private void processControlWord()

in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java [1102:1378]


    private void processControlWord() throws IOException, SAXException, TikaException {
        if (inHeader) {
            if (equals("ansi")) {
                globalCharset = WINDOWS_1252;
            } else if (equals("pca")) {
                globalCharset = CP850;
            } else if (equals("pc")) {
                globalCharset = CP437;
            } else if (equals("mac")) {
                globalCharset = MAC_ROMAN;
            }

            if (equals("colortbl") || equals("stylesheet") || equals("fonttbl")) {
                groupState.ignore = true;
            } else if (equals("listtable")) {
                currentListTable = listTable;
            } else if (equals("listoverridetable")) {
                currentListTable = listOverrideTable;
            }

            if (uprState == -1) {
                // TODO: we can also parse \creatim, \revtim,
                // \printim, \version, etc.
                if (equals("author")) {
                    nextMetaData = TikaCoreProperties.CREATOR;
                } else if (equals("title")) {
                    nextMetaData = TikaCoreProperties.TITLE;
                } else if (equals("subject")) {
                    nextMetaData = DublinCore.SUBJECT;
                } else if (equals("keywords")) {
                    nextMetaData = Office.KEYWORDS;
                } else if (equals("category")) {
                    nextMetaData = OfficeOpenXMLCore.CATEGORY;
                } else if (equals("comment")) {
                    nextMetaData = TikaCoreProperties.COMMENTS;
                } else if (equals("company")) {
                    nextMetaData = OfficeOpenXMLExtended.COMPANY;
                } else if (equals("manager")) {
                    nextMetaData = OfficeOpenXMLExtended.MANAGER;
                } else if (equals("template")) {
                    nextMetaData = OfficeOpenXMLExtended.TEMPLATE;
                } else if (equals("creatim")) {
                    nextMetaData = TikaCoreProperties.CREATED;
                }
            }

            if (fontTableState == 0) {
                // Didn't see font table yet
                if (equals("fonttbl")) {
                    fontTableState = 1;
                    fontTableDepth = groupState.depth;
                }
            } else if (fontTableState == 1) {
                // Inside font table
                if (groupState.depth < fontTableDepth) {
                    fontTableState = 2;
                }
            }

            // List table handling
            if (currentListTable != null) {
                if (equals("list") || equals("listoverride")) {
                    currentList = new ListDescriptor();
                    listTableLevel = -1;
                } else if (currentList != null) {
                    if (equals("liststylename")) {
                        currentList.isStyle = true;
                    } else if (equals("listlevel")) {
                        listTableLevel++;
                    }
                }
            }

            if (!groupState.ignore &&
                    (equals("par") || equals("pard") || equals("sect") || equals("sectd") ||
                            equals("plain") || equals("ltrch") || equals("rtlch") ||
                            equals("htmlrtf") || equals("line"))) {
                inHeader = false;
            }
        } else {
            //only modify styles if we're not in a hyperlink
            if (fieldState == 0) {
                if (equals("b")) {
                    if (!groupState.bold) {
                        pushText();
                        lazyStartParagraph();
                        if (groupState.italic) {
                            // Make sure nesting is always <b><i>
                            end("i");
                        }
                        groupState.bold = true;
                        startStyles(groupState);
                    }
                } else if (equals("i")) {
                    //START I
                    if (!groupState.italic) {
                        pushText();
                        lazyStartParagraph();
                        groupState.italic = true;
                        start("i");
                    }
                }
            }
        }

        final boolean ignored = groupState.ignore;
        if (equals("pard")) {
            // Reset styles
            pushText();
            endStyles(groupState);
            if (inList()) { // && (groupStates.size() == 1 || groupStates.peekLast().list < 0))
                pendingListEnd();
            }
        } else if (equals("plain")) {
            if (groupState.italic || groupState.bold) {
                // Reset styles
                pushText();
                endStyles(groupState);
            }
        } else if (equals("par")) {
            if (!ignored) {
                endParagraph(true);
                if (inList()) { // && (groupStates.size() == 1 || groupStates.peekLast().list < 0))
                    pendingListEnd();
                }
            }
        } else if (equals("shptxt")) {
            pushText();
            // Text inside a shape
            groupState.ignore = false;
        } else if (equals("chatn")) {
            addOutputChar(SPACE);
            pushText();
            // Annotation ID
            groupState.ignore = false;
        } else if (equals("atnid")) {
            addOutputChar(SPACE);
            pushText();
            // Annotation ID
            groupState.ignore = false;
        } else if (equals("atnauthor")) {
            addOutputChar(SPACE);
            pushText();
            // Annotation author
            groupState.ignore = false;
        } else if (equals("annotation")) {
            groupState.annotation = true;
            pushText();
            // Annotation
            groupState.ignore = false;
        } else if (equals("listtext")) {
            groupState.ignore = true;
        } else if (equals("cell")) {
            // TODO: we should produce a table output here?
            //addOutputChar(' ');
            endParagraph(true);
        } else if (equals("sp")) {
            groupState.sp = true;
        } else if (equals("sn")) {
            embObjHandler.startSN();
            groupState.sn = true;
        } else if (equals("sv")) {
            embObjHandler.startSV();
            groupState.sv = true;
        } else if (equals("object")) {
            pushText();
            embObjHandler.setInObject(true);
            groupState.object = true;
        } else if (equals("objdata")) {
            groupState.objdata = true;
            embObjHandler.startObjData();
        } else if (equals("pict")) {
            pushText();
            // TODO: create img tag?  but can that support
            // embedded image data?
            groupState.pictDepth = 1;
            embObjHandler.startPict();
        } else if (equals("line")) {
            if (!ignored) {
                addOutputChar('\n');
            }
        } else if (equals("column")) {
            if (!ignored) {
                addOutputChar(' ');
            }
        } else if (equals("page")) {
            if (!ignored) {
                addOutputChar('\n');
            }
        } else if (equals("softline")) {
            if (!ignored) {
                addOutputChar('\n');
            }
        } else if (equals("softcolumn")) {
            if (!ignored) {
                addOutputChar(' ');
            }
        } else if (equals("softpage")) {
            if (!ignored) {
                addOutputChar('\n');
            }
        } else if (equals("tab")) {
            if (!ignored) {
                addOutputChar('\t');
            }
        } else if (equals("upr")) {
            uprState = 0;
        } else if (equals("ud") && uprState == 1) {
            uprState = -1;
            // 2nd group inside the upr destination, which
            // contains the unicode encoding of the text, so
            // we want to keep that:
            groupState.ignore = false;
        } else if (equals("bullet")) {
            if (!ignored) {
                // unicode BULLET
                addOutputChar('\u2022');
            }
        } else if (equals("endash")) {
            if (!ignored) {
                // unicode EN DASH
                addOutputChar('\u2013');
            }
        } else if (equals("emdash")) {
            if (!ignored) {
                // unicode EM DASH
                addOutputChar('\u2014');
            }
        } else if (equals("enspace")) {
            if (!ignored) {
                // unicode EN SPACE
                addOutputChar('\u2002');
            }
        } else if (equals("qmspace")) {
            if (!ignored) {
                // quarter em space -> unicode FOUR-PER-EM SPACE
                addOutputChar('\u2005');
            }
        } else if (equals("emspace")) {
            if (!ignored) {
                // unicode EM SPACE
                addOutputChar('\u2003');
            }
        } else if (equals("lquote")) {
            if (!ignored) {
                // unicode LEFT SINGLE QUOTATION MARK
                addOutputChar('\u2018');
            }
        } else if (equals("rquote")) {
            if (!ignored) {
                // unicode RIGHT SINGLE QUOTATION MARK
                addOutputChar('\u2019');
            }
        } else if (equals("ldblquote")) {
            if (!ignored) {
                // unicode LEFT DOUBLE QUOTATION MARK
                addOutputChar('\u201C');
            }
        } else if (equals("rdblquote")) {
            if (!ignored) {
                // unicode RIGHT DOUBLE QUOTATION MARK
                addOutputChar('\u201D');
            }
        } else if (equals("fldinst")) {
            fieldState = 1;
            groupState.ignore = false;
        } else if (equals("fldrslt") && fieldState == 2) {
            assert pendingURL != null;
            lazyStartParagraph();
            AttributesImpl attrs = new AttributesImpl();
            attrs.addAttribute(XHTML, "href", "href", "CDATA", pendingURL);
            out.startElement("", "a", "a", attrs);
            pendingURL = null;
            fieldState = 3;
            groupState.ignore = false;
        }
    }