private void internalProcessRecord()

in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java [388:534]


        private void internalProcessRecord(Record record)
                throws SAXException, TikaException, IOException {
            switch (record.getSid()) {
                case BOFRecord.sid: // start of workbook, worksheet etc. records
                    BOFRecord bof = (BOFRecord) record;
                    if (bof.getType() == BOFRecord.TYPE_WORKBOOK) {
                        currentSheetIndex = -1;
                    } else if (bof.getType() == BOFRecord.TYPE_CHART) {
                        if (previousSid == EOFRecord.sid) {
                            // This is a sheet which contains only a chart
                            newSheet();
                        } else {
                            // This is a chart within a normal sheet
                            // Handling of this is a bit hacky...
                            if (currentSheet != null) {
                                processSheet();
                                currentSheetIndex--;
                                newSheet();
                            }
                        }
                    } else if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
                        newSheet();
                    }
                    break;

                case EOFRecord.sid: // end of workbook, worksheet etc. records
                    if (currentSheet != null) {
                        processSheet();
                    }
                    currentSheet = null;
                    break;

                case BoundSheetRecord.sid: // Worksheet index record
                    BoundSheetRecord boundSheetRecord = (BoundSheetRecord) record;
                    sheetNames.add(boundSheetRecord.getSheetname());
                    break;

                case SSTRecord.sid: // holds all the strings for LabelSSTRecords
                    sstRecord = (SSTRecord) record;
                    break;

                case FormulaRecord.sid: // Cell value from a formula
                    FormulaRecord formula = (FormulaRecord) record;
                    if (formula.hasCachedResultString()) {
                        // The String itself should be the next record
                        stringFormulaRecord = formula;
                    } else {
                        addTextCell(record, formatListener.formatNumberDateCell(formula));
                    }
                    break;

                case StringRecord.sid:
                    if (previousSid == FormulaRecord.sid) {
                        // Cached string value of a string formula
                        StringRecord sr = (StringRecord) record;
                        addTextCell(stringFormulaRecord, sr.getString());
                    } else {
                        // Some other string not associated with a cell, skip
                    }
                    break;

                case LabelRecord.sid: // strings stored directly in the cell
                    LabelRecord label = (LabelRecord) record;
                    addTextCell(record, label.getValue());
                    break;

                case LabelSSTRecord.sid: // Ref. a string in the shared string table
                    LabelSSTRecord sst = (LabelSSTRecord) record;
                    UnicodeString unicode = sstRecord.getString(sst.getSSTIndex());
                    String cellString = null;
                    if (officeParserConfig.isConcatenatePhoneticRuns()) {
                        String phonetic = (unicode != null && unicode.getExtendedRst() != null &&
                                unicode.getExtendedRst().getPhoneticText() != null &&
                                !unicode.getExtendedRst().getPhoneticText().isBlank()) ?
                                unicode.getExtendedRst().getPhoneticText() : "";
                        cellString = unicode.getString() + " " + phonetic;
                    } else {
                        cellString = unicode.getString();
                    }
                    addTextCell(record, cellString);
                    break;

                case NumberRecord.sid: // Contains a numeric cell value
                    NumberRecord number = (NumberRecord) record;
                    addTextCell(record, formatListener.formatNumberDateCell(number));
                    break;

                case RKRecord.sid: // Excel internal number record
                    RKRecord rk = (RKRecord) record;
                    addCell(record, new NumberCell(rk.getRKNumber(), format));
                    break;

                case HyperlinkRecord.sid: // holds a URL associated with a cell
                    if (currentSheet != null) {
                        HyperlinkRecord link = (HyperlinkRecord) record;
                        Point point = new Point(link.getFirstColumn(), link.getFirstRow());
                        Cell cell = currentSheet.get(point);
                        if (cell != null) {
                            String address = link.getAddress();
                            if (address != null) {
                                addCell(record, new LinkedCell(cell, address));
                            } else {
                                addCell(record, cell);
                            }
                        }
                    }
                    break;

                case TextObjectRecord.sid:
                    if (extractor.officeParserConfig.isIncludeShapeBasedContent()) {
                        TextObjectRecord tor = (TextObjectRecord) record;
                        addTextCell(record, tor.getStr().getString());
                    }
                    break;

                case SeriesTextRecord.sid: // Chart label or title
                    SeriesTextRecord str = (SeriesTextRecord) record;
                    addTextCell(record, str.getText());
                    break;

                case DrawingGroupRecord.sid:
                    // Collect this now, we'll process later when all
                    //  the continue records are in
                    drawingGroups.add((DrawingGroupRecord) record);
                    break;

                case HeaderRecord.sid:
                    if (extractor.officeParserConfig.isIncludeHeadersAndFooters()) {
                        HeaderRecord headerRecord = (HeaderRecord) record;
                        addTextCell(record, headerRecord.getText());
                    }
                    break;

                case FooterRecord.sid:
                    if (extractor.officeParserConfig.isIncludeHeadersAndFooters()) {
                        FooterRecord footerRecord = (FooterRecord) record;
                        addTextCell(record, footerRecord.getText());
                    }
                    break;
            }

            previousSid = record.getSid();

            if (stringFormulaRecord != record) {
                stringFormulaRecord = null;
            }
        }