public void startElement()

in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html/HtmlHandler.java [114:181]


    public void startElement(String uri, String local, String name, Attributes atts)
            throws SAXException {

        if ("HTML".equals(name) && atts.getValue("lang") != null) {
            metadata.set(Metadata.CONTENT_LANGUAGE, atts.getValue("lang"));
        }
        if ("SCRIPT".equals(name)) {
            scriptLevel++;
        }
        if ("TITLE".equals(name) || titleLevel > 0) {
            titleLevel++;
        }
        if ("BODY".equals(name) || ("FRAMESET".equals(name)) || bodyLevel > 0) {
            bodyLevel++;
        }
        if (mapper.isDiscardElement(name) || discardLevel > 0) {
            discardLevel++;
        }

        if (bodyLevel == 0 && discardLevel == 0) {
            if ("META".equals(name) && atts.getValue("content") != null) {
                // TIKA-478: For cases where we have either a name or
                // "http-equiv", assume that XHTMLContentHandler will emit
                // these in the <head>, thus passing them through safely.
                if (atts.getValue("http-equiv") != null) {
                    addHtmlMetadata(atts.getValue("http-equiv"), atts.getValue("content"));
                } else if (atts.getValue("name") != null) {
                    // Record the meta tag in the metadata
                    addHtmlMetadata(atts.getValue("name"), atts.getValue("content"));
                } else if (atts.getValue("property") != null) {
                    // TIKA-983: Handle <meta property="og:xxx" content="yyy" /> tags
                    metadata.add(HTML.PREFIX_HTML_META + atts.getValue("property"), atts.getValue("content"));
                }
            } else if ("BASE".equals(name) && atts.getValue("href") != null) {
                startElementWithSafeAttributes("base", atts);
                xhtml.endElement("base");
                metadata.set(Metadata.CONTENT_LOCATION, resolve(atts.getValue("href")));
            } else if ("LINK".equals(name)) {
                startElementWithSafeAttributes("link", atts);
                xhtml.endElement("link");
            } else if ("SCRIPT".equals(name)) {
                scriptAtts = atts;
            }
        }

        if (bodyLevel > 0 && discardLevel == 0) {
            String safe = mapper.mapSafeElement(name);
            if (safe != null) {
                startElementWithSafeAttributes(safe, atts);
            }
        }

        title.setLength(0);
        String value = atts.getValue("src");
        if (value != null && value.startsWith("data:")) {
            //don't extract data if we're in a script
            //and the user doesn't want to extract scripts
            if (scriptLevel == 0 || extractScripts) {
                handleDataURIScheme(value);
            }
        }
        if ("IFRAME".equals(name)) {
            String srcDoc = atts.getValue("srcdoc");
            if (!StringUtils.isBlank(srcDoc)) {
                handleSrcDoc(srcDoc);
            }
        }
    }