in inference/src/translator/html.cpp [247:317]
void consumeIgnoredTag(markup::Scanner &scanner, HTML::Tag &tag, std::string const &name) {
// Only full elements can be consumed this way. With void tags we don't know
// where to stop scanning. All other types cannot be nested anyway.
assert(tag.type == HTML::Tag::ELEMENT);
// TT_TAG_START is already consumed.
markup::Scanner::TokenType token;
size_t inside = 0;
// Consume the full open tag, i.e. all its attributes
while (!inside) {
token = scanner.next();
switch (token) {
case markup::Scanner::TT_ERROR:
ABORT("HTML parse error");
case markup::Scanner::TT_EOF:
ABORT("Did not find closing tag </{}>", name);
case markup::Scanner::TT_ATTRIBUTE:
tag.attributes += format(" {}=\"{}\"", scanner.attribute(), scanner.value());
break;
default:
// Not an attribute! Must be something inside the body or the closing
// tag already. Time to jump to the next loop.
++inside;
break;
}
}
// Last token was something that would have triggered Scanner::scanBody(),
// which sets value() to start pointing at the body.
const char *start = scanner.start();
// Consume the rest of the HTML until (including) the final closing tag. We
// start with the token that caused the previous loop to fall into the default
// case.
while (inside) {
switch (token) {
case markup::Scanner::TT_ERROR:
ABORT("HTML parse error");
case markup::Scanner::TT_EOF:
ABORT("Did not find closing tag </{}>");
case markup::Scanner::TT_TAG_START:
// Note: Looking specifically for only our own type of tag so we don't
// have to care about whether other tags we encounter are void tags or
// not. Does assume the HTML is valid, as no stack is kept.
if (toLowerCase(scanner.tag()) == name) ++inside;
break;
case markup::Scanner::TT_TAG_END:
if (toLowerCase(scanner.tag()) == name) --inside;
break;
default:
break;
}
// Only continue scanning if we're still inside. We could have just read the
// TT_TAG_END token that ended this element, and we don't want to continue
// consuming tokens at that point.
if (inside) token = scanner.next();
}
// Only a TAG_END could have stopped the previous loop. We take the start
// of the final closing tag as the end of our data.
assert(token == markup::Scanner::TT_TAG_END);
const char *end = scanner.start();
// All data between the end of the first open element, and the start of the
// last close element, we just treat as raw data that will be printed when
// this tag is eventually printed.
assert(end >= start);
tag.data = std::string_view(start, end - start);
}