Scanner::TokenType Scanner::scanAttribute()

in inference/src/translator/xh_scanner.cpp [99:196]


Scanner::TokenType Scanner::scanAttribute() {
  // Skip all whitespace between tag name or last attribute and next attribute or '>'
  skipWhitespace();

  // Find end of tag name
  switch (input_.peek()) {
    case '>':
      input_.consume();

      // Treat some elements as opaque, e.g. <script>, <style>
      if (/*equalsCaseInsensitive(tagName_, "title") ||*/ equalsCaseInsensitive(tagName_, "script") ||
          equalsCaseInsensitive(tagName_, "style") || equalsCaseInsensitive(tagName_, "textarea") ||
          equalsCaseInsensitive(tagName_, "iframe") || equalsCaseInsensitive(tagName_, "noembed") ||
          equalsCaseInsensitive(tagName_, "noscript") || equalsCaseInsensitive(tagName_, "noframes")) {
        // script is special because we want to parse the attributes,
        // but not the content
        scanFun_ = &Scanner::scanSpecial;
        return scanSpecial();
      } else {
        scanFun_ = &Scanner::scanBody;
        return scanBody();
      }
    case '/':
      input_.consume();
      if (input_.peek() == '>') {
        // self closing tag
        input_.consume();
        scanFun_ = &Scanner::scanBody;
        return TT_TAG_END;
      } else {
        return TT_ERROR;
      }
  }

  attributeName_ = string_ref{input_.pos(), 0};
  value_ = string_ref{nullptr, 0};

  // attribute name...
  while (input_.peek() != '=') {
    switch (input_.peek()) {
      case '\0':
        return TT_EOF;
      case '>':
        return TT_ATTRIBUTE;  // attribute without value (HTML style) at end of tag
      case '<':
        return TT_ERROR;
      default:
        if (skipWhitespace()) {
          if (input_.peek() == '=') {
            break;
          } else {
            return TT_ATTRIBUTE;  // attribute without value (HTML style) but not yet at end of tag
          }
        }
        input_.consume();
        ++attributeName_.size;
        break;
    }
  }

  // consume '=' and any following whitespace
  input_.consume();
  skipWhitespace();
  // attribute value...

  char quote;  // Either '"' or '\'' depending on which quote we're searching for
  switch (input_.peek()) {
    case '"':
    case '\'':
      quote = input_.consume();
      value_ = string_ref{input_.pos(), 0};
      while (true) {
        if (input_.peek() == '\0') {
          return TT_ERROR;
        } else if (input_.peek() == quote) {
          input_.consume();
          return TT_ATTRIBUTE;
        } else {
          input_.consume();
          ++value_.size;
        }
      }
      break;
    default:
      value_ = string_ref{input_.pos(), 0};

      while (true) {
        if (isWhitespace(input_.peek())) return TT_ATTRIBUTE;
        if (input_.peek() == '>') return TT_ATTRIBUTE;  // '>' will be consumed next round
        input_.consume();
        ++value_.size;
      }
      break;
  }

  // How did we end up here?!
  return TT_ERROR;
}