void JSLexer::scanString()

in lib/Parser/JSLexer.cpp [1734:1882]


void JSLexer::scanString() {
  assert(*curCharPtr_ == '\'' || *curCharPtr_ == '"');
  char quoteCh = *curCharPtr_++;

  // Track whether we encounter any escapes or new line continuations. We need
  // that information in order to detect directives.
  bool escapes = false;

  tmpStorage_.clear();

  for (;;) {
    if (*curCharPtr_ == quoteCh) {
      ++curCharPtr_;
      break;
    } else if (!JSX && *curCharPtr_ == '\\') {
      escapes = true;
      ++curCharPtr_;
      switch ((unsigned char)*curCharPtr_) {
        case '\'':
        case '"':
        case '\\':
          tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          break;

        case 'b':
          ++curCharPtr_;
          tmpStorage_.push_back(8);
          break;
        case 'f':
          ++curCharPtr_;
          tmpStorage_.push_back(12);
          break;
        case 'n':
          ++curCharPtr_;
          tmpStorage_.push_back(10);
          break;
        case 'r':
          ++curCharPtr_;
          tmpStorage_.push_back(13);
          break;
        case 't':
          ++curCharPtr_;
          tmpStorage_.push_back(9);
          break;
        case 'v':
          ++curCharPtr_;
          tmpStorage_.push_back(11);
          break;

        case '\0': // EOF?
          if (curCharPtr_ == bufferEnd_) { // eof?
            error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
            sm_.note(token_.getStartLoc(), "string started here");
            goto breakLoop;
          } else {
            tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          }
          break;

        case '0':
          // '\0' is not an octal so handle it separately.
          if (!(curCharPtr_[1] >= '0' && curCharPtr_[1] <= '7')) {
            ++curCharPtr_;
            appendUnicodeToStorage(0);
            break;
          }
          LLVM_FALLTHROUGH;
        case '1':
        case '2':
        case '3':
          appendUnicodeToStorage(consumeOctal(3));
          break;
        case '4':
        case '5':
        case '6':
        case '7':
          appendUnicodeToStorage(consumeOctal(2));
          break;

        case 'x': {
          ++curCharPtr_;
          auto v = consumeHex(2);
          appendUnicodeToStorage(v ? *v : 0);
          break;
        }

        case 'u':
          --curCharPtr_;
          appendUnicodeToStorage(consumeUnicodeEscape());
          break;

        // Escaped line terminator. We just need to skip it.
        case '\n':
          ++curCharPtr_;
          break;
        case '\r':
          ++curCharPtr_;
          if (*curCharPtr_ == '\n') // skip CR LF
            ++curCharPtr_;
          break;
        case UTF8_LINE_TERMINATOR_CHAR0:
          if (matchUnicodeLineTerminatorOffset1(curCharPtr_)) {
            curCharPtr_ += 3;
            break;
          }
          appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
          break;

        default:
          if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_)))
            appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
          else
            tmpStorage_.push_back((unsigned char)*curCharPtr_++);
          break;
      }
    } else if (LLVM_UNLIKELY(*curCharPtr_ == '\n' || *curCharPtr_ == '\r')) {
      if (JSX) {
        tmpStorage_.push_back(*curCharPtr_++);
      } else {
        error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
        sm_.note(token_.getStartLoc(), "string started here");
        break;
      }
#if HERMES_PARSE_JSX
    } else if (LLVM_UNLIKELY(JSX && *curCharPtr_ == '&')) {
      auto codePoint = consumeHTMLEntityOptional();
      if (codePoint.hasValue()) {
        appendUnicodeToStorage(*codePoint);
      } else {
        tmpStorage_.push_back(*curCharPtr_++);
      }
#endif
    } else if (LLVM_UNLIKELY(*curCharPtr_ == 0 && curCharPtr_ == bufferEnd_)) {
      error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
      sm_.note(token_.getStartLoc(), "string started here");
      break;
    } else {
      if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_))) {
        // Decode and re-encode the character and append it to the string
        // storage
        appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
      } else {
        tmpStorage_.push_back(*curCharPtr_++);
      }
    }
  }
breakLoop:
  token_.setStringLiteral(getStringLiteral(tmpStorage_.str()), escapes);
}