in lib/Parser/JSLexer.cpp [1734:1882]
void JSLexer::scanString() {
assert(*curCharPtr_ == '\'' || *curCharPtr_ == '"');
char quoteCh = *curCharPtr_++;
// Track whether we encounter any escapes or new line continuations. We need
// that information in order to detect directives.
bool escapes = false;
tmpStorage_.clear();
for (;;) {
if (*curCharPtr_ == quoteCh) {
++curCharPtr_;
break;
} else if (!JSX && *curCharPtr_ == '\\') {
escapes = true;
++curCharPtr_;
switch ((unsigned char)*curCharPtr_) {
case '\'':
case '"':
case '\\':
tmpStorage_.push_back((unsigned char)*curCharPtr_++);
break;
case 'b':
++curCharPtr_;
tmpStorage_.push_back(8);
break;
case 'f':
++curCharPtr_;
tmpStorage_.push_back(12);
break;
case 'n':
++curCharPtr_;
tmpStorage_.push_back(10);
break;
case 'r':
++curCharPtr_;
tmpStorage_.push_back(13);
break;
case 't':
++curCharPtr_;
tmpStorage_.push_back(9);
break;
case 'v':
++curCharPtr_;
tmpStorage_.push_back(11);
break;
case '\0': // EOF?
if (curCharPtr_ == bufferEnd_) { // eof?
error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
sm_.note(token_.getStartLoc(), "string started here");
goto breakLoop;
} else {
tmpStorage_.push_back((unsigned char)*curCharPtr_++);
}
break;
case '0':
// '\0' is not an octal so handle it separately.
if (!(curCharPtr_[1] >= '0' && curCharPtr_[1] <= '7')) {
++curCharPtr_;
appendUnicodeToStorage(0);
break;
}
LLVM_FALLTHROUGH;
case '1':
case '2':
case '3':
appendUnicodeToStorage(consumeOctal(3));
break;
case '4':
case '5':
case '6':
case '7':
appendUnicodeToStorage(consumeOctal(2));
break;
case 'x': {
++curCharPtr_;
auto v = consumeHex(2);
appendUnicodeToStorage(v ? *v : 0);
break;
}
case 'u':
--curCharPtr_;
appendUnicodeToStorage(consumeUnicodeEscape());
break;
// Escaped line terminator. We just need to skip it.
case '\n':
++curCharPtr_;
break;
case '\r':
++curCharPtr_;
if (*curCharPtr_ == '\n') // skip CR LF
++curCharPtr_;
break;
case UTF8_LINE_TERMINATOR_CHAR0:
if (matchUnicodeLineTerminatorOffset1(curCharPtr_)) {
curCharPtr_ += 3;
break;
}
appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
break;
default:
if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_)))
appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
else
tmpStorage_.push_back((unsigned char)*curCharPtr_++);
break;
}
} else if (LLVM_UNLIKELY(*curCharPtr_ == '\n' || *curCharPtr_ == '\r')) {
if (JSX) {
tmpStorage_.push_back(*curCharPtr_++);
} else {
error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
sm_.note(token_.getStartLoc(), "string started here");
break;
}
#if HERMES_PARSE_JSX
} else if (LLVM_UNLIKELY(JSX && *curCharPtr_ == '&')) {
auto codePoint = consumeHTMLEntityOptional();
if (codePoint.hasValue()) {
appendUnicodeToStorage(*codePoint);
} else {
tmpStorage_.push_back(*curCharPtr_++);
}
#endif
} else if (LLVM_UNLIKELY(*curCharPtr_ == 0 && curCharPtr_ == bufferEnd_)) {
error(SMLoc::getFromPointer(curCharPtr_), "non-terminated string");
sm_.note(token_.getStartLoc(), "string started here");
break;
} else {
if (LLVM_UNLIKELY(isUTF8Start(*curCharPtr_))) {
// Decode and re-encode the character and append it to the string
// storage
appendUnicodeToStorage(_decodeUTF8SlowPath(curCharPtr_));
} else {
tmpStorage_.push_back(*curCharPtr_++);
}
}
}
breakLoop:
token_.setStringLiteral(getStringLiteral(tmpStorage_.str()), escapes);
}