export function getUnescapedString()

in packages/pyright-internal/src/parser/stringTokenUtils.ts [84:508]


export function getUnescapedString(stringToken: StringToken): UnescapedString {
    const escapedString = stringToken.escapedValue;
    const isRaw = (stringToken.flags & StringTokenFlags.Raw) !== 0;
    const isFormat = (stringToken.flags & StringTokenFlags.Format) !== 0;

    if (isRaw && !isFormat) {
        return {
            value: escapedString,
            unescapeErrors: [],
            nonAsciiInBytes: false,
            formatStringSegments: [],
        };
    }

    const charCodes: number[] = [];
    for (let index = 0; index < escapedString.length; index++) {
        charCodes.push(escapedString.charCodeAt(index));
    }

    const isBytes = (stringToken.flags & StringTokenFlags.Bytes) !== 0;

    // Handle the common case in an expedited manner.
    if (!isFormat) {
        if (
            !charCodes.some(
                (curChar) => curChar === Char.CarriageReturn || curChar === Char.LineFeed || curChar === Char.Backslash
            )
        ) {
            return {
                value: escapedString,
                unescapeErrors: [],
                nonAsciiInBytes: isBytes && charCodes.some((curChar) => curChar >= 128),
                formatStringSegments: [],
            };
        }
    }

    let formatExpressionNestCount = 0;
    let formatSegment: IncompleteFormatStringSegment = {
        offset: 0,
        length: 0,
        valueParts: [],
        isExpression: false,
        hasFormatSpecifier: false,
    };
    let strOffset = 0;
    const output: IncompleteUnescapedString = {
        valueParts: [],
        unescapeErrors: [],
        nonAsciiInBytes: false,
        formatStringSegments: [],
    };

    const addInvalidEscapeOffset = () => {
        // Invalid escapes are not reported for raw strings.
        if (!isRaw) {
            output.unescapeErrors.push({
                offset: strOffset - 1,
                length: 2,
                errorType: UnescapeErrorType.InvalidEscapeSequence,
            });
        }
    };

    const getEscapedCharacter = (offset = 0) => {
        if (strOffset + offset >= charCodes.length) {
            return Char.EndOfText;
        }

        return charCodes[strOffset + offset];
    };

    const scanHexEscape = (digitCount: number) => {
        let foundIllegalHexDigit = false;
        let hexValue = 0;
        let localValue = '';

        for (let i = 0; i < digitCount; i++) {
            const charCode = getEscapedCharacter(1 + i);
            if (!_isHexCharCode(charCode)) {
                foundIllegalHexDigit = true;
                break;
            }
            hexValue = 16 * hexValue + _getHexDigitValue(charCode);
        }

        if (foundIllegalHexDigit) {
            addInvalidEscapeOffset();
            localValue = '\\' + String.fromCharCode(getEscapedCharacter());
            strOffset++;
        } else {
            localValue = String.fromCharCode(hexValue);
            strOffset += 1 + digitCount;
        }

        return localValue;
    };

    const appendOutputChar = (charCode: number) => {
        const char = String.fromCharCode(charCode);
        output.valueParts.push(char);
        formatSegment.valueParts.push(char);
    };

    while (true) {
        let curChar = getEscapedCharacter();
        if (curChar === Char.EndOfText) {
            if (isFormat) {
                if (formatSegment.isExpression) {
                    // The last format segment was an unterminated expression.
                    output.unescapeErrors.push({
                        offset: formatSegment.offset,
                        length: strOffset - formatSegment.offset,
                        errorType: UnescapeErrorType.UnterminatedFormatExpression,
                    });
                }

                // Push the last segment.
                if (strOffset !== formatSegment.offset) {
                    formatSegment.length = strOffset - formatSegment.offset;
                    output.formatStringSegments.push(formatSegment);
                }
            }
            return completeUnescapedString(output);
        }

        if (curChar === Char.Backslash) {
            if (isFormat && formatSegment.isExpression && !formatSegment.hasFormatSpecifier) {
                // Backslashes aren't allowed within format string expressions.
                output.unescapeErrors.push({
                    offset: strOffset,
                    length: 1,
                    errorType: UnescapeErrorType.EscapeWithinFormatExpression,
                });
            }

            // Move past the escape (backslash) character.
            strOffset++;

            if (isRaw) {
                appendOutputChar(curChar);
                continue;
            }

            curChar = getEscapedCharacter();
            let localValue = '';

            if (curChar === Char.CarriageReturn || curChar === Char.LineFeed) {
                if (curChar === Char.CarriageReturn && getEscapedCharacter(1) === Char.LineFeed) {
                    if (isRaw) {
                        localValue += String.fromCharCode(curChar);
                    }
                    strOffset++;
                    curChar = getEscapedCharacter();
                }
                if (isRaw) {
                    localValue = '\\' + localValue + String.fromCharCode(curChar);
                }
                strOffset++;
            } else {
                if (isRaw) {
                    localValue = '\\' + String.fromCharCode(curChar);
                    strOffset++;
                } else {
                    switch (curChar) {
                        case Char.Backslash:
                        case Char.SingleQuote:
                        case Char.DoubleQuote:
                            localValue = String.fromCharCode(curChar);
                            strOffset++;
                            break;

                        case Char.a:
                            localValue = '\u0007';
                            strOffset++;
                            break;

                        case Char.b:
                            localValue = '\b';
                            strOffset++;
                            break;

                        case Char.f:
                            localValue = '\f';
                            strOffset++;
                            break;

                        case Char.n:
                            localValue = '\n';
                            strOffset++;
                            break;

                        case Char.r:
                            localValue = '\r';
                            strOffset++;
                            break;

                        case Char.t:
                            localValue = '\t';
                            strOffset++;
                            break;

                        case Char.v:
                            localValue = '\v';
                            strOffset++;
                            break;

                        case Char.x:
                            localValue = scanHexEscape(2);
                            break;

                        case Char.N: {
                            let foundIllegalChar = false;
                            let charCount = 1;
                            if (getEscapedCharacter(charCount) !== Char.OpenBrace) {
                                foundIllegalChar = true;
                            } else {
                                charCount++;
                                while (true) {
                                    const lookaheadChar = getEscapedCharacter(charCount);
                                    if (lookaheadChar === Char.CloseBrace) {
                                        break;
                                    } else if (
                                        !_isAlphaNumericChar(lookaheadChar) &&
                                        lookaheadChar !== Char.Hyphen &&
                                        !_isWhitespaceChar(lookaheadChar)
                                    ) {
                                        foundIllegalChar = true;
                                        break;
                                    } else {
                                        charCount++;
                                    }
                                }
                            }

                            if (foundIllegalChar) {
                                addInvalidEscapeOffset();
                                localValue = '\\' + String.fromCharCode(curChar);
                                strOffset++;
                            } else {
                                // We don't have the Unicode name database handy, so
                                // assume that the name is valid and use a '-' as a
                                // replacement character.
                                localValue = '-';
                                strOffset += 1 + charCount;
                            }
                            break;
                        }

                        case Char.u:
                            localValue = scanHexEscape(4);
                            break;

                        case Char.U:
                            localValue = scanHexEscape(8);
                            break;

                        default:
                            if (_isOctalCharCode(curChar)) {
                                let octalCode = curChar - Char._0;
                                strOffset++;
                                curChar = getEscapedCharacter();
                                if (_isOctalCharCode(curChar)) {
                                    octalCode = octalCode * 8 + curChar - Char._0;
                                    strOffset++;
                                    curChar = getEscapedCharacter();

                                    if (_isOctalCharCode(curChar)) {
                                        octalCode = octalCode * 8 + curChar - Char._0;
                                        strOffset++;
                                    }
                                }

                                localValue = String.fromCharCode(octalCode);
                            } else {
                                localValue = '\\';
                                addInvalidEscapeOffset();
                            }
                            break;
                    }
                }
            }

            output.valueParts.push(localValue);
            formatSegment.valueParts.push(localValue);
        } else if (curChar === Char.LineFeed || curChar === Char.CarriageReturn) {
            // Skip over the escaped new line (either one or two characters).
            if (curChar === Char.CarriageReturn && getEscapedCharacter(1) === Char.LineFeed) {
                appendOutputChar(curChar);
                strOffset++;
                curChar = getEscapedCharacter();
            }

            appendOutputChar(curChar);
            strOffset++;
        } else if (isFormat && curChar === Char.OpenBrace) {
            if (!formatSegment.isExpression && getEscapedCharacter(1) === Char.OpenBrace) {
                appendOutputChar(curChar);
                strOffset += 2;
            } else {
                if (formatExpressionNestCount === 0) {
                    // A single open brace within a format literal indicates that
                    // an expression is starting.
                    formatSegment.length = strOffset - formatSegment.offset;
                    if (formatSegment.length > 0) {
                        output.formatStringSegments.push(formatSegment);
                    }
                    strOffset++;

                    // Start a new segment.
                    formatSegment = {
                        offset: strOffset,
                        length: 0,
                        valueParts: [],
                        isExpression: true,
                        hasFormatSpecifier: false,
                    };
                } else {
                    appendOutputChar(curChar);
                    strOffset++;
                }
                formatExpressionNestCount++;
            }
        } else if (isFormat && curChar === Char.CloseBrace) {
            if (!formatSegment.isExpression && getEscapedCharacter(1) === Char.CloseBrace) {
                appendOutputChar(curChar);
                strOffset += 2;
            } else if (formatExpressionNestCount === 0) {
                output.unescapeErrors.push({
                    offset: strOffset,
                    length: 1,
                    errorType: UnescapeErrorType.SingleCloseBraceWithinFormatLiteral,
                });
                strOffset++;
            } else {
                formatExpressionNestCount--;

                if (formatExpressionNestCount === 0) {
                    // A close brace within a format expression indicates that
                    // the expression is complete.
                    formatSegment.length = strOffset - formatSegment.offset;
                    output.formatStringSegments.push(formatSegment);
                    strOffset++;

                    // Start a new segment.
                    formatSegment = {
                        offset: strOffset,
                        length: 0,
                        valueParts: [],
                        isExpression: false,
                        hasFormatSpecifier: false,
                    };
                } else {
                    appendOutputChar(curChar);
                    strOffset++;
                }
            }
        } else if (formatSegment.isExpression && (curChar === Char.SingleQuote || curChar === Char.DoubleQuote)) {
            // We're within an expression, and we've encountered a string literal.
            // Skip over it.
            const quoteChar = curChar;
            appendOutputChar(curChar);
            const isTriplicate = getEscapedCharacter(1) === quoteChar && getEscapedCharacter(2) === quoteChar;
            if (isTriplicate) {
                strOffset += 2;
                appendOutputChar(curChar);
                appendOutputChar(curChar);
                output.valueParts.push(String.fromCharCode(curChar));
                output.valueParts.push(String.fromCharCode(curChar));
            }

            while (true) {
                strOffset++;
                let strChar = getEscapedCharacter();
                if (strChar === Char.EndOfText) {
                    break;
                }

                if (strChar === Char.Backslash) {
                    appendOutputChar(strChar);
                    strOffset++;
                    strChar = getEscapedCharacter();
                    appendOutputChar(strChar);
                    continue;
                }

                if (strChar === Char.LineFeed || strChar === Char.CarriageReturn) {
                    if (!isTriplicate) {
                        break;
                    }
                }

                if (strChar === quoteChar) {
                    if (!isTriplicate) {
                        strOffset++;
                        appendOutputChar(strChar);
                        break;
                    }

                    if (getEscapedCharacter(1) === quoteChar && getEscapedCharacter(2) === quoteChar) {
                        strOffset += 3;
                        appendOutputChar(strChar);
                        appendOutputChar(strChar);
                        appendOutputChar(strChar);
                        break;
                    }
                }

                appendOutputChar(strChar);
            }
        } else {
            if (formatSegment.isExpression && curChar === Char.Colon) {
                formatSegment.hasFormatSpecifier = true;
            }

            // There's nothing to unescape, so output the escaped character directly.
            if (isBytes && curChar >= 128) {
                output.nonAsciiInBytes = true;
            }

            appendOutputChar(curChar);
            strOffset++;
        }
    }
}