fn tokenize_triple_quoted_string()

in src/tokenizer.rs [3673:3790]


    fn tokenize_triple_quoted_string() {
        fn check<F>(
            q: char, // The quote character to test
            r: char, // An alternate quote character.
            quote_token: F,
        ) where
            F: Fn(String) -> Token,
        {
            let dialect = BigQueryDialect {};

            for (sql, expected, expected_unescaped) in [
                // Empty string
                (format!(r#"{q}{q}{q}{q}{q}{q}"#), "".into(), "".into()),
                // Should not count escaped quote as end of string.
                (
                    format!(r#"{q}{q}{q}ab{q}{q}\{q}{q}cd{q}{q}{q}"#),
                    format!(r#"ab{q}{q}\{q}{q}cd"#),
                    format!(r#"ab{q}{q}{q}{q}cd"#),
                ),
                // Simple string
                (
                    format!(r#"{q}{q}{q}abc{q}{q}{q}"#),
                    "abc".into(),
                    "abc".into(),
                ),
                // Mix single-double quotes unescaped.
                (
                    format!(r#"{q}{q}{q}ab{r}{r}{r}c{r}def{r}{r}{r}{q}{q}{q}"#),
                    format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
                    format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
                ),
                // Escaped quote.
                (
                    format!(r#"{q}{q}{q}ab{q}{q}c{q}{q}\{q}de{q}{q}f{q}{q}{q}"#),
                    format!(r#"ab{q}{q}c{q}{q}\{q}de{q}{q}f"#),
                    format!(r#"ab{q}{q}c{q}{q}{q}de{q}{q}f"#),
                ),
                // backslash-escaped quote characters.
                (
                    format!(r#"{q}{q}{q}a\'\'b\'c\'d{q}{q}{q}"#),
                    r#"a\'\'b\'c\'d"#.into(),
                    r#"a''b'c'd"#.into(),
                ),
                // backslash-escaped characters
                (
                    format!(r#"{q}{q}{q}abc\0\n\rdef{q}{q}{q}"#),
                    r#"abc\0\n\rdef"#.into(),
                    "abc\0\n\rdef".into(),
                ),
            ] {
                let tokens = Tokenizer::new(&dialect, sql.as_str())
                    .with_unescape(false)
                    .tokenize()
                    .unwrap();
                let expected = vec![quote_token(expected.to_string())];
                compare(expected, tokens);

                let tokens = Tokenizer::new(&dialect, sql.as_str())
                    .with_unescape(true)
                    .tokenize()
                    .unwrap();
                let expected = vec![quote_token(expected_unescaped.to_string())];
                compare(expected, tokens);
            }

            for sql in [
                format!(r#"{q}{q}{q}{q}{q}\{q}"#),
                format!(r#"{q}{q}{q}abc{q}{q}\{q}"#),
                format!(r#"{q}{q}{q}{q}"#),
                format!(r#"{q}{q}{q}{r}{r}"#),
                format!(r#"{q}{q}{q}abc{q}"#),
                format!(r#"{q}{q}{q}abc{q}{q}"#),
                format!(r#"{q}{q}{q}abc"#),
            ] {
                let dialect = BigQueryDialect {};
                let mut tokenizer = Tokenizer::new(&dialect, sql.as_str());
                assert_eq!(
                    "Unterminated string literal",
                    tokenizer.tokenize().unwrap_err().message.as_str(),
                );
            }
        }

        check('"', '\'', Token::TripleDoubleQuotedString);

        check('\'', '"', Token::TripleSingleQuotedString);

        let dialect = BigQueryDialect {};

        let sql = r#"""''"#;
        let tokens = Tokenizer::new(&dialect, sql)
            .with_unescape(true)
            .tokenize()
            .unwrap();
        let expected = vec![
            Token::DoubleQuotedString("".to_string()),
            Token::SingleQuotedString("".to_string()),
        ];
        compare(expected, tokens);

        let sql = r#"''"""#;
        let tokens = Tokenizer::new(&dialect, sql)
            .with_unescape(true)
            .tokenize()
            .unwrap();
        let expected = vec![
            Token::SingleQuotedString("".to_string()),
            Token::DoubleQuotedString("".to_string()),
        ];
        compare(expected, tokens);

        // Non-triple quoted string dialect
        let dialect = SnowflakeDialect {};
        let sql = r#"''''''"#;
        let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
        let expected = vec![Token::SingleQuotedString("''".to_string())];
        compare(expected, tokens);
    }