in cli/src/generate/prepare_grammar/expand_tokens.rs [452:685]
fn test_rule_expansion() {
struct Row {
rules: Vec<Rule>,
separators: Vec<Rule>,
examples: Vec<(&'static str, Option<(usize, &'static str)>)>,
}
let table = [
// regex with sequences and alternatives
Row {
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
separators: vec![],
examples: vec![
("ade1", Some((0, "ade"))),
("bdf1", Some((0, "bdf"))),
("bdfh1", Some((0, "bdfh"))),
("ad1", None),
],
},
// regex with repeats
Row {
rules: vec![Rule::pattern("a*")],
separators: vec![],
examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
},
// regex with repeats in sequences
Row {
rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
separators: vec![],
examples: vec![
("af1", Some((0, "af"))),
("adedef1", Some((0, "adedef"))),
("abcbcbcf1", Some((0, "abcbcbcf"))),
("a", None),
],
},
// regex with character ranges
Row {
rules: vec![Rule::pattern("[a-fA-F0-9]+")],
separators: vec![],
examples: vec![("A1ff0.", Some((0, "A1ff0")))],
},
// regex with perl character classes
Row {
rules: vec![Rule::pattern("\\w\\d\\s")],
separators: vec![],
examples: vec![("_0 ", Some((0, "_0 ")))],
},
// string
Row {
rules: vec![Rule::string("abc")],
separators: vec![],
examples: vec![("abcd", Some((0, "abc"))), ("ab", None)],
},
// complex rule containing strings and regexes
Row {
rules: vec![Rule::repeat(Rule::seq(vec![
Rule::string("{"),
Rule::pattern("[a-f]+"),
Rule::string("}"),
]))],
separators: vec![],
examples: vec![
("{a}{", Some((0, "{a}"))),
("{a}{d", Some((0, "{a}"))),
("ab", None),
],
},
// longest match rule
Row {
rules: vec![
Rule::pattern("a|bc"),
Rule::pattern("aa"),
Rule::pattern("bcd"),
],
separators: vec![],
examples: vec![
("a.", Some((0, "a"))),
("bc.", Some((0, "bc"))),
("aa.", Some((1, "aa"))),
("bcd?", Some((2, "bcd"))),
("b.", None),
("c.", None),
],
},
// regex with an alternative including the empty string
Row {
rules: vec![Rule::pattern("a(b|)+c")],
separators: vec![],
examples: vec![
("ac.", Some((0, "ac"))),
("abc.", Some((0, "abc"))),
("abbc.", Some((0, "abbc"))),
],
},
// separators
Row {
rules: vec![Rule::pattern("[a-f]+")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
examples: vec![
(" a", Some((0, "a"))),
(" \nb", Some((0, "b"))),
(" \\a", None),
(" \\\na", Some((0, "a"))),
],
},
// shorter tokens with higher precedence
Row {
rules: vec![
Rule::prec(2, Rule::pattern("abc")),
Rule::prec(1, Rule::pattern("ab[cd]e")),
Rule::pattern("[a-e]+"),
],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
examples: vec![
("abceef", Some((0, "abc"))),
("abdeef", Some((1, "abde"))),
("aeeeef", Some((2, "aeeee"))),
],
},
// immediate tokens with higher precedence
Row {
rules: vec![
Rule::prec(1, Rule::pattern("[^a]+")),
Rule::immediate_token(Rule::prec(2, Rule::pattern("[^ab]+"))),
],
separators: vec![Rule::pattern("\\s")],
examples: vec![("cccb", Some((1, "ccc")))],
},
Row {
rules: vec![Rule::seq(vec![
Rule::string("a"),
Rule::choice(vec![Rule::string("b"), Rule::string("c")]),
Rule::string("d"),
])],
separators: vec![],
examples: vec![
("abd", Some((0, "abd"))),
("acd", Some((0, "acd"))),
("abc", None),
("ad", None),
("d", None),
("a", None),
],
},
// nested choices within sequences
Row {
rules: vec![Rule::seq(vec![
Rule::pattern("[0-9]+"),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![Rule::seq(vec![
Rule::choice(vec![Rule::string("e"), Rule::string("E")]),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
]),
Rule::pattern("[0-9]+"),
])]),
]),
])],
separators: vec![],
examples: vec![
("12", Some((0, "12"))),
("12e", Some((0, "12"))),
("12g", Some((0, "12"))),
("12e3", Some((0, "12e3"))),
("12e+", Some((0, "12"))),
("12E+34 +", Some((0, "12E+34"))),
("12e34", Some((0, "12e34"))),
],
},
// nested groups
Row {
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
separators: vec![],
examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
},
// allowing unrecognized escape sequences
Row {
rules: vec![
// Escaped forward slash (used in JS because '/' is the regex delimiter)
Rule::pattern(r#"\/"#),
// Escaped quotes
Rule::pattern(r#"\"\'"#),
// Quote preceded by a literal backslash
Rule::pattern(r#"[\\']+"#),
],
separators: vec![],
examples: vec![
("/", Some((0, "/"))),
("\"\'", Some((1, "\"\'"))),
(r#"'\'a"#, Some((2, r#"'\'"#))),
],
},
// allowing un-escaped curly braces
Row {
rules: vec![
// Un-escaped curly braces
Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
// Already-escaped curly braces
Rule::pattern(r#"\{[ab]{3}\}"#),
// Unicode codepoints
Rule::pattern(r#"\u{1000A}"#),
],
separators: vec![],
examples: vec![
("u{1234} ok", Some((0, "u{1234}"))),
("{aba}}", Some((1, "{aba}"))),
("\u{1000A}", Some((2, "\u{1000A}"))),
],
},
];
for Row {
rules,
separators,
examples,
} in &table
{
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: separators.clone(),
variables: rules
.into_iter()
.map(|rule| Variable::named("", rule.clone()))
.collect(),
})
.unwrap();
for (haystack, needle) in examples.iter() {
assert_eq!(simulate_nfa(&grammar, haystack), *needle);
}
}
}