src/core/rule/rule_lexer.mll (129 lines of code) (raw):
(* Copyright (c) 2017 Uber Technologies, Inc. *)
(* *)
(* Permission is hereby granted, free of charge, to any person obtaining a copy *)
(* of this software and associated documentation files (the "Software"), to deal *)
(* in the Software without restriction, including without limitation the rights *)
(* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell *)
(* copies of the Software, and to permit persons to whom the Software is *)
(* furnished to do so, subject to the following conditions: *)
(* *)
(* The above copyright notice and this permission notice shall be included in *)
(* all copies or substantial portions of the Software. *)
(* *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *)
(* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, *)
(* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN *)
(* THE SOFTWARE. *)
{
open Rule_parser
}
rule read = parse
| "rule" { RULE }
| "where" { WHERE }
| "tests" { TESTS }
| "expect_ok" { EXPECT_OK }
| "expect_warn" { EXPECT_WARN }
| "expect_fail" { EXPECT_FAIL }
| "from_file" { FROM_FILE }
| "import" { IMPORT }
| "from" { FROM }
| "var" { VAR }
| "condition" { CONDITION }
| "{" { LBRACE }
| "}" { RBRACE }
| "(" { LPAREN }
| ")" { RPAREN }
| "::" { COLONCOLON }
| ":" { COLON }
| "," { COMMA }
| "." { DOT }
| "*" { STAR }
| ":=" { ASGN }
| ">>" { GTGT }
| ">" { GT }
| "true" { BOOL(true) }
| "false" { BOOL(false) }
| ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']* { IDENTIFIER(Lexing.lexeme lexbuf) }
| '"' { read_string (Buffer.create 17) lexbuf }
| "&&" { AND }
| "||" { OR }
| ['!' '=' '<' '>' '|' '&']+ { OP(Lexing.lexeme lexbuf) }
| ['0' - '9']+ { INT(int_of_string (Lexing.lexeme lexbuf)) }
| "//" [^'\n']* { read lexbuf }
| "/*" { multiline_comment lexbuf }
| eof { EOF }
| '\n' { Lexing.new_line lexbuf; read lexbuf }
| ' ' { read lexbuf }
| '/' { regexp (Buffer.create 32) lexbuf }
| _ { raise (Neal.Rule.SyntaxError("Unknown token: '" ^ Lexing.lexeme lexbuf ^ "'")) }
and regexp buf = parse
| "/i" { REGEXP(Str.regexp_case_fold (Buffer.contents buf)) }
| "\\n" {
Buffer.add_char buf '\n';
regexp buf lexbuf
}
| "\\r" {
Buffer.add_char buf '\r';
regexp buf lexbuf
}
| "\\t" {
Buffer.add_char buf '\t';
regexp buf lexbuf
}
| "\\\\" {
Buffer.add_char buf '\\';
regexp buf lexbuf
}
| "\\/" {
Buffer.add_char buf '/';
regexp buf lexbuf
}
| "\\(" {
Buffer.add_char buf '(';
regexp buf lexbuf
}
| "(" {
Buffer.add_string buf "\\(";
regexp buf lexbuf
}
| "\\)" {
Buffer.add_char buf ')';
regexp buf lexbuf
}
| ")" {
Buffer.add_string buf "\\)";
regexp buf lexbuf
}
| "\\|" {
Buffer.add_char buf '|';
regexp buf lexbuf
}
| "|" {
Buffer.add_string buf "\\|";
regexp buf lexbuf
}
| '/' {
REGEXP(Str.regexp (Buffer.contents buf))
}
| _ {
Buffer.add_char buf (Lexing.lexeme_char lexbuf 0);
regexp buf lexbuf
}
and multiline_comment = parse
| "*/" { read lexbuf }
| '\n' { Lexing.new_line lexbuf; multiline_comment lexbuf }
| _ { multiline_comment lexbuf }
and read_string buf =
parse
| '"' { STRING (Buffer.contents buf) }
| '\\' '"' { Buffer.add_char buf '"'; read_string buf lexbuf }
| [^ '"' '\\' '\n']+
{ Buffer.add_string buf (Lexing.lexeme lexbuf);
read_string buf lexbuf
}
| '\n'
| eof { raise (Neal.Rule.SyntaxError ("String is not terminated")) }
| _ { raise (Neal.Rule.SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }