nubia/internal/ui/lexer.py (145 lines of code) (raw):

#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # import re from pygments.lexer import RegexLexer, bygroups from pygments.token import Keyword, Name, Number, Operator, Punctuation, String, Text from nubia import context from nubia.internal import parser _identifier = r"[a-zA-Z_][a-zA-Z0-9_\-]*" _unquoted_string = "([a-zA-Z0-9{}]+)".format( re.escape(parser.allowed_symbols_in_string) ) _command = r"(:?[a-zA-Z_][a-zA-Z0-9_\-]*)" def command_callback(lexer, match): """ When matching a command, the lexer would look up the command registry to decide on how to highlight the command. We will emit Name.Command if this is a valid command, otherwise we emit Text. We also take care of the sub-commands if this is a super command. Otherwise, we consider the second argument a positional argument in this case. """ command = match.group(1) # We do need to know whether we are parsing two groups (command) or four # (command with subcommand) command_with_argument = len(match.groups()) > 2 ctx = context.get_context() cmd = ctx.registry.find_command(command.strip()) # We know this command command_token = Name.InvalidCommand subcommand_token = Name.InvalidCommand if cmd: command_token = Name.Command # Now, let's see if this is a super command or not. if command_with_argument: if cmd.super_command: # That's a sub-command, is this a valid sub-command? subcmd = match.group(3) if cmd.has_subcommand(subcmd): subcommand_token = Name.SubCommand else: # Just a positional subcommand_token = Name.Symbol yield (match.start(1), command_token, command) # matches the spaces yield (match.start(2), Text, match.group(2)) if command_with_argument: yield (match.start(3), subcommand_token, match.group(3)) # matches the spaces yield (match.start(4), Text, match.group(4)) class NubiaLexer(RegexLexer): name = "Nubia Interactive Lexer" filenames = ["*.nubia"] flags = re.IGNORECASE tokens = { str("root"): [ # We want to change the state of the lexer if the first word is # select so that we can have the sql lexer (r"^SELECT\s", Name.Command, str("query")), (r"\s+", Text), (r"^(\?|help)\s*$", Name.Help), (r"^(q|quit|exit)\s*$", Name.Exit), # Command with Subcommands (r"(" + _identifier + r")(\s*=\s*)", bygroups(Name.Key, Operator)), # Commands (r"^" + _command + r"(\s+)" + _command + r"(\s+)", command_callback), (r"^" + _command + r"(\s+|$)", command_callback), (r"(" + _identifier + r")(\s*)", Name.Symbol), (r"(True|False|true|false)", Keyword), (r"\-?[0-9]+", Number.Integer), (r"'(''|[^'])*'", String.Single), # not a real string literal in ANSI SQL (r'"(""|[^"])*"', String.Symbol), (r"[;:()\[\],\.]", Punctuation), ], str("query"): [ (r"\s+", Text), ( r"(ABORT|ABS|ABSOLUTE|ACCESS|ADA|ADD|ADMIN|AFTER|AGGREGATE|" r"ALIAS|ALL|ALLOCATE|ALTER|ANALYSE|ANALYZE|AND|ANY|ARE|AS|" r"ASC|ASENSITIVE|ASSERTION|ASSIGNMENT|ASYMMETRIC|AT|ATOMIC|" r"AUTHORIZATION|AVG|BACKWARD|BEFORE|BEGIN|BETWEEN|BITVAR|" r"BIT_LENGTH|BOTH|BREADTH|BY|C|CACHE|CALL|CALLED|CARDINALITY|" r"CASCADE|CASCADED|CASE|CAST|CATALOG|CATALOG_NAME|CHAIN|" r"CHARACTERISTICS|CHARACTER_LENGTH|CHARACTER_SET_CATALOG|" r"CHARACTER_SET_NAME|CHARACTER_SET_SCHEMA|CHAR_LENGTH|CHECK|" r"CHECKED|CHECKPOINT|CLASS|CLASS_ORIGIN|CLOB|CLOSE|CLUSTER|" r"COALSECE|COBOL|COLLATE|COLLATION|COLLATION_CATALOG|" r"COLLATION_NAME|COLLATION_SCHEMA|COLUMN|COLUMN_NAME|" r"COMMAND_FUNCTION|COMMAND_FUNCTION_CODE|COMMENT|COMMIT|" r"COMMITTED|COMPLETION|CONDITION_NUMBER|CONNECT|CONNECTION|" r"CONNECTION_NAME|CONSTRAINT|CONSTRAINTS|CONSTRAINT_CATALOG|" r"CONSTRAINT_NAME|CONSTRAINT_SCHEMA|CONSTRUCTOR|CONTAINS|" r"CONTINUE|CONVERSION|CONVERT|COPY|CORRESPONTING|COUNT|" r"CREATE|CREATEDB|CREATEUSER|CROSS|CUBE|CURRENT|CURRENT_DATE|" r"CURRENT_PATH|CURRENT_ROLE|CURRENT_TIME|CURRENT_TIMESTAMP|" r"CURRENT_USER|CURSOR|CURSOR_NAME|CYCLE|DATA|DATABASE|" r"DATETIME_INTERVAL_CODE|DATETIME_INTERVAL_PRECISION|DAY|" r"DEALLOCATE|DECLARE|DEFAULT|DEFAULTS|DEFERRABLE|DEFERRED|" r"DEFINED|DEFINER|DELETE|DELIMITER|DELIMITERS|DEREF|DESC|" r"DESCRIBE|DESCRIPTOR|DESTROY|DESTRUCTOR|DETERMINISTIC|" r"DIAGNOSTICS|DICTIONARY|DISCONNECT|DISPATCH|DISTINCT|DO|" r"DOMAIN|DROP|DYNAMIC|DYNAMIC_FUNCTION|DYNAMIC_FUNCTION_CODE|" r"EACH|ELSE|ENCODING|ENCRYPTED|END|END-EXEC|EQUALS|ESCAPE|EVERY|" r"EXCEPT|ESCEPTION|EXCLUDING|EXCLUSIVE|EXEC|EXECUTE|EXISTING|" r"EXISTS|EXPLAIN|EXTERNAL|EXTRACT|FALSE|FETCH|FINAL|FIRST|FOR|" r"FORCE|FOREIGN|FORTRAN|FORWARD|FOUND|FREE|FREEZE|FROM|FULL|" r"FUNCTION|G|GENERAL|GENERATED|GET|GLOBAL|GO|GOTO|GRANT|GRANTED|" r"GROUP|GROUPING|HANDLER|HAVING|HIERARCHY|HOLD|HOST|IDENTITY|" r"IGNORE|ILIKE|IMMEDIATE|IMMUTABLE|IMPLEMENTATION|IMPLICIT|IN|" r"INCLUDING|INCREMENT|INDEX|INDITCATOR|INFIX|INHERITS|INITIALIZE|" r"INITIALLY|INNER|INOUT|INPUT|INSENSITIVE|INSERT|INSTANTIABLE|" r"INSTEAD|INTERSECT|INTO|INVOKER|IS|ISNULL|ISOLATION|ITERATE|JOIN|" r"KEY|KEY_MEMBER|KEY_TYPE|LANCOMPILER|LANGUAGE|LARGE|LAST|" r"LATERAL|LEADING|LEFT|LENGTH|LESS|LEVEL|LIKE|LIMIT|LISTEN|LOAD|" r"LOCAL|LOCALTIME|LOCALTIMESTAMP|LOCATION|LOCATOR|LOCK|LOWER|" r"MAP|MATCH|MAX|MAXVALUE|MESSAGE_LENGTH|MESSAGE_OCTET_LENGTH|" r"MESSAGE_TEXT|METHOD|MIN|MINUTE|MINVALUE|MOD|MODE|MODIFIES|" r"MODIFY|MONTH|MORE|MOVE|MUMPS|NAMES|NATIONAL|NATURAL|NCHAR|" r"NCLOB|NEW|NEXT|NO|NOCREATEDB|NOCREATEUSER|NONE|NOT|NOTHING|" r"NOTIFY|NOTNULL|NULL|NULLABLE|NULLIF|OBJECT|OCTET_LENGTH|OF|OFF|" r"OFFSET|OIDS|OLD|ON|ONLY|OPEN|OPERATION|OPERATOR|OPTION|OPTIONS|" r"OR|ORDER|ORDINALITY|OUT|OUTER|OUTPUT|OVERLAPS|OVERLAY|OVERRIDING|" r"OWNER|PAD|PARAMETER|PARAMETERS|PARAMETER_MODE|PARAMATER_NAME|" r"PARAMATER_ORDINAL_POSITION|PARAMETER_SPECIFIC_CATALOG|" r"PARAMETER_SPECIFIC_NAME|PARAMATER_SPECIFIC_SCHEMA|PARTIAL|" r"PASCAL|PENDANT|PLACING|PLI|POSITION|POSTFIX|PRECISION|PREFIX|" r"PREORDER|PREPARE|PRESERVE|PRIMARY|PRIOR|PRIVILEGES|PROCEDURAL|" r"PROCEDURE|PUBLIC|READ|READS|RECHECK|RECURSIVE|REF|REFERENCES|" r"REFERENCING|REINDEX|RELATIVE|RENAME|REPEATABLE|REPLACE|RESET|" r"RESTART|RESTRICT|RESULT|RETURN|RETURNED_LENGTH|" r"RETURNED_OCTET_LENGTH|RETURNED_SQLSTATE|RETURNS|REVOKE|RIGHT|" r"ROLE|ROLLBACK|ROLLUP|ROUTINE|ROUTINE_CATALOG|ROUTINE_NAME|" r"ROUTINE_SCHEMA|ROW|ROWS|ROW_COUNT|RULE|SAVE_POINT|SCALE|SCHEMA|" r"SCHEMA_NAME|SCOPE|SCROLL|SEARCH|SECOND|SECURITY|SELECT|SELF|" r"SENSITIVE|SERIALIZABLE|SERVER_NAME|SESSION|SESSION_USER|SET|" r"SETOF|SETS|SHARE|SHOW|SIMILAR|SIMPLE|SIZE|SOME|SOURCE|SPACE|" r"SPECIFIC|SPECIFICTYPE|SPECIFIC_NAME|SQL|SQLCODE|SQLERROR|" r"SQLEXCEPTION|SQLSTATE|SQLWARNINIG|STABLE|START|STATE|STATEMENT|" r"STATIC|STATISTICS|STDIN|STDOUT|STORAGE|STRICT|STRUCTURE|STYPE|" r"SUBCLASS_ORIGIN|SUBLIST|SUBSTRING|SUM|SYMMETRIC|SYSID|SYSTEM|" r"SYSTEM_USER|TABLE|TABLE_NAME| TEMP|TEMPLATE|TEMPORARY|TERMINATE|" r"THAN|THEN|TIMESTAMP|TIMEZONE_HOUR|TIMEZONE_MINUTE|TO|TOAST|" r"TRAILING|TRANSATION|TRANSACTIONS_COMMITTED|" r"TRANSACTIONS_ROLLED_BACK|TRANSATION_ACTIVE|TRANSFORM|" r"TRANSFORMS|TRANSLATE|TRANSLATION|TREAT|TRIGGER|TRIGGER_CATALOG|" r"TRIGGER_NAME|TRIGGER_SCHEMA|TRIM|TRUE|TRUNCATE|TRUSTED|TYPE|" r"UNCOMMITTED|UNDER|UNENCRYPTED|UNION|UNIQUE|UNKNOWN|UNLISTEN|" r"UNNAMED|UNNEST|UNTIL|UPDATE|UPPER|USAGE|USER|" r"USER_DEFINED_TYPE_CATALOG|USER_DEFINED_TYPE_NAME|" r"USER_DEFINED_TYPE_SCHEMA|USING|VACUUM|VALID|VALIDATOR|VALUES|" r"VARIABLE|VERBOSE|VERSION|VIEW|VOLATILE|WHEN|WHENEVER|WHERE|" r"WITH|WITHOUT|WORK|WRITE|YEAR|ZONE)\b", Keyword, ), ( r"(ARRAY|BIGINT|BINARY|BIT|BLOB|BOOLEAN|CHAR|CHARACTER|DATE|" r"DEC|DECIMAL|FLOAT|INT|INTEGER|INTERVAL|NUMBER|NUMERIC|REAL|" r"SERIAL|SMALLINT|VARCHAR|VARYING|INT8|SERIAL8|TEXT)\b", Name.Builtin, ), (r"[+*/<>=~!@#%^&|`?-]", Operator), (r"[0-9]+", Number.Integer), (r"'(''|[^'])*'", String.Single), # not a real string literal in ANSI SQL (r'"(""|[^"])*"', String.Symbol), (r"[a-zA-Z_][a-zA-Z0-9_]*", Name), (r"[;:()\[\],\.]", Punctuation), ], }