tree-sitter-preproc/grammar.js (100 lines of code) (raw):
module.exports = grammar({
name: 'preproc',
externals: $ => [
$.raw_string_literal,
],
rules: {
translation_unit: $ => repeat($._top_level_item),
_top_level_item: $ => choice(
$.string_literal,
$.char_literal,
$.raw_string_literal,
$.comment,
$.nothing,
$.define,
$.undef,
$.preproc_if,
$.preproc_include,
$.preproc_nothing,
$.integer_literal,
),
identifier: $ => /[a-zA-Z_]\w*/,
nothing: $ => token(
choice(
/[^R"'\/#0-9]+/,
'R',
'#',
'/',
'\n',
)
),
preproc_continuation_line: $ => token.immediate(
/.*\\\r?\n/
),
preproc_line: $ => token.immediate(
/.*/,
),
preproc_include: $ => seq(
/[ \t]*#[ \t]*include(_next)?[ \t]*/,
choice(
$.string_literal,
seq(
'<',
$.path,
'>',
),
$.identifier,
),
),
path: $ => token(
/[^>]*/,
),
define: $ => seq(
/[ \t]*#[ \t]*define[ \t]+/,
$.identifier,
repeat($.preproc_continuation_line),
$.preproc_line,
'\n',
),
preproc_if: $ => seq(
/[ \t]*#[ \t]*(ifdef|ifndef|if).*\n/,
repeat($._top_level_item),
repeat($.preproc_elif),
optional($.preproc_else),
/[ \t]*#[ \t]*endif.*\n/,
),
preproc_elif: $ => seq(
/[ \t]*#[ \t]*elif.*\n/,
repeat($._top_level_item),
),
preproc_else: $ => seq(
/[ \t]*#[ \t]*else.*\n/,
repeat($._top_level_item),
),
undef: $ => seq(
/[ \t]*#[ \t]*undef[ \t]+/,
$.identifier,
repeat($.preproc_continuation_line),
$.preproc_line,
'\n',
),
preproc_nothing: $ => seq(
token.immediate(/[ \t]*#[ \t]*(error|pragma|line)/),
repeat($.preproc_continuation_line),
$.preproc_line,
),
string_literal: $ => seq(
/"([^\\"]|\\(.|\n))*"/,
),
char_literal: $ => seq(
/'([^\\']|\\(.|\n))*'/,
),
integer_literal: $ => token.immediate(
/[0-9]+[0-9']*/
),
// http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890
comment: $ => token(choice(
seq('//', /(\\(.|\r?\n)|[^\\\n])*/),
seq(
'/*',
/[^*]*\*+([^/*][^*]*\*+)*/,
'/'
)
)),
},
});