tree-sitter-preproc/grammar.js

module.exports = grammar({ name: 'preproc', externals: $ => [ $.raw_string_literal, ], rules: { translation_unit: $ => repeat($._top_level_item), _top_level_item: $ => choice( $.string_literal, $.char_literal, $.raw_string_literal, $.comment, $.nothing, $.define, $.undef, $.preproc_if, $.preproc_include, $.preproc_nothing, $.integer_literal, ), identifier: $ => /[a-zA-Z_]\w*/, nothing: $ => token( choice( /[^R"'\/#0-9]+/, 'R', '#', '/', '\n', ) ), preproc_continuation_line: $ => token.immediate( /.*\\\r?\n/ ), preproc_line: $ => token.immediate( /.*/, ), preproc_include: $ => seq( /[ \t]*#[ \t]*include(_next)?[ \t]*/, choice( $.string_literal, seq( '<', $.path, '>', ), $.identifier, ), ), path: $ => token( /[^>]*/, ), define: $ => seq( /[ \t]*#[ \t]*define[ \t]+/, $.identifier, repeat($.preproc_continuation_line), $.preproc_line, '\n', ), preproc_if: $ => seq( /[ \t]*#[ \t]*(ifdef|ifndef|if).*\n/, repeat($._top_level_item), repeat($.preproc_elif), optional($.preproc_else), /[ \t]*#[ \t]*endif.*\n/, ), preproc_elif: $ => seq( /[ \t]*#[ \t]*elif.*\n/, repeat($._top_level_item), ), preproc_else: $ => seq( /[ \t]*#[ \t]*else.*\n/, repeat($._top_level_item), ), undef: $ => seq( /[ \t]*#[ \t]*undef[ \t]+/, $.identifier, repeat($.preproc_continuation_line), $.preproc_line, '\n', ), preproc_nothing: $ => seq( token.immediate(/[ \t]*#[ \t]*(error|pragma|line)/), repeat($.preproc_continuation_line), $.preproc_line, ), string_literal: $ => seq( /"([^\\"]|\\(.|\n))*"/, ), char_literal: $ => seq( /'([^\\']|\\(.|\n))*'/, ), integer_literal: $ => token.immediate( /[0-9]+[0-9']*/ ), // http://stackoverflow.com/questions/13014947/regex-to-match-a-c-style-multiline-comment/36328890#36328890 comment: $ => token(choice( seq('//', /(\\(.|\r?\n)|[^\\\n])*/), seq( '/*', /[^*]*\*+([^/*][^*]*\*+)*/, '/' ) )), }, });

tree-sitter-preproc/grammar.js (100 lines of code) (raw):