def tokenize_template()

in Utilities/gyb.py [0:0]


def tokenize_template(template_text):
    r"""Given the text of a template, returns an iterator over
    (tokenType, token, match) tuples.

    **Note**: this is template syntax tokenization, not Python
    tokenization.

    When a non-literal token is matched, a client may call
    iter.send(pos) on the iterator to reset the position in
    template_text at which scanning will resume.

    This function provides a base level of tokenization which is
    then refined by ParseContext.token_generator.

    >>> from pprint import *
    >>> pprint(list((kind, text) for kind, text, _ in tokenize_template(
    ...   '%for x in range(10):\n%  print x\n%end\njuicebox')))
    [('gybLines', '%for x in range(10):\n%  print x'),
     ('gybLinesClose', '%end'),
     ('literal', 'juicebox')]

    >>> pprint(list((kind, text) for kind, text, _ in tokenize_template(
    ... '''Nothing
    ... % if x:
    ... %    for i in range(3):
    ... ${i}
    ... %    end
    ... % else:
    ... THIS SHOULD NOT APPEAR IN THE OUTPUT
    ... ''')))
    [('literal', 'Nothing\n'),
     ('gybLines', '% if x:\n%    for i in range(3):'),
     ('substitutionOpen', '${'),
     ('literal', '\n'),
     ('gybLinesClose', '%    end'),
     ('gybLines', '% else:'),
     ('literal', 'THIS SHOULD NOT APPEAR IN THE OUTPUT\n')]

    >>> for kind, text, _ in tokenize_template('''
    ... This is $some$ literal stuff containing a ${substitution}
    ... followed by a %{...} block:
    ...   %{
    ...   # Python code
    ...   }%
    ... and here $${are} some %-lines:
    ...   % x = 1
    ...   % y = 2
    ...   % if z == 3:
    ...   %    print '${hello}'
    ...   % end
    ...   %    for x in zz:
    ...   %        print x
    ...     % # different indentation
    ... % twice
    ... and some lines that literally start with a %% token
    ... %% first line
    ...   %% second line
    ... '''):
    ...     print((kind, text.strip().split('\n',1)[0]))
    ('literal', 'This is $some$ literal stuff containing a')
    ('substitutionOpen', '${')
    ('literal', 'followed by a %{...} block:')
    ('gybBlockOpen', '%{')
    ('literal', 'and here ${are} some %-lines:')
    ('gybLines', '% x = 1')
    ('gybLinesClose', '% end')
    ('gybLines', '%    for x in zz:')
    ('gybLines', '% # different indentation')
    ('gybLines', '% twice')
    ('literal', 'and some lines that literally start with a % token')