in Utilities/gyb.py [0:0]
def tokenize_template(template_text):
r"""Given the text of a template, returns an iterator over
(tokenType, token, match) tuples.
**Note**: this is template syntax tokenization, not Python
tokenization.
When a non-literal token is matched, a client may call
iter.send(pos) on the iterator to reset the position in
template_text at which scanning will resume.
This function provides a base level of tokenization which is
then refined by ParseContext.token_generator.
>>> from pprint import *
>>> pprint(list((kind, text) for kind, text, _ in tokenize_template(
... '%for x in range(10):\n% print x\n%end\njuicebox')))
[('gybLines', '%for x in range(10):\n% print x'),
('gybLinesClose', '%end'),
('literal', 'juicebox')]
>>> pprint(list((kind, text) for kind, text, _ in tokenize_template(
... '''Nothing
... % if x:
... % for i in range(3):
... ${i}
... % end
... % else:
... THIS SHOULD NOT APPEAR IN THE OUTPUT
... ''')))
[('literal', 'Nothing\n'),
('gybLines', '% if x:\n% for i in range(3):'),
('substitutionOpen', '${'),
('literal', '\n'),
('gybLinesClose', '% end'),
('gybLines', '% else:'),
('literal', 'THIS SHOULD NOT APPEAR IN THE OUTPUT\n')]
>>> for kind, text, _ in tokenize_template('''
... This is $some$ literal stuff containing a ${substitution}
... followed by a %{...} block:
... %{
... # Python code
... }%
... and here $${are} some %-lines:
... % x = 1
... % y = 2
... % if z == 3:
... % print '${hello}'
... % end
... % for x in zz:
... % print x
... % # different indentation
... % twice
... and some lines that literally start with a %% token
... %% first line
... %% second line
... '''):
... print((kind, text.strip().split('\n',1)[0]))
('literal', 'This is $some$ literal stuff containing a')
('substitutionOpen', '${')
('literal', 'followed by a %{...} block:')
('gybBlockOpen', '%{')
('literal', 'and here ${are} some %-lines:')
('gybLines', '% x = 1')
('gybLinesClose', '% end')
('gybLines', '% for x in zz:')
('gybLines', '% # different indentation')
('gybLines', '% twice')
('literal', 'and some lines that literally start with a % token')