in courses/machine_learning/deepdive2/structured/labs/serving/application/lib/jinja2/lexer.py [0:0]
def tokeniter(self, source, name, filename=None, state=None):
"""This method tokenizes the text and returns the tokens in a
generator. Use this method if you just want to tokenize a template.
"""
source = text_type(source)
lines = source.splitlines()
if self.keep_trailing_newline and source:
for newline in ('\r\n', '\r', '\n'):
if source.endswith(newline):
lines.append('')
break
source = '\n'.join(lines)
pos = 0
lineno = 1
stack = ['root']
if state is not None and state != 'root':
assert state in ('variable', 'block'), 'invalid state'
stack.append(state + '_begin')
else:
state = 'root'
statetokens = self.rules[stack[-1]]
source_length = len(source)
balancing_stack = []
while 1:
# tokenizer loop
for regex, tokens, new_state in statetokens:
m = regex.match(source, pos)
# if no match we try again with the next rule
if m is None:
continue
# we only match blocks and variables if braces / parentheses
# are balanced. continue parsing with the lower rule which
# is the operator rule. do this only if the end tags look
# like operators
if balancing_stack and \
tokens in ('variable_end', 'block_end',
'linestatement_end'):
continue
# tuples support more options
if isinstance(tokens, tuple):
for idx, token in enumerate(tokens):
# failure group
if token.__class__ is Failure:
raise token(lineno, filename)
# bygroup is a bit more complex, in that case we
# yield for the current token the first named
# group that matched
elif token == '#bygroup':
for key, value in iteritems(m.groupdict()):
if value is not None:
yield lineno, key, value
lineno += value.count('\n')
break
else:
raise RuntimeError('%r wanted to resolve '
'the token dynamically'
' but no group matched'
% regex)
# normal group
else:
data = m.group(idx + 1)
if data or token not in ignore_if_empty:
yield lineno, token, data
lineno += data.count('\n')
# strings as token just are yielded as it.
else:
data = m.group()
# update brace/parentheses balance
if tokens == 'operator':
if data == '{':
balancing_stack.append('}')
elif data == '(':
balancing_stack.append(')')
elif data == '[':
balancing_stack.append(']')
elif data in ('}', ')', ']'):
if not balancing_stack:
raise TemplateSyntaxError('unexpected \'%s\'' %
data, lineno, name,
filename)
expected_op = balancing_stack.pop()
if expected_op != data:
raise TemplateSyntaxError('unexpected \'%s\', '
'expected \'%s\'' %
(data, expected_op),
lineno, name,
filename)
# yield items
if data or tokens not in ignore_if_empty:
yield lineno, tokens, data
lineno += data.count('\n')
# fetch new position into new variable so that we can check
# if there is a internal parsing error which would result
# in an infinite loop
pos2 = m.end()
# handle state changes
if new_state is not None:
# remove the uppermost state
if new_state == '#pop':
stack.pop()
# resolve the new state by group checking
elif new_state == '#bygroup':
for key, value in iteritems(m.groupdict()):
if value is not None:
stack.append(key)
break
else:
raise RuntimeError('%r wanted to resolve the '
'new state dynamically but'
' no group matched' %
regex)
# direct state name given
else:
stack.append(new_state)
statetokens = self.rules[stack[-1]]
# we are still at the same position and no stack change.
# this means a loop without break condition, avoid that and
# raise error
elif pos2 == pos:
raise RuntimeError('%r yielded empty string without '
'stack change' % regex)
# publish new function and start again
pos = pos2
break
# if loop terminated without break we haven't found a single match
# either we are at the end of the file or we have a problem
else:
# end of text
if pos >= source_length:
return
# something went wrong
raise TemplateSyntaxError('unexpected char %r at %d' %
(source[pos], pos), lineno,
name, filename)