library/compiler/symbols.py (500 lines of code) (raw):
# Portions copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
# pyre-unsafe
"""Module symbol-table generator"""
from __future__ import print_function
import ast
import sys
from .consts import (
SC_CELL,
SC_FREE,
SC_GLOBAL_EXPLICIT,
SC_GLOBAL_IMPLICIT,
SC_LOCAL,
SC_UNKNOWN,
)
from .misc import mangle
from .visitor import ASTVisitor
if sys.version_info[0] >= 3:
long = int
MANGLE_LEN = 256
DEF_NORMAL = 1
DEF_COMP_ITER = 2
class Scope:
# XXX how much information do I need about each name?
def __init__(self, name, module, klass=None, lineno=0):
self.name = name
self.module = module
self.lineno = lineno
self.defs = {}
self.uses = {}
self.globals = {}
self.explicit_globals = {}
self.nonlocals = {}
self.params = {}
self.frees = {}
self.cells = {}
self.children = []
self.parent = None
self.coroutine = False
self.comp_iter_target = self.comp_iter_expr = 0
# nested is true if the class could contain free variables,
# i.e. if it is nested within another function.
self.nested = None
# It's possible to define a scope (class, function) at the nested level,
# but explicitly mark it as global. Bytecode-wise, this is handled
# automagically, but we need to generate proper __qualname__ for these.
self.global_scope = False
self.generator = False
self.klass = None
self.suppress_jit = False
if klass is not None:
for i in range(len(klass)):
if klass[i] != "_":
self.klass = klass[i:]
break
def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, self.name)
def mangle(self, name):
if self.klass is None:
return name
return mangle(name, self.klass)
def add_def(self, name, kind=DEF_NORMAL):
mangled = self.mangle(name)
self.defs[mangled] = kind | self.defs.get(mangled, 1)
def add_use(self, name):
self.uses[self.mangle(name)] = 1
def add_global(self, name):
name = self.mangle(name)
if name in self.uses or name in self.defs:
pass # XXX warn about global following def/use
if name in self.params:
raise SyntaxError("%s in %s is global and parameter" % (name, self.name))
self.explicit_globals[name] = 1
self.module.add_def(name)
# Seems to be behavior of Py3.5, "global foo" sets foo as
# explicit global for module too
self.module.explicit_globals[name] = 1
def add_free(self, name):
self.add_frees([name])
def add_param(self, name):
name = self.mangle(name)
self.defs[name] = 1
self.params[name] = 1
def get_names(self):
d = {}
d.update(self.defs)
d.update(self.uses)
d.update(self.globals)
return d.keys()
def add_child(self, child):
self.children.append(child)
child.parent = self
def get_children(self):
return self.children
def DEBUG(self):
print(self.name, self.nested and "nested" or "")
print("\tglobals: ", self.globals)
print("\texplicit_globals: ", self.explicit_globals)
print("\tcells: ", self.cells)
print("\tdefs: ", self.defs)
print("\tuses: ", self.uses)
print("\tfrees:", self.frees)
def check_name(self, name):
"""Return scope of name.
The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
"""
if name in self.explicit_globals:
return SC_GLOBAL_EXPLICIT
if name in self.globals:
return SC_GLOBAL_IMPLICIT
if name in self.cells:
return SC_CELL
if name in self.frees:
return SC_FREE
if name in self.defs:
return SC_LOCAL
if self.nested and name in self.uses:
return SC_FREE
if self.nested:
return SC_UNKNOWN
else:
return SC_GLOBAL_IMPLICIT
def get_free_vars(self):
if not self.nested:
# If we're not nested we can't possibly have any free variables,
# as we can't close over class variables. The exception to this
# rule is __class__, which we indeed can close over.
if "__class__" in self.frees:
return ["__class__"]
return []
free = {}
free.update(self.frees)
for name in self.uses.keys():
if name not in self.defs and name not in self.globals:
free[name] = 1
return sorted(free.keys())
def handle_children(self):
for child in self.children:
if child.name in self.explicit_globals:
child.global_scope = True
if child.nested:
frees = child.get_free_vars()
globals = self.add_frees(frees)
for name in globals:
child.force_global(name)
elif "__class__" in child.frees:
self.add_frees(["__class__"])
elif "__class__" in child.uses and "__class__" not in child.defs:
child.frees = {"__class__"}
self.add_frees(["__class__"])
def force_global(self, name):
"""Force name to be global in scope.
Some child of the current node had a free reference to name.
When the child was processed, it was labelled a free
variable. Now that all its enclosing scope have been
processed, the name is known to be a global or builtin. So
walk back down the child chain and set the name to be global
rather than free.
Be careful to stop if a child does not think the name is
free.
"""
self.globals[name] = 1
if name in self.frees:
del self.frees[name]
for child in self.children:
if child.check_name(name) == SC_FREE:
child.force_global(name)
def add_frees(self, names):
"""Process list of free vars from nested scope.
Returns a list of names that are either 1) declared global in the
parent or 2) undefined in a top-level parent. In either case,
the nested scope should treat them as globals.
"""
child_globals = []
for name in names:
sc = self.check_name(name)
if self.nested:
if name == "__class__":
self.cells[name] = 1
elif sc == SC_UNKNOWN or sc == SC_FREE or isinstance(self, ClassScope):
self.frees[name] = 1
elif sc == SC_GLOBAL_IMPLICIT:
child_globals.append(name)
elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
self.cells[name] = 1
elif sc != SC_CELL:
child_globals.append(name)
else:
if name == "__class__":
if isinstance(self, ClassScope):
self.cells[name] = 1
continue
elif self.findParentClass() is not None:
self.frees[name] = 1
continue
if sc == SC_LOCAL:
self.cells[name] = 1
elif sc != SC_CELL:
child_globals.append(name)
return child_globals
def get_cell_vars(self):
return sorted(self.cells.keys())
def findParentClass(self):
parent = self.parent
while not isinstance(parent, ClassScope):
if parent is None:
break
parent = parent.parent
return parent
class ModuleScope(Scope):
__super_init = Scope.__init__
def __init__(self):
# Set lineno to 0 so it sorted guaranteedly before any other scope
self.__super_init("global", self, lineno=0)
class FunctionScope(Scope):
pass
class GenExprScope(FunctionScope):
__super_init = Scope.__init__
__counter = 1
def __init__(self, module, klass=None, name="<genexpr>", lineno=0):
self.__counter += 1
self.__super_init(name, module, klass, lineno=lineno)
self.add_param(".0")
class LambdaScope(FunctionScope):
__super_init = Scope.__init__
__counter = 1
def __init__(self, module, klass=None, lineno=0):
self.__counter += 1
self.__super_init("<lambda>", module, klass, lineno=lineno)
class ClassScope(Scope):
__super_init = Scope.__init__
def __init__(self, name, module, lineno=0):
self.__super_init(name, module, name, lineno=lineno)
class SymbolVisitor(ASTVisitor):
def __init__(self):
super().__init__()
self.scopes: Dict[ast.AST, Scope] = {}
self.klass = None
# node that define new scopes
def visitModule(self, node):
scope = self.module = self.scopes[node] = ModuleScope()
self.visit(node.body, scope)
def visitInteractive(self, node):
scope = self.module = self.scopes[node] = ModuleScope()
self.visit(node.body, scope)
visitExpression = visitModule
def visitFunctionDef(self, node, parent):
if node.decorator_list:
self.visit(node.decorator_list, parent)
parent.add_def(node.name)
scope = FunctionScope(node.name, self.module, self.klass, lineno=node.lineno)
scope.coroutine = isinstance(node, ast.AsyncFunctionDef)
scope.parent = parent
if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1
self.scopes[node] = scope
self._do_args(scope, node.args)
if node.returns:
self.visit(node.returns, parent)
self.visit(node.body, scope)
self.handle_free_vars(scope, parent)
visitAsyncFunctionDef = visitFunctionDef
_scope_names = {
ast.GeneratorExp: "<genexpr>",
ast.ListComp: "<listcomp>",
ast.DictComp: "<dictcomp>",
ast.SetComp: "<setcomp>",
}
def visitAwait(self, node, scope):
scope.coroutine = True
self.visit(node.value, scope)
def visitGeneratorExp(self, node, parent):
scope = GenExprScope(
self.module,
self.klass,
name=self._scope_names[type(node)],
lineno=node.lineno,
)
scope.parent = parent
# bpo-37757: For now, disallow *all* assignment expressions in the
# outermost iterator expression of a comprehension, even those inside
# a nested comprehension or a lambda expression.
scope.comp_iter_expr = parent.comp_iter_expr
if isinstance(node, ast.GeneratorExp):
scope.generator = True
if (
parent.nested
or isinstance(parent, FunctionScope)
or isinstance(parent, GenExprScope)
):
scope.nested = 1
parent.comp_iter_expr += 1
self.visit(node.generators[0].iter, parent)
parent.comp_iter_expr -= 1
self.visitcomprehension(node.generators[0], scope, True)
for comp in node.generators[1:]:
self.visit(comp, scope, False)
if isinstance(node, ast.DictComp):
self.visit(node.value, scope)
self.visit(node.key, scope)
else:
self.visit(node.elt, scope)
self.scopes[node] = scope
self.handle_free_vars(scope, parent)
# Whether to generate code for comprehensions inline or as nested scope
# is configurable, but we compute nested scopes for them unconditionally
# TODO: this may be not correct, check.
visitSetComp = visitGeneratorExp
visitListComp = visitGeneratorExp
visitDictComp = visitGeneratorExp
def visitcomprehension(self, node, scope, is_outmost):
if node.is_async:
scope.coroutine = True
scope.comp_iter_target = 1
self.visit(node.target, scope)
scope.comp_iter_target = 0
if is_outmost:
scope.add_use(".0")
else:
scope.comp_iter_expr += 1
self.visit(node.iter, scope)
scope.comp_iter_expr -= 1
for if_ in node.ifs:
self.visit(if_, scope)
def visitGenExprInner(self, node, scope):
for genfor in node.quals:
self.visit(genfor, scope)
self.visit(node.expr, scope)
def visitGenExprFor(self, node, scope):
self.visit(node.assign, scope)
self.visit(node.iter, scope)
for if_ in node.ifs:
self.visit(if_, scope)
def visitGenExprIf(self, node, scope):
self.visit(node.test, scope)
def visitLambda(self, node, parent):
scope = LambdaScope(self.module, self.klass, lineno=node.lineno)
scope.parent = parent
# bpo-37757: For now, disallow *all* assignment expressions in the
# outermost iterator expression of a comprehension, even those inside
# a nested comprehension or a lambda expression.
scope.comp_iter_expr = parent.comp_iter_expr
if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1
self.scopes[node] = scope
self._do_args(scope, node.args)
self.visit(node.body, scope)
self.handle_free_vars(scope, parent)
def _do_args(self, scope, args):
for n in args.defaults:
self.visit(n, scope.parent)
for n in args.kw_defaults:
if n:
self.visit(n, scope.parent)
for arg in args.args:
name = arg.arg
scope.add_param(name)
if arg.annotation:
self.visit(arg.annotation, scope.parent)
for arg in getattr(args, "posonlyargs", ()):
name = arg.arg
scope.add_param(name)
if arg.annotation:
self.visit(arg.annotation, scope.parent)
for arg in args.kwonlyargs:
name = arg.arg
scope.add_param(name)
if arg.annotation:
self.visit(arg.annotation, scope.parent)
if args.vararg:
scope.add_param(args.vararg.arg)
if args.vararg.annotation:
self.visit(args.vararg.annotation, scope.parent)
if args.kwarg:
scope.add_param(args.kwarg.arg)
if args.kwarg.annotation:
self.visit(args.kwarg.annotation, scope.parent)
def handle_free_vars(self, scope, parent):
parent.add_child(scope)
scope.handle_children()
def visitClassDef(self, node, parent):
if node.decorator_list:
self.visit(node.decorator_list, parent)
for kw in node.keywords:
self.visit(kw.value, parent)
parent.add_def(node.name)
for n in node.bases:
self.visit(n, parent)
scope = ClassScope(node.name, self.module, lineno=node.lineno)
# Set parent ASAP. TODO: Probably makes sense to do that for
# other scope types either.
scope.parent = parent
if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1
doc = ast.get_docstring(node, False)
if doc is not None:
scope.add_def("__doc__")
scope.add_def("__module__")
scope.add_def("__qualname__")
self.scopes[node] = scope
prev = self.klass
self.klass = node.name
self.visit(node.body, scope)
self.klass = prev
self.handle_free_vars(scope, parent)
# name can be a def or a use
def visitName(self, node, scope):
if isinstance(node.ctx, ast.Store):
if scope.comp_iter_target:
# This name is an iteration variable in a comprehension,
# so check for a binding conflict with any named expressions.
# Otherwise, mark it as an iteration variable so subsequent
# named expressions can check for conflicts.
if node.id in scope.nonlocals or node.id in scope.globals:
raise SyntaxError(
f"comprehension inner loop cannot rebind assignment expression target '{node.id}'"
)
scope.add_def(node.id, DEF_COMP_ITER)
scope.add_def(node.id)
elif isinstance(node.ctx, ast.Del):
# We do something to var, so even if we "undefine" it, it's a def.
# Implementation-wise, delete is storing special value (usually
# NULL) to var.
scope.add_def(node.id)
else:
scope.add_use(node.id)
if node.id == "super" and isinstance(scope, FunctionScope):
# If super() is used, and special cell var __class__ to class
# definition, and free var to the method. This is complicated
# by the fact that original Python2 implementation supports
# free->cell var relationship only if free var is defined in
# a scope marked as "nested", which normal method in a class
# isn't.
scope.add_use("__class__")
# operations that bind new names
def visitNamedExpr(self, node, scope):
if scope.comp_iter_expr:
# Assignment isn't allowed in a comprehension iterable expression
raise SyntaxError(
"assignment expression cannot be used in a comprehension iterable expression"
)
name = node.target.id
if isinstance(scope, GenExprScope):
cur = scope
while cur:
if isinstance(cur, GenExprScope):
if cur.defs.get(name, 0) & DEF_COMP_ITER:
raise SyntaxError(
f"assignment expression cannot rebind comprehension iteration variable '{name}'"
)
elif isinstance(cur, FunctionScope):
# If we find a FunctionBlock entry, add as GLOBAL/LOCAL or NONLOCAL/LOCAL
scope.frees[name] = 1
if name not in cur.explicit_globals:
scope.nonlocals[name] = 1
else:
scope.add_use(name)
cur.add_def(name)
break
elif isinstance(cur, ModuleScope):
scope.globals[name] = 1
scope.add_use(name)
cur.add_def(name)
break
elif isinstance(cur, ClassScope):
raise SyntaxError(
"assignment expression within a comprehension cannot be used in a class body"
)
cur = cur.parent
self.visit(node.value, scope)
self.visit(node.target, scope)
def visitFor(self, node, scope):
self.visit(node.target, scope)
self.visit(node.iter, scope)
self.visit(node.body, scope)
if node.orelse:
self.visit(node.orelse, scope)
visitAsyncFor = visitFor
def visitImportFrom(self, node, scope):
for alias in node.names:
if alias.name == "*":
continue
scope.add_def(alias.asname or alias.name)
def visitImport(self, node, scope):
for alias in node.names:
name = alias.name
i = name.find(".")
if i > -1:
name = name[:i]
scope.add_def(alias.asname or name)
def visitGlobal(self, node, scope):
for name in node.names:
scope.add_global(name)
def visitNonlocal(self, node, scope):
# TODO: Check that var exists in outer scope
for name in node.names:
scope.frees[name] = 1
scope.nonlocals[name] = 1
def visitAssign(self, node, scope):
"""Propagate assignment flag down to child nodes.
The Assign node doesn't itself contains the variables being
assigned to. Instead, the children in node.nodes are visited
with the assign flag set to true. When the names occur in
those nodes, they are marked as defs.
Some names that occur in an assignment target are not bound by
the assignment, e.g. a name occurring inside a slice. The
visitor handles these nodes specially; they do not propagate
the assign flag to their children.
"""
for n in node.targets:
self.visit(n, scope)
self.visit(node.value, scope)
def visitAnnAssign(self, node, scope):
target = node.target
if isinstance(target, ast.Name):
if target.id in scope.nonlocals or target.id in scope.explicit_globals:
is_nonlocal = target.id in scope.nonlocals
raise SyntaxError(
f"annotated name '{target.id}' can't be {'nonlocal' if is_nonlocal else 'global'}"
)
if node.simple or node.value:
scope.add_def(target.id)
else:
self.visit(node.target, scope)
self.visit(node.annotation, scope)
if node.value:
self.visit(node.value, scope)
def visitAssName(self, node, scope):
scope.add_def(node.name)
def visitAssAttr(self, node, scope):
self.visit(node.expr, scope)
def visitSubscript(self, node, scope):
self.visit(node.value, scope)
self.visit(node.slice, scope)
def visitAttribute(self, node, scope):
self.visit(node.value, scope)
def visitSlice(self, node, scope):
if node.lower:
self.visit(node.lower, scope)
if node.upper:
self.visit(node.upper, scope)
if node.step:
self.visit(node.step, scope)
def visitAugAssign(self, node, scope):
# If the LHS is a name, then this counts as assignment.
# Otherwise, it's just use.
self.visit(node.target, scope)
if isinstance(node.target, ast.Name):
self.visit(node.target, scope)
self.visit(node.value, scope)
# prune if statements if tests are false
_const_types = str, bytes, int, long, float
def visitIf(self, node, scope):
self.visit(node.test, scope)
self.visit(node.body, scope)
if node.orelse:
self.visit(node.orelse, scope)
# a yield statement signals a generator
def visitYield(self, node, scope):
scope.generator = True
if node.value:
self.visit(node.value, scope)
def visitYieldFrom(self, node, scope):
scope.generator = True
if node.value:
self.visit(node.value, scope)
def visitTry(self, node, scope):
self.visit(node.body, scope)
# Handle exception capturing vars
for handler in node.handlers:
if handler.type:
self.visit(handler.type, scope)
if handler.name:
scope.add_def(handler.name)
self.visit(handler.body, scope)
self.visit(node.orelse, scope)
self.visit(node.finalbody, scope)
def list_eq(l1, l2):
return sorted(l1) == sorted(l2)