in common_components/monitoring/json_to_dot.py [0:0]
def find_real(nodes):
""" Remove nodes that have no timing information
DBT can have nodes that have no timing information
and not actually run in the data warehouse (e.g.,
ephemeral queries). These are removed from the graph,
while preserving the overall dependency graph.
"""
# Assumes no cycles in the graph!
def expand_real(depends):
new_depends = set()
for dep in depends:
# Source. We should have these in the graph!
if not dep in nodes:
continue
if nodes[dep]["start_time"]:
new_depends.add(dep)
elif "real_depends_on" in nodes[dep]:
new_depends.update(nodes[dep]["real_depends_on"])
else:
new_depends.update(expand_real(nodes[dep]["depends_on"]))
return new_depends
# Find the real depends_on
for node in nodes.values():
node["real_depends_on"] = expand_real(node["depends_on"])
# Shift to actual depends_on
for node in list(nodes.keys()):
if not nodes[node]['start_time']:
del nodes[node]
continue
nodes[node]['depends_on'] = nodes[node]['real_depends_on']
del nodes[node]['real_depends_on']