def find_real()

in common_components/monitoring/json_to_dot.py [0:0]


def find_real(nodes):
    """ Remove nodes that have no timing information

        DBT can have nodes that have no timing information
        and not actually run in the data warehouse (e.g.,
        ephemeral queries). These are removed from the graph,
        while preserving the overall dependency graph.
    """

    # Assumes no cycles in the graph!
    def expand_real(depends):
        new_depends = set()

        for dep in depends:

            # Source. We should have these in the graph!
            if not dep in nodes:
                continue

            if nodes[dep]["start_time"]:
                new_depends.add(dep)

            elif "real_depends_on" in nodes[dep]:
                new_depends.update(nodes[dep]["real_depends_on"])

            else:
                new_depends.update(expand_real(nodes[dep]["depends_on"]))

        return new_depends

    # Find the real depends_on
    for node in nodes.values():
        node["real_depends_on"] = expand_real(node["depends_on"])

    # Shift to actual depends_on
    for node in list(nodes.keys()):
        if not nodes[node]['start_time']:
            del nodes[node]
            continue
        nodes[node]['depends_on'] = nodes[node]['real_depends_on']
        del nodes[node]['real_depends_on']