macros/recursive_dag.sql (236 lines of code) (raw):

{% macro recursive_dag() %} {{ return(adapter.dispatch('recursive_dag', 'dbt_project_evaluator')()) }} {% endmacro %} {% macro default__recursive_dag() %} with recursive direct_relationships as ( select * from {{ ref('int_direct_relationships') }} where resource_type <> 'test' ), -- should this be a fct_ model? -- recursive CTE -- one record for every resource and each of its downstream children (including itself) all_relationships ( parent_id, parent, parent_resource_type, parent_model_type, parent_materialized, parent_access, parent_is_public, parent_source_name, parent_file_path, parent_directory_path, parent_file_name, parent_is_excluded, child_id, child, child_resource_type, child_model_type, child_materialized, child_access, child_is_public, child_source_name, child_file_path, child_directory_path, child_file_name, child_is_excluded, distance, path, is_dependent_on_chain_of_views ) as ( -- anchor select distinct resource_id as parent_id, resource_name as parent, resource_type as parent_resource_type, model_type as parent_model_type, materialized as parent_materialized, access as parent_access, is_public as parent_is_public, source_name as parent_source_name, file_path as parent_file_path, directory_path as parent_directory_path, file_name as parent_file_name, is_excluded as parent_is_excluded, resource_id as child_id, resource_name as child, resource_type as child_resource_type, model_type as child_model_type, materialized as child_materialized, access as child_access, is_public as child_is_public, source_name as child_source_name, file_path as child_file_path, directory_path as child_directory_path, file_name as child_file_name, is_excluded as child_is_excluded, 0 as distance, {{ dbt.array_construct(['resource_name']) }} as path, cast(null as {{ dbt.type_boolean() }}) as is_dependent_on_chain_of_views from direct_relationships -- where direct_parent_id is null {# optional lever to change filtering of anchor clause to only include root resources #} union all -- recursive clause select all_relationships.parent_id as parent_id, all_relationships.parent as parent, all_relationships.parent_resource_type as parent_resource_type, all_relationships.parent_model_type as parent_model_type, all_relationships.parent_materialized as parent_materialized, all_relationships.parent_access as parent_access, all_relationships.parent_is_public as parent_is_public, all_relationships.parent_source_name as parent_source_name, all_relationships.parent_file_path as parent_file_path, all_relationships.parent_directory_path as parent_directory_path, all_relationships.parent_file_name as parent_file_name, all_relationships.parent_is_excluded as parent_is_excluded, direct_relationships.resource_id as child_id, direct_relationships.resource_name as child, direct_relationships.resource_type as child_resource_type, direct_relationships.model_type as child_model_type, direct_relationships.materialized as child_materialized, direct_relationships.access as child_access, direct_relationships.is_public as child_is_public, direct_relationships.source_name as child_source_name, direct_relationships.file_path as child_file_path, direct_relationships.directory_path as child_directory_path, direct_relationships.file_name as child_file_name, direct_relationships.is_excluded as child_is_excluded, all_relationships.distance+1 as distance, {{ dbt.array_append('all_relationships.path', 'direct_relationships.resource_name') }} as path, case when all_relationships.child_materialized in ('view', 'ephemeral') and coalesce(all_relationships.is_dependent_on_chain_of_views, true) then true else false end as is_dependent_on_chain_of_views from direct_relationships inner join all_relationships on all_relationships.child_id = direct_relationships.direct_parent_id {% if var('max_depth_dag') | int > 0 %} {% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold') | int %} {% do exceptions.raise_compiler_error( 'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.' ) %} {% else %} where distance <= {{ var('max_depth_dag')}} {% endif %} {% endif %} ) {% endmacro %} {% macro bigquery__recursive_dag() %} -- as of Feb 2022 BigQuery doesn't support with recursive in the same way as other DWs {% set max_depth = var('max_depth_dag') | int %} {% if max_depth < 2 or max_depth < var('chained_views_threshold') | int %} {% do exceptions.raise_compiler_error( 'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.' ) %} {% endif %} with direct_relationships as ( select * from {{ ref('int_direct_relationships') }} where resource_type <> 'test' ) -- must do distinct prior to creating array because BigQuery doesn't support distinct on array type , get_distinct as ( select distinct resource_id as parent_id, resource_id as child_id, resource_name, materialized as child_materialized, is_public as child_is_public, access as child_access, is_excluded as child_is_excluded from direct_relationships ) , cte_0 as ( select parent_id, child_id, child_materialized, child_is_public, child_access, child_is_excluded, 0 as distance, {{ dbt.array_construct(['resource_name']) }} as path, cast(null as {{ dbt.type_boolean() }}) as is_dependent_on_chain_of_views from get_distinct ) {% for i in range(1,max_depth) %} {% set prev_cte_path %}cte_{{ i - 1 }}.path{% endset %} , cte_{{i}} as ( select cte_{{i - 1}}.parent_id as parent_id, direct_relationships.resource_id as child_id, direct_relationships.materialized as child_materialized, direct_relationships.is_public as child_is_public, direct_relationships.access as child_access, direct_relationships.is_excluded as child_is_excluded, cte_{{i - 1}}.distance+1 as distance, {{ dbt.array_append(prev_cte_path, 'direct_relationships.resource_name') }} as path, case when cte_{{i - 1}}.child_materialized in ('view', 'ephemeral') and coalesce(cte_{{i - 1}}.is_dependent_on_chain_of_views, true) then true else false end as is_dependent_on_chain_of_views from direct_relationships inner join cte_{{i - 1}} on cte_{{i - 1}}.child_id = direct_relationships.direct_parent_id ) {% endfor %} , all_relationships_unioned as ( {% for i in range(max_depth) %} select * from cte_{{i}} {% if not loop.last %}union all{% endif %} {% endfor %} ) , resource_info as ( select * from {{ ref('int_all_graph_resources') }} ) , all_relationships as ( select parent.resource_id as parent_id, parent.resource_name as parent, parent.resource_type as parent_resource_type, parent.model_type as parent_model_type, parent.materialized as parent_materialized, parent.is_public as parent_is_public, parent.access as parent_access, parent.source_name as parent_source_name, parent.file_path as parent_file_path, parent.directory_path as parent_directory_path, parent.file_name as parent_file_name, parent.is_excluded as parent_is_excluded, child.resource_id as child_id, child.resource_name as child, child.resource_type as child_resource_type, child.model_type as child_model_type, child.materialized as child_materialized, child.is_public as child_is_public, child.access as child_access, child.source_name as child_source_name, child.file_path as child_file_path, child.directory_path as child_directory_path, child.file_name as child_file_name, child.is_excluded as child_is_excluded, cast(all_relationships_unioned.distance as {{ dbt.type_int() }}) as distance, all_relationships_unioned.path, all_relationships_unioned.is_dependent_on_chain_of_views from all_relationships_unioned left join resource_info as parent on all_relationships_unioned.parent_id = parent.resource_id left join resource_info as child on all_relationships_unioned.child_id = child.resource_id ) {% endmacro %} {% macro clickhouse__recursive_dag() %} {{ return(bigquery__recursive_dag()) }} {% endmacro %} {% macro spark__recursive_dag() %} -- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs {{ return(bigquery__recursive_dag()) }} {% endmacro %} {% macro trino__recursive_dag() %} {#-- Although Trino supports a recursive WITH-queries, -- it is less performant than creating CTEs with loops and union them --#} {{ return(bigquery__recursive_dag()) }} {% endmacro %} {% macro athena__recursive_dag() %} {{ return(bigquery__recursive_dag()) }} {% endmacro %}