def _update_references()

in bigquery_etl/cli/stage.py [0:0]


def _update_references(artifact_files, project_id, dataset_suffix, sql_dir):
    replace_references = []
    replace_partial_references = []
    for artifact_file in artifact_files:
        name = artifact_file.parent.name
        name_pattern = name.replace("*", r"\*")  # match literal *
        original_dataset = artifact_file.parent.parent.name
        original_project = artifact_file.parent.parent.parent.name

        deployed_dataset = original_dataset

        if original_dataset not in (
            "INFORMATION_SCHEMA",
            "region-eu",
            "region-us",
        ):
            deployed_dataset += f"_{original_project.replace('-', '_')}"
            if dataset_suffix:
                deployed_dataset += f"_{dataset_suffix}"

        deployed_project = project_id

        # Replace references, preserving fully quoted references.
        replace_partial_references += [
            # partially qualified references (like "telemetry.main")
            (
                re.compile(rf"(?<![\._])`{original_dataset}\.{name_pattern}`"),
                f"`{deployed_project}.{deployed_dataset}.{name}`",
                original_project,
            ),
            (
                re.compile(
                    rf"(?<![\._])`?{original_dataset}`?\.`?{name_pattern}(?![a-zA-Z0-9_])`?"
                ),
                f"`{deployed_project}`.`{deployed_dataset}`.`{name}`",
                original_project,
            ),
        ]
        replace_references += [
            # fully qualified references (like "moz-fx-data-shared-prod.telemetry.main")
            (
                re.compile(
                    rf"`{original_project}\.{original_dataset}\.{name_pattern}`"
                ),
                f"`{deployed_project}.{deployed_dataset}.{name}`",
                original_project,
            ),
            (
                re.compile(
                    rf"(?<![a-zA-Z0-9_])`?{original_project}`?\.`?{original_dataset}`?\.`?{name_pattern}(?![a-zA-Z0-9_])`?"
                ),
                f"`{deployed_project}`.`{deployed_dataset}`.`{name}`",
                original_project,
            ),
        ]

    for path in map(Path, glob(f"{sql_dir}/**/*.sql", recursive=True)):
        # apply substitutions
        if path.is_file():
            if "is_init()" in path.read_text():
                init_sql = render(
                    path.name,
                    template_folder=path.parent,
                    format=False,
                    **{"is_init": lambda: True},
                )
                query_sql = render(
                    path.name,
                    template_folder=path.parent,
                    format=False,
                    **{"is_init": lambda: False},
                )
                sql = f"""
                    {{% if is_init() %}}
                    {init_sql}
                    {{% else %}}
                    {query_sql}
                    {{% endif %}}
                """
            else:
                sql = render(path.name, template_folder=path.parent, format=False)

            for ref in replace_references:
                sql = re.sub(ref[0], ref[1], sql)

            for ref in replace_partial_references:
                file_project = path.parent.parent.parent.name
                if file_project == ref[2]:
                    sql = re.sub(ref[0], ref[1], sql)

            path.write_text(sql)