in bigquery_etl/cli/stage.py [0:0]
def _update_references(artifact_files, project_id, dataset_suffix, sql_dir):
replace_references = []
replace_partial_references = []
for artifact_file in artifact_files:
name = artifact_file.parent.name
name_pattern = name.replace("*", r"\*") # match literal *
original_dataset = artifact_file.parent.parent.name
original_project = artifact_file.parent.parent.parent.name
deployed_dataset = original_dataset
if original_dataset not in (
"INFORMATION_SCHEMA",
"region-eu",
"region-us",
):
deployed_dataset += f"_{original_project.replace('-', '_')}"
if dataset_suffix:
deployed_dataset += f"_{dataset_suffix}"
deployed_project = project_id
# Replace references, preserving fully quoted references.
replace_partial_references += [
# partially qualified references (like "telemetry.main")
(
re.compile(rf"(?<![\._])`{original_dataset}\.{name_pattern}`"),
f"`{deployed_project}.{deployed_dataset}.{name}`",
original_project,
),
(
re.compile(
rf"(?<![\._])`?{original_dataset}`?\.`?{name_pattern}(?![a-zA-Z0-9_])`?"
),
f"`{deployed_project}`.`{deployed_dataset}`.`{name}`",
original_project,
),
]
replace_references += [
# fully qualified references (like "moz-fx-data-shared-prod.telemetry.main")
(
re.compile(
rf"`{original_project}\.{original_dataset}\.{name_pattern}`"
),
f"`{deployed_project}.{deployed_dataset}.{name}`",
original_project,
),
(
re.compile(
rf"(?<![a-zA-Z0-9_])`?{original_project}`?\.`?{original_dataset}`?\.`?{name_pattern}(?![a-zA-Z0-9_])`?"
),
f"`{deployed_project}`.`{deployed_dataset}`.`{name}`",
original_project,
),
]
for path in map(Path, glob(f"{sql_dir}/**/*.sql", recursive=True)):
# apply substitutions
if path.is_file():
if "is_init()" in path.read_text():
init_sql = render(
path.name,
template_folder=path.parent,
format=False,
**{"is_init": lambda: True},
)
query_sql = render(
path.name,
template_folder=path.parent,
format=False,
**{"is_init": lambda: False},
)
sql = f"""
{{% if is_init() %}}
{init_sql}
{{% else %}}
{query_sql}
{{% endif %}}
"""
else:
sql = render(path.name, template_folder=path.parent, format=False)
for ref in replace_references:
sql = re.sub(ref[0], ref[1], sql)
for ref in replace_partial_references:
file_project = path.parent.parent.parent.name
if file_project == ref[2]:
sql = re.sub(ref[0], ref[1], sql)
path.write_text(sql)