in bigquery_etl/dryrun.py [0:0]
def get_referenced_tables(self):
"""Return referenced tables by dry running the SQL file."""
if not self.skip() and not self.is_valid():
raise Exception(f"Error when dry running SQL file {self.sqlfile}")
if self.skip():
print(f"\t...Ignoring dryrun results for {self.sqlfile}")
if (
self.dry_run_result
and self.dry_run_result["valid"]
and "referencedTables" in self.dry_run_result
):
return self.dry_run_result["referencedTables"]
# Handle views that require a date filter
if (
self.dry_run_result
and self.strip_dml
and self.get_error() == Errors.DATE_FILTER_NEEDED
):
# Since different queries require different partition filters
# (submission_date, crash_date, timestamp, submission_timestamp, ...)
# We can extract the filter name from the error message
# (by capturing the next word after "column(s)")
# Example error:
# "Cannot query over table <table_name> without a filter over column(s)
# <date_filter_name> that can be used for partition elimination."
error = self.dry_run_result["errors"][0].get("message", "")
date_filter = find_next_word("column(s)", error)
if "date" in date_filter:
filtered_content = (
f"{self.get_sql()}WHERE {date_filter} > current_date()"
)
if (
DryRun(
self.sqlfile,
filtered_content,
client=self.client,
id_token=self.id_token,
).get_error()
== Errors.DATE_FILTER_NEEDED_AND_SYNTAX
):
# If the date filter (e.g. WHERE crash_date > current_date())
# is added to a query that already has a WHERE clause,
# it will throw an error. To fix this, we need to
# append 'AND' instead of 'WHERE'
filtered_content = (
f"{self.get_sql()}AND {date_filter} > current_date()"
)
if "timestamp" in date_filter:
filtered_content = (
f"{self.get_sql()}WHERE {date_filter} > current_timestamp()"
)
if (
DryRun(
sqlfile=self.sqlfile,
content=filtered_content,
client=self.client,
id_token=self.id_token,
).get_error()
== Errors.DATE_FILTER_NEEDED_AND_SYNTAX
):
filtered_content = (
f"{self.get_sql()}AND {date_filter} > current_timestamp()"
)
stripped_dml_result = DryRun(
sqlfile=self.sqlfile,
content=filtered_content,
client=self.client,
id_token=self.id_token,
)
if (
stripped_dml_result.get_error() is None
and "referencedTables" in stripped_dml_result.dry_run_result
):
return stripped_dml_result.dry_run_result["referencedTables"]
return []