in data_validation.py [0:0]
def retrieve_data_validation_metrics(metrics_source):
"""
Pull all the sanitization job data validation metrics.
Arguments:
- metadata_source: a string. The name of the table containing the data validation metrics to be fetched.
Returns: A dataframe of the data validation metrics.
"""
if re.fullmatch(r"[A-Za-z0-9\.\-\_]+", metrics_source):
metrics_source_no_injection = metrics_source
else:
raise Exception(
"metadata_source in incorrect format. This should be a fully qualified table name like myproject.mydataset.my_table"
)
# We are using f-strings here because BQ does not allow table names to be parametrized
# and we need to be able to run the same script in the staging and prod db environments for reliable testing outcomes.
DATA_VALIDATION_METRICS_QUERY = f"""
SELECT
*
FROM `{metrics_source_no_injection}` AS metadata
ORDER BY finished_at ASC;
"""
client = bigquery.Client(project=project)
query_job = client.query(DATA_VALIDATION_METRICS_QUERY)
results_as_dataframe = query_job.result().to_dataframe()
return results_as_dataframe