def retrieve_data_validation

def retrieve_data_validation_metrics()

in data_validation.py [0:0]

12 lines of code
5 McCabe index (conditional complexity)


def retrieve_data_validation_metrics(metrics_source):
    """
    Pull all the sanitization job data validation metrics.

    Arguments:

    - metadata_source: a string. The name of the table containing the data validation metrics to be fetched.

    Returns: A dataframe of the data validation metrics.
    """
    if re.fullmatch(r"[A-Za-z0-9\.\-\_]+", metrics_source):
        metrics_source_no_injection = metrics_source
    else:
        raise Exception(
            "metadata_source in incorrect format. This should be a fully qualified table name like myproject.mydataset.my_table"
        )

    # We are using f-strings here because BQ does not allow table names to be parametrized
    # and we need to be able to run the same script in the staging and prod db environments for reliable testing outcomes.
    DATA_VALIDATION_METRICS_QUERY = f"""
    SELECT
        *
        FROM `{metrics_source_no_injection}` AS metadata
    ORDER BY finished_at ASC;
    """
    client = bigquery.Client(project=project)
    query_job = client.query(DATA_VALIDATION_METRICS_QUERY)
    results_as_dataframe = query_job.result().to_dataframe()

    return results_as_dataframe