def update_sample()

in redash/tasks/queries/samples.py [0:0]


def update_sample(data_source_id, table_name, table_id, sample_updated_at):
    """
    For a given table, look up a sample row for it and update
    the "example" fields for it in the column_metadata table.
    """
    logger.info(u"task=update_sample state=start table_name=%s", table_name)
    start_time = time.time()
    ds = models.DataSource.get_by_id(data_source_id)

    persisted_columns = models.ColumnMetadata.query.filter(
        models.ColumnMetadata.exists.is_(True),
        models.ColumnMetadata.table_id == table_id,
    ).options(load_only("id", "name", "example"))

    update_threshold = utils.utcnow() - datetime.timedelta(
        days=settings.SCHEMA_SAMPLE_UPDATE_FREQUENCY_DAYS
    )

    first_column = persisted_columns.first()

    if (
        first_column
        and sample_updated_at
        and first_column.example
        and sample_updated_at > update_threshold
    ):
        # Look at the first example in the persisted columns.
        # If this is *not* empty AND sample_updated_at is recent, don't update sample
        logger.info(
            u"task=update_sample state=abort - recent sample exists table_name=%s",
            table_name,
        )
        return

    sample = None
    try:
        sample = ds.query_runner.get_table_sample(table_name)
    except NotSupported:
        logger.info(u"Unable to fetch samples for {}".format(table_name))

    if not sample:
        return

    #  If a column exists, add a sample to it.
    for persisted_column in persisted_columns.all():
        column_example = sample.get(persisted_column.name, None)
        column_example = (
            column_example if isinstance(column_example, str) else str(column_example)
        )  # noqa: F821
        persisted_column.example = truncate_long_string(column_example, 4000)
        models.db.session.add(persisted_column)

    models.db.session.commit()
    logger.info(
        u"task=update_sample state=finished table_name=%s runtime=%.2f",
        table_name,
        time.time() - start_time,
    )
    return sample