in redash/tasks/queries/samples.py [0:0]
def update_sample(data_source_id, table_name, table_id, sample_updated_at):
"""
For a given table, look up a sample row for it and update
the "example" fields for it in the column_metadata table.
"""
logger.info(u"task=update_sample state=start table_name=%s", table_name)
start_time = time.time()
ds = models.DataSource.get_by_id(data_source_id)
persisted_columns = models.ColumnMetadata.query.filter(
models.ColumnMetadata.exists.is_(True),
models.ColumnMetadata.table_id == table_id,
).options(load_only("id", "name", "example"))
update_threshold = utils.utcnow() - datetime.timedelta(
days=settings.SCHEMA_SAMPLE_UPDATE_FREQUENCY_DAYS
)
first_column = persisted_columns.first()
if (
first_column
and sample_updated_at
and first_column.example
and sample_updated_at > update_threshold
):
# Look at the first example in the persisted columns.
# If this is *not* empty AND sample_updated_at is recent, don't update sample
logger.info(
u"task=update_sample state=abort - recent sample exists table_name=%s",
table_name,
)
return
sample = None
try:
sample = ds.query_runner.get_table_sample(table_name)
except NotSupported:
logger.info(u"Unable to fetch samples for {}".format(table_name))
if not sample:
return
# If a column exists, add a sample to it.
for persisted_column in persisted_columns.all():
column_example = sample.get(persisted_column.name, None)
column_example = (
column_example if isinstance(column_example, str) else str(column_example)
) # noqa: F821
persisted_column.example = truncate_long_string(column_example, 4000)
models.db.session.add(persisted_column)
models.db.session.commit()
logger.info(
u"task=update_sample state=finished table_name=%s runtime=%.2f",
table_name,
time.time() - start_time,
)
return sample