in backfill/2024-03-23-clients_last_seen/telemetry_derived_clients_last_seen_v2_20240322/backfill_clients_last_seen_v2_in_BQproject_backfill_1.py [0:0]
def main():
"""Backfill table `backfills_staging_derived.telemetry_derived_clients_last_seen_v2_20240322` in parallel."""
args = parser.parse_args()
client = bigquery.Client(args.project_id)
sample_start = args.sample_start
sample_end = args.sample_end
if args.dry_run:
print("Do a dry run")
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
else:
job_config = bigquery.QueryJobConfig(dry_run=False, use_query_cache=False)
date_range = BackfillDateRange(
args.start_date,
args.end_date,
range_type= PartitionType.DAY,
)
schema_file_path = os.path.join(this_dir, "schema.yaml")
with open(schema_file_path, 'r') as yaml_file:
schema_yaml = yaml.safe_load(yaml_file)
bigquery_schema = get_bigquery_schema(schema_yaml)
for backfill_date in date_range:
with ThreadPool(args.parallelism) as pool:
pool.map(
partial(
_backfill_staging_table,
client, job_config, args.project_id, args.dataset, args.table, bigquery_schema, backfill_date),
list(range(sample_start, sample_end))
)