in backfill/2022-08-15-clients-first-seen/backfill.py [0:0]
def main():
"""Backfill clients_first_seen_v1 in parallel."""
args = parser.parse_args()
client = bigquery.Client(args.project_id)
if args.dry_run:
print("Do a dry run")
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
else:
job_config = bigquery.QueryJobConfig(dry_run=False, use_query_cache=False)
# create the destination table
client.query(
CREATE_TABLE_QUERY.format(dataset=args.dataset, table=args.table),
job_config=job_config
).result()
with ThreadPool(args.parallelism) as pool:
# create a temporary table for each sample_id
pool.map(partial(_create_temp_table, client, job_config, args.dataset, args.table), list(range(0, 2)))