def main()

in backfill/2022-08-15-clients-first-seen/backfill.py [0:0]


def main():
    """Backfill clients_first_seen_v1 in parallel."""
    args = parser.parse_args()

    client = bigquery.Client(args.project_id)

    if args.dry_run:
      print("Do a dry run")
      job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
    else:
      job_config = bigquery.QueryJobConfig(dry_run=False, use_query_cache=False)


    # create the destination table
    client.query(
        CREATE_TABLE_QUERY.format(dataset=args.dataset, table=args.table),
        job_config=job_config
    ).result()

    with ThreadPool(args.parallelism) as pool:
        # create a temporary table for each sample_id
        pool.map(partial(_create_temp_table, client, job_config, args.dataset, args.table), list(range(0, 2)))