in scripts/copy_select_table_column_bigquery.py [0:0]
def copy_tables(project_id, source_dataset, destination_dataset, df):
client = bigquery.Client()
unique_table_names = df['TableName'].unique()
for table_name in unique_table_names:
print(f"Processing table: {table_name}")
dest_table_id = f"{project_id}.{destination_dataset}.{table_name}"
orig_table_id = f"{project_id}.{source_dataset}.{table_name}"
# Copy the table (all columns initially)
job = client.copy_table(orig_table_id, dest_table_id)
job.result()
# Get columns to preserve
columns_to_preserve = df[df['TableName'] == table_name]['ColumnName'].tolist()
# Drop unwanted columns
dest_table = client.get_table(dest_table_id)
all_columns = [c.name for c in dest_table.schema]
for column in all_columns:
print(f'Cheking if column {column} should be preserved')
if column not in columns_to_preserve:
query = f"ALTER TABLE {dest_table_id} DROP COLUMN IF EXISTS {column}"
client.query(query).result()
# Update descriptions
for column_name in columns_to_preserve:
print(f'Updating description for column {column_name}')
description = df[(df['TableName'] == table_name) & (df['ColumnName'] == column_name)]['Description'].iloc[0]
query = f"""
ALTER TABLE {dest_table_id}
ALTER COLUMN {column_name} SET OPTIONS(description='{description}')
"""
try:
client.query(query).result()
except Exception as e:
print(f"An error occurred while additon desciption to the column {column_name} in table {dest_table_id}: {e}")