in common/py_libs/cdc.py [0:0]
def create_cdc_table(bq_client: bigquery.Client, table_setting: dict,
cdc_project: str, cdc_dataset: str,
schema: list[tuple[str, str]]):
"""Creates CDC table using the provided schema and table settings.
Args:
bq_client: BQ Client.
table_setting: Table config as defined in the settings file.
cdc_project: BQ CDC project.
cdc_dataset: BQ CDC dataset name.
schema: CDC table schema as a list of tuples (column name, column type)
"""
base_table: str = table_setting["base_table"].lower()
cdc_table_name = cdc_project + "." + cdc_dataset + "." + base_table
try:
_ = bq_client.get_table(cdc_table_name)
logger.warning("Table '%s' already exists. Not creating it again.",
cdc_table_name)
except NotFound:
# Let's create CDC table.
logger.info("Table '%s' does not exists. Creating it.", cdc_table_name)
target_schema = [
bigquery.SchemaField(name=f[0], field_type=f[1]) for f in schema
]
cdc_table = bigquery.Table(cdc_table_name, schema=target_schema)
# Add clustering and partitioning properties if specified.
partition_details = table_setting.get("partition_details")
if partition_details:
cdc_table = add_partition_to_table_def(cdc_table, partition_details)
cluster_details = table_setting.get("cluster_details")
if cluster_details:
cdc_table = add_cluster_to_table_def(cdc_table, cluster_details)
bq_client.create_table(cdc_table)
logger.info("Created table '%s'.", cdc_table_name)