in common/materializer/generate_build_files.py [0:0]
def _create_build_files(global_settings: dict, bq_obj_settings: dict,
task_dep_objs: dict[str, dag_types.BqObject],
module_name: str, tgt_dataset_name: str,
tgt_dataset_type: str, generated_files_dir: Path,
private_worker_pool: bool, enable_debug: bool) -> None:
"""
Generates cloud build files that will create target artifacts such as BQ
tables / views, K9 DAGs, etc.
"""
logging.info("Creating build files that will create bq objects...")
logging.debug("module_name = '%s'", module_name)
build_files_master_list = []
config_dict = global_settings["config_dict"]
independent_objects_settings = bq_obj_settings.get("bq_independent_objects")
dependent_objects_settings = bq_obj_settings.get("bq_dependent_objects")
# Process independent tables first, accounting for Turbo Mode.
if independent_objects_settings:
wait_for_prev_step = not config_dict["turboMode"]
build_files_master_list.extend(
_process_bq_object_settings(global_settings,
independent_objects_settings,
task_dep_objs, wait_for_prev_step))
# Process dependent tables.
if dependent_objects_settings:
wait_for_prev_step = True
build_files_master_list.extend(
_process_bq_object_settings(global_settings,
dependent_objects_settings,
task_dep_objs, wait_for_prev_step))
# Since cloud build limits 100 steps in one build file, let's split
# our list such that each list contains at the most 95 entries, as we will
# have some extra steps too.
# Each of these lists will be used to generate one "big" build
# file that will create target BQ objects one object at a time.
# We limit it to a single step per file when turboMode is false.
# This emulates the original pre-Turbo behavior.
max_build_steps = 95 if config_dict.get("turboMode", True) else 1
build_files_lists = [
build_files_master_list[x:x + max_build_steps]
for x in range(0, len(build_files_master_list), max_build_steps)
]
# Generate one build file for each list, using Jinja.
environment = Environment(loader=FileSystemLoader(_CLOUDBUILD_TEMPLATE_DIR))
build_file_template = environment.get_template(_CLOUDBUILD_TEMPLATE_FILE)
build_file_counter = 0
for build_files_list in build_files_lists:
build_file_counter += 1
build_file_text = build_file_template.render({
"module_name": module_name,
"target_dataset_type": tgt_dataset_type,
"target_dataset_name": tgt_dataset_name,
"load_test_data": config_dict["testData"],
"debug": enable_debug,
"config_file": global_settings["config_file"],
"build_files_list": build_files_list,
"private_worker_pool": private_worker_pool,
"allow_telemetry": config_dict.get("allowTelemetry", True),
"bq_location": config_dict["location"]
})
build_file_num = f"{build_file_counter:03d}"
build_file_name = (
f"cloudbuild.materializer.{tgt_dataset_name}.{build_file_num}.yaml")
build_file = Path(generated_files_dir, build_file_name)
logging.debug("Creating build file : '%s'", build_file)
logging.debug("Build File Text = '%s'", build_file_text)
with open(build_file, "w", encoding="utf-8") as bf:
bf.write(build_file_text)