in common/materializer/generate_build_files.py [0:0]
def main():
# Parse and validate arguments.
(module_name, tgt_dataset_type, config_file, materializer_settings_file,
k9_manifest, private_worker_pool, enable_debug) = _parse_args()
logging.info("Generating %s Build files....", tgt_dataset_type)
# Load and validate configs in config file
config_dict = configs.load_config_file(config_file)
# Get telemetry opt in, default to True
allow_telemetry = config_dict.get("allowTelemetry", True)
# Load and validate Materializer settings
td_enabled_and_exists = False
td_settings = generate_assets.get_enabled_task_dep_settings_file(
Path(materializer_settings_file), config_dict)
if td_settings:
td_enabled_and_exists = True
materializer_settings_file = str(td_settings)
logging.info(
"Task dependencies are enabled and %s exists. Using "
"task dependent settings.", td_settings.name)
materializer_settings = generate_assets.get_materializer_settings(
materializer_settings_file)
# Create output directory.
generated_files_dir = generate_assets.GENERATED_BUILD_DIR_NAME / module_name
if generated_files_dir.exists():
logging.debug("Removing existing generated files directory '%s'....",
generated_files_dir)
shutil.rmtree(generated_files_dir)
logging.debug("Creating directory '%s' to store generated files....",
generated_files_dir)
Path(generated_files_dir).mkdir(parents=True)
lower_tgt_dataset_type = tgt_dataset_type.lower().replace(" ", "_")
if module_name in constants.MARKETING_MODULES:
# Marketing modules are nested under "marketing".
tgt_dataset = config_dict["marketing"][module_name]["datasets"][
lower_tgt_dataset_type]
else:
tgt_dataset = config_dict[module_name]["datasets"][
lower_tgt_dataset_type]
tgt_dataset_full_name = config_dict["projectIdTarget"] + "." + tgt_dataset
# Create jinja template substitution file. This file is needed when running
# individual BQ table creation SQL build file later in the process.
_create_jinja_data_file(config_dict, generated_files_dir)
global_settings = {
"config_file": config_file,
"config_dict": config_dict,
"k9_manifest": k9_manifest
}
# Check for task dependent objects if enabled, which will not generate DAGs
# in _create_build_files. Instead they will generated dags through
# generate_dependent_dags.py called from deploy.sh.
task_dep_objs = {}
if td_enabled_and_exists:
try:
task_dep_objs = dependent_dags.get_task_deps(materializer_settings)
except Exception as e:
raise RuntimeError(
"The following materializer settings could not be parsed for "
"task dependencies. Please check that the reporting settings "
"file conforms to the type defined at "
"src/common/materializer/dag_types.ReportingObjects:\n"
f"{materializer_settings}") from e
# Create build files.
_create_build_files(global_settings, materializer_settings, task_dep_objs,
module_name, tgt_dataset_full_name, tgt_dataset_type,
generated_files_dir, private_worker_pool, enable_debug)
# Create target dataset if not present.
_create_tgt_dataset(tgt_dataset_full_name,
config_dict["location"],
label_dataset=allow_telemetry)
logging.info("Finished generating Cloud Build files for %s for %s.",
tgt_dataset_type, module_name)