cloudbuild.sfdc.yaml (176 lines of code) (raw):
#-- Copyright 2024 Google LLC
#--
#-- Licensed under the Apache License, Version 2.0 (the "License");
#-- you may not use this file except in compliance with the License.
#-- You may obtain a copy of the License at
#--
#-- https://www.apache.org/licenses/LICENSE-2.0
#--
#-- Unless required by applicable law or agreed to in writing, software
#-- distributed under the License is distributed on an "AS IS" BASIS,
#-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#-- See the License for the specific language governing permissions and
#-- limitations under the License.
# This build file generates all the necessary objects (DAG files, Bigquery
# tables and views etc) for a Cortex deployment for a Salesforce system.
steps:
# init_deployment_config.py leaves the validated config.json file in workspace/config so it's available for other build steps
- name: gcr.io/kittycorn-public/deploy-kittycorn:v2.0
id: "init_deploy_config"
script: |
#!/usr/bin/env bash
set -e
echo "Initial configuration ${_CONFIG_FILE}:"
cat "${_CONFIG_FILE}"
# Save absolute config file path to .env file
# that can be accessed by all Cloud Build steps since exported env
# variables do not persist between steps.
realpath "${_CONFIG_FILE}" > /workspace/config_file_full_path.env
python3 src/common/init_deployment_config.py \
--config-file ${_CONFIG_FILE} \
--sub-validator "src"
echo "Processed configuration:"
cat ${_CONFIG_FILE}
echo -e "\n--------------------------------"
- name: "gcr.io/kittycorn-public/deploy-kittycorn:v2.0"
script: |
#!/bin/bash
set -e
export PYTHONPATH=${PYTHONPATH}:./src
_DEPLOY_CDC_=$(jq -r ."SFDC.deployCDC" ${_CONFIG_FILE})
create_mapping_views=$(jq -r ."SFDC.createMappingViews" ${_CONFIG_FILE})
create_placeholder_tables=$(jq -r ."SFDC.createPlaceholders" ${_CONFIG_FILE})
if [[ ${create_mapping_views} == "" ]]
then
create_mapping_views="true"
fi
if [[ ${create_placeholder_tables} == "" ]]
then
create_placeholder_tables="true"
fi
if [[ ${_TGT_BUCKET} != "" ]]
then
_TGT_BUCKET_=${_TGT_BUCKET}
else
_TGT_BUCKET_=$(jq -r ."targetBucket" ${_CONFIG_FILE})
fi
if [[ ${_DEPLOY_CDC_} == "true" ]]
then
# RAW DAG generation
echo "Generating Raw DAGs and SQL files."
python src/raw_dag_generator/generate_dags.py
echo "β
Generated Raw DAGs and SQL files."
# If create_mapping_views is true, we create CDC views. Otherwise, we create CDC DAGs.
if [[ ${create_mapping_views} == "true" ]]
then
# CDC view generation
echo "Generating CDC views."
python src/cdc_dag_generator/generate_views.py
echo "β
CDC views have been generated."
else
# CDC DAG generation
echo "Generating CDC DAGs and SQL files."
python src/cdc_dag_generator/generate_dags.py
echo "β
Generated CDC DAGs and SQL files."
fi
else
echo "==== Skipping RAW and CDC DAG generation for Salesforce ===="
fi
if [[ ${_DEPLOY_CDC_} == "true" ]]
then
# Copy generated SFDC DAG sql files to Target GCS bucket
if [[ $(find generated_sql -type f 2> /dev/null | wc -l) -gt 0 ]]
then
echo "Copying SFDC SQL files to gs://${_TGT_BUCKET_}/dags/sfdc."
gsutil -m cp -r './generated_sql/*' gs://${_TGT_BUCKET_}/dags/
echo "β
SFDC SQL files have been copied."
else
echo "πͺNo SQL files found under generated_sql/sfdc directory or the directory does not exist. Skipping copy.πͺ"
fi
# Copy generated SFDC DAG python and related files to Target GCS bucket
if [[ $(find generated_dag -type f 2> /dev/null | wc -l) -gt 0 ]]
then
echo "Copying SFDC DAG files to gs://${_TGT_BUCKET_}/dags/sfdc."
gsutil -m cp -r './generated_dag/*' gs://${_TGT_BUCKET_}/dags/
echo "β
SFDC DAG files have been copied."
else
echo "πͺNo Python files found under generated_dag/sfdc directory or the directory does not exist. Skipping copy.πͺ"
fi
if [[ ${_DEPLOY_CDC_} == "true" && ${create_placeholder_tables} == "true" ]]
then
echo "Creating placeholder CDC tables / views in case they do not exist."
set -e
export PYTHONPATH=${PYTHONPATH}:./src
declare -a _WORKER_POOL_OPTIONS
if [[ -n "${_WORKER_POOL_NAME}" ]]; then
_WORKER_POOL_OPTIONS+=(--worker_pool_name "${_WORKER_POOL_NAME}")
fi
if [[ -n "${_CLOUD_BUILD_REGION}" ]]; then
_WORKER_POOL_OPTIONS+=(--region "${_CLOUD_BUILD_REGION}")
fi
src/common/materializer/deploy.sh \
--gcs_logs_bucket "${_GCS_BUCKET}" \
--gcs_tgt_bucket "${_TGT_BUCKET_}" \
--module_name "SFDC" \
--target_type "CDC" \
--config_file "${_CONFIG_FILE}" \
--materializer_settings_file config/cdc_placeholder_settings.yaml \
"${_WORKER_POOL_OPTIONS[@]}"
echo "β
Placeholder CDC tables / views have been created."
else
echo "πͺπͺπͺDeploy CDC flag is false, or CreatePlaceholders flag is false. Skipping placeholder table / view generation.πͺπͺπͺ"
fi
fi
# Generate required K9 Processing tables in case they don't exist, so as not to break Reporting.
- name: "gcr.io/kittycorn-public/deploy-kittycorn:v2.0"
id: "generate_k9_placeholder"
script: |
#!/bin/bash
echo "Creating placeholder K9 Processing tables in case they do not exist."
set -e
export PYTHONPATH=${PYTHONPATH}:./src
if [[ ${_TGT_BUCKET} != "" ]]
then
_TGT_BUCKET_=${_TGT_BUCKET}
else
_TGT_BUCKET_=$(jq -r ."targetBucket" ${_CONFIG_FILE})
fi
declare -a _WORKER_POOL_OPTIONS
if [[ -n "${_WORKER_POOL_NAME}" ]]; then
_WORKER_POOL_OPTIONS+=(--worker_pool_name "${_WORKER_POOL_NAME}")
fi
if [[ -n "${_CLOUD_BUILD_REGION}" ]]; then
_WORKER_POOL_OPTIONS+=(--region "${_CLOUD_BUILD_REGION}")
fi
src/common/materializer/deploy.sh \
--gcs_logs_bucket "${_GCS_BUCKET}" \
--gcs_tgt_bucket "${_TGT_BUCKET_}" \
--module_name "k9" \
--target_type "processing" \
--config_file ${_CONFIG_FILE} \
--materializer_settings_file config/k9_placeholder_settings.yaml \
"${_WORKER_POOL_OPTIONS[@]}"
echo "β
Placeholder K9 Processing tables have been created if they did not exist."
# Generate SFDC Reporting bigquery views / tables via Materializer.
- name: gcr.io/kittycorn-public/deploy-kittycorn:v2.0
id: "reporting"
script: |
#!/usr/bin/env bash
if [[ ${_TGT_BUCKET} != "" ]]
then
_TGT_BUCKET_=${_TGT_BUCKET}
else
_TGT_BUCKET_=$(jq -r ."targetBucket" ${_CONFIG_FILE})
fi
set -e
export PYTHONPATH=${PYTHONPATH}:./src
declare -a _WORKER_POOL_OPTIONS
if [[ -n "${_WORKER_POOL_NAME}" ]]; then
_WORKER_POOL_OPTIONS+=(--worker_pool_name "${_WORKER_POOL_NAME}")
fi
if [[ -n "${_CLOUD_BUILD_REGION}" ]]; then
_WORKER_POOL_OPTIONS+=(--region "${_CLOUD_BUILD_REGION}")
fi
src/common/materializer/deploy.sh \
--gcs_logs_bucket ${_GCS_BUCKET} \
--gcs_tgt_bucket ${_TGT_BUCKET_} \
--module_name "SFDC" \
--target_type "Reporting" \
--config_file ${_CONFIG_FILE} \
--materializer_settings_file config/reporting_settings.yaml \
"${_WORKER_POOL_OPTIONS[@]}"
# Deploy Datamesh
- name: "gcr.io/kittycorn-public/deploy-kittycorn:v2.0"
id: "datamesh"
waitFor: ["reporting"]
script: |
#!/usr/bin/env bash
set -e
_DEPLOY_DATA_MESH_=$(jq -r ."deployDataMesh" ${_CONFIG_FILE})
if [[ ${_DEPLOY_DATA_MESH_} == "true" ]]; then
python3 src/common/data_mesh/deploy_data_mesh.py \
--config-file ${_CONFIG_FILE} \
--tag-template-directories config/tag_templates \
--policy-directories config/policy_taxonomies \
--lake-directories config/lakes \
--annotation-directories config/annotations/reporting
else
echo "==Skipping Data Mesh=="
fi
logsBucket: "gs://${_GCS_BUCKET}"
serviceAccount: "${_BUILD_ACCOUNT}"
timeout: 10200s
substitutions:
_CONFIG_FILE: "config/config.json"
options:
substitution_option: "ALLOW_LOOSE"
automapSubstitutions: true
pool:
name: "${_WORKER_POOL_NAME}"