def create_default

def create_default_config()

in google_cloud_automlops/utils/utils.py [0:0]
86 lines of code
8 McCabe index (conditional complexity)

def create_default_config(artifact_repo_location: str,
                          artifact_repo_name: str,
                          artifact_repo_type: str,
                          base_image: str,
                          build_trigger_location: str,
                          build_trigger_name: str,
                          deployment_framework: str,
                          naming_prefix: str,
                          orchestration_framework: str,
                          pipeline_job_location: str,
                          pipeline_job_runner_service_account: str,
                          pipeline_job_submission_service_location: str,
                          pipeline_job_submission_service_name: str,
                          pipeline_job_submission_service_type: str,
                          project_id: str,
                          provisioning_framework: str,
                          pubsub_topic_name: str,
                          schedule_location: str,
                          schedule_name: str,
                          schedule_pattern: str,
                          setup_model_monitoring: bool,
                          source_repo_branch: str,
                          source_repo_name: str,
                          source_repo_type: str,
                          storage_bucket_location: str,
                          storage_bucket_name: str,
                          use_ci: bool,
                          vpc_connector: str) -> dict:
    """Creates defaults.yaml file contents as a dict. This defaults file is used by subsequent
    functions and by the pipeline files themselves.

    Args:
        artifact_repo_location (str): Region of the artifact repo (default use with Artifact Registry).
        artifact_repo_name (str): Artifact repo name where components are stored (default use with
            Artifact Registry).
        artifact_repo_type (str): Type of artifact repository to use (e.g. Artifact Registry, JFrog, etc.)        
        base_image (str): Image to use in the component base dockerfile.
        build_trigger_location (str): Location of the build trigger (for cloud build).
        build_trigger_name (str): Name of the build trigger (for cloud build).
        deployment_framework (str): Name of CI tool to use (e.g. cloud build, github actions, etc.)
        naming_prefix (str): Unique value used to differentiate pipelines and services across
            AutoMLOps runs.
        orchestration_framework (str): Orchestration framework to use (e.g. kfp, tfx, etc.)
        pipeline_job_location: The location to run the Pipeline Job in.
        pipeline_job_runner_service_account (str): Service Account to run PipelineJobs.
        pipeline_job_submission_service_location (str): Location of the cloud submission service.
        pipeline_job_submission_service_name (str): Name of the cloud submission service.
        pipeline_job_submission_service_type (str): Tool to host for the cloud submission service
            (e.g. cloud run, cloud functions).
        project_id (str): The project ID.
        provisioning_framework (str): IaC tool to use (e.g. Terraform, Pulumi, etc.)
        pubsub_topic_name (str): Name of the pubsub topic to publish to.
        schedule_location (str): Location of the scheduler resource.
        schedule_name (str): Name of the scheduler resource.
        schedule_pattern (str): Cron formatted value used to create a Scheduled retrain job.
        setup_model_monitoring (bool): Specifies whether to set up a Vertex AI Model Monitoring Job.
        source_repo_branch (str): Branch to use in the source repository.
        source_repo_name (str): Name of the source repository to use.
        source_repo_type (str): Type of source repository to use (e.g. gitlab, github, etc.)
        storage_bucket_location (str): Region of the GS bucket.
        storage_bucket_name (str): GS bucket name where pipeline run metadata is stored.
        use_ci (bool): Specifies whether to use Cloud CI/CD.
        vpc_connector (str): Name of the vpc connector to use.

    Returns:
        dict: Defaults yaml file content.
    """
    defaults = {}
    defaults['gcp'] = {}
    defaults['gcp']['artifact_repo_location'] = artifact_repo_location
    defaults['gcp']['artifact_repo_name'] = artifact_repo_name
    defaults['gcp']['artifact_repo_type'] = artifact_repo_type
    defaults['gcp']['base_image'] = base_image
    if use_ci:
        defaults['gcp']['build_trigger_location'] = build_trigger_location
        defaults['gcp']['build_trigger_name'] = build_trigger_name
    defaults['gcp']['naming_prefix'] = naming_prefix
    defaults['gcp']['pipeline_job_location'] = pipeline_job_location
    defaults['gcp']['pipeline_job_runner_service_account'] = pipeline_job_runner_service_account
    if use_ci:
        defaults['gcp']['pipeline_job_submission_service_location'] = pipeline_job_submission_service_location
        defaults['gcp']['pipeline_job_submission_service_name'] = pipeline_job_submission_service_name
        defaults['gcp']['pipeline_job_submission_service_type'] = pipeline_job_submission_service_type
    defaults['gcp']['project_id'] = project_id
    defaults['gcp']['setup_model_monitoring'] = setup_model_monitoring
    if use_ci:
        defaults['gcp']['pubsub_topic_name'] = pubsub_topic_name
        defaults['gcp']['schedule_location'] = schedule_location
        defaults['gcp']['schedule_name'] = schedule_name
        defaults['gcp']['schedule_pattern'] = schedule_pattern
        defaults['gcp']['source_repository_branch'] = source_repo_branch
        defaults['gcp']['source_repository_name'] = source_repo_name
        defaults['gcp']['source_repository_type'] = source_repo_type
    defaults['gcp']['storage_bucket_location'] = storage_bucket_location
    defaults['gcp']['storage_bucket_name'] = storage_bucket_name
    if use_ci:
        defaults['gcp']['vpc_connector'] = vpc_connector

    defaults['pipelines'] = {}
    defaults['pipelines']['gs_pipeline_job_spec_path'] = f'gs://{storage_bucket_name}/pipeline_root/{naming_prefix}/pipeline_job.yaml'
    defaults['pipelines']['parameter_values_path'] = GENERATED_PARAMETER_VALUES_PATH
    defaults['pipelines']['pipeline_component_directory'] = 'components'
    defaults['pipelines']['pipeline_job_spec_path'] = GENERATED_PIPELINE_JOB_SPEC_PATH
    defaults['pipelines']['pipeline_region'] = storage_bucket_location
    defaults['pipelines']['pipeline_storage_path'] = f'gs://{storage_bucket_name}/pipeline_root'

    defaults['tooling'] = {}
    defaults['tooling']['deployment_framework'] = deployment_framework
    defaults['tooling']['provisioning_framework'] = provisioning_framework
    defaults['tooling']['orchestration_framework'] = orchestration_framework
    defaults['tooling']['use_ci'] = use_ci

    if setup_model_monitoring:
        # These fields will be set up if and when AutoMLOps.monitor() is called
        defaults['monitoring'] = {}
        defaults['monitoring']['target_field'] = None
        defaults['monitoring']['model_endpoint'] = None
        defaults['monitoring']['alert_emails'] = None
        defaults['monitoring']['auto_retraining_params'] = None
        defaults['monitoring']['drift_thresholds'] = None
        defaults['monitoring']['gs_auto_retraining_params_path'] = None
        defaults['monitoring']['job_display_name'] = None
        defaults['monitoring']['log_sink_name'] = None
        defaults['monitoring']['monitoring_interval'] = None
        defaults['monitoring']['monitoring_location'] = None
        defaults['monitoring']['sample_rate'] = None
        defaults['monitoring']['skew_thresholds'] = None
        defaults['monitoring']['training_dataset'] = None

    return defaults