cli/readme.py

# imports import os import json import glob import argparse import hashlib import random import string import yaml # define constants EXCLUDED_JOBS = ["java", "spark-job-component", "storage_pe", "user-assigned-identity"] # TODO: Re-include these below endpoints and deployments when the workflow generation code supports substituting vars in .yaml files. EXCLUDED_ENDPOINTS = [ "1-uai-create-endpoint", "1-sai-create-endpoint", "tfserving-endpoint", ] EXCLUDED_DEPLOYMENTS = [ "minimal-multimodel-deployment", "minimal-single-model-conda-in-dockerfile-deployment", "mlflow-deployment", "r-deployment", "torchserve-deployment", "triton-cc-deployment", "2-sai-deployment", "kubernetes-green-deployment", ] EXCLUDED_RESOURCES = [ "workspace", "datastore", "vm-attach", "instance", "connections", "compute/cluster-user-identity", "compute/attached-spark", "compute/attached-spark-system-identity", "compute/attached-spark-user-identity", "registry", ] EXCLUDED_ASSETS = ["conda-yamls", "mlflow-models"] EXCLUDED_SCHEDULES = [] EXCLUDED_SCRIPTS = [ "setup", "cleanup", "run-job", "run-pipeline-job-with-registry-components", "deploy-custom-container-multimodel-minimal", "run-pipeline-jobs", ] READONLY_HEADER = "# This code is autogenerated.\ \n# Code is generated by running custom script: python3 readme.py\ \n# Any manual changes to this file may cause incorrect behavior.\ \n# Any manual changes will be overwritten if the code is regenerated.\n" BRANCH = "main" # default - do not change # Duplicate name in working directory during checkout # https://github.com/actions/checkout/issues/739 GITHUB_WORKSPACE = "${{ github.workspace }}" GITHUB_CONCURRENCY_GROUP = ( "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" ) # BRANCH = "sdk-preview" # this should be deleted when this branch is merged to main hours_between_runs = 12 # define functions def main(args): # get list of notebooks notebooks = sorted(glob.glob("**/*.ipynb", recursive=True)) # make all notebooks consistent modify_notebooks(notebooks) # get list of jobs jobs = sorted(glob.glob("jobs/**/*job*.yml", recursive=True)) jobs += sorted(glob.glob("jobs/basics/*.yml", recursive=False)) jobs += sorted(glob.glob("jobs/*/basics/**/*job*.yml", recursive=True)) jobs += sorted(glob.glob("jobs/pipelines/**/*pipeline*.yml", recursive=True)) jobs += sorted(glob.glob("jobs/spark/*.yml", recursive=False)) jobs += sorted( glob.glob("jobs/automl-standalone-jobs/**/cli-automl-*.yml", recursive=True) ) jobs += sorted( glob.glob("jobs/pipelines-with-components/**/*pipeline*.yml", recursive=True) ) jobs += sorted( glob.glob("jobs/automl-standalone-jobs/**/*cli-automl*.yml", recursive=True) ) jobs += sorted(glob.glob("responsible-ai/**/cli-*.yml", recursive=True)) jobs += sorted(glob.glob("jobs/parallel/**/*pipeline*.yml", recursive=True)) jobs = [ job.replace(".yml", "") for job in jobs if not any(excluded in job for excluded in EXCLUDED_JOBS) ] jobs_using_registry_components = sorted( glob.glob( "jobs/pipelines-with-components/basics/**/*pipeline*.yml", recursive=True ) ) jobs_using_registry_components = [ job.replace(".yml", "") for job in jobs_using_registry_components if not any(excluded in job.replace(os.sep, "/") for excluded in EXCLUDED_JOBS) ] # get list of endpoints endpoints = sorted(glob.glob("endpoints/**/*endpoint.yml", recursive=True)) endpoints = [ endpoint.replace(".yml", "") for endpoint in endpoints if not any( excluded in endpoint.replace(os.sep, "/") for excluded in EXCLUDED_ENDPOINTS ) ] # get list of resources resources = sorted(glob.glob("resources/**/*.yml", recursive=True)) resources = [ resource.replace(".yml", "") for resource in resources if not any( excluded in resource.replace(os.sep, "/") for excluded in EXCLUDED_RESOURCES ) ] # get list of assets assets = sorted(glob.glob("assets/**/*.yml", recursive=True)) assets = [ asset.replace(".yml", "") for asset in assets if not any( excluded in asset.replace(os.sep, "/") for excluded in EXCLUDED_ASSETS ) ] # get list of scripts scripts = sorted(glob.glob("*.sh", recursive=False)) scripts = [ script.replace(".sh", "") for script in scripts if not any( excluded in script.replace(os.sep, "/") for excluded in EXCLUDED_SCRIPTS ) ] # get list of schedules schedules = sorted(glob.glob("schedules/**/*schedule.yml", recursive=True)) schedules = [ schedule.replace(".yml", "") for schedule in schedules if not any( excluded in schedule.replace(os.sep, "/") for excluded in EXCLUDED_SCHEDULES ) ] # write workflows write_workflows( jobs, jobs_using_registry_components, endpoints, resources, assets, scripts, schedules, ) # read existing README.md with open("README.md", "r") as f: readme_before = f.read() # write README.md write_readme(jobs, endpoints, resources, assets, scripts, schedules) # read modified README.md with open("README.md", "r") as f: readme_after = f.read() # check if readme matches if args.check_readme: if not check_readme(readme_before, readme_after): print("README.md file did not match...") exit(2) def modify_notebooks(notebooks): # setup variables kernelspec = { "display_name": "Python 3.8 - AzureML", "language": "python", "name": "python38-azureml", } # for each notebooks for notebook in notebooks: # read in notebook with open(notebook, "r") as f: data = json.load(f) # update metadata data["metadata"]["kernelspec"] = kernelspec # write notebook with open(notebook, "w") as f: json.dump(data, f, indent=1) def write_readme(jobs, endpoints, resources, assets, scripts, schedules): # read in prefix.md and suffix.md with open("prefix.md", "r") as f: prefix = f.read() with open("suffix.md", "r") as f: suffix = f.read() # define markdown tables jobs_table = "\n**Jobs** ([jobs](jobs))\n\npath|status|description\n-|-|-\n" endpoints_table = ( "\n**Endpoints** ([endpoints](endpoints))\n\npath|status|description\n-|-|-\n" ) resources_table = ( "\n**Resources** ([resources](resources))\n\npath|status|description\n-|-|-\n" ) assets_table = "\n**Assets** ([assets](assets))\n\npath|status|description\n-|-|-\n" scripts_table = "\n**Scripts**\n\npath|status|\n-|-\n" schedules_table = "\n**Schedules**\n\npath|status|\n-|-\n" # process jobs for job in jobs: # build entries for tutorial table posix_job = job.replace(os.sep, "/") job_name = posix_job.replace("/", "-") status = f"[![{posix_job}](https://github.com/Azure/azureml-examples/workflows/cli-{job_name}/badge.svg?branch={BRANCH})](https://github.com/Azure/azureml-examples/actions/workflows/cli-{job_name}.yml)" description = "*no description*" try: with open(f"{job}.yml", "r") as f: for line in f.readlines(): if "description: " in str(line): description = line.split(": ")[-1].strip() break except: pass # add row to tutorial table row = f"[{posix_job}.yml]({posix_job}.yml)|{status}|{description}\n" jobs_table += row # process endpoints for endpoint in endpoints: # build entries for tutorial table posix_endpoint = endpoint.replace(os.sep, "/") endpoint_name = posix_endpoint.replace("/", "-") status = f"[![{posix_endpoint}](https://github.com/Azure/azureml-examples/workflows/cli-{endpoint_name}/badge.svg?branch={BRANCH})](https://github.com/Azure/azureml-examples/actions/workflows/cli-{endpoint_name}.yml)" description = "*no description*" try: with open(f"{endpoint}.yml", "r") as f: for line in f.readlines(): if "description: " in str(line): description = line.split(": ")[-1].strip() break except: pass # add row to tutorial table row = f"[{posix_endpoint}.yml]({posix_endpoint}.yml)|{status}|{description}\n" endpoints_table += row # process resources for resource in resources: # build entries for tutorial table posix_resource = resource.replace(os.sep, "/") resource_name = posix_resource.replace("/", "-") status = f"[![{posix_resource}](https://github.com/Azure/azureml-examples/workflows/cli-{resource_name}/badge.svg?branch={BRANCH})](https://github.com/Azure/azureml-examples/actions/workflows/cli-{resource_name}.yml)" description = "*no description*" try: with open(f"{resource}.yml", "r") as f: for line in f.readlines(): if "description: " in str(line): description = line.split(": ")[-1].strip() break except: pass # add row to tutorial table row = f"[{posix_resource}.yml]({posix_resource}.yml)|{status}|{description}\n" resources_table += row # process assets for asset in assets: # build entries for tutorial table posix_asset = asset.replace(os.sep, "/") asset_name = posix_asset.replace("/", "-") status = f"[![{posix_asset}](https://github.com/Azure/azureml-examples/workflows/cli-{asset_name}/badge.svg?branch={BRANCH})](https://github.com/Azure/azureml-examples/actions/workflows/cli-{asset_name}.yml)" description = "*no description*" try: with open(f"{asset}.yml", "r") as f: for line in f.readlines(): if "description: " in str(line): description = line.split(": ")[-1].strip() break except: pass # add row to tutorial table row = f"[{posix_asset}.yml]({posix_asset}.yml)|{status}|{description}\n" assets_table += row # process scripts for script in scripts: # build entries for tutorial table posix_script = script.replace(os.sep, "/") status = f"[![{posix_script}](https://github.com/Azure/azureml-examples/workflows/cli-scripts-{script}/badge.svg?branch={BRANCH})](https://github.com/Azure/azureml-examples/actions/workflows/cli-scripts-{script}.yml)" link = f"https://scripts.microsoft.com/azure/machine-learning/{script}" # add row to tutorial table row = f"[{posix_script}.sh]({posix_script}.sh)|{status}\n" scripts_table += row # process schedules for schedule in schedules: # build entries for tutorial table posix_schedule = schedule.replace(os.sep, "/") status = f"[![{posix_schedule}](https://github.com/Azure/azureml-examples/workflows/cli-schedules-{posix_schedule}/badge.svg?branch={BRANCH})](https://github.com/Azure/azureml-examples/actions/workflows/cli-schedules-{posix_schedule}.yml)" link = ( f"https://schedules.microsoft.com/azure/machine-learning/{posix_schedule}" ) # add row to tutorial table row = f"[{posix_schedule}.yml]({posix_schedule}.yml)|{status}\n" schedules_table += row # write README.md print("writing README.md...") with open("README.md", "w") as f: f.write( prefix + scripts_table + jobs_table + endpoints_table + resources_table + assets_table + schedules_table + suffix ) print("Finished writing README.md...") def write_workflows( jobs, jobs_using_registry_components, endpoints, resources, assets, scripts, schedules, ): print("writing .github/workflows...") # process jobs for job in jobs: # write workflow file write_job_workflow(job) # process jobs_using_registry_components for job in jobs_using_registry_components: # write workflow file write_job_using_registry_components_workflow(job) # process endpoints for endpoint in endpoints: # write workflow file write_endpoint_workflow(endpoint) # process assest for resource in resources: # write workflow file write_asset_workflow(resource) # process assest for asset in assets: # write workflow file write_asset_workflow(asset) # process scripts for script in scripts: # write workflow file write_script_workflow(script) # process schedules for schedule in schedules: # write workflow file write_schedule_workflow(schedule) def check_readme(before, after): return before == after def parse_path(path): filename = None project_dir = None hyphenated = None try: filename = path.split(os.sep)[-1] except: pass try: project_dir = os.sep.join(path.split(os.sep)[:-1]) except: pass try: hyphenated = path.replace(os.sep, "-").replace("/", "-") except: pass return filename, project_dir, hyphenated def write_job_workflow(job): filename, project_dir, hyphenated = parse_path(job) posix_project_dir = project_dir.replace(os.sep, "/") is_pipeline_sample = "jobs/pipelines" in job is_spark_sample = "jobs/spark" in job schedule_hour, schedule_minute = get_schedule_time(filename) # Duplicate name in working directory during checkout # https://github.com/actions/checkout/issues/739 workflow_yaml = f"""{READONLY_HEADER} name: cli-{hyphenated} on: workflow_dispatch: schedule: - cron: "{schedule_minute} {schedule_hour}/{hours_between_runs} * * *" pull_request: branches: - main paths: - cli/{posix_project_dir}/** - infra/bootstrapping/** - .github/workflows/cli-{hyphenated}.yml\n""" if is_pipeline_sample: workflow_yaml += " - cli/run-pipeline-jobs.sh\n" "" if is_spark_sample: workflow_yaml += " - cli/jobs/spark/data/titanic.csv\n" "" workflow_yaml += f""" - cli/setup.sh permissions: id-token: write concurrency: group: {GITHUB_CONCURRENCY_GROUP} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - name: check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: client-id: ${{{{ secrets.OIDC_AZURE_CLIENT_ID }}}} tenant-id: ${{{{ secrets.OIDC_AZURE_TENANT_ID }}}} subscription-id: ${{{{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }}}} - name: bootstrap resources run: | echo '{GITHUB_CONCURRENCY_GROUP}'; bash bootstrap.sh working-directory: infra/bootstrapping continue-on-error: false - name: setup-cli run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; bash setup.sh working-directory: cli continue-on-error: true - name: Eagerly cache access tokens for required scopes run: | # Workaround for azure-cli's lack of support for ID token refresh # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 # Management az account get-access-token --scope https://management.azure.com/.default --output none # ML az account get-access-token --scope https://ml.azure.com/.default --output none\n""" if is_spark_sample: workflow_yaml += get_spark_setup_workflow(job, posix_project_dir, filename) workflow_yaml += f""" - name: run job run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh";\n""" if "automl" in job and "image" in job: workflow_yaml += f""" bash \"{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh\" replace_template_values \"prepare_data.py\"; pip install azure-identity bash \"{GITHUB_WORKSPACE}/sdk/python/setup.sh\" python prepare_data.py --subscription $SUBSCRIPTION_ID --group $RESOURCE_GROUP_NAME --workspace $WORKSPACE_NAME\n""" elif "autotuning" in job: workflow_yaml += f""" bash -x generate-yml.sh\n""" # workflow_yaml += f""" bash -x {os.path.relpath(".", project_dir)}/run-job.sh generate-yml.yml\n""" workflow_yaml += f""" bash -x {os.path.relpath(".", project_dir).replace(os.sep, "/")}/run-job.sh {filename}.yml working-directory: cli/{posix_project_dir} - name: validate readme run: | python check-readme.py "{GITHUB_WORKSPACE}/cli/{posix_project_dir}" working-directory: infra/bootstrapping continue-on-error: false\n""" # write workflow with open( f"..{os.sep}.github{os.sep}workflows{os.sep}cli-{job.replace(os.sep, '-').replace('/', '-')}.yml", "w", ) as f: f.write(workflow_yaml) def write_job_using_registry_components_workflow(job): filename, project_dir, hyphenated = parse_path(job) posix_project_dir = project_dir.replace(os.sep, "/") folder_name = project_dir.split(os.sep)[-1] is_pipeline_sample = "jobs/pipelines" in job schedule_hour, schedule_minute = get_schedule_time(filename) # Duplicate name in working directory during checkout # https://github.com/actions/checkout/issues/739 workflow_yaml = f"""{READONLY_HEADER} name: cli-{hyphenated}-registry on: workflow_dispatch: schedule: - cron: "{schedule_minute} {schedule_hour}/{hours_between_runs} * * *" pull_request: branches: - main paths: - cli/{posix_project_dir}/** - infra/bootstrapping/** - .github/workflows/cli-{hyphenated}-registry.yml\n""" if is_pipeline_sample: workflow_yaml += " - cli/run-pipeline-jobs.sh\n" "" workflow_yaml += f""" - cli/setup.sh permissions: id-token: write concurrency: group: {GITHUB_CONCURRENCY_GROUP} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - name: check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: client-id: ${{{{ secrets.OIDC_AZURE_CLIENT_ID }}}} tenant-id: ${{{{ secrets.OIDC_AZURE_TENANT_ID }}}} subscription-id: ${{{{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }}}} - name: bootstrap resources run: | echo '{GITHUB_CONCURRENCY_GROUP}'; bash bootstrap.sh working-directory: infra continue-on-error: false - name: setup-cli run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; bash setup.sh working-directory: cli continue-on-error: true - name: Eagerly cache access tokens for required scopes run: | # Workaround for azure-cli's lack of support for ID token refresh # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 # Management az account get-access-token --scope https://management.azure.com/.default --output none # ML az account get-access-token --scope https://ml.azure.com/.default --output none - name: validate readme run: | python check-readme.py "{GITHUB_WORKSPACE}/cli/{posix_project_dir}" working-directory: infra/bootstrapping continue-on-error: false - name: run job run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh";\n""" if "automl" in job and "image" in job: workflow_yaml += f""" bash \"{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh\" replace_template_values \"prepare_data.py\"; pip install azure-identity bash \"{GITHUB_WORKSPACE}/sdk/python/setup.sh\" python prepare_data.py --subscription $SUBSCRIPTION_ID --group $RESOURCE_GROUP_NAME --workspace $WORKSPACE_NAME\n""" workflow_yaml += f""" bash -x {os.path.relpath(".", project_dir).replace(os.sep, "/")}/run-pipeline-job-with-registry-components.sh {filename} {folder_name} working-directory: cli/{posix_project_dir}\n""" # write workflow with open( f"..{os.sep}.github{os.sep}workflows{os.sep}cli-{job.replace(os.sep, '-').replace('/', '-')}-registry.yml", "w", ) as f: f.write(workflow_yaml) def write_endpoint_workflow(endpoint): filename, project_dir, hyphenated = parse_path(endpoint) project_dir = project_dir.replace(os.sep, "/") deployments = sorted( glob.glob(project_dir + "/*deployment.yml", recursive=True) + glob.glob(project_dir + "/*deployment.yaml", recursive=True) ) deployments = [ deployment for deployment in deployments if not any(excluded in deployment for excluded in EXCLUDED_DEPLOYMENTS) ] schedule_hour, schedule_minute = get_schedule_time(filename) endpoint_type = ( "online" if "endpoints/online/" in endpoint else "batch" if "endpoints/batch/" in endpoint else "unknown" ) endpoint_name = hyphenated[-28:].replace("-", "") + str( random.randrange(1000, 9999) ) create_endpoint_yaml = f"""{READONLY_HEADER} name: cli-{hyphenated} on: workflow_dispatch: schedule: - cron: "{schedule_minute} {schedule_hour}/{hours_between_runs} * * *" pull_request: branches: - main paths: - cli/{project_dir}/** - cli/endpoints/{endpoint_type}/** - infra/bootstrapping/** - .github/workflows/cli-{hyphenated}.yml - cli/setup.sh permissions: id-token: write concurrency: group: {GITHUB_CONCURRENCY_GROUP} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - name: check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: client-id: ${{{{ secrets.OIDC_AZURE_CLIENT_ID }}}} tenant-id: ${{{{ secrets.OIDC_AZURE_TENANT_ID }}}} subscription-id: ${{{{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }}}} - name: bootstrap resources run: | bash bootstrap.sh working-directory: infra/bootstrapping continue-on-error: false - name: setup-cli run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; bash setup.sh working-directory: cli continue-on-error: true - name: Eagerly cache access tokens for required scopes run: | # Workaround for azure-cli's lack of support for ID token refresh # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 # Management az account get-access-token --scope https://management.azure.com/.default --output none # ML az account get-access-token --scope https://ml.azure.com/.default --output none - name: validate readme run: | python check-readme.py "{GITHUB_WORKSPACE}/cli/{project_dir}" working-directory: infra/bootstrapping continue-on-error: false - name: delete endpoint if existing run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; az ml {endpoint_type}-endpoint delete -n {endpoint_name} -y working-directory: cli continue-on-error: true - name: create endpoint run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; cat {endpoint}.yml az ml {endpoint_type}-endpoint create -n {endpoint_name} -f {endpoint}.yml working-directory: cli\n""" cleanup_yaml = f""" - name: cleanup endpoint run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; az ml {endpoint_type}-endpoint delete -n {endpoint_name} -y working-directory: cli\n""" workflow_yaml = create_endpoint_yaml if (deployments is not None) and (len(deployments) > 0): for deployment in deployments: deployment = deployment.replace(".yml", "").replace(".yaml", "") deployment_yaml = f""" - name: create deployment run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; cat {deployment}.yml az ml {endpoint_type}-deployment create -e {endpoint_name} -f {deployment}.yml working-directory: cli\n""" workflow_yaml += deployment_yaml workflow_yaml += cleanup_yaml # write workflow with open(f"../.github/workflows/cli-{hyphenated}.yml", "w") as f: f.write(workflow_yaml) def write_asset_workflow(asset): filename, project_dir, hyphenated = parse_path(asset) project_dir = project_dir.replace(os.sep, "/") posix_asset = asset.replace(os.sep, "/") schedule_hour, schedule_minute = get_schedule_time(filename) workflow_yaml = f"""{READONLY_HEADER} name: cli-{hyphenated} on: workflow_dispatch: schedule: - cron: "{schedule_minute} {schedule_hour}/{hours_between_runs} * * *" pull_request: branches: - main paths: - cli/{posix_asset}.yml - infra/bootstrapping/** - .github/workflows/cli-{hyphenated}.yml - cli/setup.sh permissions: id-token: write concurrency: group: {GITHUB_CONCURRENCY_GROUP} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - name: check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: client-id: ${{{{ secrets.OIDC_AZURE_CLIENT_ID }}}} tenant-id: ${{{{ secrets.OIDC_AZURE_TENANT_ID }}}} subscription-id: ${{{{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }}}} - name: bootstrap resources run: | bash bootstrapping/bootstrap.sh working-directory: infra continue-on-error: false - name: setup-cli run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; bash setup.sh working-directory: cli continue-on-error: true - name: Eagerly cache access tokens for required scopes run: | # Workaround for azure-cli's lack of support for ID token refresh # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 # Management az account get-access-token --scope https://management.azure.com/.default --output none # ML az account get-access-token --scope https://ml.azure.com/.default --output none - name: validate readme run: | python check-readme.py "{GITHUB_WORKSPACE}/cli/{project_dir}" working-directory: infra/bootstrapping continue-on-error: false - name: create asset run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; az ml {asset.split(os.sep)[1]} create -f {posix_asset}.yml working-directory: cli\n""" # write workflow with open( f"..{os.sep}.github{os.sep}workflows{os.sep}cli-{hyphenated}.yml", "w" ) as f: f.write(workflow_yaml) def write_script_workflow(script): filename, project_dir, hyphenated = parse_path(script) project_dir = project_dir.replace(os.sep, "/") schedule_hour, schedule_minute = get_schedule_time(filename) workflow_yaml = f"""{READONLY_HEADER} name: cli-scripts-{hyphenated} on: workflow_dispatch: schedule: - cron: "{schedule_minute} {schedule_hour}/{hours_between_runs} * * *" pull_request: branches: - main paths: - cli/{script}.sh - infra/bootstrapping/** - .github/workflows/cli-scripts-{hyphenated}.yml - cli/setup.sh permissions: id-token: write concurrency: group: {GITHUB_CONCURRENCY_GROUP} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - name: check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: client-id: ${{{{ secrets.OIDC_AZURE_CLIENT_ID }}}} tenant-id: ${{{{ secrets.OIDC_AZURE_TENANT_ID }}}} subscription-id: ${{{{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }}}} - name: bootstrap resources run: | bash bootstrap.sh working-directory: infra continue-on-error: false - name: setup-cli run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; bash setup.sh working-directory: cli continue-on-error: true - name: Eagerly cache access tokens for required scopes run: | # Workaround for azure-cli's lack of support for ID token refresh # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 # Management az account get-access-token --scope https://management.azure.com/.default --output none # ML az account get-access-token --scope https://ml.azure.com/.default --output none - name: validate readme run: | python check-readme.py "{GITHUB_WORKSPACE}/cli/{project_dir}" working-directory: infra/bootstrapping continue-on-error: false - name: test script script run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; set -e; bash -x {script}.sh working-directory: cli\n""" # write workflow with open(f"../.github/workflows/cli-scripts-{hyphenated}.yml", "w") as f: f.write(workflow_yaml) def write_schedule_workflow(schedule): filename, project_dir, hyphenated = parse_path(schedule) project_dir = project_dir.replace(os.sep, "/") posix_schedule = schedule.replace(os.sep, "/") schedule_hour, schedule_minute = get_schedule_time(filename) workflow_yaml = f"""{READONLY_HEADER} name: cli-schedules-{hyphenated} on: workflow_dispatch: schedule: - cron: "{schedule_minute} {schedule_hour}/{hours_between_runs} * * *" pull_request: branches: - main paths: - cli/{posix_schedule}.yml - infra/bootstrapping/** - .github/workflows/cli-schedules-{hyphenated}.yml - cli/setup.sh permissions: id-token: write concurrency: group: {GITHUB_CONCURRENCY_GROUP} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - name: check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: client-id: ${{{{ secrets.OIDC_AZURE_CLIENT_ID }}}} tenant-id: ${{{{ secrets.OIDC_AZURE_TENANT_ID }}}} subscription-id: ${{{{ secrets.OIDC_AZURE_SUBSCRIPTION_ID }}}} - name: bootstrap resources run: | bash bootstrap.sh working-directory: infra continue-on-error: false - name: setup-cli run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; bash setup.sh working-directory: cli continue-on-error: true - name: Eagerly cache access tokens for required scopes run: | # Workaround for azure-cli's lack of support for ID token refresh # Taken from: https://github.com/Azure/login/issues/372#issuecomment-2056289617 # Management az account get-access-token --scope https://management.azure.com/.default --output none # ML az account get-access-token --scope https://ml.azure.com/.default --output none - name: validate readme run: | python check-readme.py "{GITHUB_WORKSPACE}/cli/{project_dir}" working-directory: infra/bootstrapping continue-on-error: false - name: create schedule run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; az ml schedule create -f ./{posix_schedule}.yml --set name="ci_test_{filename}" working-directory: cli\n - name: disable schedule run: | source "{GITHUB_WORKSPACE}/infra/bootstrapping/sdk_helpers.sh"; source "{GITHUB_WORKSPACE}/infra/bootstrapping/init_environment.sh"; az ml schedule disable --name ci_test_{filename} working-directory: cli\n""" # write workflow with open(f"../.github/workflows/cli-schedules-{hyphenated}.yml", "w") as f: f.write(workflow_yaml) def get_schedule_time(filename): name_hash = int(hashlib.sha512(filename.encode()).hexdigest(), 16) schedule_minute = name_hash % 60 schedule_hour = (name_hash // 60) % hours_between_runs return schedule_hour, schedule_minute def get_endpoint_name(filename, hyphenated): # gets the endpoint name from the .yml file with open(filename, "r") as f: endpoint_name = yaml.safe_load(f)["name"] return endpoint_name def get_spark_setup_workflow(job, posix_project_dir, filename): is_attached = "attached-spark" in job is_user_identity = "user-identity" in job is_managed_identity = "managed-identity" in job is_default_identity = "default-identity" in job workflow = f""" - name: upload data run: | bash -x upload-data-to-blob.sh jobs/spark/ working-directory: cli continue-on-error: true\n""" if is_managed_identity: workflow += f""" - name: setup identities run: | bash -x setup-identities.sh working-directory: cli/{posix_project_dir} continue-on-error: true\n""" if is_attached: workflow += f""" - name: setup attached spark working-directory: cli continue-on-error: true""" if is_attached and is_user_identity: workflow += f""" run: | bash -x {posix_project_dir}/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml {posix_project_dir}/{filename}.yml\n""" if is_attached and is_managed_identity: workflow += f""" run: | bash -x {posix_project_dir}/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml {posix_project_dir}/{filename}.yml\n""" if is_attached and is_default_identity: workflow += f""" run: | bash -x {posix_project_dir}/setup-attached-resources.sh resources/compute/attached-spark.yml {posix_project_dir}/{filename}.yml\n""" return workflow # run functions if __name__ == "__main__": # setup argparse parser = argparse.ArgumentParser() parser.add_argument("--check-readme", type=bool, default=False) args = parser.parse_args() # call main main(args)

cli/readme.py (438 lines of code) (raw):