template/v2/dirs/etc/sagemaker/sm_pysdk_default_config.py (104 lines of code) (raw):

import json import os import re import yaml def generate_intelligent_default_config(metadata: str) -> dict: config = { "SchemaVersion": "1.0", "SageMaker": { "PythonSDK": { "Modules": { "Session": { "DefaultS3Bucket": metadata["S3Bucket"], "DefaultS3ObjectKeyPrefix": metadata["S3ObjectKeyPrefix"], }, "RemoteFunction": { "IncludeLocalWorkDir": True, "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]}, }, "NotebookJob": { "RoleArn": metadata["UserRoleArn"], "S3RootUri": f"s3://{metadata['S3Bucket']}/{metadata['S3ObjectKeyPrefix']}", "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]}, }, "Serve": {"S3ModelDataUri": f"s3://{metadata['S3Bucket']}/{metadata['S3ObjectKeyPrefix']}"}, } }, "MonitoringSchedule": { "MonitoringScheduleConfig": { "MonitoringJobDefinition": { "NetworkConfig": { "VpcConfig": { "SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"], } } } } }, "AutoMLJob": { "AutoMLJobConfig": { "SecurityConfig": { "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]} } } }, "AutoMLJobV2": { "SecurityConfig": { "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]} } }, "CompilationJob": { "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]} }, "Pipeline": {"RoleArn": metadata["UserRoleArn"]}, "Model": { "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]}, "ExecutionRoleArn": metadata["UserRoleArn"], }, "ModelPackage": {"ValidationSpecification": {"ValidationRole": metadata["UserRoleArn"]}}, "ProcessingJob": { "NetworkConfig": { "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]} }, "RoleArn": metadata["UserRoleArn"], }, "TrainingJob": { "RoleArn": metadata["UserRoleArn"], "VpcConfig": {"SecurityGroupIds": metadata["SecurityGroupIds"], "Subnets": metadata["Subnets"]}, }, }, } return config if __name__ == "__main__": try: config = {} resource_metadata = "/opt/ml/metadata/resource-metadata.json" PROJECT_S3_PATH = "ProjectS3Path" SECURITY_GROUP = "SecurityGroup" PRIVATE_SUBNETS = "PrivateSubnets" META_DATA = "AdditionalMetadata" EXECUTION_ROLE_ARN = "ExecutionRoleArn" CONFIG_FILE_NAME = "config.yaml" CONFIG_DIR = "/etc/xdg/sagemaker/" if os.path.exists(resource_metadata): with open(resource_metadata, "r") as file: data = json.load(file) s3_path = data[META_DATA].get(PROJECT_S3_PATH, "") metadata = { # user provided bucket "S3Bucket": re.search(r"s3://([^/]+)/", s3_path).group(1), # ${datazoneEnvironmentDomainId}/${datazoneEnvironmentProjectId}/${datazoneScopeName}/ "S3ObjectKeyPrefix": s3_path.split("//")[1].split("/", 1)[1], # TODO: Is this a billing concern if set default # 'InstanceType': 'ml.m5.xlarge', "SecurityGroupIds": data[META_DATA].get(SECURITY_GROUP, "").split(","), "Subnets": data[META_DATA].get(PRIVATE_SUBNETS, "").split(","), "UserRoleArn": data[EXECUTION_ROLE_ARN], } # Not create config file when invalid value exists in metadata empty_values = [key for key, value in metadata.items() if value == "" or value == [""]] if empty_values: raise AttributeError(f"There are empty values in the metadata: {empty_values}") config = generate_intelligent_default_config(metadata) else: raise FileNotFoundError("No resource-metadata.json exists on host!") # Write the config YAML file to default location of the admin config file with open(os.path.join(CONFIG_DIR, CONFIG_FILE_NAME), "w") as f: yaml.dump(config, f, default_flow_style=False, sort_keys=False) except Exception as e: print(f"Error: {e}, SageMaker PySDK intelligent config file is not valid!")