in source/lambda/graph-modelling/index.py [0:0]
def run_modelling_job(timestamp,
train_input,
s3_bucket=S3_BUCKET,
train_out_prefix=OUTPUT_PREFIX,
train_job_prefix='sagemaker-graph-fraud-model-training',
train_source_dir='dgl_fraud_detection',
train_entry_point='train_dgl_mxnet_entry_point.py',
framework='mxnet',
framework_version='1.6.0',
xpu='gpu',
python_version='py3',
instance_type=INSTANCE_TYPE
):
print("Creating SageMaker Training job with inputs from {}".format(train_input))
sagemaker_client = boto3.client('sagemaker')
region = boto3.session.Session().region_name
container = "763104351884.dkr.ecr.{}.amazonaws.com/{}-training:{}-{}-{}".format(region,
framework,
framework_version,
xpu,
python_version)
training_job_name = "{}-{}".format(train_job_prefix, timestamp)
code_path = tar_and_upload_to_s3(train_source_dir,
s3_bucket,
os.path.join(train_out_prefix, training_job_name, 'source'))
framework_params = {
'sagemaker_container_log_level': str(20),
'sagemaker_enable_cloudwatch_metrics': 'false',
'sagemaker_job_name': json.dumps(training_job_name),
'sagemaker_program': json.dumps(train_entry_point),
'sagemaker_region': json.dumps(region),
'sagemaker_submit_directory': json.dumps(code_path)
}
model_params = {
'nodes': 'features.csv',
'edges': 'relation*',
'labels': 'tags.csv',
'model': 'rgcn',
'num-gpus': 1,
'batch-size': 10000,
'embedding-size': 64,
'n-neighbors': 1000,
'n-layers': 2,
'n-epochs': 10,
'optimizer': 'adam',
'lr': 1e-2
}
model_params = {k: json.dumps(str(v)) for k, v in model_params.items()}
model_params.update(framework_params)
train_params = \
{
'TrainingJobName': training_job_name,
"AlgorithmSpecification": {
"TrainingImage": container,
"TrainingInputMode": "File"
},
"RoleArn": ROLE_ARN,
"OutputDataConfig": {
"S3OutputPath": get_full_s3_path(s3_bucket, train_out_prefix)
},
"ResourceConfig": {
"InstanceCount": 1,
"InstanceType": instance_type,
"VolumeSizeInGB": 30
},
"HyperParameters": model_params,
"StoppingCondition": {
"MaxRuntimeInSeconds": 86400
},
"InputDataConfig": [
{
"ChannelName": "train",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": train_input,
"S3DataDistributionType": "FullyReplicated"
}
},
},
]
}
response = sagemaker_client.create_training_job(**train_params)
return response