in source/soca/cluster_manager/add_nodes.py [0:0]
def main(**kwargs):
try:
# Create default value for optional parameters if needed
optional_job_parameters = {'anonymous_metrics': soca_configuration["DefaultMetricCollection"],
'force_ri': False,
'base_os': False,
'efa_support': False,
'fsx_lustre': False,
'fsx_lustre_size': False,
'fsx_lustre_deployment_type': "SCRATCH_2",
'fsx_lustre_per_unit_throughput': 200,
'ht_support': False,
'keep_ebs': False,
'root_size': 10,
'scratch_size': 0,
'security_groups': False,
'instance_profile': False,
'spot_allocation_count': False,
'spot_allocation_strategy': 'capacity-optimized',
'spot_price': False,
'subnet_id': False,
'system_metrics': False,
'scratch_iops': 0,
'stack_uuid': str(uuid.uuid4()),
'weighted_capacity': False
}
for k, v in optional_job_parameters.items():
if k not in kwargs.keys():
kwargs[k] = v
# Validate Job parameters
try:
params = check_config(**kwargs)
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
return return_message('Unable to verify parameters ' + str(e) + ': error:' + str(exc_type) + ' ' + str(fname) + ' ' + str(exc_tb.tb_lineno) + ' ' + str(kwargs))
# If error is detected, return error message
if 'message' in params.keys():
return params
# Force Tag if they don't exist. DO NOT DELETE them or host won't be able to be registered by nodes_manager.py
tags = params['tags']
if params['keep_forever'] is True:
cfn_stack_name = soca_configuration['ClusterId'] + '-keepforever-' + params['queue'] + '-' + params['stack_uuid']
tags['soca:KeepForever'] = 'true'
else:
cfn_stack_name = soca_configuration['ClusterId'] + '-job-' + str(params['job_id'])
tags['soca:KeepForever'] = 'false'
if int(params['terminate_when_idle']) > 0:
tags['soca:TerminateWhenIdle'] = params['terminate_when_idle']
if 'soca:NodeType' not in tags.keys():
tags['soca:NodeType'] = 'soca-compute-node'
if 'soca:ClusterId' not in tags.keys():
tags['soca:ClusterId'] = soca_configuration['ClusterId']
if 'soca:JobId' not in tags.keys():
tags['soca:JobId'] = params['job_id']
if 'Name' not in tags.keys():
tags['Name'] = cfn_stack_name.replace('_', '-')
# These parameters will be used to build the cloudformation template
parameters_list = {
'AuthProvider': {
'Key': None,
'Default': soca_configuration['AuthProvider'],
},
'BaseOS': {
'Key': 'base_os',
'Default': soca_configuration['BaseOS'],
},
'ClusterId': {
'Key': None,
'Default': soca_configuration['ClusterId'],
},
'ComputeNodeInstanceProfileArn': {
'Key': None,
'Default': soca_configuration['ComputeNodeInstanceProfileArn'],
},
'CoreCount': {
'Key': 'core_count',
'Default': None,
},
'DesiredCapacity': {
'Key': 'desired_capacity',
'Default': None,
},
'Efa': {
'Key': 'efa_support',
'Default': False,
},
'FileSystemApps': {
'Key': None,
'Default': soca_configuration['FileSystemApps'],
},
'FileSystemAppsProvider': {
'Key': None,
'Default': soca_configuration['FileSystemAppsProvider'],
},
'FileSystemData': {
'Key': None,
'Default': soca_configuration['FileSystemData'],
},
'FileSystemDataProvider': {
'Key': None,
'Default': soca_configuration['FileSystemDataProvider'],
},
'ESDomainEndpoint': {
'Key': None,
'Default': soca_configuration['ESDomainEndpoint'],
},
'FSxLustreConfiguration': {
'Key': 'fsx_lustre_configuration',
'Default': False
},
'ImageId': {
'Key': 'instance_ami',
'Default': soca_configuration['CustomAMI']
},
'CustomIamInstanceProfile': {
'Key': 'instance_profile',
'Default': False
},
'InstanceType': {
'Key': 'instance_type',
'Default': None
},
'JobId': {
'Key': 'job_id',
'Default': None
},
'JobName': {
'Key': 'job_name',
'Default': None
},
'JobOwner': {
'Key': 'job_owner',
'Default': None
},
'JobProject': {
'Key': 'job_project',
'Default': None
},
'JobQueue': {
'Key': 'queue',
'Default': None
},
'KeepEbs': {
'Key': 'keep_ebs',
'Default': False
},
'KeepForever': {
'Key': 'keep_forever',
'Default': False
},
'MetricCollectionAnonymous': {
'Key': 'anonymous_metrics',
'Default': soca_configuration["DefaultMetricCollection"]
},
'PlacementGroup': {
'Key': 'placement_group',
'Default': True
},
'RootSize': {
'Key': 'root_size',
'Default': 10
},
'S3Bucket': {
'Key': None,
'Default': soca_configuration['S3Bucket']
},
'S3InstallFolder': {
'Key': None,
'Default': soca_configuration['S3InstallFolder']
},
'SchedulerPrivateDnsName': {
'Key': None,
'Default': soca_configuration['SchedulerPrivateDnsName']
},
'ScratchSize': {
'Key': 'scratch_size',
'Default': 0
},
'AdditionalSecurityGroupIds': {
'Key': 'security_groups',
'Default': None
},
'SecurityGroupId': {
'Key': None,
'Default': soca_configuration['ComputeNodeSecurityGroup']
},
'SchedulerHostname': {
'Key': None,
'Default': soca_configuration['SchedulerPrivateDnsName']
},
'SolutionMetricsLambda': {
'Key': None,
'Default': soca_configuration['SolutionMetricsLambda']
},
'SpotAllocationCount': {
'Key': 'spot_allocation_count',
'Default': False
},
'SpotAllocationStrategy': {
'Key': 'spot_allocation_strategy',
'Default': 'capacity-optimized'
},
'SpotFleetIAMRoleArn': {
'Key': None,
'Default': soca_configuration['SpotFleetIAMRoleArn']
},
'SpotPrice': {
'Key': 'spot_price',
'Default': False
},
'SSHKeyPair': {
'Key': None,
'Default': soca_configuration['SSHKeyPair']
},
'StackUUID': {
'Key': None,
'Default': params['stack_uuid']
},
'SubnetId': {
'Key': 'subnet_id',
'Default': None
},
'SystemMetrics': {
'Key': 'system_metrics',
'Default': False
},
'TerminateWhenIdle': {
'Key': 'terminate_when_idle',
'Default': 0
},
'ThreadsPerCore': {
'Key': 'ht_support',
'Default': False
},
'Version': {
'Key': None,
'Default': soca_configuration['Version']
},
'VolumeTypeIops': {
'Key': 'scratch_iops',
'Default': 0
},
'WeightedCapacity': {
'Key': 'weighted_capacity',
'Default': False
}
}
cfn_stack_parameters = {}
for k, v in parameters_list.items():
if v['Key'] is not None:
if v['Key'] not in params.keys():
cfn_stack_parameters[k] = v['Default']
else:
cfn_stack_parameters[k] = params[v['Key']]
else:
if v['Default'] is None:
error = return_message('Unable to detect value for ' + k)
return error
else:
cfn_stack_parameters[k] = v['Default']
cfn_stack_body = cloudformation_builder.main(**cfn_stack_parameters)
if cfn_stack_body['success'] is False:
return return_message(cfn_stack_body['output'])
cfn_stack_tags = [{'Key': str(k), 'Value': str(v)} for k, v in tags.items() if v]
# Dry Run (note: licenses checks is handled by dispatcher.py. This dry run only check for AWS related commands)
can_launch = can_launch_capacity(cfn_stack_parameters['InstanceType'],
cfn_stack_parameters['DesiredCapacity'],
cfn_stack_parameters['ImageId'],
cfn_stack_parameters['SubnetId'][0],
cfn_stack_parameters['SecurityGroupId'])
if can_launch is True:
try:
cloudformation.create_stack(
StackName=cfn_stack_name,
TemplateBody=cfn_stack_body['output'],
Tags=cfn_stack_tags)
return {'success': True,
'stack_name': cfn_stack_name,
'compute_node': 'job'+str(params['job_id'])
}
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
return return_message(str(e) + ': error:' + str(exc_type) + ' ' + str(fname) + ' ' + str(exc_tb.tb_lineno) + ' ' + str(kwargs))
else:
return return_message('Dry Run failed: ' + str(can_launch))
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
return return_message(str(e) + ': error:' + str(exc_type) + ' ' + str(fname) + ' ' + str(exc_tb.tb_lineno) + ' ' + str(kwargs))