in services/jenkins-autoscaling/lambda_mxnet_ci/autoscaling/handler.py [0:0]
def scaling(): # pragma: no cover
"""
Main handler, used by the lambda function
:return: None
"""
# All underlying methods are being unit tested. This function will have to be integration tested in a live dev
# environment.
logging.getLogger().setLevel(_get_log_level('LOGGING_LEVEL', logging.INFO))
logging.getLogger('botocore').setLevel(_get_log_level('LOGGING_LEVEL_BOTOCORE', logging.INFO))
logging.getLogger('boto3').setLevel(_get_log_level('LOGGING_LEVEL_BOTO3', logging.INFO))
logging.getLogger('urllib3').setLevel(_get_log_level('LOGGING_LEVEL_URLLIB3', logging.INFO))
logging.getLogger('requests').setLevel(_get_log_level('LOGGING_LEVEL_REQUESTS', logging.ERROR))
logging.getLogger('botocore.vendored.requests.packages.urllib3.connectionpool').setLevel(logging.ERROR)
logging.getLogger('jenkinsapi.node').setLevel(logging.INFO)
boto_config = Config(
retries=dict(
max_attempts=0 # Don't retry but fail fast
)
)
jenkins = _get_jenkins_handle()
aws_session = _get_aws_session()
ec2_resource = aws_session.resource('ec2', config=boto_config)
# list of jenkinsapi.nodes.Node
nodes = jenkins.get_nodes()._data['computer']
logging.info("Found %d nodes registered in Jenkins.", len(nodes))
# Ec2 instances
instance_uptime = _instance_uptime(ec2_resource=ec2_resource)
logging.info("Found %d ec2 instances.", len(instance_uptime))
if len(instance_uptime) != len(nodes):
logging.warning("nodes and instances don't have the same length.")
unconnected_label2instance_names = _unconnected_instances(
nodes=nodes,
instance_uptime=instance_uptime,
ec2_resource=ec2_resource)
queue_items = jenkins.get_queue()._data['items']
label2num_instances = determine_scale_up_nodes(
queue_items=queue_items, nodes=nodes, unconnected=unconnected_label2instance_names)
scale_down_nodes = determine_scale_down_nodes(nodes_data=nodes,
instance_uptime=instance_uptime)
############################################
# Detection of instances and slots to be cleaned up
(label2faulty_nodes, orphaned_instances) = _determine_faulty_nodes(nodes=nodes,
instance_uptime=instance_uptime,
unconnected_instances=unconnected_label2instance_names)
if label2faulty_nodes:
faulty = []
for faulty_nodes in label2faulty_nodes.values():
faulty.extend([node['displayName'] for node in faulty_nodes])
logging.warning('Found %d faulty instances: %s', len(faulty), faulty)
if orphaned_instances:
logging.error('Found %d orphaned instances: %s', len(orphaned_instances), orphaned_instances)
############################################
label2num_instances = _apply_upscale_limit(limit=NUM_UPSCALES_PER_ROUND, label2num_instances=label2num_instances)
scale_down_nodes = _apply_downscale_limit(limit=NUM_DOWNSCALES_PER_ROUND, scale_down_nodes=scale_down_nodes)
scale_down_nodes = _merge_dicts_nested_lists(scale_down_nodes, label2faulty_nodes)
execute_scale_down_logic(
jenkins_server=jenkins,
ec2_resource=ec2_resource,
scale_down_nodes=scale_down_nodes,
)
############
_terminate_ec2_instances(orphaned_instances, ec2_resource)
############
execute_scale_up_logic(
jenkins_server=jenkins,
ec2_resource=ec2_resource,
scale_up_nb_nodes=label2num_instances)