in source/soca/cluster_web_ui/scheduled_tasks/manage_dcv_instances_lifecycle.py [0:0]
def windows_auto_stop_instance(instances_to_check):
# Automatically stop or hibernate (when possible) instances based on Idle time and CPU usage
with db.app.app_context():
logger.info(f"Scheduled Task: windows_auto_stop_instance {instances_to_check}")
get_host_to_stop = retrieve_host(instances_to_check, "running")
logger.info("windows_auto_stop_instance: List of Windows DCV hosts subject to stop/hibernate {}".format(get_host_to_stop))
for instance_id, instance_data in get_host_to_stop.items():
if instance_data["hibernate_enabled"] is True:
action = "hibernate"
stop_instance_after = config.Config.DCV_WINDOWS_HIBERNATE_IDLE_SESSION
else:
action = "stop"
stop_instance_after = config.Config.DCV_WINDOWS_STOP_IDLE_SESSION
logger.info("windows_auto_stop_instance: Trying to {} instance {} if idle for more than {} hours and CPU % is below {}".format(action,
instance_id,
stop_instance_after,
config.Config.DCV_IDLE_CPU_THRESHOLD))
if stop_instance_after > 0:
for instance_id in get_host_to_stop.keys():
logger.info("Checking Instance ID: {}".format(instance_id))
ssm_failed = False
ssm_list_command_loop = 0
powershell_commands = [
"$DCV_Describe_Session = Invoke-Expression \"& 'C:\\Program Files\\NICE\\DCV\\Server\\bin\\dcv' describe-session console -j\" | ConvertFrom-Json",
"$CPUAveragePerformanceLast10Secs = (GET-COUNTER -Counter \"\\Processor(_Total)\\% Processor Time\" -SampleInterval 2 -MaxSamples 5 |select -ExpandProperty countersamples | select -ExpandProperty cookedvalue | Measure-Object -Average).average",
"$output = @{}",
"$output[\"CPUAveragePerformanceLast10Secs\"] = $CPUAveragePerformanceLast10Secs",
"$output[\"DCVCurrentConnections\"] = $DCV_Describe_Session.\"num-of-connections\"",
"$output[\"DCVCreationTime\"] = $DCV_Describe_Session.\"creation-time\"",
"$output[\"DCVLastDisconnectTime\"] = $DCV_Describe_Session.\"last-disconnection-time\"",
"$output | ConvertTo-Json"]
try:
check_dcv_session = client_ssm.send_command(InstanceIds=[instance_id],
DocumentName='AWS-RunPowerShellScript',
Parameters={"commands": powershell_commands},
TimeoutSeconds=30)
except ClientError as e:
logger.error("windows_auto_stop_instance: Unable to query SSM for {} : {}".format(instance_id, e))
if "InvalidInstanceId" in str(e):
logger.error("windows_auto_stop_instance: Instance is not in Running state or SSM daemon is not running. This instance is probably still starting up ...")
ssm_failed = True
if ssm_failed is False:
ssm_command_id = check_dcv_session["Command"]["CommandId"]
while ssm_list_command_loop < 6:
check_command_status = client_ssm.list_commands(CommandId=ssm_command_id)['Commands'][0]['Status']
if check_command_status != "Success":
logger.info("windows_auto_stop_instance: SSM command ({}) executed but did not succeed or failed yet. Waiting 20 seconds ... {} ".format(ssm_command_id, client_ssm.list_commands(CommandId=ssm_command_id)['Commands']))
if check_command_status == "Failed":
logger.error("windows_auto_stop_instance: Unable to query DCV for {} with SSM id ".format(instance_id,ssm_command_id))
ssm_failed = True
break
time.sleep(20)
ssm_list_command_loop += 1
else:
break
if ssm_list_command_loop >= 5:
logger.error("windows_auto_stop_instance: Unable to determine status SSM responses after 2 minutes timeout for {} : {} ".format(ssm_command_id, str(client_ssm.list_commands(CommandId=ssm_command_id))))
ssm_failed = True
if ssm_failed is False:
ssm_output = client_ssm.get_command_invocation(CommandId=ssm_command_id,InstanceId=instance_id)
session_info = json.loads(ssm_output["StandardOutputContent"])
session_current_connection = session_info["DCVCurrentConnections"]
if session_info["DCVLastDisconnectTime"] == "":
# handle case where user launched DCV but never accessed it
last_dcv_disconnect = parse(session_info["DCVCreationTime"])
else:
last_dcv_disconnect = parse(session_info["DCVLastDisconnectTime"])
logger.info(session_info)
session_cpu_average = session_info["CPUAveragePerformanceLast10Secs"]
if session_cpu_average < config.Config.DCV_IDLE_CPU_THRESHOLD:
if session_current_connection == 0:
current_time = parse(datetime.now().replace(microsecond=0).replace(tzinfo=timezone.utc).isoformat())
if (last_dcv_disconnect + timedelta(hours=stop_instance_after)) < current_time:
logger.info("windows_auto_stop_instance: {} is ready for {}. Last access time {}".format(instance_id, action, last_dcv_disconnect))
try:
if action == "hibernate":
client_ec2.stop_instances(InstanceIds=[instance_id], Hibernate=True, DryRun=True)
else:
client_ec2.stop_instances(InstanceIds=[instance_id], DryRun=True)
except ClientError as e:
if e.response['Error'].get('Code') == 'DryRunOperation':
if action == "hibernate":
client_ec2.stop_instances(InstanceIds=[instance_id], Hibernate=True)
else:
client_ec2.stop_instances(InstanceIds=[instance_id])
logging.info("windows_auto_stop_instance: Stopped {}".format(instance_id))
try:
check_session = WindowsDCVSessions.query.filter_by(session_instance_id=instance_id, session_state="running", is_active=True).first()
if check_session:
check_session.session_state = "stopped"
db.session.commit()
logger.info("windows_auto_stop_instance: DB entry updated")
else:
logger.error("windows_auto_stop_instance: Instance ({}) has been stopped but could not find associated database entry".format(instance_id), "error")
except Exception as e:
logger.error("windows_auto_stop_instance: SQL Query error:".format(e), "error")
else:
logger.error("windows_auto_stop_instance: Unable to {} instance ({}) due to {}".format(action, instance_id,e), "error")
else:
logger.info("windows_auto_stop_instance: {} NOT ready for {}. Last access time {}".format(instance_id, action,last_dcv_disconnect))
else:
logger.info("windows_auto_stop_instance: {} currently has active DCV sessions".format(instance_id))
else:
logger.info("windows_auto_stop_instance: CPU usage {} is above threshold {} so this host won't be subject to {}.".format(session_cpu_average, config.Config.DCV_IDLE_CPU_THRESHOLD, action))
else:
logger.error("windows_auto_stop_instance: SSM failed for {} with ssm_id {}".format(instance_id, ssm_command_id))