def windows_auto_stop_instance()

in source/soca/cluster_web_ui/scheduled_tasks/manage_dcv_instances_lifecycle.py [0:0]


def windows_auto_stop_instance(instances_to_check):
    # Automatically stop or hibernate (when possible) instances based on Idle time and CPU usage
    with db.app.app_context():
        logger.info(f"Scheduled Task: windows_auto_stop_instance {instances_to_check}")
        get_host_to_stop = retrieve_host(instances_to_check, "running")
        logger.info("windows_auto_stop_instance: List of Windows DCV hosts subject to stop/hibernate {}".format(get_host_to_stop))
        for instance_id, instance_data in get_host_to_stop.items():
            if instance_data["hibernate_enabled"] is True:
                action = "hibernate"
                stop_instance_after = config.Config.DCV_WINDOWS_HIBERNATE_IDLE_SESSION
            else:
                action = "stop"
                stop_instance_after = config.Config.DCV_WINDOWS_STOP_IDLE_SESSION

            logger.info("windows_auto_stop_instance: Trying to {} instance {} if idle for more than {} hours and  CPU % is below {}".format(action,
                                                                                                            instance_id,
                                                                                                            stop_instance_after,
                                                                                                            config.Config.DCV_IDLE_CPU_THRESHOLD))
            if stop_instance_after > 0:
                for instance_id in get_host_to_stop.keys():
                    logger.info("Checking Instance ID: {}".format(instance_id))
                    ssm_failed = False
                    ssm_list_command_loop = 0
                    powershell_commands = [
                        "$DCV_Describe_Session = Invoke-Expression \"& 'C:\\Program Files\\NICE\\DCV\\Server\\bin\\dcv' describe-session console -j\" | ConvertFrom-Json",
                        "$CPUAveragePerformanceLast10Secs = (GET-COUNTER -Counter \"\\Processor(_Total)\\% Processor Time\" -SampleInterval 2 -MaxSamples 5 |select -ExpandProperty countersamples | select -ExpandProperty cookedvalue | Measure-Object -Average).average",
                        "$output = @{}",
                        "$output[\"CPUAveragePerformanceLast10Secs\"] = $CPUAveragePerformanceLast10Secs",
                        "$output[\"DCVCurrentConnections\"] = $DCV_Describe_Session.\"num-of-connections\"",
                        "$output[\"DCVCreationTime\"] = $DCV_Describe_Session.\"creation-time\"",
                        "$output[\"DCVLastDisconnectTime\"] = $DCV_Describe_Session.\"last-disconnection-time\"",
                        "$output | ConvertTo-Json"]

                    try:
                        check_dcv_session = client_ssm.send_command(InstanceIds=[instance_id],
                                                                    DocumentName='AWS-RunPowerShellScript',
                                                                    Parameters={"commands": powershell_commands},
                                                                    TimeoutSeconds=30)
                    except ClientError as e:
                        logger.error("windows_auto_stop_instance: Unable to query SSM for {} : {}".format(instance_id, e))
                        if "InvalidInstanceId" in str(e):
                            logger.error("windows_auto_stop_instance: Instance is not in Running state or SSM daemon is not running. This instance is probably still starting up ...")
                        ssm_failed = True

                    if ssm_failed is False:
                        ssm_command_id = check_dcv_session["Command"]["CommandId"]
                        while ssm_list_command_loop < 6:
                            check_command_status = client_ssm.list_commands(CommandId=ssm_command_id)['Commands'][0]['Status']
                            if check_command_status != "Success":
                                logger.info("windows_auto_stop_instance: SSM command ({}) executed but did not succeed or failed yet. Waiting 20 seconds ... {} ".format(ssm_command_id, client_ssm.list_commands(CommandId=ssm_command_id)['Commands']))
                                if check_command_status == "Failed":
                                    logger.error("windows_auto_stop_instance: Unable to query DCV for {} with SSM id ".format(instance_id,ssm_command_id))
                                    ssm_failed = True
                                    break
                                time.sleep(20)
                                ssm_list_command_loop += 1
                            else:
                                break

                    if ssm_list_command_loop >= 5:
                       logger.error("windows_auto_stop_instance: Unable to determine status SSM responses after 2 minutes timeout for {} : {} ".format(ssm_command_id, str(client_ssm.list_commands(CommandId=ssm_command_id))))
                       ssm_failed = True

                    if ssm_failed is False:
                        ssm_output = client_ssm.get_command_invocation(CommandId=ssm_command_id,InstanceId=instance_id)
                        session_info = json.loads(ssm_output["StandardOutputContent"])
                        session_current_connection = session_info["DCVCurrentConnections"]
                        if session_info["DCVLastDisconnectTime"] == "":
                            # handle case where user launched DCV but never accessed it
                            last_dcv_disconnect = parse(session_info["DCVCreationTime"])
                        else:
                            last_dcv_disconnect = parse(session_info["DCVLastDisconnectTime"])
                        logger.info(session_info)
                        session_cpu_average = session_info["CPUAveragePerformanceLast10Secs"]
                        if session_cpu_average < config.Config.DCV_IDLE_CPU_THRESHOLD:
                            if session_current_connection == 0:
                                current_time = parse(datetime.now().replace(microsecond=0).replace(tzinfo=timezone.utc).isoformat())
                                if (last_dcv_disconnect + timedelta(hours=stop_instance_after)) < current_time:
                                    logger.info("windows_auto_stop_instance: {} is ready for {}. Last access time {}".format(instance_id, action, last_dcv_disconnect))
                                    try:
                                        if action == "hibernate":
                                            client_ec2.stop_instances(InstanceIds=[instance_id], Hibernate=True, DryRun=True)
                                        else:
                                            client_ec2.stop_instances(InstanceIds=[instance_id], DryRun=True)
                                    except ClientError as e:
                                        if e.response['Error'].get('Code') == 'DryRunOperation':
                                            if action == "hibernate":
                                                client_ec2.stop_instances(InstanceIds=[instance_id], Hibernate=True)
                                            else:
                                                client_ec2.stop_instances(InstanceIds=[instance_id])

                                            logging.info("windows_auto_stop_instance: Stopped {}".format(instance_id))
                                            try:
                                                check_session = WindowsDCVSessions.query.filter_by(session_instance_id=instance_id, session_state="running", is_active=True).first()
                                                if check_session:
                                                    check_session.session_state = "stopped"
                                                    db.session.commit()
                                                    logger.info("windows_auto_stop_instance: DB entry updated")
                                                else:
                                                    logger.error("windows_auto_stop_instance: Instance ({}) has been stopped but could not find associated database entry".format(instance_id), "error")
                                            except Exception as e:
                                                logger.error("windows_auto_stop_instance: SQL Query error:".format(e), "error")
                                        else:
                                            logger.error("windows_auto_stop_instance: Unable to {} instance ({}) due to {}".format(action, instance_id,e), "error")
                                else:
                                    logger.info("windows_auto_stop_instance: {} NOT ready for {}. Last access time {}".format(instance_id, action,last_dcv_disconnect))
                            else:
                                logger.info("windows_auto_stop_instance: {} currently has active DCV sessions".format(instance_id))
                        else:
                            logger.info("windows_auto_stop_instance: CPU usage {} is above threshold {} so this host won't be subject to {}.".format(session_cpu_average, config.Config.DCV_IDLE_CPU_THRESHOLD, action))
                    else:
                        logger.error("windows_auto_stop_instance: SSM failed for {} with ssm_id {}".format(instance_id, ssm_command_id))