def __cleanup_computers()

in ad-joining/register-computer/main.py [0:0]


def __cleanup_computers(request):
    """
        Clean up stale computer accounts.
    """

    # Authenticate HTTP request
    auth_info = __authenticate_request(request, False)

    # Authorize the request. The request must be using the same service
    # account as the cloud function in order to be considered legitimate.
    function_identity = __read_required_setting("FUNCTION_IDENTITY")
    if function_identity != auth_info.get_email():
        logging.error("Untrusted caller '%s', expected '%s'" % (auth_info.get_email(), function_identity))
        return flask.abort(HTTP_ACCESS_DENIED, description="CALLER_AUTHENTICATION_FAILED")

    # The request is now properly authorized. Identify projects that we can
    # scavenge.
    try:
        ad_connection = __connect_to_activedirectory()
    except Exception as e:
        logging.exception("Connecting to Active Directory failed")
        return flask.abort(HTTP_BAD_GATEWAY, description="CONNECT_TO_AD_FAILED")

    # Although we verify that we can access the VM instance's project when a VM
    # is joined, this project access might later be revoked. When checking whether
    # a VM instance still exists, we therefore need to be careful in distinguishing
    # between the cases (1) the VM does not exist and (2) the VM is inaccessible.

    # Iterate over all OUs underneath the projects OU or the custom OU, if specified
    projects_root_dn = os.getenv("PROJECTS_DN")
    root_dn = os.getenv("CUSTOM_OU_ROOT_DN", projects_root_dn)
    
    if not root_dn or root_dn == "":
        logging.warning("Cleanup cannot start. Could not find root OU to start the scan from.")
        return flask.abort(HTTP_INTERNAL_SERVER_ERROR, description="BAD_ROOT_OU_CONFIGURATION")

    logging.info("Starting cleanup in OU '%s'" % root_dn)
    result = {}
    for ou in ad_connection.find_ou(root_dn):
        try:            
            ou_name = ou.get_dn()
            # Look up list of computer accounts in the OU
            computer_accounts = ad_connection.find_computer(ou.get_dn())

            output = {
                "computers" : {},
                "groups" : {}
            }

            accounts_deleted = 0
            accounts_failed = 0

            dns_records_deleted = 0
            dns_records_failed = 0

            logging.info("Checking for stale computer accounts in OU '%s'" % ou_name)
            for computer in computer_accounts:
                if not computer.get_instance_name() or not computer.get_project_id() or not computer.get_project_id():
                    logging.debug("Ignoring computer account '%s' as it lacks for GCE annotations" % computer.get_name())

                elif gcp.project.Project(computer.get_project_id()).get_instance(computer.get_instance_name(), computer.get_zone()):
                    # VM instance still exists, fine.
                    logging.debug("Skipping computer account '%s' as it has a matching instance '%s' in project '%s'" 
                        % (computer.get_name(), computer.get_instance_name(), computer.get_project_id()))
                    pass
                else:
                    logging.info("Computer account '%s' (instance '%s' in project '%s') is stale" 
                        % (computer.get_name(), computer.get_instance_name(), computer.get_project_id()))

                    # Delete the computer object
                    try:
                        ad_connection.delete_computer(computer.get_dn())
                        accounts_deleted += 1

                    except Exception as e:
                        logging.error("Failed to delete stale computer account '%s' (instance '%s' in project '%s'): %s" 
                            % (computer.get_name(), computer.get_instance_name(), computer.get_project_id(), str(e)))
                        accounts_failed += 1
 
                    # Delete the DNS record
                    try:
                        if computer.get_dns_record_dn():
                            logging.info("Computer account '%s' has a stale DNS record: %s" 
                                % (computer.get_name(), computer.get_dns_record_dn()))
                            ad_connection.delete_dns_record(computer.get_dns_record_dn())
                            dns_records_deleted += 1

                    except ad.domain.NoSuchObjectException as e:
                        pass

                    except Exception as e:
                        logging.error("Failed to delete DNS record '%s' for stale computer account '%s' (instance '%s' in project '%s'): %s" 
                            % (computer.get_dns_record_dn(), computer.get_name(), computer.get_instance_name(), computer.get_project_id(), str(e)))
                        dns_records_failed += 1

            # Gather metrics for response.
            output["computers"] = {
                "stale_accounts": accounts_deleted + accounts_failed,
                "accounts_deleted": accounts_deleted,
                "accounts_failed": accounts_failed,
                "dns_records_deleted": dns_records_deleted,
                "dns_records_failed" : dns_records_failed
            }

            logging.info("Done checking for stale computer accounts in OU "+
                "'%s' - %d accounts deleted, %d failed to be deleted" %
                (ou_name, accounts_deleted, accounts_failed))

            # After deleting stale computers, look for groups whose MIGs were removed
            mig_ad_groups = ad_connection.find_group(ou.get_dn())
            accounts_deleted = 0
            accounts_failed = 0
            logging.info("Checking for stale managed instance groups in OU '%s'" % ou_name)
            for mig_ad_group in mig_ad_groups:                
                if not mig_ad_group.get_project_id() or (not mig_ad_group.get_zone() and not mig_ad_group.get_region()):
                    logging.debug("Ignoring group '%s' as it lacks for GCE annotations" % mig_ad_group.get_name())

                elif gcp.project.Project(mig_ad_group.get_project_id()).get_managed_instance_group(mig_ad_group.get_name(), mig_ad_group.get_zone(), mig_ad_group.get_region()):
                    # MIG still exists, fine.
                    logging.debug("Skipping group '%s' as it has a matching managed instance group in project '%s'" 
                        % (mig_ad_group.get_name(), mig_ad_group.get_project_id()))
                    pass
                else:
                    logging.info("Group '%s' (project '%s') is stale" % (mig_ad_group.get_name(), mig_ad_group.get_project_id()))
                    try:
                        ad_connection.delete_group(mig_ad_group.get_dn())
                        accounts_deleted += 1

                    except Exception as e:
                        logging.error("Failed to delete stale group '%s' (project '%s')" % (mig_ad_group.get_name(), mig_ad_group.get_project_id()))
                        accounts_failed += 1

            # Gather metrics for response.
            output["groups"] = {
                "stale_accounts": accounts_deleted + accounts_failed,
                "accounts_deleted": accounts_deleted,
                "accounts_failed": accounts_failed
            }

            result[ou_name] = output
            logging.info("Done checking for stale groups in OU "+
                "'%s' - %d groups deleted, %d failed to be deleted" %
                (ou_name, accounts_deleted, accounts_failed))
        except Exception as e:
            # We cannot access this project, ignore.
            logging.warning("Skipping OU '%s' as it is inaccessible: %s" % (ou_name, str(e)))

    return flask.jsonify(result)