rules/customer-fc/ack_cluster_node_monitor_enabled.py (170 lines of code) (raw):
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import json
import logging
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest
from aliyunsdkcore.http import protocol_type
"""
该函数周期触发方式为周期执行。
收到周期性触发事件后,查询需要评估的ACK集群列表,遍历集群信息获取集群的节点列表,再遍历节点列表逐个判断是否安装云监控插件,如果节点都已安装云监控插件则视该集群为合规,否则视为不合规。
周期性评估回写评估结果时开启删除无效评估模式,config会自动将本周期产生的评估之外的评估结果进行删除
"""
logger = logging.getLogger()
# 合规类型
COMPLIANCE_TYPE_COMPLIANT = 'COMPLIANT'
COMPLIANCE_TYPE_NON_COMPLIANT = 'NON_COMPLIANT'
COMPLIANCE_TYPE_NOT_APPLICABLE = 'NOT_APPLICABLE'
# Config api endpoint, International sites use ap-southeast-1 and config.ap-southeast-1.aliyuncs.com
CONFIG_SERVICE_REGION = 'cn-shanghai'
CONFIG_SERVICE_ENDPOINT = 'config.cn-shanghai.aliyuncs.com'
AK = '******'
SK = '******'
# 入口方法
def handler(event, context):
evt = validate_event(event)
if not evt:
return None
result_token = evt.get('resultToken')
ordering_timestamp = evt.get('orderingTimestamp')
invoking_event = evt.get('invokingEvent')
account_id = invoking_event.get('accountId')
# regionId = invoking_event.get('configurationItem')['regionId']
client = AcsClient(AK, SK, CONFIG_SERVICE_REGION)
evaluations = []
# ResourceType supported by the config
resource_type = "ACS::ACK::Cluster"
# 查询集群列表并逐个校验集群检查情况
page_number = 1
page_size = 50
cluster_total = 0
while page_number == 1 or (cluster_total > page_size * page_number):
cluster_page_json = query_cluster_page(client, page_size, page_number)
if "clusters" in cluster_page_json and cluster_page_json["clusters"]:
for cluster in cluster_page_json["clusters"]:
logger.info(cluster["cluster_id"])
compliant_result = query_nodes_evaluation(client, cluster["cluster_id"])
if compliant_result:
evaluation = {
'accountId': account_id,
'complianceResourceId': cluster["cluster_id"],
'complianceResourceType': resource_type,
'orderingTimestamp': ordering_timestamp,
'complianceType': COMPLIANCE_TYPE_COMPLIANT,
'annotation': json.dumps({}),
'complianceRegionId': cluster["region_id"]
}
evaluations.append(evaluation)
else:
evaluation = {
'accountId': account_id,
'complianceResourceId': cluster["cluster_id"],
'complianceResourceType': resource_type,
'orderingTimestamp': ordering_timestamp,
'complianceType': COMPLIANCE_TYPE_NON_COMPLIANT,
'annotation': json.dumps(
{'configuration': 'Not all nodes installed monitor agent.',
'desiredValue': 'All nodes installed monitor agent.'}),
'complianceRegionId': cluster["region_id"]
}
evaluations.append(evaluation)
else:
break
page_number = page_number + 1
if "page_info" in cluster_page_json and cluster_page_json["page_info"] and "total_count" \
in cluster_page_json["page_info"] and cluster_page_json["page_info"]["total_count"]:
cluster_total = cluster_page_json["page_info"]["total_count"]
put_evaluations(context, result_token, evaluations)
return evaluations
def query_cluster_page(clt, page_size, page_number):
request = CommonRequest(
'cs.aliyuncs.com',
'2015-12-15',
uri_pattern='/api/v1/clusters'
)
request.add_query_param('page_size', page_size)
request.add_query_param('page_number', page_number)
response = clt.do_action_with_exception(request)
res = str(response, encoding='utf-8')
json_res = json.loads(res)
return json_res
# 查询集群节点列表,并校验是否全部安装云监控插件且状态为运行中
def query_nodes_evaluation(clt, cluster_id):
compliance_result = True
page_number = 1
page_size = 50
cluster_total = 0
while page_number == 1 or (cluster_total > page_size * page_number):
request = CommonRequest(
'cs.aliyuncs.com',
'2015-12-15',
uri_pattern='/clusters/' + cluster_id + '/nodes'
)
request.set_protocol_type(protocol_type.HTTPS)
request.add_query_param('pageSize', page_size)
request.add_query_param('pageNumber', page_number)
response = clt.do_action_with_exception(request)
res = str(response, encoding='utf-8')
json_res = json.loads(res)
instance_id_set = set()
if "nodes" in json_res and json_res["nodes"]:
for node in json_res["nodes"]:
instance_id_set.add(node["instance_id"])
compliance_result = query_instances_monitor_status(clt, instance_id_set)
if not compliance_result:
return compliance_result
else:
break
page_number = page_number + 1
if "page" in json_res and json_res["page"] and "total_count" in json_res["page"] and \
json_res["page"]["total_count"]:
cluster_total = json_res["page"]["total_count"]
return compliance_result
# 批量查询实例列表云监控插件状态是否全为运行中
def query_instances_monitor_status(clt, instance_id_set):
compliance_result = True
instance_id_str = ",".join(instance_id_set)
request = CommonRequest()
request.set_protocol_type(protocol_type.HTTPS)
request.set_domain('metrics.aliyuncs.com')
request.set_version('2019-01-01')
request.set_action_name('DescribeMonitoringAgentStatuses')
request.set_method('GET')
request.add_query_param('InstanceIds', instance_id_str)
response = clt.do_action_with_exception(request)
res = str(response, encoding='utf-8')
json_res = json.loads(res)
if "NodeStatusList" in json_res and json_res["NodeStatusList"] and "NodeStatus" in json_res["NodeStatusList"] and \
json_res["NodeStatusList"]["NodeStatus"]:
if len(json_res["NodeStatusList"]["NodeStatus"]) != len(instance_id_set):
logger.warn('Query monitor list len not equal input: {}'.format(instance_id_str))
compliance_result = False
else:
for status in json_res["NodeStatusList"]["NodeStatus"]:
if status["Status"] != "running":
compliance_result = False
break
else:
compliance_result = False
return compliance_result
def validate_event(event):
if not event:
logger.error('Event is empty.')
evt = parse_json(event)
logger.info('Loading event: %s .' % evt)
if 'resultToken' not in evt:
logger.error('ResultToken is empty.')
return None
if 'ruleParameters' not in evt:
logger.error('RuleParameters is empty.')
return None
if 'invokingEvent' not in evt:
logger.error('InvokingEvent is empty.')
return None
return evt
def parse_json(content):
try:
return json.loads(content)
except Exception as e:
logger.error('Parse content:{} to json error:{}.'.format(content, e))
return None
def put_evaluations(context, result_token, evaluations):
client = AcsClient(AK, SK, CONFIG_SERVICE_REGION)
request = CommonRequest()
request.set_domain(CONFIG_SERVICE_ENDPOINT)
request.set_version('2019-01-08')
request.set_action_name('PutEvaluations')
request.add_body_params('ResultToken', result_token)
request.add_body_params('Evaluations', evaluations)
# 开启删除模式,config会自动删除非本周期内评估的记录
request.add_body_params('DeleteMode', True)
request.set_method('POST')
try:
response = client.do_action_with_exception(request)
logger.info('PutEvaluations with request: {}, response: {}.'.format(request, response))
except Exception as e:
logger.error('PutEvaluations error: %s' % e)