services/lambda-pr-status-labeler/pr_status_bot/PRStatusBot.py (214 lines of code) (raw):

#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # -*- coding: utf-8 -*- import ast import json import hmac import hashlib import os import logging import re import secret_manager from github import Github # Define the constants # Github labels PR_WORK_IN_PROGRESS_LABEL = 'pr-work-in-progress' PR_AWAITING_TESTING_LABEL = 'pr-awaiting-testing' PR_AWAITING_MERGE_LABEL = 'pr-awaiting-merge' PR_AWAITING_REVIEW_LABEL = 'pr-awaiting-review' PR_AWAITING_RESPONSE_LABEL = 'pr-awaiting-response' WORK_IN_PROGRESS_TITLE_SUBSTRING = 'WIP' # CI state FAILURE_STATE = 'failure' PENDING_STATE = 'pending' SUCCESS_STATE = 'success' # Review state APPROVED_STATE = 'APPROVED' CHANGES_REQUESTED_STATE = 'CHANGES_REQUESTED' COMMENTED_STATE = 'COMMENTED' DISMISSED_STATE = 'DISMISSED' class GithubObj: def __init__(self, github_personal_access_token=None, apply_secret=True): """ Initializes the Github Object :param github_personal_access_token: GitHub authentication token (Personal access token) :param apply_secret: GitHub secret credential (Secret credential that is unique to a GitHub developer) """ self.github_personal_access_token = github_personal_access_token if apply_secret: self._get_secret() self.github_object = self._get_github_object() def _get_github_object(self): """ This method returns github object initialized with Github personal access token """ github_obj = Github(self.github_personal_access_token) return github_obj def _get_secret(self): """ This method is to get secret value from Secrets Manager """ secret = json.loads(secret_manager.get_secret()) self.github_personal_access_token = secret["github_personal_access_token"] class PRStatusBot: def __init__(self, repo=os.environ.get("repo"), github_obj=None, apply_secret=True): """ Initializes the PR Status Bot :param repo: GitHub repository that is being referenced :param apply_secret: GitHub secret credential (Secret credential that is unique to a GitHub developer) """ self.repo = repo self.github_obj = github_obj self.latest_commit_sha = None if apply_secret: self._get_secret() def _get_secret(self): """ This method is to get secret value from Secrets Manager """ secret = json.loads(secret_manager.get_secret()) self.webhook_secret = secret["webhook_secret"] def _secure_webhook(self, event): """ This method will validate the security of the webhook, it confirms that the secret of the webhook is matched and that each github event is signed appropriately :param event: The github event we want to validate :return Response denoting success or failure of security """ # Validating github event is signed try: git_signed = ast.literal_eval(event["Records"][0]['body'])['headers']["X-Hub-Signature"] except KeyError: raise Exception("WebHook from GitHub is not signed") git_signed = git_signed.replace('sha1=', '') # Signing our event with the same secret as what we assigned to github event secret = self.webhook_secret body = ast.literal_eval(event["Records"][0]['body'])['body'] secret_sign = hmac.new(key=secret.encode('utf-8'), msg=body.encode('utf-8'), digestmod=hashlib.sha1).hexdigest() # Validating signatures match return hmac.compare_digest(git_signed, secret_sign) def _get_pull_request_object(self, pr_number): """ This method returns a PullRequest object based on the PR number :param pr_number """ repo = self.github_obj.get_repo(self.repo) pr_obj = repo.get_pull(int(pr_number)) return pr_obj def _get_commit_object(self, commit_sha): """ This method returns a Commit object based on the SHA of the commit :param commit_sha """ repo = self.github_obj.get_repo(self.repo) commit_obj = repo.get_commit(commit_sha) return commit_obj def _is_mxnet_committer(self, reviewer): """ This method checks if the Pull Request reviewer is a member of MXNet committers It uses the Github API for fetching team members of a repo Only a Committer can access [read/write] to Apache MXNet Committer team on Github Retrieved the Team ID of the Apache MXNet Committer team on Github using a Committer's credentials """ team = self.github_obj.get_organization('apache').get_team(2413476) return team.has_in_members(reviewer) def _drop_other_pr_labels(self, pr, desired_label): labels = pr.get_labels() if not labels: logging.info('No labels found') return for label in labels: logging.info(f'Label:{label}') if label.name.startswith('pr-') and label.name != desired_label: try: logging.info(f'Removing {label}') pr.remove_from_labels(label) except Exception: logging.error(f'Error while removing the label {label}') def _add_label(self, pr, label): # drop other PR labels self._drop_other_pr_labels(pr, label) # check if the PR already has the desired label if(self._has_desired_label(pr, label)): logging.info(f'PR {pr.number} already contains the label {label}') return logging.info(f'BOT Labels: {label}') try: pr.add_to_labels(label) except Exception: logging.error(f'Unable to add label {label}') # verify that label has been correctly added # if(self._has_desired_label(pr, label)): # logging.info(f'Successfully labeled {label} for PR-{pr.number}') return def _has_desired_label(self, pr, desired_label): """ This method returns True if desired label found in PR labels """ labels = pr.get_labels() for label in labels: if desired_label == label.name: return True return False def get_review_counts(self, review, approvers, change_requesters, commenters, dismissed): if review.state == APPROVED_STATE: approvers.append(review.user.login) elif review.state == CHANGES_REQUESTED_STATE: change_requesters.append(review.user.login) elif review.state == COMMENTED_STATE: commenters.append(review.user.login) elif review.state == DISMISSED_STATE: dismissed.append(review.user.login) else: logging.error(f'Unknown review state {review.state}') return approvers, change_requesters, commenters, dismissed def _parse_reviews(self, pr): """ This method parses through the reviews of the PR and returns count of 3 states: Approved reviews, Comment reviews, Requested Changes reviews All these 3 states take into account if there are dismissed reviews. Approved review / Requested changes review can be dismissed. If dismissed, then the review doesn't count. Note: Only reviews by MXNet Committers are considered. :param pr """ approvers = [] change_requesters = [] commenters = [] dismissed = [] for review in pr.get_reviews(): # continue if the review is for a stale commit if(review.commit_id != self.latest_commit_sha): continue # continue if the review is by non-committer reviewer = review.user if not self._is_mxnet_committer(reviewer): logging.info(f'Review is by non-MXNet Committer: {reviewer}. Ignore.') continue approvers, change_requesters, commenters, dismissed = self.get_review_counts(review, approvers, change_requesters, commenters, dismissed) approvers = list(set(approvers) - set(dismissed)) change_requesters = list(set(change_requesters) - set(dismissed)) commenters = list(set(commenters)) approved_count = len(approvers) if len(approvers) else 0 requested_changes_count = len(change_requesters) if len(change_requesters) else 0 comment_count = len(commenters) if len(commenters) else 0 return approved_count, requested_changes_count, comment_count def _label_pr_based_on_status(self, full_build_status_state, pull_request_obj): """ This method checks the CI status of the specific commit of the PR and it labels the PR accordingly :param full_build_status_state :param pull_request_obj """ # pseudo-code # if WIP in title or PR is draft or CI failed: # pr-work-in-progress # elif CI has not started yet or CI is in progress: # pr-awaiting-testing # else: # CI passed checks # if pr has at least one approval and no request changes: # pr-awaiting-merge # elif pr has no review or all reviews have been dismissed/re-requested: # pr-awaiting-review # else: # pr has a review that hasn't been dismissed yet no approval # pr-awaiting-response # combined status of PR can be 1 of the 3 potential states # https://developer.github.com/v3/repos/statuses/#get-the-combined-status-for-a-specific-reference wip_in_title, ci_failed, ci_pending = False, False, False if full_build_status_state == FAILURE_STATE: ci_failed = True elif full_build_status_state == PENDING_STATE: ci_pending = True if WORK_IN_PROGRESS_TITLE_SUBSTRING in pull_request_obj.title: logging.info('WIP in PR Title') wip_in_title = True work_in_progress_conditions = wip_in_title or pull_request_obj.draft or ci_failed if work_in_progress_conditions: self._add_label(pull_request_obj, PR_WORK_IN_PROGRESS_LABEL) elif ci_pending: self._add_label(pull_request_obj, PR_AWAITING_TESTING_LABEL) else: # CI passed since status=successful # parse reviews to assess count of approved/requested changes/commented reviews # make sure you take into account dismissed reviews approves, request_changes, comments = self._parse_reviews(pull_request_obj) if approves > 0 and request_changes == 0: self._add_label(pull_request_obj, PR_AWAITING_MERGE_LABEL) else: # decisive review means approve/request change # comment is a non-decisive review has_no_decisive_reviews = approves + request_changes == 0 if has_no_decisive_reviews: self._add_label(pull_request_obj, PR_AWAITING_REVIEW_LABEL) else: self._add_label(pull_request_obj, PR_AWAITING_RESPONSE_LABEL) return def _get_latest_commit(self, pull_request_obj): """ This method returns the latest commit of the Pull Request :param pull_request_obj :returns latest_commit """ latest_commit = pull_request_obj.get_commits()[pull_request_obj.commits - 1] return latest_commit def _is_stale_commit(self, commit_sha, pull_request_obj): """ This method checks if the given commit is stale or not :param commit_sha :param pull_request_obj :returns boolean """ latest_commit = self._get_latest_commit(pull_request_obj) self.latest_commit_sha = latest_commit.sha if commit_sha == self.latest_commit_sha: logging.info(f'Current commit {commit_sha} is latest commit of PR {pull_request_obj.number}') return False else: logging.info(f'Latest commit of PR {pull_request_obj.number}: {self.latest_commit_sha}') logging.info(f'Current status belongs to stale commit {commit_sha}') return True def _get_full_build_status_from_combined_status(self, commit_obj, combined_status_state): """ Due to staggered build pipelines, combined status isn't reflective of full build status i.e. When sanity passes, combined_status_state = Success It should be pending since other pipelines aren't successful yet. However, combined_status_state only takes into account the pipelines that have been triggered until that point. Thus, manually check if combined_status_state and length of combined_statuses """ combined_status_list = commit_obj.get_combined_status().statuses combined_status_length = len(combined_status_list) logging.info(f'Combined Status Length: {combined_status_length}') if combined_status_state == SUCCESS_STATE and combined_status_length == 1: # Only sanity build has passed; rest of the builds haven't been triggered return PENDING_STATE # Full build has been triggered return combined_status_state def parse_payload(self, payload): """ This method parses the payload and process it according to the event status """ # CI is run for non-PR commits as well # for instance, after PR is merged into the master/v1.x branch # we exit in such a case # to detect if the status update is for a PR commit or a merged commit # we rely on Target_URL in the event payload # e.g. http//jenkins.mxnet-ci.amazon-ml.com/job/mxnet-validation/job/sanity/job/PR-18899/1/display/redirect target_url = payload['target_url'] if 'PR' not in target_url: logging.info('Status update doesnt belong to a PR commit') return 1 # strip PR number from the target URL # use raw string instead of normal string to make regex check pep8 compliant pr_number = re.search(r"PR-(\d+)", target_url, re.IGNORECASE).group(1) logging.info(f'--------- PR : {pr_number} ----------') pull_request_obj = self._get_pull_request_object(pr_number) # verify PR is open # return if PR is closed if pull_request_obj.state == 'closed': logging.info('PR is closed. No point in labeling') return 2 # CI runs for stale commits # return if its status update of a stale commit commit_sha = payload['commit']['sha'] if self._is_stale_commit(commit_sha, pull_request_obj): return context = payload['context'] state = payload['state'] logging.info(f'PR Context: {context}') logging.info(f'Context State: {state}') commit_obj = self._get_commit_object(commit_sha) combined_status_state = commit_obj.get_combined_status().state logging.info(f'PR Combined Status State: {combined_status_state}') full_build_status_state = self._get_full_build_status_from_combined_status(commit_obj, combined_status_state) logging.info(f'PR Full Build Status State: {full_build_status_state}') self._label_pr_based_on_status(full_build_status_state, pull_request_obj) def parse_webhook_data(self, event): """ This method handles the processing for each PR depending on the appropriate Github event information provided by the Github Webhook. """ try: github_event = ast.literal_eval(event["Records"][0]['body'])['headers']["X-GitHub-Event"] logging.info(f"github event {github_event}") except KeyError: raise Exception("Not a GitHub Event") if not self._secure_webhook(event): raise Exception("Failed to validate WebHook security") try: payload = json.loads(ast.literal_eval(event["Records"][0]['body'])['body']) except ValueError: raise Exception("Decoding JSON for payload failed") self.parse_payload(payload)