scripts/github_issue_retriever.py (130 lines of code) (raw):

# -*- coding: utf-8 -*- # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. import argparse from logging import getLogger import requests from bugbug import db from bugbug.github import Github, IssueDict from bugbug.utils import extract_private, zstd_compress logger = getLogger(__name__) class Retriever(object): def __init__( self, owner: str, repo: str, state: str, retrieve_events: bool, retrieve_private: bool, ): self.owner = owner self.repo = repo self.state = state self.retrieve_events = retrieve_events self.retrieve_private = retrieve_private self.github = Github( owner=owner, repo=repo, state=state, retrieve_events=retrieve_events ) def replace_with_private( self, original_data: list[IssueDict] ) -> tuple[list[IssueDict], set]: """Replace title and body of automatically closed public issues. Replace them with title and body of a corresponding private issue to account for moderation workflow in webcompat repository """ updated_ids = set() updated_issues = [] for item in original_data: if item["title"] == "Issue closed.": extracted = extract_private(item["body"]) if extracted is None: continue owner, repo, issue_number = extracted try: private_issue = self.github.fetch_issue_by_number( owner, repo, issue_number ) if private_issue: item["title"] = private_issue["title"] item["body"] = private_issue["body"] updated_ids.add(item["id"]) updated_issues.append(item) except requests.HTTPError as e: if e.response.status_code == 410: logger.info(e) else: raise return updated_issues, updated_ids def retrieve_issues(self) -> None: last_modified = None db.download(self.github.db_path) try: last_modified = db.last_modified(self.github.db_path) except db.LastModifiedNotAvailable: pass if last_modified: logger.info( "Retrieving issues modified or created since the last run on %s", last_modified.isoformat(), ) data = self.github.fetch_issues_updated_since_timestamp( last_modified.isoformat() ) if self.retrieve_private: logger.info( "Replacing contents of auto closed public issues with private issues content" ) self.replace_with_private(data) updated_ids = set(issue["id"] for issue in data) logger.info( "Deleting issues that were changed since the last run and saving updates" ) self.github.delete_issues(lambda issue: issue["id"] in updated_ids) db.append(self.github.db_path, data) logger.info("Updating finished") else: logger.info("Retrieving all issues since last_modified is not available") self.github.download_issues() if self.retrieve_private: logger.info( "Replacing contents of auto closed public issues with private issues content" ) all_issues = list(self.github.get_issues()) updated_issues, updated_ids = self.replace_with_private(all_issues) logger.info( "Deleting public issues that were updated and saving updates" ) self.github.delete_issues(lambda issue: issue["id"] in updated_ids) db.append(self.github.db_path, updated_issues) zstd_compress(self.github.db_path) def main() -> None: description = "Retrieve GitHub issues" parser = argparse.ArgumentParser(description=description) parser.add_argument( "--owner", help="GitHub repository owner.", type=str, required=True, ) parser.add_argument( "--repo", help="GitHub repository name.", type=str, required=True, ) parser.add_argument( "--state", type=str, default="all", help="Indicates the state of the issues to return. Can be either open, closed, or all", ) parser.add_argument( "--retrieve-events", action="store_true", help="Whether to retrieve events for each issue.", ) parser.add_argument( "--retrieve-private", action="store_true", help="Whether to retrieve private issue content (only webcompat repository usecase).", ) # Parse args to show the help if `--help` is passed args = parser.parse_args() retriever = Retriever( args.owner, args.repo, args.state, args.retrieve_events, args.retrieve_private ) retriever.retrieve_issues() if __name__ == "__main__": main()