scripts/create_naughtylist_csv.py (161 lines of code) (raw):

import argparse import sys import os import time import requests import json import logging from datetime import datetime from collections import defaultdict # Disable SSL warnings requests.packages.urllib3.disable_warnings() MAX_RECORDS_PER_PAGE = 100 WORKING_GROUPS = { 1: "Guardian Films", 2: "Guardian VR", 3: "Multimedia Audio", 4: "Multimedia Brand Partnerships", 5: "Multimedia Central Commissions", 6: "Multimedia Central Resources", 7: "Multimedia CiF", 8: "Multimedia Culture and Life", 9: "Multimedia Culture and Sport", 10: "Multimedia Documentaries", 11: "Multimedia GLabs", 12: "Multimedia Global Dev", 13: "Multimedia Investigations", 14: "Multimedia News", 15: "Multimedia Returning Series", 16: "Multimedia Science and Tech", 17: "Multimedia Social", 18: "Multimedia Special Projects", 19: "Multimedia Sport", 20: "Multimedia US", 21: "Multimedia YouTube Fashion", 22: "Multimedia YouTube Football", 23: "Multimedia YouTube Membership", 24: "Multimedia YouTube News", 25: "Multimedia YouTube Tech", 26: "Multimedia Australia", 27: "Multimedia Today In Focus", 28: "Multimedia YouTube First", 29: "Multimedia Drama", 30: "Multimedia News Features", 31: "Multimedia Reactive News and Sport", 32: "Multimedia YouTube Explainers", 34: "Multimedia It's Complicated" } def setup_argparser() -> argparse.ArgumentParser: """Set up the argument parser for the script""" argparser = argparse.ArgumentParser(description='Create list of overdue projects') argparser.add_argument('-b', '--baseurl', help='Base URL of the environment to run the script against') argparser.add_argument('-t', '--timestamp', help='Date to filter records before (yyyy-mm-dd)') return argparser def get_token() -> str: """Set token from environment variable""" token = os.environ.get("PLUTO_TOKEN") if token == None: print("No token found. Exiting script...") sys.exit() return token def create_urls(base_url): commission_list_url = f"{base_url}/pluto-core/api/pluto/commission/list" project_list_url = f"{base_url}/pluto-core/api/project/list" return commission_list_url, project_list_url def get_headers(token: str) -> dict: return { "Content-Type": "application/json", "Authorization": f"Bearer {token}", } def api_put_request(url, headers, json_body, max_retries=5): backoff_factor = 2 for retry in range(max_retries): try: with requests.put(url, headers=headers, data=json_body, verify=False) as response: response.raise_for_status() return response.json() except (requests.exceptions.HTTPError, requests.exceptions.RequestException) as e: if retry == max_retries - 1: raise wait_time = backoff_factor ** retry print(f"An error occurred: {e}. Retrying in {wait_time} seconds...") time.sleep(wait_time) def get_filtered_commission_records(timestamp, headers, commission_list_url) -> list: request_body = { "match": "W_CONTAINS", "completionDateBefore": timestamp } json_body = json.dumps(request_body) records = [] try: json_content = api_put_request(commission_list_url, headers, json_body) total_records = json_content["count"] total_pages = (total_records + MAX_RECORDS_PER_PAGE - 1) // MAX_RECORDS_PER_PAGE start_at = 0 for page in range(1, total_pages + 1): print(f"Loading commission page: {page}") response = api_put_request( f"{commission_list_url}?startAt={start_at}&length={MAX_RECORDS_PER_PAGE}", headers, json_body, ) records.extend(response["result"]) start_at += MAX_RECORDS_PER_PAGE except requests.exceptions.RequestException as e: print(e) raise Exception("An error occurred. Exiting script...") return records def get_projects_by_user(records, headers, project_list_url) -> dict: user_projects = defaultdict(list) user_project_count = defaultdict(int) for record in records: commission_id = record['id'] print(f"Getting projects for commission ID: {commission_id}") try: json_content = api_put_request( project_list_url, headers, json.dumps({"match": "W_EXACT", "commissionId": commission_id}), ) for project in json_content["result"]: if project['status'] == "In Production": user = project['user'] working_group_id = project.get('workingGroupId') working_group_name = WORKING_GROUPS.get(working_group_id, 'Unknown') user_projects[user].append({ 'id': project['id'], 'title': project['title'], 'commission_id': commission_id, 'created': project['created'], 'working_group': working_group_name }) user_project_count[user] += 1 except requests.exceptions.RequestException as e: print(f"Error getting projects for commission {commission_id}: {e}") continue return user_projects, user_project_count def write_naughty_list(user_projects: dict, user_project_count: dict, timestamp: str): output_file = f"naughty_list_{timestamp}.csv" with open(output_file, "w") as f: # Write headers f.write("user,project,commission,title,working_group\n") # Write data for user, projects in sorted(user_projects.items(), key=lambda x: len(x[1]), reverse=True): for project in projects: project_url = f"https://pluto.gnm.int/pluto-core/project/{project['id']}" commission_url = f"https://pluto.gnm.int/pluto-core/commission/{project['commission_id']}" # Escape any commas in the title and working group escaped_title = project['title'].replace(',', '\\,') escaped_working_group = project['working_group'].replace(',', '\\,') f.write(f"{user},{project_url},{commission_url},{escaped_title},{escaped_working_group}\n") print(f"\nNaughty list has been written to {output_file}") def main(): args = setup_argparser().parse_args() baseurl = args.baseurl or "https://local.prexit" commission_list_url, project_list_url = create_urls(baseurl) token = get_token() headers = get_headers(token) timestamp = args.timestamp or "2022-01-01" timestamp = f"{timestamp}T00:00:00.0Z" print(f"Getting commissions before {timestamp}") commissions = get_filtered_commission_records(timestamp, headers, commission_list_url) print(f"Found {len(commissions)} commissions") user_projects, user_project_count = get_projects_by_user(commissions, headers, project_list_url) write_naughty_list(user_projects, user_project_count, timestamp) print(f"\nNaughty list has been written to naughty_list_{timestamp}.txt") if __name__ == "__main__": main()