data_extraction_transformation/scripts/extract-bugs-api.py

#!/usr/bin/env python # -*- coding: utf-8 -*- """ Author: Mohamed Bilel Besbes Date: 2025-01-02 """ import argparse import csv import json import requests import pandas as pd def parse_args(): parser = argparse.ArgumentParser(description="Fetch bug details from an API and save to a CSV file.") parser.add_argument('-a', '--alerts-file', help="Path to the input CSV alerts file.") parser.add_argument('-b', '--bugs-file', help="Path to the output CSV bugs file.") return parser.parse_args() def fetch_json_from_api(api_url): try: response = requests.get(api_url) response.raise_for_status() return response.json() except requests.RequestException as e: print(f"Error fetching data from API: {e}") return None def flatten_json(bug): try: flat_bug = { 'id': bug.get('id'), 'type': bug.get('type'), 'summary': bug.get('summary'), 'status': bug.get('status'), 'priority': bug.get('priority'), 'severity': bug.get('severity'), 'creator': bug.get('creator'), 'assigned_to': bug.get('assigned_to'), 'creation_time': bug.get('creation_time'), 'last_change_time': bug.get('last_change_time'), 'component': bug.get('component'), 'product': bug.get('product'), 'resolution': bug.get('resolution'), 'is_confirmed': bug.get('is_confirmed'), 'platform': bug.get('platform'), 'op_sys': bug.get('op_sys'), 'assigned_to_name': bug.get('assigned_to_detail', {}).get('real_name', None), 'assigned_to_email': bug.get('assigned_to_detail', {}).get('email', None), 'cc': ', '.join(bug.get('cc', [])), 'cc_detail': ', '.join([cc.get('email', '') for cc in bug.get('cc_detail', [])]), 'flags': ', '.join([flag.get('name', '') for flag in bug.get('flags', [])]), 'classification': bug.get('classification', None), 'triage_owner': bug.get('triage_owner', None), 'comment_count': bug.get('comment_count', None), 'keywords': ', '.join(bug.get('keywords', [])), 'depends_on': ', '.join(map(str, bug.get('depends_on', []))), 'blocks': ', '.join(map(str, bug.get('blocks', []))), 'duplicates': ', '.join(map(str, bug.get('duplicates', []))), 'creator_name': bug.get('creator_detail', {}).get('real_name', None), 'creator_email': bug.get('creator_detail', {}).get('email', None) } return flat_bug except Exception as e: print(f"Error flattening bug: {e}") return {} def write_to_csv(bugs, csv_file): try: with open(csv_file, mode='w', newline='', encoding='utf-8') as file: if bugs: writer = csv.DictWriter(file, fieldnames=bugs[0].keys()) writer.writeheader() writer.writerows(bugs) print(f"Data has been written to {csv_file}") except Exception as e: print(f"Error writing to CSV: {e}") def main(): args = parse_args() alerts_file = args.alerts_file bugs_file = args.bugs_file try: # Read the alerts DataFrame alerts_df = pd.read_csv(alerts_file) # Process the bug IDs bug_ids = set(alerts_df['alert_summary_bug_number'].dropna().astype(int).tolist()) all_bugs = [] print("INITIATING RESPONSE COLLECTION FROM API") for bug_id in bug_ids: print(bug_id) # Prepare API URL for each bug ID api_url = f"https://bugzilla.mozilla.org/rest/bug?id={bug_id}" # Fetch JSON data for the bug json_data = fetch_json_from_api(api_url) if json_data and 'bugs' in json_data: for bug in json_data['bugs']: flat_bug = flatten_json(bug) all_bugs.append(flat_bug) else: print(f"No data found for bug ID {bug_id}.") print("DATA COLLECTION DONE") print("INTIATING DATA FLATTENING") # Write all collected and flattened bugs to CSV if all_bugs: write_to_csv(all_bugs, bugs_file) else: print("No bugs to write to the CSV file.") except Exception as e: print(f"Error in processing: {e}") if __name__ == "__main__": main()

data_extraction_transformation/scripts/extract-bugs-api.py (95 lines of code) (raw):