data_extraction_transformation/scripts/extract-bugs-bugbug.py (103 lines of code) (raw):

from bugbug import bugzilla, db, bug_features import pandas as pd from helper import append_strings, get_json, txt_to_list import os import pandas as pd import shutil import argparse def parse_args(): parser = argparse.ArgumentParser(description="Script to cross-checkif the signatures are corrct or not") parser.add_argument('-a', '--alert-file', required=True, help="Path to the alerts file") parser.add_argument('-o', '--output-location', required=True, help="Path the location of the bugs CSV") return parser.parse_args() ''' This function is dedicated for extracting JSON attrbutes that do not exist always with exception handling ''' def process_element(the_json, the_attribute): try: return the_json[the_attribute] except: return "" ''' Thsi function extracts the attributes of a given bug from the provided JSON and labels it as a performance-related bug or not. It reutrns a dictionary to be appended into the bug dataframe to be converted into a CSV ''' def extract_row(json_bug, isperfbug): bug_row = {'bug_id': process_element(json_bug, 'id'), 'bug_resolution': process_element(json_bug, 'resolution'), 'bug_type': process_element(json_bug, 'type'), 'bug_component': process_element(json_bug, 'component'), 'bug_summary': process_element(json_bug, 'summary'), 'bug_classification': process_element(json_bug, 'classification'), 'bug_status': process_element(json_bug, 'status'), 'bug_creation_time': process_element(json_bug, 'creation_time'), 'bug_url': process_element(json_bug, 'url'), 'bug_last_change_time': process_element(json_bug, 'last_change_time'), 'bug_severity': process_element(json_bug, 'severity'), 'bug_priority': process_element(json_bug, 'priority'), 'bug_product': process_element(json_bug, 'product'), 'bug_is_confirmed': process_element(json_bug, 'is_confirmed'), 'bug_votes': process_element(json_bug, 'votes'), 'bug_is_open': process_element(json_bug, 'is_open'), 'bug_assigned_to': process_element(json_bug, 'assigned_to'), 'bug_cf_last_resolved': process_element(json_bug, 'cf_last_resolved'), 'bug_cf_performance_impact': process_element(json_bug, 'cf_performance_impact'), 'bug_version': process_element(json_bug, 'version'), 'bug_whiteboard': process_element(json_bug, 'whiteboard'), 'bug_platform': process_element(json_bug, 'platform'), 'bug_keywords': append_strings(json_bug['keywords']), 'IsPerformanceBug': isperfbug } return bug_row def main(): args = parse_args() alert_file = args.alert_file output_location = args.output_location ''' The following list contains the columns names of the CSV to be generated through this script ''' columns = [ 'bug_id', 'bug_resolution', 'bug_type', 'bug_component', 'bug_summary', 'bug_classification', 'bug_status', 'bug_creation_time', 'bug_url', 'bug_last_change_time', 'bug_severity', 'bug_priority', 'bug_product', 'bug_is_confirmed', 'bug_votes', 'bug_is_open', 'bug_assigned_to', 'bug_cf_last_resolved', 'bug_cf_performance_impact', 'bug_version', 'bug_whiteboard', 'bug_platform', 'bug_keywords', 'IsPerformanceBug' ] ''' Downlanding the latest version of bugs data using bugbug ''' db.download(bugzilla.BUGS_DB) df = pd.DataFrame(columns=columns) alerts_df = pd.read_csv(alerts_file) ''' Only bugs that are asosciated with alerts extracted through extract-alerts.py will be kept. note that bugs.txt contains the IDs of bugs associated with alerts obtained from the same Python script previously mentioned ''' bugs_ids = alerts_df['alert_bug_number'].unique().flatten() for bug in bugzilla.get_bugs(): if str(bug["id"]) in bugs_ids: isperfbug = False if(bug_features.IsPerformanceBug().__call__(bug)): isperfbug = True new_row = extract_row(bug, isperfbug) df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) df.to_csv(output_location + '/bugs_data.csv', index=False) if __name__ == "__main__": main()