in data_extraction_transformation/scripts/transform-data.py [0:0]
def main():
global problematic_signatures
global cutoff_date_time
global df_alerts
global category_mapping
global alert_summary_status_mapping
global alert_status_mapping
args = parse_args()
input_folder = args.input_folder
output_folder = args.output_folder
alerts_file = args.alerts_file
alert_summary_status_mapping = {
0: "untriaged",
1: "downstream",
2: "reassigned",
3: "invalid",
4: "improvement",
5: "investigating",
6: "wontfix",
7: "fixed",
8: "backedout"
}
alert_status_mapping = {
0: "untriaged",
1: "downstream",
2: "reassigned",
3: "invalid",
4: "acknowledged"
}
category_mapping = {
'investigating': 'SP',
'reassigned': 'TP',
'invalid': 'FP',
'improvement': 'TP',
'fixed': 'TP',
'wontfix': 'TP',
'untriaged': 'SP',
'backedout': 'TP',
'downstream': 'TP',
'acknowledged': 'TP',
}
problematic_signatures = []
# The following usage projects_folders_mapping in case the names of the subfolders does not reflect the names of the projects. The code is designed to handle this change.
#projects_folders_mapping = {"autoland": ["autoland1", "autoland2", "autoland3", "autoland4"], "firefox-android": ["firefox-android"], "mozilla-beta": ["mozilla-beta"], "mozilla-release": ["mozilla-release"], "mozilla-central": ["mozilla-central"]}
projects_folders_mapping = {name: [name] for name in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, name))}
df_alerts = pd.read_csv(alerts_file, index_col=False)
# df_alerts['push_timestamp'] = pd.to_datetime(df_alerts['push_timestamp'], format='%Y-%m-%dT%H:%M:%S', errors='coerce')
df_alerts['push_timestamp'] = pd.to_datetime(df_alerts['push_timestamp'], unit="s")
cutoff_date_time = df_alerts['push_timestamp'].max()
df_alerts = df_alerts.drop(columns=['push_timestamp'])
df_alerts['alert_summary_status_general'] = df_alerts['alert_summary_status'].map(alert_summary_status_mapping)
df_alerts["alert_summary_status_general"] = df_alerts["alert_summary_status_general"].replace(category_mapping)
os.makedirs(output_folder, exist_ok=True)
for project in projects_folders_mapping:
for folder in projects_folders_mapping[project]:
os.makedirs(output_folder + '/' + folder, exist_ok=True)
process_folder(input_folder, output_folder, folder)
# shutil.rmtree('../datasets/' + folder)
# os.rename('../datasets/' + folder + "-processed", '../datasets/' + folder)
print('####### Problematic signatures #######')
for sig in problematic_signatures:
print('Signature path:')
print(sig)