in data_extraction_transformation/scripts/extract-timeseries.py [0:0]
def main():
global filtered_sig_ids
global columns
args = parse_args()
output_folder = args.output_folder
alerts_file = args.alerts_file
alerts_df = pd.read_csv(alerts_file)
mentionned_projects = alerts_df['alert_summary_repository'].unique().tolist()
filtered_sig_ids = alerts_df['signature_id'].unique().tolist()
# For reference, these are all of the projects
'''
all_porjects = [
"try",
"android-components",
"application-services",
"ash",
"birch",
"cedar",
"ci-admin",
"ci-admin-try",
"ci-configuration",
"ci-configuration-try",
"comm-beta",
"comm-central",
"comm-esr115",
"comm-release",
"elm",
"fenix",
"firefox-ios",
"firefox-translations-training",
"focus-android",
"holly",
"jamun",
"kaios",
"kaios-try",
"larch",
"maple",
"mozilla-esr115",
"mozilla-release",
"mozilla-vpn-client",
"mozilla-vpn-client-release",
"nss",
"nss-try",
"oak",
"pine",
"reference-browser",
"servo-auto",
"servo-master",
"servo-try",
"staging-android-components",
"staging-fenix",
"staging-firefox-translations-training",
"staging-focus-android",
"taskgraph",
"toolchains",
"try-comm-central",
"webrender"
]
'''
'''
The following list contains the columns names of the CSV to be generated through this script
'''
columns = [
"repository_name",
"signature_id",
"framework_id",
"signature_hash",
"machine_platform",
"should_alert",
"has_subtests",
"extra_options",
"tags",
"option_collection_hash",
"test",
"suite",
"lower_is_better",
"name",
"parent_signature",
"repository_id",
"measurement_unit",
"application",
"job_id",
"entry_id",
"push_timestamp",
"value",
"revision",
"push_id"
]
for project in mentionned_projects:
if not os.path.exists(output_folder + '/' + project):
os.makedirs(output_folder + '/' + project, exist_ok=True)
extract_timeseries(output_folder, project)