def main()

in data_extraction_transformation/scripts/extract-timeseries.py [0:0]


def main():
    global filtered_sig_ids
    global columns
    args = parse_args()
    output_folder = args.output_folder
    alerts_file = args.alerts_file
    alerts_df = pd.read_csv(alerts_file)
    mentionned_projects = alerts_df['alert_summary_repository'].unique().tolist()
    filtered_sig_ids = alerts_df['signature_id'].unique().tolist()

    # For reference, these are all of the projects
    '''
    all_porjects = [
        "try",
        "android-components",
        "application-services",
        "ash",
        "birch",
        "cedar",
        "ci-admin",
        "ci-admin-try",
        "ci-configuration",
        "ci-configuration-try",
        "comm-beta",
        "comm-central",
        "comm-esr115",
        "comm-release",
        "elm",
        "fenix",
        "firefox-ios",
        "firefox-translations-training",
        "focus-android",
        "holly",
        "jamun",
        "kaios",
        "kaios-try",
        "larch",
        "maple",
        "mozilla-esr115",
        "mozilla-release",
        "mozilla-vpn-client",
        "mozilla-vpn-client-release",
        "nss",
        "nss-try",
        "oak",
        "pine",
        "reference-browser",
        "servo-auto",
        "servo-master",
        "servo-try",
        "staging-android-components",
        "staging-fenix",
        "staging-firefox-translations-training",
        "staging-focus-android",
        "taskgraph",
        "toolchains",
        "try-comm-central",
        "webrender"
    ]
    '''

    '''
    The following list contains the columns names of the CSV to be generated through this script
    '''
    columns = [
        "repository_name",
        "signature_id",
        "framework_id",
        "signature_hash",
        "machine_platform",
        "should_alert",
        "has_subtests",
        "extra_options",
        "tags",
        "option_collection_hash",
        "test",
        "suite",
        "lower_is_better",
        "name",
        "parent_signature",
        "repository_id",
        "measurement_unit",
        "application",
        "job_id",
        "entry_id",
        "push_timestamp",
        "value",
        "revision",
        "push_id"
    ]


    for project in mentionned_projects:
        if not os.path.exists(output_folder + '/' + project):
            os.makedirs(output_folder + '/' + project, exist_ok=True)
        extract_timeseries(output_folder, project)