data_extraction_transformation/scripts/jsonfy_specific_timeseries.py (55 lines of code) (raw):

import os import pandas as pd import json import argparse def parse_args(): parser = argparse.ArgumentParser(description="JSONfy ONLY specific timeseries CSVs") parser.add_argument('-i', '--input-folder', help="Path to the input CSV timeseries folder") parser.add_argument('-o', '--output-folder', help="Path to the output CSV timeseries folder") return parser.parse_args() def main(): args = parse_args() input_folder = args.input_folder output_folder = args.output_folder true_alerting_mapping = ['TP', 'FN'] y_column = 'alert_summary_status_general' os.makedirs(output_folder, exist_ok=True) annotations = dict() for signature_file in os.listdir(input_folder): signature_id = signature_file.split("_")[0] df = pd.read_csv(input_folder + '/' + signature_file) df[y_column] = df[y_column].apply(lambda x: 1 if x in true_alerting_mapping else 0) df = df.sort_values(by='push_timestamp', ascending=True) indices = df.index[df[y_column] == 1].tolist() indices.sort() new_entry = { "1": indices } annotations[signature_id] = new_entry n_obs = len(df) json_df = { "name": signature_id, "longname": f"{signature_id} timeseries", "n_obs": n_obs, "n_dim": 1, "time": { "type": "string", "format": "%Y-%m-%d %H:%M:%S", "index": list(range(n_obs)), "raw": df['push_timestamp'].tolist() }, "series": [ { "label": "Timeseries", "type": "float", "raw": df['value'].tolist() } ] } signature_json_file = signature_id + ".json" with open(output_folder + '/' + signature_json_file, 'w') as file: json.dump(json_df, file, indent=4) with open(output_folder + '/annotations.json', 'a') as file: json.dump(annotations, file, indent=4) if __name__ == "__main__": main()