cosmos/scripts/converting_log_to_csv.py (76 lines of code) (raw):

# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. import csv import os import sys def main(): if len(sys.argv) == 1: raise Exception("Input log file name not provided. Syntax = 'python converting_log_to_csv.py <FileLocation>'") path = sys.argv[1] input_file = open(path, 'r') lines = input_file.readlines() filename = os.path.basename(path) filename_withoutextention = os.path.splitext(filename)[0] # create the csv writer output_csv = open(filename_withoutextention + ".csv", 'w', newline='') writer = csv.writer(output_csv) header = ['Date', 'Time', 'Operation', 'RPS', 'Count', 'MAX(microsecond)', 'MIN(microsecond)', 'AVG(microsecond)', 'P90(microsecond)', 'P99(microsecond)', 'P999(microsecond)', 'P9999(microsecond)'] writer.writerow(header) # Strips the newline character for line in lines: line = line.strip() if not "current ops/sec" in line: continue array_after_split = line.strip().split('[') for i in range(len(array_after_split)): if i == len(array_after_split) - 1: break else: newline = array_after_split[0] + array_after_split[i + 1] parse_line_for_formatting(newline, writer) output_csv.close() print("Successfully created "+output_csv.name) def parse_line_for_formatting(line, writer): # 2022-04-13 16:16:13:684 10 sec: 15743 operations; 1574.14 current ops/sec; est completion in 52 minutes READ: # Count=14959, Max=613887, Min=1110, Avg=12876.19, 90=25807, 99=78143, 99.9=493311, 99.99=609279] split_semicolon = line.split(';') # '2022-04-13 16:16:13:684 10 sec: 15743 operations' first_part = split_semicolon[0] first_part_split = first_part.split(' ') date = first_part_split[0] time = first_part_split[1].rsplit(':', 1)[0] # 1574.14 current ops/sec second_part = split_semicolon[1].strip() rps = second_part.rsplit('current ops/sec', 1)[0] # est completion in 52 minutes READ: Count=14959, Max=613887, Min=1110, Avg=12876.19, 90=25807, 99=78143, # 99.9=493311, 99.99=609279] third_part = split_semicolon[2].strip() if 'CLEANUP' in third_part: return operation = '' count = '' max_in_micro_sec = '' min_in_micro_sec = '' avg_in_micro_sec = '' p9999_in_micro_sec = '' p999_in_micro_sec = '' p99_in_micro_sec = '' p90_in_micro_sec = '' for metrics in third_part.split(' '): metrics = metrics.strip() metrics = metrics.replace(']', '') metrics = metrics.replace(',', '') if ':' in metrics: operation = metrics.replace(':', '') elif 'Count=' in metrics: count = metrics.replace('Count=', '') elif 'Max=' in metrics: max_in_micro_sec = metrics.replace('Max=', '') elif 'Min=' in metrics: min_in_micro_sec = metrics.replace('Min=', '') elif 'Avg=' in metrics: avg_in_micro_sec = metrics.replace('Avg=', '') elif '99.99=' in metrics: p9999_in_micro_sec = metrics.replace('99.99=', '') elif '99.9=' in metrics: p999_in_micro_sec = metrics.replace('99.9=', '') elif '99=' in metrics: p99_in_micro_sec = metrics.replace('99=', '') elif '90=' in metrics: p90_in_micro_sec = metrics.replace('90=', '') row_in_csv = [date, time, operation, rps, count, max_in_micro_sec, min_in_micro_sec, avg_in_micro_sec, p90_in_micro_sec, p99_in_micro_sec, p999_in_micro_sec, p9999_in_micro_sec] writer.writerow(row_in_csv) if __name__ == '__main__': main()