cosmos/scripts/converting_log_to_csv.py (76 lines of code) (raw):
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import csv
import os
import sys
def main():
if len(sys.argv) == 1:
raise Exception("Input log file name not provided. Syntax = 'python converting_log_to_csv.py <FileLocation>'")
path = sys.argv[1]
input_file = open(path, 'r')
lines = input_file.readlines()
filename = os.path.basename(path)
filename_withoutextention = os.path.splitext(filename)[0]
# create the csv writer
output_csv = open(filename_withoutextention + ".csv", 'w', newline='')
writer = csv.writer(output_csv)
header = ['Date', 'Time', 'Operation', 'RPS', 'Count', 'MAX(microsecond)', 'MIN(microsecond)', 'AVG(microsecond)',
'P90(microsecond)', 'P99(microsecond)', 'P999(microsecond)', 'P9999(microsecond)']
writer.writerow(header)
# Strips the newline character
for line in lines:
line = line.strip()
if not "current ops/sec" in line:
continue
array_after_split = line.strip().split('[')
for i in range(len(array_after_split)):
if i == len(array_after_split) - 1:
break
else:
newline = array_after_split[0] + array_after_split[i + 1]
parse_line_for_formatting(newline, writer)
output_csv.close()
print("Successfully created "+output_csv.name)
def parse_line_for_formatting(line, writer):
# 2022-04-13 16:16:13:684 10 sec: 15743 operations; 1574.14 current ops/sec; est completion in 52 minutes READ:
# Count=14959, Max=613887, Min=1110, Avg=12876.19, 90=25807, 99=78143, 99.9=493311, 99.99=609279]
split_semicolon = line.split(';')
# '2022-04-13 16:16:13:684 10 sec: 15743 operations'
first_part = split_semicolon[0]
first_part_split = first_part.split(' ')
date = first_part_split[0]
time = first_part_split[1].rsplit(':', 1)[0]
# 1574.14 current ops/sec
second_part = split_semicolon[1].strip()
rps = second_part.rsplit('current ops/sec', 1)[0]
# est completion in 52 minutes READ: Count=14959, Max=613887, Min=1110, Avg=12876.19, 90=25807, 99=78143,
# 99.9=493311, 99.99=609279]
third_part = split_semicolon[2].strip()
if 'CLEANUP' in third_part:
return
operation = ''
count = ''
max_in_micro_sec = ''
min_in_micro_sec = ''
avg_in_micro_sec = ''
p9999_in_micro_sec = ''
p999_in_micro_sec = ''
p99_in_micro_sec = ''
p90_in_micro_sec = ''
for metrics in third_part.split(' '):
metrics = metrics.strip()
metrics = metrics.replace(']', '')
metrics = metrics.replace(',', '')
if ':' in metrics:
operation = metrics.replace(':', '')
elif 'Count=' in metrics:
count = metrics.replace('Count=', '')
elif 'Max=' in metrics:
max_in_micro_sec = metrics.replace('Max=', '')
elif 'Min=' in metrics:
min_in_micro_sec = metrics.replace('Min=', '')
elif 'Avg=' in metrics:
avg_in_micro_sec = metrics.replace('Avg=', '')
elif '99.99=' in metrics:
p9999_in_micro_sec = metrics.replace('99.99=', '')
elif '99.9=' in metrics:
p999_in_micro_sec = metrics.replace('99.9=', '')
elif '99=' in metrics:
p99_in_micro_sec = metrics.replace('99=', '')
elif '90=' in metrics:
p90_in_micro_sec = metrics.replace('90=', '')
row_in_csv = [date, time, operation, rps, count, max_in_micro_sec, min_in_micro_sec, avg_in_micro_sec,
p90_in_micro_sec, p99_in_micro_sec, p999_in_micro_sec, p9999_in_micro_sec]
writer.writerow(row_in_csv)
if __name__ == '__main__':
main()