tools/separate_log_file_runs.py (58 lines of code) (raw):
#!/usr/bin/env python
# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import sys
# When perfkitbenchmarker is run with multiple processes, the output
# from each thread is interleaved in the output. This program takes
# a log file with interleaved messages, and separates them back into their
# own file.
class LogDeInterlace(object):
def __init__(self, root_file_name):
self.root_file_name = root_file_name
self.map_index_to_stream = {}
def _GetRootComponents(self, fullpath):
path = os.path.dirname(fullpath)
(filename, extension) = os.path.splitext(os.path.basename(fullpath))
return (path, filename, extension)
def _CreateStreamForIndex(self, index):
(path, filename, extension) = self._GetRootComponents(self.root_file_name)
filename = os.path.join(path,
filename + '-' + str(index) + extension)
if os.path.exists(filename):
print 'Warning file %s already exists. Log will be lost' % filename
return None
print 'Creating %s' % filename
file_object = open(filename, 'w')
return file_object
def GetStreamForIndex(self, index):
if index not in self.map_index_to_stream:
self.map_index_to_stream[index] = self._CreateStreamForIndex(index)
return self.map_index_to_stream[index]
def __enter__(self):
return self
def __exit__(self, types, value, traceback):
for file_object in self.map_index_to_stream.itervalues():
if file_object is not None:
file_object.close()
def main(argv):
if len(argv) != 2 or argv[1] == '--help':
print 'usage: SeparateLogFileRuns <filename>'
print ''
print ('Takes a pkb.log which was created from a single invocation of '
'perfkitbenchmarker running multiple benchmarks. The output '
'from each thread is written out to its own stream.')
sys.exit(1)
input_file = argv[1]
# the threads are numbered starting at 1 ... so use 0 for beginning
# and ending stream output
sentinel_stream = 0
with LogDeInterlace(input_file) as logs:
with open(input_file) as f:
current_stream = logs.GetStreamForIndex(sentinel_stream)
for line in f:
# matches lines like:
# 2018-02-13 22:30:41,701 6538b6ae MainThread pgbench(1/9) ...
stream_match = re.match(r'^\d\d\d\d-\d\d-\d\d .*?Thread'
r'.*?\((\d*)\/\d*\)', line)
# matches lines like (one line):
# 2018-02-14 17:59:57,297 6538b6ae MainThread pkb.py:856 INFO
# Benchmark run statuses:
end_match = re.match(r'^\d\d\d\d-\d\d-\d\d.*'
r'Benchmark run statuses:', line)
if stream_match:
stream_index = int(stream_match.group(1))
current_stream = logs.GetStreamForIndex(stream_index)
elif end_match:
current_stream = logs.GetStreamForIndex(sentinel_stream)
if current_stream is not None:
current_stream.write(line)
if __name__ == '__main__':
main(sys.argv)