in src/kudu/scripts/parse_metrics_log.py [0:0]
def main(argv):
prev_data = None
aggregated_prev = None
simple_headers = [header for _, header in SIMPLE_METRICS + RATE_METRICS]
for _, header in HISTOGRAM_METRICS:
simple_headers.append(header + "_p50")
simple_headers.append(header + "_p95")
simple_headers.append(header + "_p99")
simple_headers.append(header + "_p999")
simple_headers.append(header + "_max")
print "time cache_hit_ratio", " ".join(simple_headers)
for path in sorted(argv[1:]):
if path.endswith(".gz"):
f = gzip.GzipFile(path)
else:
f = file(path)
for line_number, line in enumerate(f, start=1):
# Only parse out the "metrics" lines.
try:
(_, _, log_type, ts, metrics_json) = line.split(" ")
except ValueError:
continue
if log_type != "metrics":
continue
ts = float(ts) / 1000000.0
prev_ts = prev_data['ts'] if prev_data else 0
# Enforce that the samples come in time-sorted order.
if ts <= prev_ts:
raise Exception("timestamps must be in ascending order (%f <= %f at %s:%d)"
% (ts, prev_ts, path, line_number))
if prev_data and ts < prev_ts + GRANULARITY_SECS:
continue
# Parse the metrics json into a map of the form:
# { metric key => { entity id => metric value } }
data = json_to_map(json.loads(metrics_json))
data['ts'] = ts
if prev_data:
# Copy missing metrics from prev_data.
for m, prev_eid_to_vals in prev_data.iteritems():
if m is 'ts':
continue
# The metric was missing entirely; copy it over.
if m not in data:
data[m] = prev_eid_to_vals
else:
# If the metric was missing for a specific entity, copy the metric
# from the previous snapshot.
for eid, prev_vals in prev_eid_to_vals.iteritems():
if eid not in data[m]:
data[m][eid] = prev_vals
aggregated_cur = aggregate_metrics(data)
if prev_data:
if not aggregated_prev:
aggregated_prev = aggregate_metrics(prev_data)
process(aggregated_prev, aggregated_cur)
prev_data = data
aggregated_prev = aggregated_cur