def main()

in src/kudu/scripts/parse_metrics_log.py [0:0]


def main(argv):
  prev_data = None
  aggregated_prev = None

  simple_headers = [header for _, header in SIMPLE_METRICS + RATE_METRICS]
  for _, header in HISTOGRAM_METRICS:
    simple_headers.append(header + "_p50")
    simple_headers.append(header + "_p95")
    simple_headers.append(header + "_p99")
    simple_headers.append(header + "_p999")
    simple_headers.append(header + "_max")

  print "time cache_hit_ratio", " ".join(simple_headers)

  for path in sorted(argv[1:]):
    if path.endswith(".gz"):
      f = gzip.GzipFile(path)
    else:
      f = file(path)
    for line_number, line in enumerate(f, start=1):
      # Only parse out the "metrics" lines.
      try:
        (_, _, log_type, ts, metrics_json) = line.split(" ")
      except ValueError:
        continue
      if log_type != "metrics":
        continue
      ts = float(ts) / 1000000.0
      prev_ts = prev_data['ts'] if prev_data else 0
      # Enforce that the samples come in time-sorted order.
      if ts <= prev_ts:
        raise Exception("timestamps must be in ascending order (%f <= %f at %s:%d)"
                        % (ts, prev_ts, path, line_number))
      if prev_data and ts < prev_ts + GRANULARITY_SECS:
        continue

      # Parse the metrics json into a map of the form:
      #   { metric key => { entity id => metric value } }
      data = json_to_map(json.loads(metrics_json))
      data['ts'] = ts
      if prev_data:
        # Copy missing metrics from prev_data.
        for m, prev_eid_to_vals in prev_data.iteritems():
          if m is 'ts':
            continue
          # The metric was missing entirely; copy it over.
          if m not in data:
            data[m] = prev_eid_to_vals
          else:
            # If the metric was missing for a specific entity, copy the metric
            # from the previous snapshot.
            for eid, prev_vals in prev_eid_to_vals.iteritems():
              if eid not in data[m]:
                data[m][eid] = prev_vals

      aggregated_cur = aggregate_metrics(data)
      if prev_data:
        if not aggregated_prev:
          aggregated_prev = aggregate_metrics(prev_data)
        process(aggregated_prev, aggregated_cur)

      prev_data = data
      aggregated_prev = aggregated_cur