def save_json_results()

in benchmarks/benchmark/tools/profile-generator/container/benchmark_serving.py [0:0]


def save_json_results(args: argparse.Namespace, benchmark_result, server_metrics, model, errors):
  # Setup
  start_dt_proto = Timestamp()
  start_dt_proto.FromDatetime(args.start_datetime)

  final_json = {
    # metrics values are numerical
    "metrics" : {
      # Traffic
      "num_prompts_attempted": benchmark_result['num_prompts_attempted'],
      "num_prompts_succeeded": benchmark_result['num_prompts_succeeded'],
      "request_rate": args.request_rate,
      'server_metrics': {
        **server_metrics
      },
      **benchmark_result,
      **errors,
    },
    # dimensions values are strings
    "dimensions": {
      "date": args.start_datetime.strftime('%Y%m%d-%H%M%S'),
      "backend": args.backend,
      "model_id": model,
      "tokenizer_id": args.tokenizer,
      **(json.loads(args.additional_metadata_metrics_to_save) if args.additional_metadata_metrics_to_save else {})
    },
    "config": {
      "model": model,
      "num_models": len(args.models.split(',')),
      "model_server": args.backend,
      "start_time": {
        "seconds" : start_dt_proto.seconds,
        "nanos" : start_dt_proto.nanos
      }
    },
    "summary_stats": {
      "stats": [{
        "request_rate": args.request_rate,
        "request_latency": {
          "mean": benchmark_result["avg_latency"],
          "median": benchmark_result["median_latency"],
          "sd": benchmark_result["sd_latency"],
          "min": benchmark_result["min_latency"],
          "max": benchmark_result["max_latency"],
          "p90": benchmark_result["p90_latency"],
          "p99": benchmark_result["p99_latency"],
        },
        "throughput": {
          "mean": benchmark_result['throughput']
        },
        "input_length": {
          "mean": benchmark_result["avg_input_len"],
          "median": benchmark_result["median_input_len"],
          "sd": benchmark_result["sd_input_len"],
          "min": benchmark_result["min_input_len"],
          "max": benchmark_result["max_input_len"],
          "p90": benchmark_result["p90_input_len"],
          "p99": benchmark_result["p99_input_len"],
        },
        "output_length": {
          "mean": benchmark_result["avg_output_len"],
          "median": benchmark_result["median_output_len"],
          "sd": benchmark_result["sd_output_len"],
          "min": benchmark_result["min_output_len"],
          "max": benchmark_result["max_output_len"],
          "p90": benchmark_result["p90_output_len"],
          "p99": benchmark_result["p99_output_len"],
        },
        "tpot": {
          "mean": benchmark_result["avg_per_output_token_latency"],
          "median": benchmark_result["median_per_output_token_latency"],
          "sd": benchmark_result["sd_per_output_token_latency"],
          "min": benchmark_result["min_per_output_token_latency"],
          "max": benchmark_result["max_per_output_token_latency"],
          "p90": benchmark_result["p90_per_output_token_latency"],
          "p99": benchmark_result["p99_per_output_token_latency"],
        },
        "model_server_metrics" : [{"Name": name, **metrics} for name, metrics in server_metrics.items()]
      }]
    }
  }
  
  # Save to file
  model_without_slash = model.replace("/","-")
  file_name = (
      f"{args.file_prefix}-{args.backend}-{args.request_rate}qps-{args.start_datetime.strftime('%Y%m%d-%H%M%S')}-{model_without_slash}.json"
  )
  with open(file_name, "w", encoding="utf-8") as outfile:
    json.dump(final_json, outfile)
  if gcs_bucket is not None:
    try:
      gcs_bucket.blob(f"{args.output_bucket_filepath}/{file_name}").upload_from_filename(file_name)
      print(f"File {file_name} uploaded to gs://{args.output_bucket}/{args.output_bucket_filepath}")
    except google.cloud.exceptions.NotFound:
      print(f"GS Bucket (gs://{args.output_bucket}) does not exist")