# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import json
import matplotlib.pyplot as plt
import numpy as np

def geomean(data):
    return np.prod(data) ** (1 / len(data))

def generate_query_speedup_chart(baseline, comparison, label1: str, label2: str, benchmark: str, title: str):
    results = []
    for query in range(1, query_count(benchmark)+1):
        a = np.median(np.array(baseline[str(query)]))
        b = np.median(np.array(comparison[str(query)]))
        if a > b:
            speedup = a/b-1
        else:
            speedup = -(1/(a/b)-1)
        results.append(("q" + str(query), round(speedup*100, 0)))

    results = sorted(results, key=lambda x: -x[1])

    queries, speedups = zip(*results)

    # Create figure and axis
    if benchmark == "tpch":
        fig, ax = plt.subplots(figsize=(10, 6))
    else:
        fig, ax = plt.subplots(figsize=(35, 10))

    # Create bar chart
    bars = ax.bar(queries, speedups, color='skyblue')

    # Add text annotations
    for bar, speedup in zip(bars, speedups):
        yval = bar.get_height()
        if yval >= 0:
            ax.text(bar.get_x() + bar.get_width() / 2.0, min(800, yval+5), f'{yval:.0f}%', va='bottom', ha='center', fontsize=8,
                    color='blue', rotation=90)
        else:
            ax.text(bar.get_x() + bar.get_width() / 2.0, yval, f'{yval:.0f}%', va='top', ha='center', fontsize=8,
                    color='blue', rotation=90)

    # Add title and labels
    ax.set_title(label2 + " speedup over " + label1 + " (" + title + ")")
    ax.set_ylabel('Speedup (100% speedup = 2x faster)')
    ax.set_xlabel('Query')

    # Customize the y-axis to handle both positive and negative values better
    ax.axhline(0, color='black', linewidth=0.8)
    min_value = (min(speedups) // 100) * 100
    max_value = ((max(speedups) // 100) + 1) * 100 + 50
    if benchmark == "tpch":
        ax.set_ylim(min_value, max_value)
    else:
        # TODO improve this
        ax.set_ylim(-250, 300)

    # Show grid for better readability
    ax.yaxis.grid(True)

    # Save the plot as an image file
    plt.savefig(f'{benchmark}_queries_speedup.png', format='png')


def generate_query_comparison_chart(results, labels, benchmark: str, title: str):
    queries = []
    benches = []
    for _ in results:
        benches.append([])
    for query in range(1, query_count(benchmark)+1):
        queries.append("q" + str(query))
        for i in range(0, len(results)):
            benches[i].append(np.median(np.array(results[i][str(query)])))

    # Define the width of the bars
    bar_width = 0.3

    # Define the positions of the bars on the x-axis
    index = np.arange(len(queries)) * 1.5

    # Create a bar chart
    if benchmark == "tpch":
        fig, ax = plt.subplots(figsize=(15, 6))
    else:
        fig, ax = plt.subplots(figsize=(35, 6))

    for i in range(0, len(results)):
        bar = ax.bar(index + i * bar_width, benches[i], bar_width, label=labels[i])

    # Add labels, title, and legend
    ax.set_title(title)
    ax.set_xlabel('Queries')
    ax.set_ylabel('Query Time (seconds)')
    ax.set_xticks(index + bar_width / 2)
    ax.set_xticklabels(queries)
    ax.legend()

    # Save the plot as an image file
    plt.savefig(f'{benchmark}_queries_compare.png', format='png')

def generate_summary(results, labels, benchmark: str, title: str):
    timings = []
    for _ in results:
        timings.append(0)

    num_queries = query_count(benchmark)
    for query in range(1, num_queries + 1):
        for i in range(0, len(results)):
            timings[i] += np.median(np.array(results[i][str(query)]))

    # Create figure and axis
    fig, ax = plt.subplots()

    # Add title and labels
    ax.set_title(title)
    ax.set_ylabel(f'Time in seconds to run all {num_queries} {benchmark} queries (lower is better)')

    times = [round(x,0) for x in timings]

    # Create bar chart
    bars = ax.bar(labels, times, color='skyblue')

    # Add text annotations
    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2.0, yval, f'{yval}', va='bottom')  # va: vertical alignment

    plt.savefig(f'{benchmark}_allqueries.png', format='png')

def query_count(benchmark: str):
    if benchmark == "tpch":
        return 22
    elif benchmark == "tpcds":
        return 99
    else:
        raise "invalid benchmark name"

def main(files, labels, benchmark: str, title: str):
    results = []
    for filename in files:
        with open(filename) as f:
            results.append(json.load(f))
    generate_summary(results, labels, benchmark, title)
    generate_query_comparison_chart(results, labels, benchmark, title)
    if len(files) == 2:
        generate_query_speedup_chart(results[0], results[1], labels[0], labels[1], benchmark, title)

if __name__ == '__main__':
    argparse = argparse.ArgumentParser(description='Generate comparison')
    argparse.add_argument('filenames', nargs='+', type=str, help='JSON result files')
    argparse.add_argument('--labels', nargs='+', type=str, help='Labels')
    argparse.add_argument('--benchmark', type=str, help='Benchmark name (tpch or tpcds)')
    argparse.add_argument('--title', type=str, help='Chart title')
    args = argparse.parse_args()
    main(args.filenames, args.labels, args.benchmark, args.title)