#!/usr/bin/env python

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import sys
from github import Github
import os
import re


def print_pulls(repo_name, title, pulls):
    if len(pulls) > 0:
        print("**{}:**".format(title))
        print()
        for pull, commit in pulls:
            url = "https://github.com/{}/pull/{}".format(
                repo_name, pull.number
            )
            print(
                "- {} [#{}]({}) ({})".format(
                    pull.title, pull.number, url, commit.author.login
                )
            )
        print()


def generate_changelog(repo, repo_name, tag1, tag2):
    # get a list of commits between two tags
    print(
        f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr
    )
    comparison = repo.compare(tag1, tag2)

    # get the pull requests for these commits
    print("Fetching pull requests", file=sys.stderr)
    unique_pulls = []
    all_pulls = []
    for commit in comparison.commits:
        pulls = commit.get_pulls()
        for pull in pulls:
            # there can be multiple commits per PR if squash merge is not being used and
            # in this case we should get all the author names, but for now just pick one
            if pull.number not in unique_pulls:
                unique_pulls.append(pull.number)
                all_pulls.append((pull, commit))

    # we split the pulls into categories
    # TODO: make categories configurable
    breaking = []
    bugs = []
    docs = []
    enhancements = []

    # categorize the pull requests based on GitHub labels
    print("Categorizing pull requests", file=sys.stderr)
    for pull, commit in all_pulls:
        # see if PR title uses Conventional Commits
        cc_type = ""
        # cc_scope = ''
        cc_breaking = ""
        parts = re.findall(r"^([a-z]+)(\([a-z]+\))?(!)?:", pull.title)
        if len(parts) == 1:
            parts_tuple = parts[0]
            cc_type = parts_tuple[0]  # fix, feat, docs, chore
            # cc_scope = parts_tuple[1]  # component within project
            cc_breaking = parts_tuple[2] == "!"

        labels = [label.name for label in pull.labels]
        # print(pull.number, labels, parts, file=sys.stderr)
        if "api change" in labels or cc_breaking:
            breaking.append((pull, commit))
        elif "bug" in labels or cc_type == "fix":
            bugs.append((pull, commit))
        elif "enhancement" in labels or cc_type == "feat":
            enhancements.append((pull, commit))
        elif "documentation" in labels or cc_type == "docs":
            docs.append((pull, commit))

    # produce the changelog content
    print("Generating changelog content", file=sys.stderr)
    print_pulls(repo_name, "Breaking changes", breaking)
    print_pulls(repo_name, "Implemented enhancements", enhancements)
    print_pulls(repo_name, "Fixed bugs", bugs)
    print_pulls(repo_name, "Documentation updates", docs)
    print_pulls(repo_name, "Merged pull requests", all_pulls)


def cli(args=None):
    """Process command line arguments."""
    if not args:
        args = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "project", help="The project name e.g. apache/arrow-datafusion-python"
    )
    parser.add_argument("tag1", help="The previous release tag")
    parser.add_argument("tag2", help="The current release tag")
    args = parser.parse_args()

    token = os.getenv("GITHUB_TOKEN")

    g = Github(token)
    repo = g.get_repo(args.project)
    generate_changelog(repo, args.project, args.tag1, args.tag2)


if __name__ == "__main__":
    cli()
