dev/stats/calculate_statistics_provider_testing_issues.py (169 lines of code) (raw):
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import logging
import os
import re
import textwrap
from pathlib import Path
from typing import TYPE_CHECKING
import rich_click as click
from attr import dataclass
from github import Github
from rich.console import Console
from tabulate import tabulate
if TYPE_CHECKING:
from github.Issue import Issue
PROVIDER_TESTING_LABEL = "testing status"
logger = logging.getLogger(__name__)
console = Console(width=400, color_system="standard")
MY_DIR_PATH = Path(os.path.dirname(__file__))
SOURCE_DIR_PATH = MY_DIR_PATH / os.pardir / os.pardir
@click.group(context_settings={"help_option_names": ["-h", "--help"], "max_content_width": 500})
def cli(): ...
option_table = click.option(
"--table",
is_flag=True,
help="Print output as markdown table1",
)
option_github_token = click.option(
"--github-token",
type=str,
required=True,
help=textwrap.dedent(
"""
GitHub token used to authenticate.
You can set omit it if you have GITHUB_TOKEN env variable set
Can be generated with:
https://github.com/settings/tokens/new?description=Read%20Write%20isssues&scopes=repo"""
),
envvar="GITHUB_TOKEN",
)
@dataclass
class Stats:
issue_number: int
title: str
num_providers: int
num_issues: int
tested_issues: int
url: str
users_involved: set[str]
users_commented: set[str]
def percent_tested(self) -> int:
return 100 * self.tested_issues // self.num_issues
def num_involved_users_who_commented(self) -> int:
return len(self.users_involved.intersection(self.users_commented))
def num_commenting_not_involved(self) -> int:
return len(self.users_commented - self.users_involved)
def percent_commented_among_involved(self) -> int:
return 100 * self.num_involved_users_who_commented() // len(self.users_involved)
def __str__(self):
return (
f"#{self.issue_number}: {self.title}: Num providers: {self.num_providers}, "
f"Issues: {self.num_issues}, Tested {self.tested_issues}, "
f"Percent Tested: {self.percent_tested()}%, "
f"Involved users: {len(self.users_involved)}, Commenting users: {len(self.users_commented)}, "
f"Involved who commented: {self.num_involved_users_who_commented()}, "
f"Extra people: {self.num_commenting_not_involved()}, "
f"Percent commented: {self.percent_commented_among_involved()}%, "
f"URL: {self.url}"
)
def get_users_from_content(content: str) -> set[str]:
users_match = re.findall(r"@\S*", content, re.MULTILINE)
users: set[str] = set()
for user_match in users_match:
users.add(user_match)
return users
def get_users_who_commented(issue: Issue) -> set[str]:
users: set[str] = set()
for comment in issue.get_comments():
users.add("@" + comment.user.login)
return users
def get_stats(issue: Issue) -> Stats:
content = issue.body
return Stats(
issue_number=issue.number,
title=issue.title,
num_providers=content.count("Provider "),
num_issues=content.count("- [") - 1,
tested_issues=content.count("[x]") + content.count("[X]") - 1,
url=issue.html_url,
users_involved=get_users_from_content(content),
users_commented=get_users_who_commented(issue),
)
def stats_to_rows(stats_list: list[Stats]) -> list[tuple]:
total = Stats(
issue_number=0,
title="",
num_providers=0,
num_issues=0,
tested_issues=0,
url="",
users_commented=set(),
users_involved=set(),
)
rows: list[tuple] = []
for stat in stats_list:
total.num_providers += stat.num_providers
total.num_issues += stat.num_issues
total.tested_issues += stat.tested_issues
total.users_involved.update(stat.users_involved)
total.users_commented.update(stat.users_commented)
rows.append(
(
f"[{stat.issue_number}]({stat.url})",
stat.num_providers,
stat.num_issues,
stat.tested_issues,
stat.percent_tested(),
len(stat.users_involved),
len(stat.users_commented),
stat.num_involved_users_who_commented(),
stat.num_commenting_not_involved(),
stat.percent_commented_among_involved(),
)
)
rows.append(
(
"Total",
total.num_providers,
total.num_issues,
total.tested_issues,
total.percent_tested(),
len(total.users_involved),
len(total.users_commented),
total.num_involved_users_who_commented(),
total.num_commenting_not_involved(),
total.percent_commented_among_involved(),
)
)
return rows
@option_github_token
@option_table
@cli.command()
def provide_stats(github_token: str, table: bool):
g = Github(github_token)
repo = g.get_repo("apache/airflow")
issues = repo.get_issues(labels=[PROVIDER_TESTING_LABEL], state="closed", sort="created", direction="asc")
stats_list: list[Stats] = []
for issue in issues:
stat = get_stats(issue)
if not table:
print(stat)
else:
stats_list.append(stat)
if table:
rows = stats_to_rows(stats_list)
print(
tabulate(
rows,
headers=(
"Issue",
"Num Providers",
"Num Issues",
"Tested Issues",
"Tested (%)",
"Involved",
"Commenting",
"Involved who commented",
"Extra people",
"User response (%)",
),
tablefmt="github",
)
)
if __name__ == "__main__":
cli()