scripts/fetch_contributors_and_team.py (79 lines of code) (raw):
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations
# under the License.
#
"""
fetch_contributors_and_team.py
This script fetches:
1. Contributors to the repository since a hardcoded fork date.
2. Members of a specific GitHub team.
Output:
- contributors_and_team.json: Contains contributors and team members with the following fields:
- login: GitHub username.
- avatar_url: URL to the user's GitHub avatar.
Requirements:
- A valid GitHub API token with `repo` and `read:org` permissions.
The token must be set as an environment variable `GITHUB_TOKEN`.
Prerequisites:
- Python 3.7 or later.
- Modules: `requests`.
Usage:
1. Ensure you have `requests` installed (`pip install requests`).
2. Set the environment variable `GITHUB_TOKEN` with a valid token:
- export GITHUB_TOKEN="your_github_personal_access_token"
3. Run the script:
- ./fetch_contributors_and_team.py
4. The output file `contributors_and_team.json` will be saved in the `cloudberry-site/static` directory.
"""
import os
import requests
import json
import sys
# GitHub API Token from environment variable
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
if not GITHUB_TOKEN:
raise ValueError("Please set the GITHUB_TOKEN environment variable.")
# Repository and team details
OWNER = "apache"
REPO = "cloudberry"
TEAM = "cloudberry-committers"
# Hardcoded fork date
FORK_DATE = "2022-02-09T22:31:43Z" # UTC timestamp for fork point.
# API URLs
COMMITS_URL = f"https://api.github.com/repos/{OWNER}/{REPO}/commits"
TEAM_MEMBERS_URL = f"https://api.github.com/orgs/{OWNER}/teams/{TEAM}/members"
# Headers for API requests
HEADERS = {"Authorization": f"Bearer {GITHUB_TOKEN}"}
# Determine absolute path to static directory
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
STATIC_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "../static"))
# Ensure static directory exists
os.makedirs(STATIC_DIR, exist_ok=True)
def fetch_committers(team_members):
"""
Fetch GitHub committers from the repository since the fork date.
Only includes users who are part of the specified team.
"""
print(f"Fetching committers since fork date: {FORK_DATE}...")
committers = set()
params = {"per_page": 100, "page": 1, "since": FORK_DATE}
while True:
sys.stdout.write(f"Processing page {params['page']} of committers...\r")
sys.stdout.flush()
response = requests.get(COMMITS_URL, headers=HEADERS, params=params)
if response.status_code != 200:
print(f"\nError fetching committers: {response.status_code}, {response.text}")
break
commits = response.json()
if not commits:
break
for commit in commits:
committer = commit.get("committer")
if committer:
username = committer.get("login")
if username and username in team_members:
committers.add(username)
params["page"] += 1
sys.stdout.write("\n")
print(f"Found {len(committers)} unique committers.")
return committers
def fetch_team_members():
"""
Fetch members of the GitHub team specified in the TEAM variable.
"""
print("Fetching team members...")
team_members = set()
params = {"per_page": 100, "page": 1}
while True:
response = requests.get(TEAM_MEMBERS_URL, headers=HEADERS, params=params)
if response.status_code != 200:
print(f"Error fetching team members: {response.status_code}, {response.text}")
break
members = response.json()
if not members:
break
for member in members:
username = member.get("login")
if username:
team_members.add(username)
params["page"] += 1
print(f"Found {len(team_members)} unique team members.")
return team_members
def save_combined_data(committers, team_members):
"""
Combine committers and team members and save as JSON.
The output is written to the `contributors_and_team.json` file in the `static` directory.
"""
combined_users = committers.union(team_members)
user_data = [
{
"login": user,
"avatar_url": f"https://avatars.githubusercontent.com/{user}"
}
for user in sorted(combined_users)
]
output_path = os.path.join(STATIC_DIR, "contributors_and_team.json")
with open(output_path, "w") as file:
json.dump(user_data, file, indent=4)
print(f"Combined data saved to {output_path}")
if __name__ == "__main__":
print("Starting GitHub user processing...")
team_members = fetch_team_members()
committers = fetch_committers(team_members)
save_combined_data(committers, team_members)
print("Processing complete.")