asfyaml/dataobjects.py (146 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import pathlib import asfyaml.mappings as mappings import os import subprocess DEFAULT_BRANCH = "refs/heads/main" UNKNOWN_BRANCH = "--unknown-branch--" GIT_CMD = "/usr/bin/git" # COMMIT_FIELDS represent the data points we collect from commits when iterating over the change-sets COMMIT_FIELDS = [ ("commit", "%h"), ("parents", "%p"), ("tree", "%t"), ("author", "%aN <%ae>"), ("authored", "%ad"), ("author_name", "%aN"), ("author_email", "%ae"), ("committer", "%cN <%ce>"), ("committer_email", "%ce"), ("committed", "%cd"), ("committed_unix", "%ct"), ("ref_names", "%d"), ("subject", "%s"), ("body", "%B"), ] def gitcmd(*args): """Runs a git command and returns the output as a string""" xargs = list(args) xargs.insert(0, GIT_CMD) try: rv = subprocess.check_output(xargs, stderr=subprocess.PIPE, universal_newlines=True) except subprocess.CalledProcessError as e: print(e.stderr) rv = "" return rv class Committer: """ "Simple info class for committer(pusher) of code""" def __init__(self, username): #: str: The ASF user id of the person that pushed this commit, for instance :samp:`humbedooh` self.username = username #: str: The ASF email address of the person that pushed this commit, for instance :samp:`humbedooh@apache.org`. self.email = f"{username}@apache.org" class Commit: def __init__(self, ref, sha): self.ref = ref self.sha = sha fmt = "--format=format:%s%%x00" % r"%x00".join([s for _, s in COMMIT_FIELDS]) args = ["show", "--stat=75", fmt, self.sha] parts = gitcmd(*args).split("\x00") self.stats = "\n".join(filter(None, parts.pop(-1).splitlines())) for pos, (key, _) in enumerate(COMMIT_FIELDS): setattr(self, key, parts[pos]) self.committed_unix = int(self.committed_unix) parts = self.committer_email.split("@") self.committer_uname = parts[0] if len(parts) > 1: self.committer_domain = parts[1] else: self.committer_domain = "" def __cmp__(self, other): return self.committed_unix == other.committed_unix @property def is_merge(self): return len(self.parents.split()) > 1 @property def files(self): files = gitcmd("show", "--name-only", "--format=format:", self.sha) return [line.strip() for line in files.splitlines() if line.strip()] def diff(self, fname): args = ["show", "--format=format:", self.sha, "--", fname] return gitcmd(*args).lstrip() class ChangeSet: def __init__(self, name, oldsha, newsha): self.name = name self.oldsha = oldsha self.newsha = newsha @property def created(self): """Returns True if this tag or branch was just created""" return self.oldsha == ("0" * 40) @property def deleted(self): """Returns True if this branch or tag was deleted""" return self.newsha == ("0" * 40) @property def is_tag(self): """Returns True if this ref is a tag rather than a branch""" return self.name.startswith("refs/tags/") @property def is_branch(self): """Returns True if this ref is a branch rather than a tag""" return self.name.startswith("refs/heads/") @property def is_rewrite(self): """Returns true if this is a history rewrite""" return self.merge_base != self.oldsha @property def commits(self, num=None, reverse=False): """Lists all commits in this ref update as Commit objects""" # Deleted refs have no commits. if self.deleted: return # Only report commits that aren't reachable from any other branch refs = [] args = ["for-each-ref", "--format=%(refname)"] for r in gitcmd(*args).splitlines(): if r.strip() == self.name: continue if r.strip().startswith("refs/heads/"): refs.append("^%s" % r.strip()) args = ["rev-list"] if num is not None: args += ["-n", str(num)] if reverse: args.append("--reverse") if self.created: args += refs args.append(self.newsha) else: args.append("%s..%s" % (self.oldsha, self.newsha)) for line in gitcmd(*args).splitlines(): sha = line.strip() yield Commit(self, sha) @property def merge_base(self): """finds the best common ancestor(s) between two commits to use in a three-way merge.""" if ("0" * 40) in (self.oldsha, self.newsha): return "0" * 40 sha = gitcmd("merge-base", self.oldsha, self.newsha) return sha.strip() class Repository: """Simple class that holds information about the repository (and branch) being processed. :parameter path: The filesystem path to the .git directory for this repository. Example:: import dataobjects repo = dataobjects.Repository("/x1/repos/asf/tomcat/tomcat9.git") assert repo.is_private is False, "This should not be a private repo!" website = f"https://{repo.hostname}.apache.org/" """ def __init__(self, path, reflog="", org_id: str = "apache"): #: str|Pathlike: The filesystem path to this repository directory self.path = pathlib.Path(path) #: str: The name of this repository (sans the .git part), for instance :samp:`whimsy-website`. self.name = self.path.name.removesuffix(".git") #: str: Ref update log, if found. Follows standard git syntax, one entry per line # with: "oldsha newsha refname". Used for populating self.changesets self._reflog = reflog or "" #: str: The GitHub organization this repository belongs to, by default `apache`. self.org_id = org_id @property def is_private(self): """ "Set to True if the repository is a private repository, False if it is public""" return "private" in self.path.parts @property def project(self): """Returns the LDAP name of the project owning this repository, for instance httpd or openoffice""" match = mappings.REPO_RE.match(self.name) if match: return match.group(1) return "infrastructure" # Weird repo name, default to infra owning it. @property def hostname(self): """Returns the hostname for the project. httpd for httpd, but whimsical for whimsy.""" return mappings.LDAP_TO_HOSTNAME.get(self.project, self.project) @property def default_branch(self): """Returns the default branch for this repository.""" head_path = os.path.join(self.path, "HEAD") if os.path.isfile(head_path): hb = open(head_path).read().removeprefix("ref: refs/heads/").strip() else: hb = DEFAULT_BRANCH.removeprefix("refs/heads/") return hb @property def changesets(self): """Yields a ChangeSet for each ref update seen in this push. Each ChangeSet can have several commits bundled""" for line in self._reflog.splitlines(): oldsha, newsha, name = line.split(None, 2) yield ChangeSet(name.strip(), oldsha, newsha)