in tools/dependency_summary.py [0:0]
def group_dependencies_for_printing(deps):
"""Iterate over groups of dependencies and their license info, in print order.
This is a helper function to group and sort our various dependencies,
so that they're always printed in sensible, consistent order and we
don't unnecessarily repeat any license text.
"""
# Group by shared license text where possible.
depsByLicenseTextHash = collections.defaultdict(list)
for info in deps:
if info["license"] in ("MPL-2.0", "Apache-2.0") or info["license"].startswith(
"EXT-"
):
# We know these licenses to have shared license text, sometimes differing on e.g. punctuation details.
# XXX TODO: should check this more explicitly to ensure they contain the expected text.
licenseTextHash = info["license"]
else:
# Other license texts typically include copyright notices that we can't dedupe, except on whitespace.
text = "".join(info["license_text"].split())
licenseTextHash = (
info["license"] + ":" + hashlib.sha256(text.encode("utf8")).hexdigest()
)
depsByLicenseTextHash[licenseTextHash].append(info)
# Add summary information for each group.
groups = []
for licenseTextHash, unsorted_deps in depsByLicenseTextHash.items():
# Sort by name and then by full package id, to produce a stable total order
# that makes sense to humans and handles multiple versions of the same package.
deps = sorted(unsorted_deps, key=lambda i: (i["name"], i["id"]))
# Find single canonical license text for the group, which is the whole point of grouping.
license = deps[0]["license"]
if licenseTextHash != "Apache-2.0":
licenseText = deps[0]["license_text"]
else:
# As a bit of a hack, we need to find a copy of the "canonical" apache license text
# that still has the copyright placeholders in it, and no project-specific additions.
for dep in deps:
licenseText = dep["license_text"]
if "[yyyy]" in licenseText and "NSS" not in licenseText:
break
else:
raise RuntimeError("Could not find appropriate apache license text")
# Make a nice human-readable description for the group.
# For some licenses we don't want to list all the deps in the title.
if license in ("MPL-2.0", "Apache-2.0"):
title = make_license_title(license)
else:
title = make_license_title(license, deps)
groups.append(
{
"title": title,
"dependencies": deps,
"license": license,
"license_text_hash": licenseTextHash,
"license_text": licenseText,
"license_url": deps[0].get("license_url"),
}
)
# List groups in the order in which we prefer their license, then in alphabetical order
# of the dependency names. This ensures a convenient and stable ordering.
def sort_key(group):
for i, license in enumerate(LICENSES_IN_PREFERENCE_ORDER):
if group["license"] == license:
return (i, [d["name"] for d in group["dependencies"]])
return (i + 1, [d["name"] for d in group["dependencies"]])
groups.sort(key=sort_key)
return groups