def group_dependencies_for_printing()

in tools/dependency_summary.py [0:0]


def group_dependencies_for_printing(deps):
    """Iterate over groups of dependencies and their license info, in print order.

    This is a helper function to group and sort our various dependencies,
    so that they're always printed in sensible, consistent order and we
    don't unnecessarily repeat any license text.
    """
    # Group by shared license text where possible.
    depsByLicenseTextHash = collections.defaultdict(list)
    for info in deps:
        if info["license"] in ("MPL-2.0", "Apache-2.0") or info["license"].startswith(
            "EXT-"
        ):
            # We know these licenses to have shared license text, sometimes differing on e.g. punctuation details.
            # XXX TODO: should check this more explicitly to ensure they contain the expected text.
            licenseTextHash = info["license"]
        else:
            # Other license texts typically include copyright notices that we can't dedupe, except on whitespace.
            text = "".join(info["license_text"].split())
            licenseTextHash = (
                info["license"] + ":" + hashlib.sha256(text.encode("utf8")).hexdigest()
            )
        depsByLicenseTextHash[licenseTextHash].append(info)

    # Add summary information for each group.
    groups = []
    for licenseTextHash, unsorted_deps in depsByLicenseTextHash.items():
        # Sort by name and then by full package id, to produce a stable total order
        # that makes sense to humans and handles multiple versions of the same package.
        deps = sorted(unsorted_deps, key=lambda i: (i["name"], i["id"]))

        # Find single canonical license text for the group, which is the whole point of grouping.
        license = deps[0]["license"]
        if licenseTextHash != "Apache-2.0":
            licenseText = deps[0]["license_text"]
        else:
            # As a bit of a hack, we need to find a copy of the "canonical" apache license text
            # that still has the copyright placeholders in it, and no project-specific additions.
            for dep in deps:
                licenseText = dep["license_text"]
                if "[yyyy]" in licenseText and "NSS" not in licenseText:
                    break
            else:
                raise RuntimeError("Could not find appropriate apache license text")

        # Make a nice human-readable description for the group.
        # For some licenses we don't want to list all the deps in the title.
        if license in ("MPL-2.0", "Apache-2.0"):
            title = make_license_title(license)
        else:
            title = make_license_title(license, deps)

        groups.append(
            {
                "title": title,
                "dependencies": deps,
                "license": license,
                "license_text_hash": licenseTextHash,
                "license_text": licenseText,
                "license_url": deps[0].get("license_url"),
            }
        )

    # List groups in the order in which we prefer their license, then in alphabetical order
    # of the dependency names. This ensures a convenient and stable ordering.
    def sort_key(group):
        for i, license in enumerate(LICENSES_IN_PREFERENCE_ORDER):
            if group["license"] == license:
                return (i, [d["name"] for d in group["dependencies"]])
        return (i + 1, [d["name"] for d in group["dependencies"]])

    groups.sort(key=sort_key)
    return groups