def url_stats()

in lib/ramble/spack/cmd/url.py [0:0]


def url_stats(args):
    # dictionary of issue type -> package -> descriptions
    issues = defaultdict(lambda: defaultdict(lambda: []))

    class UrlStats(object):
        def __init__(self):
            self.total = 0
            self.schemes = defaultdict(lambda: 0)
            self.checksums = defaultdict(lambda: 0)
            self.url_type = defaultdict(lambda: 0)
            self.git_type = defaultdict(lambda: 0)

        def add(self, pkg_name, fetcher):
            self.total += 1

            url_type = fetcher.url_attr
            self.url_type[url_type or 'no code'] += 1

            if url_type == 'url':
                digest = getattr(fetcher, 'digest', None)
                if digest:
                    algo = crypto.hash_algo_for_digest(digest)
                else:
                    algo = 'no checksum'
                self.checksums[algo] += 1

                if algo == "md5":
                    md5_hashes = issues["md5 hashes"]
                    md5_hashes[pkg_name].append(fetcher.url)

                # parse out the URL scheme (https/http/ftp/etc.)
                urlinfo = urllib.parse.urlparse(fetcher.url)
                self.schemes[urlinfo.scheme] += 1

                if urlinfo.scheme == "http":
                    http_urls = issues["http urls"]
                    http_urls[pkg_name].append(fetcher.url)

            elif url_type == 'git':
                if getattr(fetcher, 'commit', None):
                    self.git_type['commit'] += 1
                elif getattr(fetcher, 'branch', None):
                    self.git_type['branch'] += 1
                elif getattr(fetcher, 'tag', None):
                    self.git_type['tag'] += 1
                else:
                    self.git_type['no ref'] += 1

    npkgs = 0
    version_stats = UrlStats()
    resource_stats = UrlStats()

    for pkg in spack.repo.path.all_packages():
        npkgs += 1

        for v in pkg.versions:
            try:
                fetcher = fs.for_package_version(pkg, v)
            except (fs.InvalidArgsError, fs.FetcherConflict):
                continue
            version_stats.add(pkg.name, fetcher)

        for _, resources in pkg.resources.items():
            for resource in resources:
                resource_stats.add(pkg.name, resource.fetcher)

    # print a nice summary table
    tty.msg("URL stats for %d packages:" % npkgs)

    def print_line():
        print("-" * 62)

    def print_stat(indent, name, stat_name=None):
        width = 20 - indent
        fmt = " " * indent
        fmt += "%%-%ds" % width
        if stat_name is None:
            print(fmt % name)
        else:
            fmt += "%12d%8.1f%%%12d%8.1f%%"
            v = getattr(version_stats, stat_name).get(name, 0)
            r = getattr(resource_stats, stat_name).get(name, 0)
            print(fmt % (name,
                         v, v / version_stats.total * 100,
                         r, r / resource_stats.total * 100))

    print_line()
    print("%-20s%12s%9s%12s%9s" % ("stat", "versions", "%", "resources", "%"))
    print_line()
    print_stat(0, "url", "url_type")

    print_stat(4, "schemes")
    schemes = set(version_stats.schemes) | set(resource_stats.schemes)
    for scheme in schemes:
        print_stat(8, scheme, "schemes")

    print_stat(4, "checksums")
    checksums = set(version_stats.checksums) | set(resource_stats.checksums)
    for checksum in checksums:
        print_stat(8, checksum, "checksums")
    print_line()

    types = set(version_stats.url_type) | set(resource_stats.url_type)
    types -= set(["url", "git"])
    for url_type in sorted(types):
        print_stat(0, url_type, "url_type")
        print_line()

    print_stat(0, "git", "url_type")
    git_types = set(version_stats.git_type) | set(resource_stats.git_type)
    for git_type in sorted(git_types):
        print_stat(4, git_type, "git_type")
    print_line()

    if args.show_issues:
        total_issues = sum(
            len(issues)
            for _, pkg_issues in issues.items()
            for _, issues in pkg_issues.items()
        )
        print()
        tty.msg("Found %d issues." % total_issues)
        for issue_type, pkgs in issues.items():
            tty.msg("Package URLs with %s" % issue_type)
            for pkg, pkg_issues in pkgs.items():
                color.cprint("    @*C{%s}" % pkg)
                for issue in pkg_issues:
                    print("      %s" % issue)