def find_nightly_builds()

in pylib/mozautomation/mozautomation/releasescraper.py [0:0]


def find_nightly_builds(start_day, end_day=None):
    """Find all Nightly builds for a given date range.

    This function opens a pool of HTTP sockets and spiders appropriate
    sites for references to builds for the date range specified. If no
    ``end_day`` is set, defaults to UTC today.

    This function should be able to find Nightly builds back to at least
    April 2010.

    This function is a generator of dicts describing each found build. Dicts
    have the following keys:

    channel
       The release channel. Always ``nightly``.
    platform
       The build platform. e.g. ``linux64`` or ``win32``.
    build_id
       The build ID. Looks like a timestamp. Should be unique per
       (platform, app_version).
    app_version
       The application version string.
    revision
       Mercurial revision build was produced from. May be 12 or 40 characters.
       See ``ensure_full_revision()`` for how to normalize this to the full
       hash.
    day
       A ``datetime.date`` corresponding to the day of the build. Timezone is
       undefined.
    archive_url
       Where build artifacts can be obtained.
    """

    if not end_day:
        end_day = datetime.datetime.utcnow().date()

    session = get_session()

    with futures.ThreadPoolExecutor(requests.adapters.DEFAULT_POOLSIZE) as e:
        day = start_day

        # Load monthly pages to find links to builds.
        months = set()
        while day <= end_day:
            months.add(day.strftime("%Y/%m").encode("utf-8"))
            day += datetime.timedelta(days=1)

        month_fs = []
        for month in sorted(months):
            url = b"%s/%s/" % (NIGHTLY_ARCHIVE_URL, month)
            month_fs.append(e.submit(session.get, url))
            day += datetime.timedelta(days=1)

        builds_by_day = collections.defaultdict(list)
        for f in futures.as_completed(month_fs):
            r = f.result()
            if r.status_code != 200:
                continue

            for m in RE_NIGHTLY_MONTH_ENTRY.finditer(r.content):
                groups = m.groupdict()
                day = datetime.date(
                    int(groups["year"]), int(groups["month"]), int(groups["day"])
                )
                builds_by_day[day].append((groups["build"], groups["path"]))

        build_fs = []

        for day, builds in sorted(builds_by_day.items()):
            for build, path in builds:
                if build != b"mozilla-central":
                    continue

                url = b"%s/%s/%s" % (
                    NIGHTLY_ARCHIVE_URL,
                    day.strftime("%Y/%m").encode("utf-8"),
                    path,
                )
                build_fs.append(e.submit(session.get, url))

        release_fs = []

        for f in futures.as_completed(build_fs):
            r = f.result()
            # We found a link. So index should exist.
            assert r.status_code == 200

            found_build = False
            for m in RE_ARCHIVE_FILENAMES.finditer(r.content):
                info = match_archive_build_file(r.url, m)
                if not info:
                    continue

                found_build = True

                if info[b"platform"] in NIGHTLY_IGNORE_PLATFORMS:
                    continue

                assert info[b"path"].startswith(b"/pub/firefox/nightly/")
                normpath = info[b"path"][len(b"/pub/firefox/nightly/") :]
                url = b"%s/%s" % (NIGHTLY_ARCHIVE_URL, normpath)

                release_fs.append((info[b"platform"], e.submit(session.get, url)))

            if not found_build:
                # This could be a bug in this script. Filter out special cases
                # that are known failures and emit warnings for remaining.
                if r.url in INVALID_NIGHTLY_URLS:
                    continue

                if all(
                    b"_test" in m.group("path")
                    for m in RE_ARCHIVE_FILENAMES.finditer(r.content)
                ):
                    continue

                if all(
                    m.group("path").endswith(b".txt.gz")
                    for m in RE_ARCHIVE_FILENAMES.finditer(r.content)
                ):
                    continue

                print("no build info for %s" % r.url)
                for m in RE_ARCHIVE_FILENAMES.finditer(r.content):
                    print(b"\t%s" % m.group("path"))
                continue

        try:
            for platform, f in release_fs:
                r = f.result()

                if r.status_code != 200:
                    print("HTTP %s from %s" % (r.status_code, r.url))
                    continue

                build = get_build_from_archive_file(platform, r)
                if build:
                    yield build
        except Exception:
            # Cancel all pending futures so we abort immediately.
            for platform, f in release_fs:
                f.cancel()

            raise