in pylib/mozautomation/mozautomation/releasescraper.py [0:0]
def find_nightly_builds(start_day, end_day=None):
"""Find all Nightly builds for a given date range.
This function opens a pool of HTTP sockets and spiders appropriate
sites for references to builds for the date range specified. If no
``end_day`` is set, defaults to UTC today.
This function should be able to find Nightly builds back to at least
April 2010.
This function is a generator of dicts describing each found build. Dicts
have the following keys:
channel
The release channel. Always ``nightly``.
platform
The build platform. e.g. ``linux64`` or ``win32``.
build_id
The build ID. Looks like a timestamp. Should be unique per
(platform, app_version).
app_version
The application version string.
revision
Mercurial revision build was produced from. May be 12 or 40 characters.
See ``ensure_full_revision()`` for how to normalize this to the full
hash.
day
A ``datetime.date`` corresponding to the day of the build. Timezone is
undefined.
archive_url
Where build artifacts can be obtained.
"""
if not end_day:
end_day = datetime.datetime.utcnow().date()
session = get_session()
with futures.ThreadPoolExecutor(requests.adapters.DEFAULT_POOLSIZE) as e:
day = start_day
# Load monthly pages to find links to builds.
months = set()
while day <= end_day:
months.add(day.strftime("%Y/%m").encode("utf-8"))
day += datetime.timedelta(days=1)
month_fs = []
for month in sorted(months):
url = b"%s/%s/" % (NIGHTLY_ARCHIVE_URL, month)
month_fs.append(e.submit(session.get, url))
day += datetime.timedelta(days=1)
builds_by_day = collections.defaultdict(list)
for f in futures.as_completed(month_fs):
r = f.result()
if r.status_code != 200:
continue
for m in RE_NIGHTLY_MONTH_ENTRY.finditer(r.content):
groups = m.groupdict()
day = datetime.date(
int(groups["year"]), int(groups["month"]), int(groups["day"])
)
builds_by_day[day].append((groups["build"], groups["path"]))
build_fs = []
for day, builds in sorted(builds_by_day.items()):
for build, path in builds:
if build != b"mozilla-central":
continue
url = b"%s/%s/%s" % (
NIGHTLY_ARCHIVE_URL,
day.strftime("%Y/%m").encode("utf-8"),
path,
)
build_fs.append(e.submit(session.get, url))
release_fs = []
for f in futures.as_completed(build_fs):
r = f.result()
# We found a link. So index should exist.
assert r.status_code == 200
found_build = False
for m in RE_ARCHIVE_FILENAMES.finditer(r.content):
info = match_archive_build_file(r.url, m)
if not info:
continue
found_build = True
if info[b"platform"] in NIGHTLY_IGNORE_PLATFORMS:
continue
assert info[b"path"].startswith(b"/pub/firefox/nightly/")
normpath = info[b"path"][len(b"/pub/firefox/nightly/") :]
url = b"%s/%s" % (NIGHTLY_ARCHIVE_URL, normpath)
release_fs.append((info[b"platform"], e.submit(session.get, url)))
if not found_build:
# This could be a bug in this script. Filter out special cases
# that are known failures and emit warnings for remaining.
if r.url in INVALID_NIGHTLY_URLS:
continue
if all(
b"_test" in m.group("path")
for m in RE_ARCHIVE_FILENAMES.finditer(r.content)
):
continue
if all(
m.group("path").endswith(b".txt.gz")
for m in RE_ARCHIVE_FILENAMES.finditer(r.content)
):
continue
print("no build info for %s" % r.url)
for m in RE_ARCHIVE_FILENAMES.finditer(r.content):
print(b"\t%s" % m.group("path"))
continue
try:
for platform, f in release_fs:
r = f.result()
if r.status_code != 200:
print("HTTP %s from %s" % (r.status_code, r.url))
continue
build = get_build_from_archive_file(platform, r)
if build:
yield build
except Exception:
# Cancel all pending futures so we abort immediately.
for platform, f in release_fs:
f.cancel()
raise