in bugbug/code_search/searchfox_download.py [0:0]
def fetch(commit_hash: str) -> str:
folders = os.listdir(SEARCHFOX_STORAGE_DATA)
for folder in folders:
if folder.startswith(commit_hash):
return os.path.join(SEARCHFOX_STORAGE_DATA, folder)
# https://firefox-ci-tc.services.mozilla.com/tasks/index/gecko.v2.mozilla-central.pushdate.2023.06.01.20230601042516.firefox/linux64-searchfox-debug
baseUrl = "https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.mozilla-central.commit_hash.%s.firefox.%s-searchfox-debug"
artifactBaseUrl = "https://firefoxci.taskcluster-artifacts.net/%s/0/%s"
# target_oses = ['linux64', 'win64', 'macosx64', 'android-armv7']
target_oses = ["linux64"]
session = get_session("searchfox")
for index_os in target_oses:
indexUrl = baseUrl % (commit_hash, index_os)
if len(sys.argv) > 2:
indexUrl = sys.argv[2]
indexRequest = session.get(
indexUrl,
headers={
"User-Agent": get_user_agent(),
},
)
if not indexRequest.ok:
raise SearchfoxDataNotAvailable("Searchfox task not indexed")
indexEntry = indexRequest.json()
taskId = indexEntry["taskId"]
targetJsonUrl = artifactBaseUrl % (taskId, "public/build/target.json")
targetJsonRequest = session.get(
targetJsonUrl,
headers={
"User-Agent": get_user_agent(),
},
)
if not targetJsonRequest.ok:
raise SearchfoxDataNotAvailable("Searchfox artifact not present")
targetJson = targetJsonRequest.json()
rev = targetJson["moz_source_stamp"]
targetZipUrl = artifactBaseUrl % (
taskId,
"public/build/target.mozsearch-index.zip",
)
targetZipBasename = "%s_%s" % (rev, index_os)
targetZipRequest = session.get(targetZipUrl, stream=True)
if not targetZipRequest.ok:
raise SearchfoxDataNotAvailable("Searchfox data no longer available")
zip_path = os.path.join(SEARCHFOX_STORAGE_DATA, targetZipBasename)
os.makedirs(zip_path)
with open(
os.path.join(SEARCHFOX_STORAGE_DATA, targetZipBasename, "searchfox.zip"),
"wb",
) as f:
for chunk in targetZipRequest.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
# TODO: This uses unzip, we might want to use Python instead. However, the archive
# is a highly-compressed 300 MB file, that needs to be decompressed efficiently.
subprocess.check_call(["unzip", "searchfox.zip"], cwd=zip_path)
# TODO: When using only specific parts of searchfox data, such as syntax data,
# it can be beneficial to filter the remaining data out just once.
# for (path, dirs, files) in os.walk(SEARCHFOX_STORAGE_DATA):
# for file in files:
# fp_file = os.path.join(path, file)
# with open(fp_file, 'r') as fd:
# lines = fd.readlines()
# lines = [x for x in lines if '"syntax"' in x]
# with open(fp_file, 'w') as fd:
# fd.writelines(lines)
folders = os.listdir(SEARCHFOX_STORAGE_DATA)
for folder in folders:
if folder.startswith(commit_hash):
return os.path.join(SEARCHFOX_STORAGE_DATA, folder)
assert False