in utils/taskcluster_downloader.py [0:0]
def download_logs(group_id, output):
options = {"rootUrl": TC_MOZILLA}
queue = taskcluster.Queue(options=options)
group: Any = queue.listTaskGroup(group_id)
task_found = False
for task in group["tasks"]:
if task["status"]["state"] not in ("completed", "running"):
continue
label = task["task"]["tags"]["kind"]
if ("train" not in label and "finetune" not in label) or "vocab" in label:
continue
task_found = True
task_id = task["status"]["taskId"]
task_obj: Any = queue.task(task_id)
task["status"]["runs"][-1]["runId"]
task_obj_label = task_obj["tags"]["label"].replace("/", "_")
os.makedirs(output, exist_ok=True)
output_path = os.path.join(output, f"{task_obj_label}.log")
url = queue.buildUrl("getLatestArtifact", task_id, "public/logs/live.log")
resp = requests.get(url, stream=True, timeout=5)
print(f"Downloading {url}")
log_lines = []
start_writing = False
try:
for line in resp.iter_lines():
line_str = line.decode()
if "[marian]" in line_str:
start_writing = True
if start_writing:
log_lines.append(re.sub(r"\[task .*Z\] ", "", line_str))
except requests.exceptions.ConnectionError:
pass
print(f"Writing to {output_path}")
with open(output_path, "w") as f:
f.write("\n".join(log_lines))
if not task_found:
print(f"No logs were found for {group_id}")