in tracking/translations_parser/cli/experiments.py [0:0]
def main() -> None:
args = get_args()
directory = args.directory
mode = args.mode
# Ignore files with a different name than "train.log"
train_files = sorted(directory.glob("**/train.log"))
logger.info(f"Reading {len(train_files)} train.log data")
prefix = os.path.commonprefix([path.parts for path in train_files])
# Move on top of the main models (Snakemake) or logs (Taskcluster) folder
if "models" in prefix:
prefix = prefix[: prefix.index("models")]
if "logs" in prefix:
prefix = prefix[: prefix.index("logs")]
# First parent folder correspond to the run name, second one is the group
groups = groupby(train_files, lambda path: path.parent.parent)
for path, files in groups:
logger.info(f"Parsing folder {path.resolve()}")
*_, project, group = path.parts
if mode == ExperimentMode.TASKCLUSTER:
if len(group) < 22:
logger.error(
f"Skip folder {group} as it cannot contain a task group ID (too few caracters)."
)
continue
suffix = f"_{group[-22:-17]}"
else:
# Use the full experiment name as a suffix for old Snakemake experiments
suffix = f"_{group}"
# Publish a run for each file inside that group
published_runs = []
for file in files:
try:
tag = f"train-{file.parent.name}"
name = parse_task_label(tag).model
except ValueError:
logger.error(f"Invalid tag extracted from file @{path}: {tag}")
continue
logger.info(f"Handling training task {name}")
# Also publish metric files when available
metrics_path = Path(
"/".join([*prefix, "models", project, group, "evaluation", file.parent.name])
)
metrics_dir = metrics_path if metrics_path.is_dir() else None
if metrics_dir is None:
logger.warning(f"Evaluation metrics files not found for {name}.")
try:
parse_experiment(
project=project,
group=group,
name=name,
suffix=suffix,
logs_file=file,
metrics_dir=metrics_dir,
mode=mode,
)
except Exception as e:
logger.error(f"An exception occured parsing training file {file}: {e}")
else:
published_runs.append(name)
# Try to publish related log files to the group on a last run named "group_logs"
logger.info(
f"Publishing '{project}/{group}' evaluation metrics and files (fake run 'group_logs')"
)
WandB.publish_group_logs(
logs_parent_folder=[*prefix, "logs"],
project=project,
group=group,
suffix=suffix,
existing_runs=published_runs,
snakemake=(mode == ExperimentMode.SNAKEMAKE.value),
)