def main()

in tracking/translations_parser/cli/experiments.py [0:0]


def main() -> None:
    args = get_args()
    directory = args.directory
    mode = args.mode

    # Ignore files with a different name than "train.log"
    train_files = sorted(directory.glob("**/train.log"))

    logger.info(f"Reading {len(train_files)} train.log data")
    prefix = os.path.commonprefix([path.parts for path in train_files])

    # Move on top of the main models (Snakemake) or logs (Taskcluster) folder
    if "models" in prefix:
        prefix = prefix[: prefix.index("models")]
    if "logs" in prefix:
        prefix = prefix[: prefix.index("logs")]

    # First parent folder correspond to the run name, second one is the group
    groups = groupby(train_files, lambda path: path.parent.parent)

    for path, files in groups:
        logger.info(f"Parsing folder {path.resolve()}")
        *_, project, group = path.parts
        if mode == ExperimentMode.TASKCLUSTER:
            if len(group) < 22:
                logger.error(
                    f"Skip folder {group} as it cannot contain a task group ID (too few caracters)."
                )
                continue
            suffix = f"_{group[-22:-17]}"
        else:
            # Use the full experiment name as a suffix for old Snakemake experiments
            suffix = f"_{group}"

        # Publish a run for each file inside that group
        published_runs = []
        for file in files:
            try:
                tag = f"train-{file.parent.name}"
                name = parse_task_label(tag).model
            except ValueError:
                logger.error(f"Invalid tag extracted from file @{path}: {tag}")
                continue
            logger.info(f"Handling training task {name}")

            # Also publish metric files when available
            metrics_path = Path(
                "/".join([*prefix, "models", project, group, "evaluation", file.parent.name])
            )
            metrics_dir = metrics_path if metrics_path.is_dir() else None
            if metrics_dir is None:
                logger.warning(f"Evaluation metrics files not found for {name}.")

            try:
                parse_experiment(
                    project=project,
                    group=group,
                    name=name,
                    suffix=suffix,
                    logs_file=file,
                    metrics_dir=metrics_dir,
                    mode=mode,
                )
            except Exception as e:
                logger.error(f"An exception occured parsing training file {file}: {e}")
            else:
                published_runs.append(name)

        # Try to publish related log files to the group on a last run named "group_logs"
        logger.info(
            f"Publishing '{project}/{group}' evaluation metrics and files (fake run 'group_logs')"
        )
        WandB.publish_group_logs(
            logs_parent_folder=[*prefix, "logs"],
            project=project,
            group=group,
            suffix=suffix,
            existing_runs=published_runs,
            snakemake=(mode == ExperimentMode.SNAKEMAKE.value),
        )