def main()

in src/datatrove/tools/jobs_status.py [0:0]


def main():
    """
    Takes a `path` as input, gets all valid job folders and their total number of tasks from `executor.json` and then gets which ranks are
    incomplete by scanning `path/{LOGGING_DIRS}/completions`. If a `log_prefix` is provided the directories following the `path/log_prefix{LOGGING_DIRS}/completions`
    pattern are scanned.
    """
    args = parser.parse_args()
    console = Console()

    main_folder = get_datafolder(args.path)
    logging_dirs = [
        f
        for f, info in main_folder.glob(f"{args.log_prefix}*", detail=True, maxdepth=1).items()
        if info["type"] == "directory"
    ]
    logger.remove()

    complete_jobs = 0
    incomplete_jobs = 0

    complete_tasks = 0
    incomplete_tasks = 0

    for path in logging_dirs:
        logging_dir = get_datafolder(main_folder.resolve_paths(path))
        if not logging_dir.isfile("executor.json"):
            console.log(
                f'Could not find "executor.json" in the given directory ({path}). Are you sure it is a '
                "logging folder?",
                style="red",
            )
            continue
        with logging_dir.open("executor.json", "rt") as f:
            world_size = json.load(f).get("world_size", None)
        if not world_size:
            console.log(
                f"Could not get the total number of tasks in {path}, please try relaunching the run.",
                style="red",
            )
            continue

        with console.status("Fetching list of incomplete tasks"):
            completed = set(logging_dir.list_files("completions"))
            incomplete = set(filter(lambda rank: f"completions/{rank:05d}" not in completed, range(world_size)))

        complete_tasks += len(completed)
        incomplete_tasks += len(incomplete)

        if len(incomplete) == 0:
            emoji = "✅"
            complete_jobs += 1
        else:
            emoji = "❌"
            incomplete_jobs += 1

        if len(incomplete) > 0 or not args.hide_complete:
            console.log(
                f"{emoji} {path + ':': <50}{len(completed)}/{world_size} ({len(completed) / (world_size):.0%}) completed tasks."
            )

    if complete_jobs + incomplete_jobs > 0:
        console.log(
            f"Summary: {complete_jobs}/{complete_jobs + incomplete_jobs} ({complete_jobs / (complete_jobs + incomplete_jobs):.0%}) jobs completed, {complete_tasks}/{complete_tasks + incomplete_tasks} ({complete_tasks / (complete_tasks + incomplete_tasks):.0%}) tasks completed."
        )
    else:
        console.log("No jobs found.")