def parse_config_from_log()

in extra_scripts/experiment_spreadsheet_from_logs.py [0:0]


def parse_config_from_log(log_path: str) -> dict:
    # String prepending beginning of config
    config_start_split_on = r"hydra_config.py: \d*: "
    # String at start of config
    config_start = "{'CHECKPOINT': "
    # String on final line of config
    config_end = "'VERBOSE': "
    config = ""
    # Flag to indicate the config portion of the log has been read
    config_finished = False

    # World size info from config is not reliable. Use the
    # String prepending beginning of world size info
    world_size_string = "WORLD_SIZE:"
    world_size_btwn = ("WORLD_SIZE:\t", "\n")
    world_size = None

    train_losses = []
    train_loss_str = "loss:"
    loss_string_btwn = ("loss: ", ";")

    latest_epoch = 0
    epoch_string = "[ep: "
    epoch_regex = r"(?<=\[ep: )\d{1,5}(?=\])"

    accuracies = {
        "train": {"string": "train_accuracy_list_meter", "values": []},
        "test": {"string": "test_accuracy_list_meter", "values": []},
    }

    with open(log_path) as reader:
        store_line = False
        # # There are some logs in which the config is printed multiple times.
        # # config_read_complete is used to avoid reading more than one config
        # # printing.
        # config_read_complete = False
        for line in reader:
            if not store_line:
                if world_size_string in line:
                    world_size = line
                if train_loss_str in line:
                    train_losses.append(line)
                for partition in accuracies.keys():
                    if accuracies[partition]["string"] in line:
                        accuracies[partition]["values"].append(line)
            if not config_finished:
                if config_start in line:
                    store_line = True
                    line = re.split(config_start_split_on, line)[1]
                if store_line:
                    config += line
                if config_end in line:
                    store_line = False
                    config_finished = True
            if epoch_string in line:
                epoch = re.search(epoch_regex, line)
                if epoch:
                    latest_epoch = int(epoch.group(0))

    if config:
        # Parse into dict
        try:
            config = ast.literal_eval(config)
            config = collections.OrderedDict(config)
        except BaseException:
            print("Unable to parse dictionary")
            config = {}
        # Add latest epoch to config
        config["latest_epoch"] = latest_epoch
        # Parse world size from string
        try:
            world_size = world_size.split(world_size_btwn[0])[1]
            world_size = world_size.split(world_size_btwn[1])[0]
            world_size = int(world_size)
            # Add to dict
            config["WORLD_SIZE"] = world_size
        except BaseException:
            print("Unable to parse world size")
        try:
            final_loss = train_losses[-1]
            final_loss = final_loss.split(loss_string_btwn[0])[1]
            final_loss = final_loss.split(loss_string_btwn[1])[0]
            config["final_train_loss"] = final_loss
        except BaseException:
            print("Unable to parse final training loss")
        for partition, partition_contents in accuracies.items():
            if partition_contents["values"]:
                try:
                    final_accuracy_string = partition_contents["values"][-1]
                    for top_string in ["top_1", "top_5"]:
                        acc = final_accuracy_string.split("value")[1].split(top_string)
                        acc = acc[1].split("0: ")[1]
                        acc = acc.split("}")[0]
                        param_str = f"final_{partition}_accuracy_{top_string}"
                        config[param_str] = float(acc)
                except BaseException:
                    print(f"Unable to parse final {partition} accuracy")
    else:
        print("No information parsed from log file")
        config = {}

    return config