in extra_scripts/experiment_spreadsheet_from_logs.py [0:0]
def parse_config_from_log(log_path: str) -> dict:
# String prepending beginning of config
config_start_split_on = r"hydra_config.py: \d*: "
# String at start of config
config_start = "{'CHECKPOINT': "
# String on final line of config
config_end = "'VERBOSE': "
config = ""
# Flag to indicate the config portion of the log has been read
config_finished = False
# World size info from config is not reliable. Use the
# String prepending beginning of world size info
world_size_string = "WORLD_SIZE:"
world_size_btwn = ("WORLD_SIZE:\t", "\n")
world_size = None
train_losses = []
train_loss_str = "loss:"
loss_string_btwn = ("loss: ", ";")
latest_epoch = 0
epoch_string = "[ep: "
epoch_regex = r"(?<=\[ep: )\d{1,5}(?=\])"
accuracies = {
"train": {"string": "train_accuracy_list_meter", "values": []},
"test": {"string": "test_accuracy_list_meter", "values": []},
}
with open(log_path) as reader:
store_line = False
# # There are some logs in which the config is printed multiple times.
# # config_read_complete is used to avoid reading more than one config
# # printing.
# config_read_complete = False
for line in reader:
if not store_line:
if world_size_string in line:
world_size = line
if train_loss_str in line:
train_losses.append(line)
for partition in accuracies.keys():
if accuracies[partition]["string"] in line:
accuracies[partition]["values"].append(line)
if not config_finished:
if config_start in line:
store_line = True
line = re.split(config_start_split_on, line)[1]
if store_line:
config += line
if config_end in line:
store_line = False
config_finished = True
if epoch_string in line:
epoch = re.search(epoch_regex, line)
if epoch:
latest_epoch = int(epoch.group(0))
if config:
# Parse into dict
try:
config = ast.literal_eval(config)
config = collections.OrderedDict(config)
except BaseException:
print("Unable to parse dictionary")
config = {}
# Add latest epoch to config
config["latest_epoch"] = latest_epoch
# Parse world size from string
try:
world_size = world_size.split(world_size_btwn[0])[1]
world_size = world_size.split(world_size_btwn[1])[0]
world_size = int(world_size)
# Add to dict
config["WORLD_SIZE"] = world_size
except BaseException:
print("Unable to parse world size")
try:
final_loss = train_losses[-1]
final_loss = final_loss.split(loss_string_btwn[0])[1]
final_loss = final_loss.split(loss_string_btwn[1])[0]
config["final_train_loss"] = final_loss
except BaseException:
print("Unable to parse final training loss")
for partition, partition_contents in accuracies.items():
if partition_contents["values"]:
try:
final_accuracy_string = partition_contents["values"][-1]
for top_string in ["top_1", "top_5"]:
acc = final_accuracy_string.split("value")[1].split(top_string)
acc = acc[1].split("0: ")[1]
acc = acc.split("}")[0]
param_str = f"final_{partition}_accuracy_{top_string}"
config[param_str] = float(acc)
except BaseException:
print(f"Unable to parse final {partition} accuracy")
else:
print("No information parsed from log file")
config = {}
return config