in hiplot/fetchers.py [0:0]
def load_fairseq(uri: str) -> hip.Experiment:
# pylint:disable=too-many-locals
# pylint:disable=too-many-branches
# pylint:disable=too-many-statements
PREFIX = 'fairseq://'
if not uri.startswith(PREFIX):
raise hip.ExperimentFetcherDoesntApply()
uri = uri[len(PREFIX):]
train_log = Path(uri)
if train_log.is_dir():
found = False
try_files = [train_log / f for f in ["train.log", "process.out", "process_0.out"]] + \
[Path(f) for f in glob.glob(str(train_log / "*.log")) + glob.glob(str(train_log / "slurm_logs" / "*.log"))]
for try_log_file in try_files:
if try_log_file.is_file():
found = True
train_log = try_log_file
break
if not found:
raise hip.ExperimentFetcherDoesntApply("No log file found")
lines = train_log.read_text(encoding="utf-8").split('\n')
datapoints: tp.List[tp.Dict[str, tp.Any]] = []
params: tp.Dict[str, tp.Any] = {}
logs_prefix_re = r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \| [A-Z]* \| )"
for l in lines:
# Strip log prefix
# eg "2020-03-08 16:48:16 | INFO | "
m = re.match(logs_prefix_re, l)
if m is not None:
l = l[m.span()[1]:]
# Arguments: Namespace(...)
if l.startswith('Namespace('):
# format: Namespace(activation_dropout=0.1, activation_fn='relu', ...)
# Ideally we want to do: `eval("dict(activation_dropout=0.1, activation_fn='relu', ...)")`
# But as it's user input, we want to have something safe.
# (it's still possible to crash the python interpreter with a too complex string due to stack depth limitations)
node = ast.parse(l)
params = {
kw.arg: ast.literal_eval(kw.value)
for kw in node.body[0].value.keywords # type: ignore
}
continue
# Results in JSON format
# valid | {"epoch": 33, "valid_loss": "0.723", "valid_ppl": "1.65", ...}
if l.startswith("valid | {"):
json_string = l.split('|', 1)[-1].lstrip()
valid_metrics = json.loads(json_string)
datapoints.append(valid_metrics)
# For older version of fairseq
if l.startswith('| epoch '):
values = _load_fairseq_metrics_inline(l)
if datapoints and datapoints[-1]['epoch'] == values['epoch']:
datapoints[-1].update(values)
else:
datapoints.append(values)
datapoints = [{
**params,
**values, # overrides 'learning rate' for instance
} for values in datapoints]
datapoints.sort(key=lambda d: float(d["epoch"]))
xp = hip.Experiment.from_iterable(datapoints)
for dp, next_dp in zip(xp.datapoints, xp.datapoints[1:]):
next_dp.from_uid = dp.uid
return xp