def load_fairseq()

in hiplot/fetchers.py [0:0]
51 lines of code
19 McCabe index (conditional complexity)

def load_fairseq(uri: str) -> hip.Experiment:
    # pylint:disable=too-many-locals
    # pylint:disable=too-many-branches
    # pylint:disable=too-many-statements
    PREFIX = 'fairseq://'
    if not uri.startswith(PREFIX):
        raise hip.ExperimentFetcherDoesntApply()
    uri = uri[len(PREFIX):]
    train_log = Path(uri)
    if train_log.is_dir():
        found = False
        try_files = [train_log / f for f in ["train.log", "process.out", "process_0.out"]] + \
            [Path(f) for f in glob.glob(str(train_log / "*.log")) + glob.glob(str(train_log / "slurm_logs" / "*.log"))]
        for try_log_file in try_files:
            if try_log_file.is_file():
                found = True
                train_log = try_log_file
                break
        if not found:
            raise hip.ExperimentFetcherDoesntApply("No log file found")
    lines = train_log.read_text(encoding="utf-8").split('\n')

    datapoints: tp.List[tp.Dict[str, tp.Any]] = []
    params: tp.Dict[str, tp.Any] = {}
    logs_prefix_re = r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \| [A-Z]* \| )"
    for l in lines:
        # Strip log prefix
        # eg "2020-03-08 16:48:16 | INFO | "
        m = re.match(logs_prefix_re, l)
        if m is not None:
            l = l[m.span()[1]:]
        # Arguments: Namespace(...)
        if l.startswith('Namespace('):
            # format: Namespace(activation_dropout=0.1, activation_fn='relu', ...)
            # Ideally we want to do: `eval("dict(activation_dropout=0.1, activation_fn='relu', ...)")`
            # But as it's user input, we want to have something safe.
            # (it's still possible to crash the python interpreter with a too complex string due to stack depth limitations)
            node = ast.parse(l)
            params = {
                kw.arg: ast.literal_eval(kw.value)
                for kw in node.body[0].value.keywords  # type: ignore
            }
            continue
        # Results in JSON format
        # valid | {"epoch": 33, "valid_loss": "0.723", "valid_ppl": "1.65", ...}
        if l.startswith("valid | {"):
            json_string = l.split('|', 1)[-1].lstrip()
            valid_metrics = json.loads(json_string)
            datapoints.append(valid_metrics)
        # For older version of fairseq
        if l.startswith('| epoch '):
            values = _load_fairseq_metrics_inline(l)
            if datapoints and datapoints[-1]['epoch'] == values['epoch']:
                datapoints[-1].update(values)
            else:
                datapoints.append(values)
    datapoints = [{
        **params,
        **values,  # overrides 'learning rate' for instance
    } for values in datapoints]
    datapoints.sort(key=lambda d: float(d["epoch"]))
    xp = hip.Experiment.from_iterable(datapoints)
    for dp, next_dp in zip(xp.datapoints, xp.datapoints[1:]):
        next_dp.from_uid = dp.uid
    return xp