def load_metadata()

in datasets/iamdb.py [0:0]


def load_metadata(data_path, wordsep, use_words=False):
    forms = collections.defaultdict(list)
    filename = "words.txt" if use_words else "lines.txt"
    with open(os.path.join(data_path, filename), "r") as fid:
        lines = (l.strip().split() for l in fid if l[0] != "#")
        for line in lines:
            # skip word segmentation errors
            if use_words and line[1] == "err":
                continue
            text = " ".join(line[8:])
            # remove garbage tokens:
            text = text.replace("#", "")
            # swap word sep from | to wordsep
            text = re.sub(r"\|+|\s", wordsep, text).strip(wordsep)
            form_key = "-".join(line[0].split("-")[:2])
            line_key = "-".join(line[0].split("-")[:3])
            box_idx = 4 - use_words
            box = tuple(int(val) for val in line[box_idx : box_idx + 4])
            forms[form_key].append(
                {
                    "key": line_key,
                    "box": box,
                    "text": text,
                }
            )
    return forms