def vectorize()

in docker/app.py [0:0]


def vectorize():
    content = request.args.get('q')
    lang = request.args.get('lang')
    embedding = ''
    if lang is None or not lang:
        lang = "en"
    # encoder
    model_dir = Path(__file__).parent / "LASER" / "models"
    encoder_path = model_dir / "bilstm.93langs.2018-12-26.pt"
    bpe_codes_path = model_dir / "93langs.fcodes"
    print(f' - Encoder: loading {encoder_path}')
    encoder = SentenceEncoder(encoder_path,
                              max_sentences=None,
                              max_tokens=12000,
                              sort_kind='mergesort',
                              cpu=True)
    with tempfile.TemporaryDirectory() as tmp:
        tmpdir = Path(tmp)
        ifname = tmpdir / "content.txt"
        bpe_fname = tmpdir / 'bpe'
        bpe_oname = tmpdir / 'out.raw'
        with ifname.open("w") as f:
            f.write(content)
        if lang != '--':
            tok_fname = tmpdir / "tok"
            Token(str(ifname),
                  str(tok_fname),
                  lang=lang,
                  romanize=True if lang == 'el' else False,
                  lower_case=True,
                  gzip=False,
                  verbose=True,
                  over_write=False)
            ifname = tok_fname
        BPEfastApply(str(ifname),
                     str(bpe_fname),
                     str(bpe_codes_path),
                     verbose=True, over_write=False)
        ifname = bpe_fname
        EncodeFile(encoder,
                   str(ifname),
                   str(bpe_oname),
                   verbose=True,
                   over_write=False,
                   buffer_size=10000)
        dim = 1024
        X = np.fromfile(str(bpe_oname), dtype=np.float32, count=-1)
        X.resize(X.shape[0] // dim, dim)
        embedding = X
    body = {'content': content, 'embedding': embedding.tolist()}
    return jsonify(body)