def marian_best_bleu()

in pipeline/translate/extract_best.py [0:0]


def marian_best_bleu(args, score_function):
    prev_line = None
    for i, ref_line in enumerate(args.references):
        refs = ref_line.strip().split("\n")
        if args.debpe:
            refs = [re.sub(r"@@ +", "", r) for r in refs]

        texts = []
        while True:
            if prev_line:
                # CTranslate2 can output empty text, for example:
                # 10181 ||| .GDFMAKERPROJECTファイルを開くには?
                # 10181 ||| .GDMAKERPROJECTファイルを開くには?
                # 10181 ||| .GDFMAKERPROJECTファイルを開くには?
                # 10181 ||| .GDFMakerPROJECTファイルを開くには?
                # 10181 ||| .GDFAKERPROJECTファイルを開くには?
                # 10181 ||| .GDMakerPROJECTファイルを開くには?
                # 10181 ||| .GDFMAKERPROJECTファイルを開くには。
                # 10181 |||
                # Marian also outputs scores, for example:
                # 0 ||| Реформа, направленная на выдвижение условий, идет слишком медленно. ||| F0= -9.21191 F1= -11.53 ||| -1.22059
                fields = prev_line.rstrip("\n").split(" ||| ")
                if len(fields) == 1:
                    # handle "10181 |||"
                    fields = fields[0].split()[0], ""

                idx = int(fields[0])
                if idx == i:
                    texts.append(fields[1])
                else:
                    break

            prev_line = next(args.nbest, None)
            if not prev_line:
                break

        if args.debpe:
            texts = [re.sub(r"@@ +", "", t) for t in texts]
        refs = [r.split() for r in refs]
        scores = [score_function(refs, t.split()) for t in texts]
        best_txt = texts[scores.index(max(scores))]

        args.output.write("{}\n".format(best_txt))
        if args.debug:
            sys.stderr.write("{}: {}\n".format(i, scores))

        if i % 100000 == 0 and i > 0:
            sys.stderr.write("[{}]\n".format(i))