def main()

in learn_bpe.py [0:0]


def main():
    # Full compatibility with original implementation.
    # I do not know exactly why this is different to the standard file objects,
    # but some special utf-8 symbols are handled differently if the codecs call
    # is not present.
    sys.stderr = codecs.getwriter('UTF-8')(sys.stderr.buffer)
    sys.stdout = codecs.getwriter('UTF-8')(sys.stdout.buffer)
    sys.stdin = codecs.getreader('UTF-8')(sys.stdin.buffer)

    arg_parser = create_arg_parser()
    args = arg_parser.parse_args()

    # Full compatibility with original implementation
    for i in range(len(args.input)):
        if args.input[i].name != '<stdin>':
            args.input[i] = codecs.open(args.input[i].name, encoding='utf-8')
    if args.output.name != '<stdout>':
        args.output = codecs.open(args.output.name, 'w', encoding='utf-8')

    learn_bpe(args.input, args.output, args.symbols,
              probabilistic=args.probabilistic,
              frac_stopping=args.frac_stopping,
              frac_stopping_average_n=args.frac_stopping_average,
              min_frequency=args.min_frequency,
              is_dict=args.dict_input,
              total_symbols=args.total_symbols,
              verbose=args.verbose,
              )