def main()

in src/target_filter.py [0:0]


def main():
    # read in arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--source-input', '-s', type=str, required=True,
                        help='Source input file for target-filtering. Should be parallel to `target_input` and '
                             'contain one sentence per line.')
    parser.add_argument('--target-input', '-t', type=str, required=True,
                        help='Target input file for target-filtering. Should be parallel to `source_input` and '
                             'contain one sentence per line.')
    parser.add_argument('--target-language', '-l', type=str, required=True, choices=SUPPORTED_LANGUAGES,
                        help='Two-letter code for target language.')
    parser.add_argument('--gender', '-g', type=str, required=True, choices=SUPPORTED_GENDERS,
                        help="Gender to filter for on the target side.")
    parser.add_argument('--source-output', '-so', type=str, required=False, default=None,
                        help='Output file for the source side of the target-filtered corpus. '
                             'Default: `[source_input].target_filtered`.')
    parser.add_argument('--target-output', '-to', type=str, required=False, default=None,
                        help='Output file for the target side of the target_filtered corpus. '
                             'Default: `[target_input].target_filtered`.')
    args = parser.parse_args()
    source_output, target_output = args.source_output, args.target_output
    if source_output is None:
        source_output = f'{args.source_input}.target_filtered'
    if target_output is None:
        target_output = f'{args.target_input}.target_filtered'
    lang = args.target_language

    # for each sentence pair, check whether all morphologically gendered words in the target sentence match the
    # specified gender. if so, we output the sentence pair; otherwise, we ignore it.
    if lang == "de":
        filterer = GermanMorphFilterer()
    elif lang in ("fr", "it"):
        filterer = SpacyMorphFilterer(lang=lang)
    elif lang == "he":
        filterer = HebrewMorphFilterer()
    elif lang == "ru":
        filterer = RussianMorphFilterer()
    else:
        raise NotImplementedError(f'Unrecognized language {lang}. Supported languages: {SUPPORTED_LANGUAGES}')

    count_total, count_kept = filterer.target_filter(args.source_input, args.target_input, source_output,
                                                     target_output, args.gender)

    sys.stderr.write(f'Read {count_total} lines from {args.source_input} and {args.target_input}\n')
    sys.stderr.write(f'Wrote {count_kept} lines to {source_output} and {target_output} for gender {args.gender}\n')