sockeye/train.py [357:403]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            source_vocab_paths = data_info.source_vocabs
            target_vocab_paths = data_info.target_vocabs

        else:
            # Load or create vocabs
            source_factor_vocab_paths = [args.source_factor_vocabs[i] if i < len(args.source_factor_vocabs)
                                         else None for i in range(len(args.source_factors))]
            source_vocab_paths = [args.source_vocab] + source_factor_vocab_paths
            target_factor_vocab_paths = [args.target_factor_vocabs[i] if i < len(args.target_factor_vocabs)
                                         else None for i in range(len(args.target_factors))]
            target_vocab_paths = [args.target_vocab] + target_factor_vocab_paths
            source_vocabs, target_vocabs = vocab.load_or_create_vocabs(
                shard_source_paths=[[args.source] + args.source_factors],
                shard_target_paths=[[args.target] + args.target_factors],
                source_vocab_paths=source_vocab_paths,
                source_factor_vocab_same_as_source=args.source_factors_share_embedding,
                target_vocab_paths=target_vocab_paths,
                target_factor_vocab_same_as_target=args.target_factors_share_embedding,
                shared_vocab=shared_vocab,
                num_words_source=num_words_source,
                num_words_target=num_words_target,
                word_min_count_source=word_min_count_source,
                word_min_count_target=word_min_count_target,
                pad_to_multiple_of=args.pad_vocab_to_multiple_of)

        check_condition(all([combine in [C.FACTORS_COMBINE_SUM, C.FACTORS_COMBINE_AVERAGE]
                             for combine in args.source_factors_combine])
                        or len(args.source_factors) == len(args.source_factors_num_embed),
                        "Number of source factor data (%d) differs from provided source factor dimensions (%d)" % (
                            len(args.source_factors), len(args.source_factors_num_embed)))
        check_condition(all([combine in [C.FACTORS_COMBINE_SUM, C.FACTORS_COMBINE_AVERAGE]
                             for combine in args.target_factors_combine])
                        or len(args.target_factors) == len(args.target_factors_num_embed),
                        "Number of target factor data (%d) differs from provided source factor dimensions (%d)" % (
                            len(args.target_factors), len(args.target_factors_num_embed)))

        sources = [args.source] + args.source_factors
        sources = [str(os.path.abspath(s)) for s in sources]
        targets = [args.target] + args.target_factors
        targets = [str(os.path.abspath(t)) for t in targets]

        check_condition(len(sources) == len(validation_sources),
                        'Training and validation data must have the same number of source factors, '
                        'but found %d and %d.' % (len(source_vocabs), len(validation_sources)))
        check_condition(len(targets) == len(validation_targets),
                        'Training and validation data must have the same number of target factors, '
                        'but found %d and %d.' % (len(source_vocabs), len(validation_sources)))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



sockeye/train_pt.py [335:381]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            source_vocab_paths = data_info.source_vocabs
            target_vocab_paths = data_info.target_vocabs

        else:
            # Load or create vocabs
            source_factor_vocab_paths = [args.source_factor_vocabs[i] if i < len(args.source_factor_vocabs)
                                         else None for i in range(len(args.source_factors))]
            source_vocab_paths = [args.source_vocab] + source_factor_vocab_paths
            target_factor_vocab_paths = [args.target_factor_vocabs[i] if i < len(args.target_factor_vocabs)
                                         else None for i in range(len(args.target_factors))]
            target_vocab_paths = [args.target_vocab] + target_factor_vocab_paths
            source_vocabs, target_vocabs = vocab.load_or_create_vocabs(
                shard_source_paths=[[args.source] + args.source_factors],
                shard_target_paths=[[args.target] + args.target_factors],
                source_vocab_paths=source_vocab_paths,
                source_factor_vocab_same_as_source=args.source_factors_share_embedding,
                target_vocab_paths=target_vocab_paths,
                target_factor_vocab_same_as_target=args.target_factors_share_embedding,
                shared_vocab=shared_vocab,
                num_words_source=num_words_source,
                num_words_target=num_words_target,
                word_min_count_source=word_min_count_source,
                word_min_count_target=word_min_count_target,
                pad_to_multiple_of=args.pad_vocab_to_multiple_of)

        check_condition(all([combine in [C.FACTORS_COMBINE_SUM, C.FACTORS_COMBINE_AVERAGE]
                             for combine in args.source_factors_combine])
                        or len(args.source_factors) == len(args.source_factors_num_embed),
                        "Number of source factor data (%d) differs from provided source factor dimensions (%d)" % (
                            len(args.source_factors), len(args.source_factors_num_embed)))
        check_condition(all([combine in [C.FACTORS_COMBINE_SUM, C.FACTORS_COMBINE_AVERAGE]
                             for combine in args.target_factors_combine])
                        or len(args.target_factors) == len(args.target_factors_num_embed),
                        "Number of target factor data (%d) differs from provided source factor dimensions (%d)" % (
                            len(args.target_factors), len(args.target_factors_num_embed)))

        sources = [args.source] + args.source_factors
        sources = [str(os.path.abspath(s)) for s in sources]
        targets = [args.target] + args.target_factors
        targets = [str(os.path.abspath(t)) for t in targets]

        check_condition(len(sources) == len(validation_sources),
                        'Training and validation data must have the same number of source factors, '
                        'but found %d and %d.' % (len(source_vocabs), len(validation_sources)))
        check_condition(len(targets) == len(validation_targets),
                        'Training and validation data must have the same number of target factors, '
                        'but found %d and %d.' % (len(source_vocabs), len(validation_sources)))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



