def feature_data()

in ludwig/features/text_feature.py [0:0]


    def feature_data(column, metadata, preprocessing_parameters):
        char_data = build_sequence_matrix(
            sequences=column,
            inverse_vocabulary=metadata['char_str2idx'],
            tokenizer_type=preprocessing_parameters['char_tokenizer'],
            length_limit=metadata['char_max_sequence_length'],
            padding_symbol=metadata['char_pad_symbol'],
            padding=preprocessing_parameters['padding'],
            unknown_symbol=metadata['char_unk_symbol'],
            lowercase=preprocessing_parameters['lowercase'],
            tokenizer_vocab_file=preprocessing_parameters[
                'char_vocab_file'
            ],
            pretrained_model_name_or_path=preprocessing_parameters[
                'pretrained_model_name_or_path'
            ]

        )
        word_data = build_sequence_matrix(
            sequences=column,
            inverse_vocabulary=metadata['word_str2idx'],
            tokenizer_type=preprocessing_parameters['word_tokenizer'],
            length_limit=metadata['word_max_sequence_length'],
            padding_symbol=metadata['word_pad_symbol'],
            padding=preprocessing_parameters['padding'],
            unknown_symbol=metadata['word_unk_symbol'],
            lowercase=preprocessing_parameters['lowercase'],
            tokenizer_vocab_file=preprocessing_parameters[
                'word_vocab_file'
            ],
            pretrained_model_name_or_path=preprocessing_parameters[
                'pretrained_model_name_or_path'
            ]
        )

        return char_data, word_data