in bert_layer.py [0:0]
def preprocess_bert_input(X, y, max_seq_length, tokenizer, categories):
# print('Converting data to InputExample format')
# Convert data to InputExample format
examples = convert_text_to_examples(X, y)
# print('Converting data to features')
# Convert to features
(
input_ids,
input_masks,
segment_ids,
labels,
) = convert_examples_to_features(
tokenizer, examples, max_seq_length=max_seq_length
)
# For inference label is nont
if labels[0][0] is None:
return (input_ids,
input_masks,
segment_ids,
labels)
enc = preprocessing.OneHotEncoder(categories=[range(categories)], sparse=False)
col = np.array(labels)
col = labels.reshape(len(col), 1)
enc.fit(labels)
onehotlabels = enc.transform(col)
labels = onehotlabels
return (input_ids,
input_masks,
segment_ids,
labels)