in code/train_deploy.py [0:0]
def input_fn(request_body, request_content_type):
"""An input_fn that loads a pickled tensor"""
if request_content_type == "application/json":
data = json.loads(request_body)
print("================ input sentences ===============")
print(data)
if isinstance(data, str):
data = [data]
elif isinstance(data, list) and len(data) > 0 and isinstance(data[0], str):
pass
else:
raise ValueError("Unsupported input type. Input type can be a string or an non-empty list. \
I got {}".format(data))
#encoded = [tokenizer.encode(x, add_special_tokens=True) for x in data]
#encoded = tokenizer(data, add_special_tokens=True)
# for backward compatibility use the following way to encode
# https://github.com/huggingface/transformers/issues/5580
input_ids = [tokenizer.encode(x, add_special_tokens=True) for x in data]
print("================ encoded sentences ==============")
print(input_ids)
# pad shorter sentence
padded = torch.zeros(len(input_ids), MAX_LEN)
for i, p in enumerate(input_ids):
padded[i, :len(p)] = torch.tensor(p)
# create mask
mask = (padded != 0)
print("================= padded input and attention mask ================")
print(padded, '\n', mask)
return padded.long(), mask.long()
raise ValueError("Unsupported content type: {}".format(request_content_type))