in utils_ranking.py [0:0]
def process_one(examples):
input_ids, token_type_ids, attention_mask = [], [], []
output = []
for (ex_index, example) in enumerate(examples):
inputs = tokenizer.encode_plus(
example.text_a,
example.text_b,
add_special_tokens=True,
max_length=max_length,
padding='max_length',
truncation='only_second')
if example.guid.split('_')[1] == '0' and ex_index < 5:
logger.info("*** Example ***")
logger.info("guid: %s" % (example.guid))
logger.info("input_ids: %s" % " ".join([str(x) for x in inputs["input_ids"]]))
logger.info("input_tokens: %s" % " ".join([str(tokenizer.convert_ids_to_tokens(x)) for x in inputs["input_ids"]]))
logger.info("attention_mask: %s" % " ".join([str(x) for x in inputs["attention_mask"]]))
logger.info("token_type_ids: %s" % " ".join([str(x) for x in inputs["token_type_ids"]]))
output.append(
InputFeatures(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
label=0
)
)
return output