in src/engine/step4/model_dev/t5_evaluation.py [0:0]
def do_inference(args):
"""
Get inferences of input questions based on the given args (for parallel inference).
Args:
args(tuple): Tuple model, dataframe and gpu device number.
Returns:
The list of the inferred queries templates.
"""
model_test, df, device = args
try:
proc_input = [
"translate English to SQL: %s" % input_text
for input_text in df["unfolded_questions"]
]
batch_size = 16
model_test.eval()
queries = []
total = df.shape[0]
with torch.no_grad():
for i in range(0, len(proc_input), batch_size):
print(f"{i}/{total} done")
batch = proc_input[i : i + batch_size]
inputs = model_test.tokenizer.batch_encode_plus(
batch,
max_length=model_test.hparams.max_input_length,
padding="max_length",
truncation=True,
return_tensors="pt",
)
inputs = inputs.to(f"cuda:{device}")
output = model_test.model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_length=model_test.hparams.max_output_length,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
)
queries.extend(output.cpu().numpy().tolist())
queries = model_test.tokenizer.batch_decode(queries)
queries = [
sql.replace("<pad>", "")
.replace("</s>", "")
.replace("[", "<")
.replace("]", ">")
.strip()
for sql in queries
]
model_test.to("cpu")
finally:
pass
return queries