in scripts/batch_eval_KB_completion.py [0:0]
def batchify(data, batch_size):
msg = ""
list_samples_batches = []
list_sentences_batches = []
current_samples_batch = []
current_sentences_batches = []
c = 0
# sort to group togheter sentences with similar length
for sample in sorted(
data, key=lambda k: len(" ".join(k["masked_sentences"]).split())
):
masked_sentences = sample["masked_sentences"]
current_samples_batch.append(sample)
current_sentences_batches.append(masked_sentences)
c += 1
if c >= batch_size:
list_samples_batches.append(current_samples_batch)
list_sentences_batches.append(current_sentences_batches)
current_samples_batch = []
current_sentences_batches = []
c = 0
# last batch
if current_samples_batch and len(current_samples_batch) > 0:
list_samples_batches.append(current_samples_batch)
list_sentences_batches.append(current_sentences_batches)
return list_samples_batches, list_sentences_batches, msg