in scripts/setfit/run_fewshot_multilingual.py [0:0]
def eval_setfit(train_data, test_data, model, loss_class, num_epochs, metric):
x_train = train_data["text"]
y_train = train_data["label"]
x_test = test_data["text"]
y_test = test_data["label"]
if loss_class is None:
return compute_metrics(x_train, y_train, x_test, y_test, metric)
# sentence-transformers adaptation
batch_size = args.batch_size
if loss_class in [
losses.BatchAllTripletLoss,
losses.BatchHardTripletLoss,
losses.BatchSemiHardTripletLoss,
losses.BatchHardSoftMarginTripletLoss,
SupConLoss,
]:
train_examples = [InputExample(texts=[text], label=label) for text, label in zip(x_train, y_train)]
train_data_sampler = SentenceLabelDataset(train_examples)
batch_size = min(args.batch_size, len(train_data_sampler))
train_dataloader = DataLoader(train_data_sampler, batch_size=batch_size, drop_last=True)
if loss_class is losses.BatchHardSoftMarginTripletLoss:
train_loss = loss_class(
model=model,
distance_metric=BatchHardTripletLossDistanceFunction.cosine_distance,
)
elif loss_class is SupConLoss:
train_loss = loss_class(model=model)
else:
train_loss = loss_class(
model=model,
distance_metric=BatchHardTripletLossDistanceFunction.cosine_distance,
margin=0.25,
)
train_steps = len(train_dataloader) * num_epochs
else:
train_examples = []
for _ in range(num_epochs):
train_examples = sentence_pairs_generation(np.array(x_train), np.array(y_train), train_examples)
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)
train_loss = loss_class(model)
train_steps = len(train_dataloader)
print(f"{len(x_train)} train samples in total, {train_steps} train steps with batch size {batch_size}")
warmup_steps = math.ceil(train_steps * 0.1)
model.fit(
train_objectives=[(train_dataloader, train_loss)],
epochs=1,
steps_per_epoch=train_steps,
warmup_steps=warmup_steps,
show_progress_bar=False,
)
return compute_metrics(x_train, y_train, x_test, y_test, metric)