in src/engine/step4/model_dev/utils/model.py [0:0]
def train_dataloader(self):
"""
Train data loader.
Args:
None
Returns:
DataLoader object.
"""
n_samples = self.n_obs["train"]
train_dataset = get_dataset(
tokenizer=self.tokenizer,
data_split="train",
num_samples=n_samples,
args=self.hparams,
)
dataloader = DataLoader(
train_dataset,
batch_size=self.hparams.train_batch_size,
drop_last=True,
shuffle=True,
prefetch_factor=4,
num_workers=96,
)
t_total = (
(
len(dataloader.dataset)
// (self.hparams.train_batch_size * max(1, self.hparams.n_gpu))
)
// self.hparams.gradient_accumulation_steps
* float(self.hparams.num_train_epochs)
)
scheduler = get_linear_schedule_with_warmup(
self.opt,
num_warmup_steps=self.hparams.warmup_steps,
num_training_steps=t_total,
)
self.lr_scheduler = scheduler
return dataloader