def prep()

in torchbenchmark/e2e_models/hf_bert/__init__.py [0:0]


    def prep(self, hf_args):
        # Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
        accelerator = Accelerator(fp16=(self.tb_args.fp16 == "amp"))
        accelerator.wait_for_everyone()
        raw_datasets = prep_dataset(hf_args)
        num_labels, label_list, is_regression = prep_labels(hf_args, raw_datasets)
        # Load pretrained model and tokenizer
        #
        # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
        # download model & vocab.
        config = AutoConfig.from_pretrained(hf_args.model_name_or_path, num_labels=num_labels, finetuning_task=hf_args.task_name)
        tokenizer = AutoTokenizer.from_pretrained(hf_args.model_name_or_path, use_fast=not hf_args.use_slow_tokenizer)
        model = AutoModelForSequenceClassification.from_pretrained(
            hf_args.model_name_or_path,
            from_tf=bool(".ckpt" in hf_args.model_name_or_path),
            config=config,)
        train_dataset, eval_dataset, self.mnli_eval_dataset = preprocess_dataset(hf_args, config, model, \
            tokenizer, raw_datasets, num_labels, label_list, is_regression, accelerator)
        # DataLoaders creation:
        if hf_args.pad_to_max_length:
            # If padding was already done ot max length, we use the default data collator that will just convert everything
            # to tensors.
            self.data_collator = default_data_collator
        else:
            # Otherwise, `DataCollatorWithPadding` will apply dynamic padding for us (by padding to the maximum length of
            # the samples passed). When using mixed precision, we add `pad_to_multiple_of=8` to pad all tensors to multiple
            # of 8s, which will enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta).
            self.data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=(8 if accelerator.use_fp16 else None))

        train_dataloader = DataLoader(
            train_dataset, shuffle=True, collate_fn=self.data_collator, batch_size=hf_args.per_device_train_batch_size)
        eval_dataloader = DataLoader(eval_dataset, collate_fn=self.data_collator, batch_size=hf_args.per_device_eval_batch_size)

        # Optimizer
        # Split weights in two groups, one with weight decay and the other not.
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": hf_args.weight_decay,
            },
            {
                "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=hf_args.learning_rate)

        # Prepare everything with our `accelerator`.
        model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
            model, optimizer, train_dataloader, eval_dataloader
        )

        # Note -> the training dataloader needs to be prepared before we grab his length below (cause its length will be
        # shorter in multiprocess)

        # Scheduler and math around the number of training steps.
        num_update_steps_per_epoch = math.ceil(len(train_dataloader) / hf_args.gradient_accumulation_steps)
        if hf_args.max_train_steps is None:
            hf_args.max_train_steps = hf_args.num_train_epochs * num_update_steps_per_epoch
        else:
            hf_args.num_train_epochs = math.ceil(hf_args.max_train_steps / num_update_steps_per_epoch)

        lr_scheduler = get_scheduler(
            name=hf_args.lr_scheduler_type,
            optimizer=optimizer,
            num_warmup_steps=hf_args.num_warmup_steps,
            num_training_steps=hf_args.max_train_steps,
        )
        # Steup metrics
        # Get the metric function
        if hf_args.task_name is not None:
            self.metric = load_metric("glue", hf_args.task_name)
        else:
            self.metric = load_metric("accuracy")
        # Setup class members
        self.hf_args = hf_args
        self.is_regression = is_regression
        self.model = model
        self.optimizer = optimizer
        self.train_dataloader = train_dataloader
        self.eval_dataloader = eval_dataloader
        self.lr_scheduler = lr_scheduler
        self.accelerator = accelerator