in src/open-r1-multimodal/src/open_r1/trainer/vllm_grpo_trainer.py [0:0]
def __init__(
self,
model: Union[str, PreTrainedModel],
reward_funcs: Union[RewardFunc, list[RewardFunc]],
args: GRPOConfig = None,
train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
eval_dataset: Optional[
Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]
] = None,
processing_class: Optional[PreTrainedTokenizerBase] = None,
reward_processing_classes: Optional[
Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]
] = None,
callbacks: Optional[list[TrainerCallback]] = None,
optimizers: tuple[
Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]
] = (None, None),
peft_config: Optional["PeftConfig"] = None,
# qwen2-vl related params
max_pixels: Optional[int] = 12845056,
min_pixels: Optional[int] = 3136,
attn_implementation: str = "flash_attention_2",