in trl/trainer/grpo_trainer.py [0:0]
def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None:
mode = "train" if self.model.training else "eval"
metrics = {key: sum(val) / len(val) for key, val in self._metrics[mode].items()} # average the metrics
# This method can be called both in training and evaluation. When called in evaluation, the keys in `logs`
# start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format.
if mode == "eval":
metrics = {f"eval_{key}": val for key, val in metrics.items()}
logs = {**logs, **metrics}
super().log(logs, start_time)
self._metrics[mode].clear()
if self.accelerator.is_main_process and self.log_completions:
if is_rich_available():
print_prompt_completions_sample(
self._textual_logs["prompt"],
self._textual_logs["completion"],
self._textual_logs["rewards"],
self._textual_logs["advantages"],
self.state.global_step,
self.num_completions_to_print,
)
if self.args.report_to and "wandb" in self.args.report_to and wandb.run is not None:
import pandas as pd
table = {
"step": [str(self.state.global_step)] * len(self._textual_logs["prompt"]),
"prompt": self._textual_logs["prompt"],
"completion": self._textual_logs["completion"],
**self._textual_logs["rewards"],
"advantage": self._textual_logs["advantages"],
}
df = pd.DataFrame(table)
if self.wandb_log_unique_prompts:
df = df.drop_duplicates(subset=["prompt"])
wandb.log({"completions": wandb.Table(dataframe=df)})