in trl/trainer/grpo_trainer.py [0:0]
def _sync_fsdp_params_to_vllm(self, module: nn.Module, prefix: str = "", visited=None):
"""Memory-efficient post-order traversal of FSDP modules to extract full parameters and sync with vLLM."""
if visited is None:
visited = set()
for child_name, child_module in module.named_children():
child_prefix = f"{prefix}.{child_name}" if prefix else child_name
self._sync_fsdp_params_to_vllm(
child_module, prefix=child_prefix, visited=visited
) # recurse into the child
if isinstance(module, FSDP):
with FSDP.summon_full_params(module, recurse=False, writeback=False):
for param_name, param in module.named_parameters():
full_name = f"{prefix}.{param_name}" if prefix else param_name
for extra in ("_fsdp_wrapped_module.", "_checkpoint_wrapped_module."):
full_name = full_name.replace(extra, "")
if full_name in visited:
continue # skip FSDP subtrees already traversed
visited.add(full_name)
if self.vllm_mode == "server" and self.accelerator.is_main_process:
self.vllm_client.update_named_param(full_name, param.data)
elif self.vllm_mode == "colocate":
llm_model = self.llm.llm_engine.model_executor.driver_worker.model_runner.model
llm_model.load_weights([(full_name, param.data)])