in inference/model.py [0:0]
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Forward pass for row parallel linear layer.
Args:
x (torch.Tensor): Input tensor.
Returns:
torch.Tensor: Transformed tensor with row-parallel computation.
"""
y = linear(x, self.weight)
if world_size > 1:
dist.all_reduce(y)
if self.bias is not None:
y += self.bias
return y