tzrec/ops/jagged_tensors.py

# Copyright (c) 2025, Alibaba Group; # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # We use the jagged_tensors ops from generative-recommenders a starting point. # https://github.com/facebookresearch/generative-recommenders # thanks to their public work. from typing import Optional, Tuple import torch from torch.fx._symbolic_trace import is_fx_tracing from tzrec.ops import Kernel from tzrec.ops.pytorch.pt_jagged_tensors import ( pytorch_concat_2D_jagged, pytorch_hstu_concat_l2_embeddings, pytorch_hstu_split_l2_embeddings, pytorch_jagged_dense_bmm_broadcast_add, pytorch_split_2D_jagged, ) from tzrec.ops.triton.triton_jagged_tensors import ( triton_concat_2D_jagged, triton_jagged_dense_bmm_broadcast_add, triton_split_2D_jagged, ) torch.fx.wrap("triton_concat_2D_jagged") torch.fx.wrap("triton_split_2D_jagged") def concat_2D_jagged( values_left: torch.Tensor, values_right: torch.Tensor, max_len_left: int, max_len_right: int, offsets_left: Optional[torch.Tensor], offsets_right: Optional[torch.Tensor], kernel: Kernel = Kernel.PYTORCH, ) -> torch.Tensor: if not is_fx_tracing(): torch._assert(values_left.dim() == 2, "values_left must be 2D") torch._assert(values_right.dim() == 2, "values_right must be 2D") torch._assert( values_right.shape[1] == values_left.shape[1], f"values_left shape[1] must be equal to values_right shape[1] {values_left.shape[1]} vs {values_right.shape[1]}", # NOQA ) if kernel == Kernel.TRITON: return triton_concat_2D_jagged( values_left=values_left, values_right=values_right, max_len_left=max_len_left, max_len_right=max_len_right, offsets_left=offsets_left, offsets_right=offsets_right, ) else: return pytorch_concat_2D_jagged( values_left=values_left, values_right=values_right, max_len_left=max_len_left, max_len_right=max_len_right, offsets_left=offsets_left, offsets_right=offsets_right, ) def split_2D_jagged( max_seq_len: int, values: torch.Tensor, max_len_left: Optional[int] = None, max_len_right: Optional[int] = None, offsets_left: Optional[torch.Tensor] = None, offsets_right: Optional[torch.Tensor] = None, kernel: Kernel = Kernel.PYTORCH, ) -> Tuple[torch.Tensor, torch.Tensor]: if not is_fx_tracing(): torch._assert(values.dim() == 2, "values must be 2D") torch._assert( offsets_left is not None or offsets_right is not None, "offsets_left and offsets_right cannot be None at the same time", ) if offsets_left is None: torch._assert( max_len_left is not None, "max_len_left must be provided when offsets_left is None", ) if offsets_right is None: torch._assert( max_len_right is not None, "max_len_right must be provided when offsets_right is None", ) if offsets_left is not None and offsets_right is not None: torch._assert( offsets_left.shape[0] == offsets_right.shape[0], "offsets_left shape[0] must be equal to offsets_right shape[0]", ) if kernel == Kernel.TRITON: return triton_split_2D_jagged( max_seq_len=max_seq_len, values=values, max_len_left=max_len_left, max_len_right=max_len_right, offsets_left=offsets_left, offsets_right=offsets_right, ) else: return pytorch_split_2D_jagged( max_seq_len=max_seq_len, values=values, max_len_left=max_len_left, max_len_right=max_len_right, offsets_left=offsets_left, offsets_right=offsets_right, ) def hstu_split_l2_embeddings( max_seq_len: int, x: torch.Tensor, minus_l2_offsets: torch.Tensor, l2_offsets: torch.Tensor, contextual_seq_len: int, kernel: Kernel = Kernel.PYTORCH, ) -> Tuple[torch.Tensor, torch.Tensor]: if kernel == Kernel.TRITON: return triton_split_2D_jagged( max_seq_len=max_seq_len, values=x, max_len_left=None, max_len_right=None, offsets_left=minus_l2_offsets, offsets_right=l2_offsets, n_prefix_to_right=contextual_seq_len, ) else: return pytorch_hstu_split_l2_embeddings( max_seq_len=max_seq_len, x=x, minus_l2_offsets=minus_l2_offsets, l2_offsets=l2_offsets, contextual_seq_len=contextual_seq_len, ) def hstu_concat_l2_embeddings( max_minus_l2_len: int, minus_l2_x: torch.Tensor, minus_l2_offsets: torch.Tensor, max_l2_len: int, l2_x: torch.Tensor, l2_offsets: torch.Tensor, contextual_seq_len: int, kernel: Kernel = Kernel.PYTORCH, ) -> torch.Tensor: if kernel == Kernel.TRITON: return triton_concat_2D_jagged( values_left=minus_l2_x, values_right=l2_x, max_len_left=max_minus_l2_len, max_len_right=max_l2_len, offsets_left=minus_l2_offsets, offsets_right=l2_offsets, n_prefix_from_right=contextual_seq_len, ) else: return pytorch_hstu_concat_l2_embeddings( contextual_seq_len=contextual_seq_len, max_minus_l2_len=max_minus_l2_len, minus_l2_x=minus_l2_x, minus_l2_offsets=minus_l2_offsets, max_l2_len=max_l2_len, l2_x=l2_x, l2_offsets=l2_offsets, ) def jagged_dense_bmm_broadcast_add( max_seq_len: int, seq_offsets: torch.Tensor, jagged: torch.Tensor, dense: torch.Tensor, bias: torch.Tensor, kernel: Kernel = Kernel.PYTORCH, ) -> torch.Tensor: """Computing out = jagged x dense + bias. jagged has shape (sum_B(M_i), K), dense has shape (B, K, N), and bias has shape (B, N), out has shape (sum_B(M_i), N) """ if not is_fx_tracing(): _, K = jagged.shape B, _, N = dense.shape torch._assert(dense.shape[1] == K, "wrong dense shape[1]") torch._assert(seq_offsets.shape[0] == B + 1, "wrong seq_offsets shape[0]") torch._assert(bias.shape[0] == B, "wrong bias shape[0]") torch._assert(bias.shape[1] == N, "wrong bias shape[1]") if kernel == Kernel.TRITON: return triton_jagged_dense_bmm_broadcast_add( max_seq_len=max_seq_len, seq_offsets=seq_offsets, jagged=jagged, dense=dense, bias=bias, ) else: return pytorch_jagged_dense_bmm_broadcast_add( max_seq_len=max_seq_len, seq_offsets=seq_offsets, jagged=jagged, dense=dense, bias=bias, )

tzrec/ops/jagged_tensors.py (187 lines of code) (raw):