in kernels/fmha/smem_tile.h [210:221]
inline __device__ void move_to_next_read_buffer() {
// if( BUFFERS_PER_TILE > 1 && smem_read_buffer_ >= BYTES_PER_TILE_INC_BOUNDARY ) {
// this->smem_read_buffer_ -= BYTES_PER_TILE_INC_BOUNDARY;
// } else if( BUFFERS_PER_TILE > 1 ) {
// this->smem_read_buffer_ += BYTES_PER_BUFFER;
// }
if( BUFFERS_PER_TILE > 1 && smem_read_offset_ >= BYTES_PER_TILE_INC_BOUNDARY ) {
this->smem_read_offset_ -= BYTES_PER_TILE_INC_BOUNDARY;
} else if( BUFFERS_PER_TILE > 1 ) {
this->smem_read_offset_ += BYTES_PER_BUFFER;
}
}