in kernels/fmha/smem_tile.h [1599:1605]
inline __device__ void move_to_next_read_buffer() {
if( BUFFERS_PER_TILE > 1 && (smem_read_buffer_ - smem_) >= ROWS_PER_TILE_INC_BOUNDARY ) {
this->smem_read_buffer_ -= ROWS_PER_TILE_INC_BOUNDARY;
} else if( BUFFERS_PER_TILE > 1 ) {
this->smem_read_buffer_ += ROWS;
}
}