xformers/sparse/csr_tensor.py (4 lines): - line 150: # TODO: check if need to return this or not - line 195: # TODO add bias here - line 282: # TODO this is not always true, but is a fast approximation for now - line 429: # TODO: check this experimental/ragged_inference/triton_v2_ragged_qk_dotprod.py (3 lines): - line 23: # TODO: tune these - line 153: # TODO: link to a drawing of what these tensors are - line 321: # TODO: flag use zeros for garbage experimental/ragged_inference/garbage_pad_ragged_acts.py (3 lines): - line 101: # TODO: flag use zeros for garbage - line 170: # TODO: Build LUT - line 179: # TODO: Add the QK dotprod to get scores xformers/components/attention/nystrom.py (2 lines): - line 96: # TODO: update defaults for use_razavi_pinverse and inv_iterations - line 128: # TODO: should be able to not have to pass in num_heads xformers/triton/k_dropout.py (1 line): - line 179: seed = SEEDS + col_id # FIXME index the seed properly xformers/factory/model_factory.py (1 line): - line 237: # TODO: pass in key and value independently. xformers/components/attention/core.py (1 line): - line 192: # TODO assume we have (N, S, hs) instead of (B, nh, S, hs), with N = B x nh xformers/components/attention/ortho.py (1 line): - line 108: # FIXME: Should we still accept a mask in that case ? xformers/components/attention/_sputnik_sparse.py (1 line): - line 12: # TODO: this is here for BC xformers/components/attention/csrc/cpu/matmul.cpp (1 line): - line 17: int64_t grain_size = 128; // TODO: tune this xformers/triton/k_layer_norm.py (1 line): - line 243: # FIXME: @lefaudeux tensor shape changes are not well handled, see shape3 xformers/components/attention/csrc/autograd/matmul.cpp (1 line): - line 39: // TODO: compute grad only if they require grad xformers/components/attention/csrc/cuda/spmm.cu (1 line): - line 829: // TODO investigate misaligned address errors in values ptr