faiss/gpu/impl/PQCodeLoad.cuh (12 lines): - line 86: // FIXME: this is a non-coalesced, unaligned, non-vectorized load - line 92: // FIXME: this is also slow, since we have to recover the - line 129: // FIXME: this is a non-coalesced, unaligned, non-vectorized load - line 157: // FIXME: this is a non-coalesced, unaligned, non-vectorized load - line 174: // FIXME: this is a non-coalesced, unaligned, 2-vectorized load - line 195: // FIXME: this is a non-coalesced, unaligned, non-vectorized load - line 214: // FIXME: this is a non-coalesced load - line 232: // FIXME: this is a non-coalesced, unaligned, 2-vectorized load - line 259: // FIXME: this is a non-coalesced load - line 283: // FIXME: this is a non-coalesced, unaligned, 2-vectorized load - line 316: // FIXME: this is a non-coalesced load - line 346: // FIXME: this is a non-coalesced load faiss/gpu/utils/MergeNetworkBlock.cuh (4 lines): - line 66: // FIXME: is this a CUDA 9 compiler bug? - line 97: // FIXME: is this a CUDA 9 compiler bug? - line 153: // FIXME: is this a CUDA 9 compiler bug? - line 189: // FIXME: is this a CUDA 9 compiler bug? faiss/gpu/impl/PQCodeDistances-inl.cuh (4 lines): - line 103: // FIXME: investigate loading separately, so we don't need this - line 137: // FIXME: try always making this centroid id 0 so we can - line 621: // FIXME: tune - line 631: // FIXME: probably impractical for large # of dims? faiss/gpu/GpuIndexIVF.cu (4 lines): - line 56: // FIXME: inherit our same device - line 104: // FIXME: inherit our same device - line 108: // FIXME: 2 different float16 options? - line 111: // FIXME: 2 different float16 options? faiss/gpu/utils/MergeNetworkWarp.cuh (4 lines): - line 272: // FIXME: compiler doesn't like this expression? compiler bug? - line 302: // FIXME: compiler doesn't like this expression? compiler bug? - line 359: // FIXME: compiler doesn't like this expression? compiler bug? - line 389: // FIXME: compiler doesn't like this expression? compiler bug? faiss/gpu/impl/BroadcastSum.cu (3 lines): - line 32: // FIXME: if we have exact multiples, don't need this - line 141: // FIXME: if we have exact multiples, don't need this - line 216: // FIXME: speed up faiss/gpu/impl/IVFAppend.cu (3 lines): - line 98: // FIXME: there could be overflow here, but where should we check this? - line 288: // FIXME: stride with threads instead of single thread - line 366: // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? faiss/gpu/impl/PQScanMultiPassPrecomputed.cu (2 lines): - line 54: // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? - line 609: // FIXME: we should adjust queryTileSize to deal with this, since faiss/python/__init__.py (2 lines): - line 1134: # TODO: once deprecated classes are removed, remove the dict and just use .lower() below - line 1182: # TODO check class name faiss/gpu/StandardGpuResources.cpp (2 lines): - line 437: // FIXME: as of CUDA 11, a memory allocation error appears to be - line 458: // FIXME: as of CUDA 11, a memory allocation error appears to be faiss/gpu/impl/PQScanMultiPassNoPrecomputed-inl.cuh (2 lines): - line 48: // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? - line 592: // FIXME: we should adjust queryTileSize to deal with this, since faiss/gpu/impl/IVFPQ.cu (2 lines): - line 322: // FIXME: why are we doing this? - line 575: // FIXME: we might ultimately be calling this function with inputs faiss/utils/partitioning.cpp (2 lines): - line 183: // FIXME avoid a second pass over the array to sample the threshold - line 821: /// FIXME when MSB of uint16 is set faiss/IndexIVFPQ.cpp (2 lines): - line 199: // TODO: parallelize? - line 318: // TODO: parallelize? faiss/gpu/impl/IVFFlatScan.cu (2 lines): - line 69: // FIXME: why does getLaneId() not work when we write out below!?!?! - line 401: // FIXME: we should adjust queryTileSize to deal with this, since faiss/utils/simdlib_neon.h (2 lines): - line 10: // TODO: Support big endian (currently supporting only little endian) - line 490: // TODO find a better name faiss/gpu/GpuIndexIVFPQ.cu (2 lines): - line 245: // FIXME jhj convert to _n version - line 317: // FIXME: GPUize more of this faiss/IndexIVFPQFastScan.cpp (2 lines): - line 260: // TODO should not need stable - line 265: // TODO parallelize faiss/gpu/utils/HostTensor-inl.cuh (2 lines): - line 215: // FIXME: type-specific abs() - line 220: // FIXME: type-specific abs faiss/gpu/GpuIndexFlat.cu (2 lines): - line 254: // FIXME jhj: kernel for copy - line 272: // FIXME jhj: kernel for copy faiss/utils/simdlib.h (1 line): - line 30: // FIXME: make a SSE version faiss/gpu/utils/StackDeviceMemory.cpp (1 line): - line 70: // FIXME: make sure there are no outstanding memory allocations? faiss/invlists/OnDiskInvertedLists.cpp (1 line): - line 561: // TODO shrink global storage if needed faiss/IndexIVF.cpp (1 line): - line 1158: : dis > radius; // TODO templatize to remove this test faiss/IndexShards.cpp (1 line): - line 150: // FIXME: assumes that nothing is currently running on the sub-indexes, which is faiss/IndexIVFAdditiveQuantizer.cpp (1 line): - line 202: // TODO find a way to provide the nprobes together to do a matmul contrib/torch_utils.py (1 line): - line 398: # FIXME: no rev_swig_ptr equivalent for torch.Tensor, just convert faiss/gpu/GpuIndexIVFFlat.cu (1 line): - line 158: // FIXME: GPUize more of this contrib/exhaustive_search.py (1 line): - line 217: # TODO: all result manipulations are in python, should move to C++ if perf faiss/utils/hamming-inl.h (1 line): - line 59: nbit -= 8; // TODO remove nbit faiss/utils/simdlib_avx2.h (1 line): - line 212: // TODO find a better name faiss/gpu/utils/MatrixMult-inl.cuh (1 line): - line 51: // FIXME: some weird CUDA 11 bug? where cublasSgemmEx on faiss/python/python_callbacks.cpp (1 line): - line 52: // TODO check nb of bytes written faiss/gpu/utils/DeviceUtils.cu (1 line): - line 125: // FIXME: what to use for managed memory? faiss/gpu/GpuDistance.cu (1 line): - line 139: // FIXME: convert to int32_t everywhere? faiss/gpu/utils/Limits.cuh (1 line): - line 21: // FIXME: faiss CPU uses +/-FLT_MAX instead of +/-infinity faiss/gpu/GpuIndexIVFScalarQuantizer.cu (1 line): - line 204: // FIXME: GPUize more of this faiss/impl/ScalarQuantizer.cpp (1 line): - line 595: // TODO just do a qucikselect faiss/impl/index_write.cpp (1 line): - line 71: * TODO: in this file, the read functions that encouter errors may faiss/gpu/utils/Tensor-inl.cuh (1 line): - line 552: // FIXME: maybe also consider offset in bytes? multiply by sizeof(T)? faiss/utils/simdlib_emulated.h (1 line): - line 242: // TODO find a better name faiss/gpu/impl/Distance.cu (1 line): - line 107: // FIXME: optimize with a dedicated kernel faiss/gpu/impl/IVFFlat.cu (1 line): - line 256: // FIXME: we might ultimately be calling this function with inputs faiss/gpu/impl/IVFInterleaved.cuh (1 line): - line 68: // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? faiss/IndexReplicas.cpp (1 line): - line 158: // FIXME: assumes that nothing is currently running on the sub-indexes, which is faiss/gpu/impl/IVFBase.cu (1 line): - line 455: // FIXME: really this can be into pinned memory and a true async faiss/gpu/utils/ThrustAllocator.cuh (1 line): - line 49: // FIXME: we cannot use temporary memory for new requests because faiss/gpu/impl/Distance.cuh (1 line): - line 21: /// FIXME: the output distances must fit in GPU memory faiss/IVFlib.cpp (1 line): - line 53: // TODO: check as thoroughfully for other index types faiss/impl/simd_result_handlers.h (1 line): - line 130: if (with_id_map) { // FIXME test on q_map instead faiss/gpu/impl/L2Select.cu (1 line): - line 45: // FIXME: if we have exact multiples, don't need this faiss/gpu/GpuIndex.cu (1 line): - line 37: // FIXME: parameterize based on algorithm need faiss/impl/AdditiveQuantizer.cpp (1 line): - line 261: // TODO: make tree of partial sums