Path Lines of Code bench/BenchUtils.h 415 bench/EmbeddingSpMDM8BitBenchmark.cc 282 bench/EmbeddingSpMDMBenchmark.cc 293 bench/EmbeddingSpMDMNBitBenchmark.cc 278 bench/EmbeddingSpMDMNBitRowWiseSparseBenchmark.cc 309 bench/RowwiseAdagradBenchmark.cc 189 bench/RowwiseAdagradFusedBenchmark.cc 162 bench/SparseAdagradBenchmark.cc 190 cmake/modules/FindMKL.cmake 252 fbgemm_gpu/bench/merge_embeddings_benchmark.py 440 fbgemm_gpu/codegen/embedding_backward_split_cpu_approx_template.cpp 191 fbgemm_gpu/codegen/embedding_backward_split_cpu_template.cpp 340 fbgemm_gpu/codegen/embedding_backward_split_indice_weights_template.cu 274 fbgemm_gpu/codegen/embedding_forward_quantized_split_template.cu 917 fbgemm_gpu/codegen/embedding_forward_split_cpu.cpp 526 fbgemm_gpu/fbgemm_gpu/split_embedding_inference_converter.py 145 fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py 1963 fbgemm_gpu/include/fbgemm_gpu/fbgemm_cuda_utils.cuh 1948 fbgemm_gpu/include/fbgemm_gpu/quantize_ops_utils.h 55 fbgemm_gpu/include/fbgemm_gpu/sparse_ops_utils.h 214 fbgemm_gpu/setup.py 144 fbgemm_gpu/src/cumem_utils.cu 305 fbgemm_gpu/src/cumem_utils_host.cpp 34 fbgemm_gpu/src/jagged_tensor_ops.cu 1150 fbgemm_gpu/src/jagged_tensor_ops_cpu.cpp 967 fbgemm_gpu/src/merge_pooled_embeddings_gpu.cpp 341 fbgemm_gpu/src/permute_pooled_embedding_ops.cu 74 fbgemm_gpu/src/quantize_ops.cu 666 fbgemm_gpu/src/sparse_ops.cu 1953 fbgemm_gpu/src/split_embeddings_cache_cuda.cu 1803 include/fbgemm/Fbgemm.h 804 include/fbgemm/FbgemmEmbedding.h 234 include/fbgemm/QuantUtils.h 224 src/EmbeddingSpMDM.cc 1251 src/EmbeddingSpMDMNBit.cc 1170 src/ExecuteKernelU8S8.cc 486 src/FbgemmI64.cc 409 src/FbgemmI8Depthwise2DAvx2-inl.h 656 src/FbgemmI8Spmdm.cc 296 src/FbgemmSparseDenseVectorInt8Avx512.cc 217 src/GroupwiseConv.cc 938 src/PackAMatrix.cc 153 src/PackAWithIm2Col.cc 645 src/PackAWithRowOffset.cc 201 src/PackWeightMatrixForGConv.cc 189 src/RowWiseSparseAdagradFused.cc 823 src/SparseAdagrad.cc 842 src/Utils.cc 397