int main()

in faiss/gpu/perf/PerfIVFPQAdd.cpp [32:160]


int main(int argc, char** argv) {
    gflags::ParseCommandLineFlags(&argc, &argv, true);

    cudaProfilerStop();

    int dim = FLAGS_dim;
    int numCentroids = FLAGS_centroids;
    int bytesPerVec = FLAGS_bytes_per_vec;
    int bitsPerCode = FLAGS_bits_per_code;

    faiss::gpu::StandardGpuResources res;

    // IndexIVFPQ will complain, but just give us enough to get through this
    int numTrain = 4 * numCentroids;
    std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);

    faiss::IndexFlatL2 coarseQuantizer(dim);
    faiss::IndexIVFPQ cpuIndex(
            &coarseQuantizer, dim, numCentroids, bytesPerVec, bitsPerCode);
    if (FLAGS_time_cpu) {
        cpuIndex.train(numTrain, trainVecs.data());
    }

    faiss::gpu::GpuIndexIVFPQConfig config;
    config.device = 0;
    config.indicesOptions = (faiss::gpu::IndicesOptions)FLAGS_index;

    faiss::gpu::GpuIndexIVFPQ gpuIndex(
            &res,
            dim,
            numCentroids,
            bytesPerVec,
            bitsPerCode,
            faiss::METRIC_L2,
            config);

    if (FLAGS_time_gpu) {
        gpuIndex.train(numTrain, trainVecs.data());
        if (FLAGS_reserve_memory) {
            size_t numVecs = (size_t)FLAGS_batches * (size_t)FLAGS_batch_size;
            gpuIndex.reserveMemory(numVecs);
        }
    }

    cudaDeviceSynchronize();
    CUDA_VERIFY(cudaProfilerStart());

    float totalGpuTime = 0.0f;
    float totalCpuTime = 0.0f;

    for (int i = 0; i < FLAGS_batches; ++i) {
        if (!FLAGS_per_batch_time) {
            if (i % 10 == 0) {
                printf("Adding batch %d\n", i + 1);
            }
        }

        auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);

        if (FLAGS_time_gpu) {
            faiss::gpu::CpuTimer timer;
            gpuIndex.add(FLAGS_batch_size, addVecs.data());
            CUDA_VERIFY(cudaDeviceSynchronize());
            auto time = timer.elapsedMilliseconds();

            totalGpuTime += time;

            if (FLAGS_per_batch_time) {
                printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
                       i + 1,
                       FLAGS_batch_size,
                       time,
                       time / (float)FLAGS_batch_size);
            }
        }

        if (FLAGS_time_cpu) {
            faiss::gpu::CpuTimer timer;
            cpuIndex.add(FLAGS_batch_size, addVecs.data());
            auto time = timer.elapsedMilliseconds();

            totalCpuTime += time;

            if (FLAGS_per_batch_time) {
                printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
                       i + 1,
                       FLAGS_batch_size,
                       time,
                       time / (float)FLAGS_batch_size);
            }
        }
    }

    CUDA_VERIFY(cudaProfilerStop());

    int total = FLAGS_batch_size * FLAGS_batches;

    if (FLAGS_time_gpu) {
        printf("%d dim, %d centroids, %d x %d encoding\n"
               "GPU time to add %d vectors (%d batches, %d per batch): "
               "%.3f ms (%.3f us per)\n",
               dim,
               numCentroids,
               bytesPerVec,
               bitsPerCode,
               total,
               FLAGS_batches,
               FLAGS_batch_size,
               totalGpuTime,
               totalGpuTime * 1000.0f / (float)total);
    }

    if (FLAGS_time_cpu) {
        printf("%d dim, %d centroids, %d x %d encoding\n"
               "CPU time to add %d vectors (%d batches, %d per batch): "
               "%.3f ms (%.3f us per)\n",
               dim,
               numCentroids,
               bytesPerVec,
               bitsPerCode,
               total,
               FLAGS_batches,
               FLAGS_batch_size,
               totalCpuTime,
               totalCpuTime * 1000.0f / (float)total);
    }

    return 0;
}