Status GdrMemoryManager::Init()

in tensorflow_networking/gdr/gdr_memory_manager.cc [184:266]


Status GdrMemoryManager::Init() {
  rdma_addrinfo* addrinfo;
  rdma_addrinfo hints = {};
  hints.ai_port_space = RDMA_PS_TCP;
  hints.ai_flags = RAI_PASSIVE;
  if (rdma_getaddrinfo(const_cast<char*>(host_.c_str()),
                       const_cast<char*>(port_.c_str()), &hints, &addrinfo)) {
    return errors::Unavailable(strerror(errno), ": ", "cannot resolve rdma://",
                               host_, ":", port_);
  }

  ibv_qp_init_attr init_attr = {};
  init_attr.qp_type = IBV_QPT_RC;
  init_attr.cap.max_recv_wr = 1024;
  init_attr.cap.max_send_wr = 1;
  init_attr.cap.max_recv_sge = 1;
  init_attr.cap.max_send_sge = 1;

  // Create listening endpoint
  rdma_cm_id* id;
  if (rdma_create_ep(&id, addrinfo, nullptr, &init_attr)) {
    return errors::Unavailable(strerror(errno), ": ", "cannot bind to rdma://",
                               host_, ":", port_);
  }
  listening_.reset(id);
  rdma_freeaddrinfo(addrinfo);

  // Listen without backlog
  if (rdma_listen(listening_.get(), 0)) {
    return errors::Unavailable(strerror(errno), ": ",
                               "cannot listen on rdma://", host_, ":", port_);
  }
  LOG(INFO) << "RDMA server is listening on " << host_ << ":" << port_;

  if (listening_->verbs == nullptr) {
    return errors::Unimplemented(
        "Unsupported address ", host_, ":", port_,
        " as it does not bind to a particular RDMA device");
  }

  int flags = fcntl(listening_->channel->fd, F_GETFL, 0);
  if (fcntl(listening_->channel->fd, F_SETFL, flags | O_NONBLOCK)) {
    return errors::Unavailable(strerror(errno), ": ",
                               "cannot set server to non-blocking mode");
  }

  numa_node_ = TryToReadNumaNode(listening_->verbs->device);

  SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node,
                                               size_t num_bytes) {
    VLOG(2) << "Registering RDMA capable memory region on numa_node "
            << numa_node;
    InsertMemoryRegion(ptr, num_bytes, strings::StrCat("CPU:", numa_node));
  };
  SubAllocator::Visitor free_visitor = [this](void* ptr, int numa_node,
                                              size_t num_bytes) {
    VLOG(2) << "De-registering RDMA capable memory region on numa_node "
            << numa_node;
    EvictMemoryRegion(ptr, num_bytes);
  };
  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
  LOG(INFO) << "Instrumenting CPU allocator(s)";

  for (int numa_idx = 0; numa_idx < port::NUMANumNodes(); ++numa_idx) {
    GPUProcessState::singleton()->AddGpuHostAllocVisitor(numa_idx,
                                                         alloc_visitor);
    GPUProcessState::singleton()->AddGpuHostFreeVisitor(numa_idx, free_visitor);
  }

  if (IsGDRAvailable()) {
    SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id,
                                                      size_t num_bytes) {
      VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id;
      InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
    };
    GPUProcessState::singleton()->AddGPUAllocVisitor(numa_node_,
                                                     cuda_alloc_visitor);
    LOG(INFO) << "Instrumenting GPU allocator for NUMA " << numa_node_;
  }

  return Status::OK();
}