include/nccl_ofi_sendrecv.h (95 lines of code) (raw):
/*
* Copyright (c) 2023-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef NCCL_OFI_SENDRECV_H_
#define NCCL_OFI_SENDRECV_H_
#include <rdma/fabric.h>
#include "nccl_ofi.h"
#include "nccl_ofi_freelist.h"
#include "nccl_ofi_log.h"
/* This is the initial value of mr_key. At key deregisteration time,
* it is used to validate if a key was generated and needed to be freed or not.
*/
#define MR_KEY_INIT_VALUE FI_KEY_NOTAVAIL
typedef enum nccl_net_ofi_sendrecv_req_state {
NCCL_OFI_SENDRECV_REQ_CREATED = 0,
NCCL_OFI_SENDRECV_REQ_PENDING,
NCCL_OFI_SENDRECV_REQ_COMPLETED,
NCCL_OFI_SENDRECV_REQ_ERROR,
} nccl_net_ofi_sendrecv_req_state_t;
typedef enum nccl_net_ofi_sendrecv_req_direction {
NCCL_OFI_SENDRECV_INVALID_DIRECTION = 0,
NCCL_OFI_SENDRECV_SEND = 1,
NCCL_OFI_SENDRECV_RECV,
} nccl_net_ofi_sendrecv_req_direction_t;
typedef struct nccl_net_ofi_sendrecv_mr_handle {
uint64_t mr_key;
struct fid_mr *mr;
} nccl_net_ofi_sendrecv_mr_handle_t;
typedef struct nccl_net_ofi_sendrecv_listen_comm {
/* This base listen communicator must be the first member of
* this struct. This allows casting between pointers of this
* struct and its base struct. */
nccl_net_ofi_listen_comm_t base;
uint64_t tag;
struct fid_ep *local_ep;
fi_addr_t local_ep_addr;
bool accepted;
/* Saves temporary state when creating receive communicator object */
save_comm_state_t state;
/* connecting peer information (nccl_ofi_connection_info_t) */
nccl_ofi_freelist_elem_t *conn_info;
} nccl_net_ofi_sendrecv_listen_comm_t;
typedef struct nccl_net_ofi_sendrecv_send_comm {
/* This base send communicator must be the first member of this
* struct. This allows casting between pointers of this struct
* and its base struct. */
nccl_net_ofi_send_comm_t base;
uint64_t num_inflight_reqs;
nccl_ofi_freelist_t *nccl_ofi_reqs_fl;
uint64_t tag;
fi_addr_t remote_ep;
fi_addr_t local_ep_addr;
struct fid_ep *local_ep;
/* connecting peer information (nccl_ofi_connection_info_t) */
nccl_ofi_freelist_elem_t *conn_info;
} nccl_net_ofi_sendrecv_send_comm_t;
/* Metadata about dummy flush buffer */
typedef struct nccl_net_ofi_sendrecv_flush_buffer {
void *host_buffer;
size_t size;
/* Memory registration handle of the local buffer */
nccl_net_ofi_sendrecv_mr_handle_t *mr_handle;
} nccl_net_ofi_sendrecv_flush_buffer_t;
typedef struct nccl_net_ofi_sendrecv_recv_comm {
/* This base receive communicator must be the first member of
* this struct. This allows casting between pointers of this
* struct and its base struct. */
nccl_net_ofi_recv_comm_t base;
uint64_t num_inflight_reqs;
nccl_ofi_freelist_t *nccl_ofi_reqs_fl;
uint64_t tag;
fi_addr_t remote_ep;
fi_addr_t local_ep_addr;
struct fid_ep *local_ep;
nccl_net_ofi_sendrecv_flush_buffer_t flush_buff;
} nccl_net_ofi_sendrecv_recv_comm_t;
/**
* @brief Sendrecv Endpoint
*
* Sendrecv endpoint implements the nccl_net_ofi_ep_t interface
* for the sendrecv protocol that uses libfabric's fi_tsend and
* fi_trecv for communication.
*/
typedef struct nccl_net_ofi_sendrecv_ep {
/* This base endpoint interface struct provides access to the
* sendrecv endpoint's functions such as sendrecv_listen() and
* sendrecv_connect(). At construction time of this endpoint,
* the constructor assigns these functions to the member
* functions of abstract nccl_net_ofi_ep_t endpoint 'base'.
*
* This base endpoint must be the first member of this
* struct. This allows casting between pointers of this struct
* and its base struct. */
nccl_net_ofi_ep_t base;
/* Current available tag ID */
uint64_t tag;
/* copy of device's max_tag to reading device information */
uint64_t max_tag;
/* Endpoint handle to communicate to */
struct fid_ep *ofi_ep;
/* Address vector handle */
struct fid_av *av;
/* Completion Queue handle */
struct fid_cq *cq;
/* free list for control messages */
nccl_ofi_freelist_t *conn_msg_fl;
} nccl_net_ofi_sendrecv_ep_t;
/*
* Domain - container for the libfabric domain, which is the threading
* boundary for most Libfabric providers, given how the util cq
* implementation works.
*/
typedef struct nccl_net_ofi_sendrecv_domain {
nccl_net_ofi_domain_t base;
/* Access Domain handle */
struct fid_domain *domain;
} nccl_net_ofi_sendrecv_domain_t;
/**
* @brief Sendrecv Device
*
* Device implementation of the Sendrecv protocol
*
* Sendrecv device implements the nccl_net_ofi_device_t interface for
* the sendrecv protocol that uses libfabric's fi_tsend and fi_trecv
* for communication. Internally, the sendrecv device maintains
* sendrecv endpoints that are per thread to avoid contention over the
* endpoint's libfabric resources. Access to endpoints is protected via
* locks and the lifetime of resouces is maintained with a reference
* counter.
*/
typedef struct nccl_net_ofi_sendrecv_device {
/* This base device interface struct provides access to the
* sendrecv endpoint's functions such as
* sendrecv_get_properties(), sendrecv_get_ep(), and
* sendrecv_release_ep(). At construction time of this device,
* the constructor assigns these functions to the member
* functions of abstract nccl_net_ofi_device_t device
* 'device'.
*
* This base device must be the first member of this
* struct. This allows casting between pointers of this struct
* and its base struct. */
nccl_net_ofi_device_t base;
/* Device provider */
struct fi_info *info;
/* Maximum supported tag ID */
uint64_t max_tag;
/* Provider name. Device did not obtain ownership. */
char *prov_name;
// TODO: So far, devices resources are not released and device
// memory is not freed. These actions should include closing
// fabirc, domain, and cq as well as freeing prov_name.
/* Fabric handle */
struct fid_fabric *fabric;
} nccl_net_ofi_sendrecv_device_t;
typedef struct nccl_net_ofi_sendrecv_req {
nccl_net_ofi_req_t base;
/* Associated Comm object */
nccl_net_ofi_comm_t *comm;
/* Associated context */
nccl_net_ofi_context_t ctx;
/* Associated Device ID */
int dev_id;
/* Number of receives associated with request */
int num_recvs;
/* Size of completed request */
size_t size;
/* State of request */
nccl_net_ofi_sendrecv_req_state_t state;
/* Direction of request */
nccl_net_ofi_sendrecv_req_direction_t direction;
/* Backpointer to freelist elem (for cleanup) */
nccl_ofi_freelist_elem_t *elem;
} nccl_net_ofi_sendrecv_req_t;
struct nccl_net_ofi_sendrecv_plugin {
nccl_net_ofi_plugin_t base;
struct fi_info *provider_list;
};
typedef struct nccl_net_ofi_sendrecv_plugin nccl_net_ofi_sendrecv_plugin_t;
/*
* @brief Initialize plugin with sendrecv protocol structures
*/
int nccl_net_ofi_sendrecv_init(const char *provider_filter,
nccl_net_ofi_plugin_t **plugin_p);
#endif // End NCCL_OFI_SENDRECV_H_