include/nccl_ofi_tracepoint.h (93 lines of code) (raw):

/* * Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef NCCL_OFI_TRACEPOINT_H_ #define NCCL_OFI_TRACEPOINT_H_ #include "config.h" #include "tracing_impl/nvtx.h" #include "tracing_impl/lttng.h" /***** SENDRECV PROTOCOL *****/ #define NCCL_OFI_TRACE_SEND_SENDRECV(dev, size, comm, msg_seq_num, request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send, dev, size, comm, msg_seq_num, request, nccl_req); \ } while (0) #define NCCL_OFI_TRACE_RECV_SENDRECV(dev, comm, size, request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Recv, dev, comm, size, request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_FLUSH_SENDRECV(request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Flush, request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_COMPLETIONS_SENDRECV(dev,request,ctx) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, ProcessCompletions, dev,request,ctx); \ } while(0) /***** RDMA PROTOCL *****/ #define NCCL_OFI_TRACE_SEND(dev, size, comm, msg_seq_num, request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send, dev, size, comm, msg_seq_num, request, nccl_req); \ NCCL_OFI_TRACE_SEND_NVTX(dev, size, comm, msg_seq_num, request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_SEND_END(dev, comm, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, SendEnd, dev, comm, request); \ NCCL_OFI_TRACE_SEND_END_NVTX(request); \ } while(0) #define NCCL_OFI_TRACE_EAGER_SEND_START(dev, rail_id, size, comm, msg_seq_num, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_eager_start, dev, rail_id, size, comm, msg_seq_num, request); \ NCCL_OFI_TRACE_EAGER_SEND_START_NVTX(dev, rail_id, size, comm, msg_seq_num, request); \ } while(0) #define NCCL_OFI_TRACE_EAGER_SEND_COMPLETE(dev, rail_id, comm, msg_seq_num, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_eager_complete, dev, rail_id, comm, msg_seq_num, request); \ NCCL_OFI_TRACE_EAGER_SEND_COMPLETE_NVTX(dev, rail_id, comm, msg_seq_num, request); \ } while (0) #define NCCL_OFI_TRACE_SEND_CTRL_RECV(dev, rail_id, comm, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_ctrl_recv, dev, rail_id, comm, msg_seq_num); \ NCCL_OFI_TRACE_SEND_CTRL_RECV_NVTX(dev, rail_id, comm, msg_seq_num); \ } while (0) #define NCCL_OFI_TRACE_SEND_CTRL_START(dev, rail_id, comm, req, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_ctrl_start, dev, rail_id, comm, req, msg_seq_num); \ NCCL_OFI_TRACE_SEND_CTRL_START_NVTX(dev, rail_id, comm, req, msg_seq_num); \ } while (0); #define NCCL_OFI_TRACE_SEND_CTRL_END(dev, rail_id, comm, req, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_ctrl_end, dev, rail_id, comm, req, msg_seq_num); \ NCCL_OFI_TRACE_SEND_CTRL_END_NVTX(dev, rail_id, comm, req, msg_seq_num); \ } while (0); #define NCCL_OFI_TRACE_SEND_WRITE_SEG_START(dev, rail_id, size, comm, msg_seq_num, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_write_segment_start, dev, rail_id, size, comm, msg_seq_num, request); \ NCCL_OFI_TRACE_SEND_WRITE_SEG_START_NVTX(dev, rail_id, size, comm, msg_seq_num, request); \ } while(0) #define NCCL_OFI_TRACE_SEND_WRITE_SEG_COMPLETE(dev, rail_id, comm, msg_seq_num, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_write_segment_complete, dev, rail_id, comm, msg_seq_num, request); \ NCCL_OFI_TRACE_SEND_WRITE_SEG_COMPLETE_NVTX(dev, rail_id, comm, msg_seq_num, request); \ } while(0) #define NCCL_OFI_TRACE_RECV(dev, comm, size, request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Recv, dev, comm, size, request, nccl_req); \ NCCL_OFI_TRACE_RECV_NVTX(dev, comm, size, request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_RECV_END(dev, comm, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, RecvEnd, dev, comm, request); \ NCCL_OFI_TRACE_RECV_END_NVTX(request); \ } while(0) #define NCCL_OFI_TRACE_RECV_SEGMENT_COMPLETE(dev, rail_id, comm, size, request, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Recv_segment_complete, dev, rail_id, comm, size, request, msg_seq_num); \ NCCL_OFI_TRACE_RECV_SEGMENT_COMPLETE_NVTX(dev, rail_id, size, request, msg_seq_num); \ } while(0) #define NCCL_OFI_TRACE_EAGER_RECV(dev, rail_id, comm, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Eager_recv, dev, rail_id, comm, msg_seq_num); \ NCCL_OFI_TRACE_EAGER_RECV_NVTX(dev, rail_id, comm, msg_seq_num); \ } while(0) #define NCCL_OFI_TRACE_COMPLETIONS(dev,request,ctx) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, ProcessCompletions, dev,request,ctx); \ } while(0) #define NCCL_OFI_TRACE_FLUSH(request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Flush, request, nccl_req); \ NCCL_OFI_TRACE_FLUSH_NVTX(request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_READ(request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Read, request, nccl_req); \ NCCL_OFI_TRACE_READ_NVTX(request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_WRITE(request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Write, request, nccl_req); \ NCCL_OFI_TRACE_WRITE_NVTX(request, nccl_req); \ } while(0) #define NCCL_OFI_TRACE_PENDING_INSERT(request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Pending_queue_insert, request); \ NCCL_OFI_TRACE_PENDING_INSERT_NVTX(request); \ } while(0) #define NCCL_OFI_TRACE_PENDING_REMOVE(request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Pending_queue_remove, request); \ NCCL_OFI_TRACE_PENDING_REMOVE_NVTX(request); \ } while(0) #endif /* NCCL_OFI_TRACEPOINT_H_ */