in net/tcp/tcp_send_buffered.c [434:1184]
static uint16_t psock_send_eventhandler(FAR struct net_driver_s *dev,
FAR void *pvpriv, uint16_t flags)
{
FAR struct tcp_conn_s *conn = pvpriv;
#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
struct tcp_ofoseg_s ofosegs[TCP_SACK_RANGES_MAX];
uint8_t nsacks = 0;
#endif
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
uint32_t rexmitno = 0;
#endif
/* Get the TCP connection pointer reliably from
* the corresponding TCP socket.
*/
DEBUGASSERT(conn != NULL);
/* The TCP socket is connected and, hence, should be bound to a device.
* Make sure that the polling device is the one that we are bound to.
*/
DEBUGASSERT(conn->dev != NULL);
if (dev != conn->dev)
{
return flags;
}
ninfo("flags: %04x\n", flags);
/* The TCP_ACKDATA, TCP_REXMIT and TCP_DISCONN_EVENTS flags are expected to
* appear here strictly one at a time, except for the FIN + ACK case.
*/
DEBUGASSERT((flags & TCP_ACKDATA) == 0 ||
(flags & TCP_REXMIT) == 0);
DEBUGASSERT((flags & TCP_DISCONN_EVENTS) == 0 ||
(flags & TCP_REXMIT) == 0);
/* If this packet contains an acknowledgment, then update the count of
* acknowledged bytes.
*/
if ((flags & TCP_ACKDATA) != 0)
{
FAR struct tcp_wrbuffer_s *wrb;
FAR struct tcp_hdr_s *tcp;
FAR sq_entry_t *entry;
FAR sq_entry_t *next;
uint32_t ackno;
/* Get the offset address of the TCP header */
#ifdef CONFIG_NET_IPv4
#ifdef CONFIG_NET_IPv6
if (conn->domain == PF_INET)
#endif
{
tcp = TCPIPv4BUF;
}
#endif /* CONFIG_NET_IPv4 */
#ifdef CONFIG_NET_IPv6
#ifdef CONFIG_NET_IPv4
else
#endif
{
tcp = TCPIPv6BUF;
}
#endif /* CONFIG_NET_IPv6 */
/* Get the ACK number from the TCP header */
ackno = tcp_getsequence(tcp->ackno);
ninfo("ACK: ackno=%" PRIu32 " flags=%04x\n", ackno, flags);
/* Look at every write buffer in the unacked_q. The unacked_q
* holds write buffers that have been entirely sent, but which
* have not yet been ACKed.
*/
for (entry = sq_peek(&conn->unacked_q); entry; entry = next)
{
uint32_t lastseq;
/* Check of some or all of this write buffer has been ACKed. */
next = sq_next(entry);
wrb = (FAR struct tcp_wrbuffer_s *)entry;
/* If the ACKed sequence number is greater than the start
* sequence number of the write buffer, then some or all of
* the write buffer has been ACKed.
*/
if (TCP_SEQ_GT(ackno, TCP_WBSEQNO(wrb)))
{
/* Get the sequence number at the end of the data */
lastseq = TCP_WBSEQNO(wrb) + TCP_WBPKTLEN(wrb);
ninfo("ACK: wrb=%p seqno=%" PRIu32
" lastseq=%" PRIu32 " pktlen=%u ackno=%" PRIu32 "\n",
wrb, TCP_WBSEQNO(wrb), lastseq, TCP_WBPKTLEN(wrb),
ackno);
/* Has the entire buffer been ACKed? */
if (TCP_SEQ_GTE(ackno, lastseq))
{
ninfo("ACK: wrb=%p Freeing write buffer\n", wrb);
/* Yes... Remove the write buffer from ACK waiting queue */
sq_rem(entry, &conn->unacked_q);
/* And return the write buffer to the pool of free
* buffers
*/
tcp_wrbuffer_release(wrb);
/* Notify any waiters if the write buffers have been
* drained.
*/
psock_writebuffer_notify(conn);
}
else
{
unsigned int trimlen;
/* No, then just trim the ACKed bytes from the beginning
* of the write buffer. This will free up some I/O buffers
* that can be reused while are still sending the last
* buffers in the chain.
*/
trimlen = TCP_SEQ_SUB(ackno, TCP_WBSEQNO(wrb));
if (trimlen > TCP_WBSENT(wrb))
{
/* More data has been ACKed then we have sent? */
trimlen = TCP_WBSENT(wrb);
}
ninfo("ACK: wrb=%p trim %u bytes\n", wrb, trimlen);
TCP_WBTRIM(wrb, trimlen);
TCP_WBSEQNO(wrb) += trimlen;
TCP_WBSENT(wrb) -= trimlen;
/* Set the new sequence number for what remains */
ninfo("ACK: wrb=%p seqno=%" PRIu32 " pktlen=%u\n",
wrb, TCP_WBSEQNO(wrb), TCP_WBPKTLEN(wrb));
}
}
else if (ackno == TCP_WBSEQNO(wrb))
{
#ifdef CONFIG_NET_TCP_CC_NEWRENO
if (conn->dupacks >= TCP_FAST_RETRANSMISSION_THRESH)
#else
/* Duplicate ACK? Retransmit data if need */
if (++TCP_WBNACK(wrb) == TCP_FAST_RETRANSMISSION_THRESH)
#endif
{
/* Fast retransmission has been triggered */
if ((flags & TCP_NEWDATA) != 0)
{
/* The current receive data needs to be handled by
* following tcp_recvhandler or tcp_data_event. Notify
* driver to send the message and marked as rexmit
*/
TCP_WBNACK(wrb) = 0;
conn->timeout = true;
netdev_txnotify_dev(conn->dev);
return flags;
}
#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
if ((conn->flags & TCP_SACK) &&
(tcp->tcpoffset & 0xf0) > 0x50)
{
/* Parse s-ack from tcp options */
nsacks = parse_sack(conn, tcp, ofosegs);
flags |= TCP_REXMIT;
}
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
else
#endif
#endif
{
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
/* Do fast retransmit */
rexmitno = ackno;
#ifndef CONFIG_NET_TCP_CC_NEWRENO
/* Reset counter */
TCP_WBNACK(wrb) = 0;
#endif
#endif
}
#ifdef CONFIG_NET_TCP_CC_NEWRENO
conn->dupacks = 0;
#endif
}
}
}
/* A special case is the head of the write_q which may be partially
* sent and so can still have un-ACKed bytes that could get ACKed
* before the entire write buffer has even been sent.
*/
wrb = (FAR struct tcp_wrbuffer_s *)sq_peek(&conn->write_q);
if (wrb && TCP_WBSENT(wrb) > 0 && TCP_SEQ_GT(ackno, TCP_WBSEQNO(wrb)))
{
uint32_t nacked;
/* Number of bytes that were ACKed */
nacked = TCP_SEQ_SUB(ackno, TCP_WBSEQNO(wrb));
if (nacked > TCP_WBSENT(wrb))
{
/* More data has been ACKed then we have sent? ASSERT? */
nacked = TCP_WBSENT(wrb);
}
ninfo("ACK: wrb=%p seqno=%" PRIu32
" nacked=%" PRIu32 " sent=%u ackno=%" PRIu32 "\n",
wrb, TCP_WBSEQNO(wrb), nacked, TCP_WBSENT(wrb), ackno);
/* Trim the ACKed bytes from the beginning of the write buffer. */
TCP_WBTRIM(wrb, nacked);
TCP_WBSEQNO(wrb) += nacked;
TCP_WBSENT(wrb) -= nacked;
ninfo("ACK: wrb=%p seqno=%" PRIu32 " pktlen=%u sent=%u\n",
wrb, TCP_WBSEQNO(wrb), TCP_WBPKTLEN(wrb), TCP_WBSENT(wrb));
}
}
/* Check for a loss of connection */
if ((flags & TCP_DISCONN_EVENTS) != 0)
{
ninfo("Lost connection: %04x\n", flags);
/* We could get here recursively through the callback actions of
* tcp_lost_connection(). So don't repeat that action if we have
* already been disconnected.
*/
if (_SS_ISCONNECTED(conn->sconn.s_flags))
{
/* Report not connected */
tcp_lost_connection(conn, conn->sndcb, flags);
}
/* Free write buffers and terminate polling */
psock_lost_connection(conn, !!(flags & NETDEV_DOWN));
return flags;
}
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
if (rexmitno != 0)
{
FAR struct tcp_wrbuffer_s *wrb;
FAR sq_entry_t *entry;
FAR sq_entry_t *next;
size_t sndlen;
int ret;
/* According to RFC 6298 (5.4), retransmit the earliest segment
* that has not been acknowledged by the TCP receiver.
*/
for (entry = sq_peek(&conn->unacked_q); entry; entry = next)
{
wrb = (FAR struct tcp_wrbuffer_s *)entry;
next = sq_next(entry);
if (rexmitno != TCP_WBSEQNO(wrb))
{
continue;
}
/* Reconstruct the length of the earliest segment to be
* retransmitted.
*/
sndlen = TCP_WBPKTLEN(wrb);
if (sndlen > conn->mss)
{
sndlen = conn->mss;
}
/* As we are retransmitting, the sequence number is expected
* already set for this write buffer.
*/
DEBUGASSERT(TCP_WBSEQNO(wrb) != (unsigned)-1);
#ifdef NEED_IPDOMAIN_SUPPORT
/* If both IPv4 and IPv6 support are enabled, then we will need to
* select which one to use when generating the outgoing packet.
* If only one domain is selected, then the setup is already in
* place and we need do nothing.
*/
tcp_ip_select(conn);
#endif
/* Then set-up to send that amount of data. (this won't actually
* happen until the polling cycle completes).
*/
tcp_setsequence(conn->sndseq, TCP_WBSEQNO(wrb));
#ifdef CONFIG_NET_JUMBO_FRAME
if (sndlen <= conn->mss)
{
/* alloc iob */
netdev_iob_prepare_dynamic(dev, sndlen + tcpip_hdrsize(conn));
}
#endif
ret = devif_iob_send(dev, TCP_WBIOB(wrb), sndlen,
0, tcpip_hdrsize(conn));
if (ret <= 0)
{
return flags;
}
#ifdef CONFIG_NET_TCP_CC_NEWRENO
/* After Fast retransmitted, set ssthresh to the maximum of
* the unacked and the 2*SMSS, and enter to Fast Recovery.
* ssthresh = max (FlightSize / 2, 2*SMSS) referring to rfc5681
* cwnd=ssthresh + 3*SMSS referring to rfc5681
*/
if (conn->flags & TCP_INFT)
{
tcp_cc_update(conn, NULL);
}
#endif
/* Reset the retransmission timer. */
tcp_update_retrantimer(conn, conn->rto);
/* Continue waiting */
return flags;
}
}
#endif
#ifdef CONFIG_NET_TCP_SELECTIVE_ACK
/* Check if we are being asked to retransmit s-ack data */
if (nsacks > 0)
{
FAR struct tcp_wrbuffer_s *wrb;
FAR sq_entry_t *entry;
FAR sq_entry_t *next;
uint32_t right;
int i;
/* Dump s-ack edge */
for (i = 0, right = 0; i < nsacks; i++)
{
ninfo("TCP SACK [%d]"
"[%" PRIu32 " : %" PRIu32 " : %" PRIu32 "]\n",
i, ofosegs[i].left, ofosegs[i].right,
TCP_SEQ_SUB(ofosegs[i].right, ofosegs[i].left));
}
for (entry = sq_peek(&conn->unacked_q); entry; entry = next)
{
wrb = (FAR struct tcp_wrbuffer_s *)entry;
next = sq_next(entry);
for (i = 0, right = 0; i < nsacks; i++)
{
/* Wrb seqno out of s-ack edge ? do retransmit ! */
if (TCP_SEQ_LT(TCP_WBSEQNO(wrb), ofosegs[i].left) &&
TCP_SEQ_GTE(TCP_WBSEQNO(wrb), right))
{
ninfo("TCP REXMIT "
"[%" PRIu32 " : %" PRIu32 " : %d]\n",
TCP_WBSEQNO(wrb),
TCP_SEQ_ADD(TCP_WBSEQNO(wrb), TCP_WBPKTLEN(wrb)),
TCP_WBPKTLEN(wrb));
sq_rem(entry, &conn->unacked_q);
retransmit_segment(conn, (FAR void *)entry);
break;
}
right = ofosegs[i].right;
}
}
#ifdef CONFIG_NET_TCP_CC_NEWRENO
/* After Fast retransmitted, set ssthresh to the maximum of
* the unacked and the 2*SMSS, and enter to Fast Recovery.
* ssthresh = max (FlightSize / 2, 2*SMSS) referring to rfc5681
* cwnd=ssthresh + 3*SMSS referring to rfc5681
*/
if (conn->flags & TCP_INFT)
{
tcp_cc_update(conn, NULL);
}
#endif
}
else
#endif
/* Check if we are being asked to retransmit data */
if ((flags & TCP_REXMIT) != 0)
{
FAR struct tcp_wrbuffer_s *wrb;
FAR sq_entry_t *entry;
ninfo("REXMIT: %04x\n", flags);
/* If there is a partially sent write buffer at the head of the
* write_q? Has anything been sent from that write buffer?
*/
wrb = (FAR struct tcp_wrbuffer_s *)sq_peek(&conn->write_q);
ninfo("REXMIT: wrb=%p sent=%u\n", wrb, wrb ? TCP_WBSENT(wrb) : 0);
if (wrb != NULL && TCP_WBSENT(wrb) > 0)
{
FAR struct tcp_wrbuffer_s *tmp;
/* Increment the retransmit count on this write buffer. */
if (++TCP_WBNRTX(wrb) >= TCP_MAXRTX)
{
nwarn("WARNING: Expiring wrb=%p nrtx=%u\n",
wrb, TCP_WBNRTX(wrb));
/* The maximum retry count as been exhausted. Remove the write
* buffer at the head of the queue.
*/
tmp = (FAR struct tcp_wrbuffer_s *)sq_remfirst(&conn->write_q);
DEBUGASSERT(tmp == wrb);
UNUSED(tmp);
/* And return the write buffer to the free list */
tcp_wrbuffer_release(wrb);
/* Notify any waiters if the write buffers have been
* drained.
*/
psock_writebuffer_notify(conn);
/* NOTE expired is different from un-ACKed, it is designed to
* represent the number of segments that have been sent,
* retransmitted, and un-ACKed, if expired is not zero, the
* connection will be closed.
*
* field expired can only be updated at TCP_ESTABLISHED state
*/
conn->expired++;
}
else
{
uint16_t sent;
sent = TCP_WBSENT(wrb);
ninfo("REXMIT: wrb=%p sent=%u, "
"conn tx_unacked=%" PRId32 " sent=%" PRId32 "\n",
wrb, TCP_WBSENT(wrb), conn->tx_unacked, conn->sent);
/* Yes.. Reset the number of bytes sent sent from
* the write buffer
*/
if (conn->tx_unacked > sent)
{
conn->tx_unacked -= sent;
}
else
{
conn->tx_unacked = 0;
}
if (conn->sent > sent)
{
conn->sent -= sent;
}
else
{
conn->sent = 0;
}
TCP_WBSENT(wrb) = 0;
}
}
/* Move all segments that have been sent but not ACKed to the write
* queue again note, the un-ACKed segments are put at the head of the
* write_q so they can be resent as soon as possible.
*/
while ((entry = sq_remlast(&conn->unacked_q)) != NULL)
{
retransmit_segment(conn, (FAR void *)entry);
}
}
#if CONFIG_NET_SEND_BUFSIZE > 0
/* Notify the send buffer available if wrbbuffer drained */
tcp_sendbuffer_notify(conn);
#endif /* CONFIG_NET_SEND_BUFSIZE */
/* Check if the outgoing packet is available (it may have been claimed
* by a sendto event serving a different thread).
*/
if (dev->d_sndlen > 0)
{
/* Another thread has beat us sending data, wait for the next poll */
return flags;
}
/* We get here if (1) not all of the data has been ACKed, (2) we have been
* asked to retransmit data, (3) the connection is still healthy, and (4)
* the outgoing packet is available for our use. In this case, we are
* now free to send more data to receiver -- UNLESS the buffer contains
* unprocessed incoming data or window size is zero. In that event, we
* will have to wait for the next polling cycle.
*/
if ((conn->tcpstateflags & TCP_ESTABLISHED) &&
((flags & TCP_NEWDATA) == 0) &&
(flags & (TCP_POLL | TCP_REXMIT | TCP_ACKDATA)) &&
!(sq_empty(&conn->write_q)) &&
conn->snd_wnd > 0)
{
FAR struct tcp_wrbuffer_s *wrb;
uint32_t predicted_seqno;
uint32_t seq;
uint32_t snd_wnd_edge;
size_t sndlen;
/* Peek at the head of the write queue (but don't remove anything
* from the write queue yet). We know from the above test that
* the write_q is not empty.
*/
wrb = (FAR struct tcp_wrbuffer_s *)sq_peek(&conn->write_q);
DEBUGASSERT(wrb);
/* Set the sequence number for this segment. If we are
* retransmitting, then the sequence number will already
* be set for this write buffer.
*/
if (TCP_WBSEQNO(wrb) == (unsigned)-1)
{
TCP_WBSEQNO(wrb) = conn->isn + conn->sent;
}
/* Get the amount of data that we can send in the next packet.
* We will send either the remaining data in the buffer I/O
* buffer chain, or as much as will fit given the MSS and current
* window size.
*/
seq = TCP_WBSEQNO(wrb) + TCP_WBSENT(wrb);
#ifdef CONFIG_NET_TCP_CC_NEWRENO
snd_wnd_edge = conn->snd_wl2 + MIN(conn->snd_wnd, conn->cwnd);
#else
snd_wnd_edge = conn->snd_wl2 + conn->snd_wnd;
#endif
if (TCP_SEQ_LT(seq, snd_wnd_edge))
{
uint32_t remaining_snd_wnd;
int ret;
sndlen = TCP_WBPKTLEN(wrb) - TCP_WBSENT(wrb);
if (sndlen > conn->mss)
{
sndlen = conn->mss;
}
remaining_snd_wnd = TCP_SEQ_SUB(snd_wnd_edge, seq);
if (sndlen > remaining_snd_wnd)
{
sndlen = remaining_snd_wnd;
}
/* Normally CONFIG_IOB_THROTTLE should ensure that we have enough
* iob space available for copying the data to a packet buffer.
* If it doesn't, a deadlock could happen where the iobs are used
* by queued TX data and cannot be released because a full-sized
* packet gets refused by devif_iob_send(). Detect this situation
* and send tiny TCP packets until we manage to free up some space.
* We do not want to exhaust all of the remaining iobs by sending
* the maximum size packet that would fit.
*/
if (sndlen > iob_navail(false) * CONFIG_IOB_BUFSIZE)
{
nwarn("Running low on iobs, limiting packet size\n");
sndlen = CONFIG_IOB_BUFSIZE;
}
ninfo("SEND: wrb=%p seq=%" PRIu32 " pktlen=%u sent=%u sndlen=%zu "
"mss=%u snd_wnd=%" PRIu32 " seq=%" PRIu32
" remaining_snd_wnd=%" PRIu32 "\n",
wrb, TCP_WBSEQNO(wrb), TCP_WBPKTLEN(wrb), TCP_WBSENT(wrb),
sndlen, conn->mss,
(uint32_t)conn->snd_wnd, seq, remaining_snd_wnd);
/* The TCP stack updates sndseq on receipt of ACK *before*
* this function is called. In that case sndseq will point
* to the next unacknowledged byte (which might have already
* been sent). We will overwrite the value of sndseq here
* before the packet is sent.
*/
tcp_setsequence(conn->sndseq, TCP_WBSEQNO(wrb) + TCP_WBSENT(wrb));
#ifdef NEED_IPDOMAIN_SUPPORT
/* If both IPv4 and IPv6 support are enabled, then we will need to
* select which one to use when generating the outgoing packet.
* If only one domain is selected, then the setup is already in
* place and we need do nothing.
*/
tcp_ip_select(conn);
#endif
/* Then set-up to send that amount of data with the offset
* corresponding to the amount of data already sent. (this
* won't actually happen until the polling cycle completes).
*/
#ifdef CONFIG_NET_JUMBO_FRAME
if (sndlen <= conn->mss)
{
/* alloc iob */
netdev_iob_prepare_dynamic(dev, sndlen + tcpip_hdrsize(conn));
}
#endif
ret = devif_iob_send(dev, TCP_WBIOB(wrb), sndlen,
TCP_WBSENT(wrb), tcpip_hdrsize(conn));
if (ret <= 0)
{
return flags;
}
/* Remember how much data we send out now so that we know
* when everything has been acknowledged. Just increment
* the amount of data sent. This will be needed in sequence
* number calculations.
*/
conn->tx_unacked += sndlen;
conn->sent += sndlen;
/* Below prediction will become true,
* unless retransmission occurrence
*/
predicted_seqno = tcp_getsequence(conn->sndseq) + sndlen;
if (TCP_SEQ_GT(predicted_seqno, conn->sndseq_max))
{
conn->sndseq_max = predicted_seqno;
}
ninfo("SEND: wrb=%p nrtx=%u tx_unacked=%" PRIu32
" sent=%" PRIu32 "\n",
wrb, TCP_WBNRTX(wrb), conn->tx_unacked, conn->sent);
/* Increment the count of bytes sent from this write buffer */
TCP_WBSENT(wrb) += sndlen;
ninfo("SEND: wrb=%p sent=%u pktlen=%u\n",
wrb, TCP_WBSENT(wrb), TCP_WBPKTLEN(wrb));
/* Remove the write buffer from the write queue if the
* last of the data has been sent from the buffer.
*/
DEBUGASSERT(TCP_WBSENT(wrb) <= TCP_WBPKTLEN(wrb));
if (TCP_WBSENT(wrb) >= TCP_WBPKTLEN(wrb))
{
FAR struct tcp_wrbuffer_s *tmp;
ninfo("SEND: wrb=%p Move to unacked_q\n", wrb);
tmp = (FAR struct tcp_wrbuffer_s *)sq_remfirst(&conn->write_q);
DEBUGASSERT(tmp == wrb);
UNUSED(tmp);
/* Put the I/O buffer chain in the un-acked queue; the
* segment is waiting for ACK again
*/
psock_insert_segment(wrb, &conn->unacked_q);
}
/* Only one data can be sent by low level driver at once,
* tell the caller stop polling the other connection.
*/
flags &= ~TCP_POLL;
}
}
else
{
tcp_set_zero_probe(conn, flags);
}
/* Continue waiting */
return flags;
}