TCP报文丢失判断 && SACK下的重传
主要看RTO超时、SACK、RACK等情况下的报文丢失判断
此次看的内核版本是linux-5.10
RTO超时标记丢失报文
在RTO超时处理中,tp_sk进入TCP_CA_Loss状态,由函数tcp_timeout_mark_lost标记套接口丢失报文。
/* Enter Loss state. 不同内核版本 其实现逻辑基本一样 */
void tcp_enter_loss(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
tcp_timeout_mark_lost(sk);
/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->prior_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
net->ipv4.sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
tcp_timeout_mark_lost,如果SACK确认了重传队列首部的报文(本该由ACK.SEQ确认),表明对端丢弃了其OFO队列,
后续遍历tp_sk重传队列,如果依据RACK算法,报文并没有超时,不标记,否则,调用函数tcp_mark_skb_lost对重传队列中的报文进行丢失标记。
/* If we detect SACK reneging, forget all SACK information
* and reset tags completely, otherwise preserve SACKs. If receiver
* dropped its ofo queue, we will know this due to reneging detection.
*/
static void tcp_timeout_mark_lost(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *head;
bool is_reneg; /* is receiver reneging on SACKs? */
head = tcp_rtx_queue_head(sk);
//如果SACK确认了重传队列首部的报文(本该由ACK.SEQ确认),表明对端丢弃了其OFO队列
is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
/* Mark SACK reneging until we recover from this loss event. */
tp->is_sack_reneg = 1;
} else if (tcp_is_reno(tp)) {
tcp_reset_reno_sack(tp);
}
skb = head;
skb_rbtree_walk_from(skb) {
if (is_reneg)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
else if (tcp_is_rack(sk) && skb != head &&
tcp_rack_skb_timeout(tp, skb, 0) > 0) //RACK算法,报文并没有超时,不标记
continue; /* Don't mark recently sent ones lost yet */
//否则,调用函数tcp_mark_skb_lost对重传队列中的报文进行丢失标记
tcp_mark_skb_lost(sk, skb);
}
tcp_verify_left_out(tp);
tcp_clear_all_retrans_hints(tp);
}
tcp_mark_skb_lost函数,如果此报文被重传过,既然已经丢失,清除其重传状态位TCPCB_SACKED_RETRANS,并且将tp_sk重传计数retrans_out减去报文数量。
重传报文再度丢失的情况下,sacked状态位为:(TCPCB_LOST | TCPCB_EVER_RETRANS),没有标志位TCPCB_SACKED_RETRANS。
void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
{
__u8 sacked = TCP_SKB_CB(skb)->sacked;
struct tcp_sock *tp = tcp_sk(sk);
if (sacked & TCPCB_SACKED_ACKED)
return;
//用于设置下一次重传时,应使用的重传报文skb(保存在retransmit_skb_hint)
tcp_verify_retransmit_hint(tp, skb);
if (sacked & TCPCB_LOST) {
if (sacked & TCPCB_SACKED_RETRANS) {
/* Account for retransmits that are lost again
重传报文再度丢失的情况下,sacked状态位为:(TCPCB_LOST | TCPCB_EVER_RETRANS),没有标志位TCPCB_SACKED_RETRANS。*/
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
tcp_skb_pcount(skb));
tcp_notify_skb_loss_event(tp, skb);
}
} else {
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tcp_notify_skb_loss_event(tp, skb);
}
}
TCP快速重传触发后Reno标记丢失报文
tcp_ack中,如果我们发现该ACK所携带的信息是 可疑的, 那么逻辑就会进入到进一步的筛选判断中,以最终抉择 是不是要让该TCP连接的拥塞状态机从Open切换到Disorder或者Recovery状态。
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
//但是!(flag & FLAG_NOT_DUP) 表示只要带了数据此值为!1===》0 false 表示不是可疑的
//也就是关闭sack情况下;双向传输的TCP连接,对端每次都发送点数据过来,那么本端永远都不会被触发快速重传
#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
//只要携带了SACK,或者显式的拥塞标志,那肯定是可疑的
...
static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
{
return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
}
参考:以前的拥塞控制文章
在tcp_fastretrans_alert函数中,
- 如果处于TCP_CA_Recovery拥塞状态的套接口,未能施行拥塞撤销操作(报文确实已经丢失),参见函数tcp_try_undo_partial(接收到对原始报文的确认)和tcp_try_undo_dsack(全部重传被DSACK确认)。进行丢包标记
,由函数tcp_identify_packet_loss完成。
- 如果处于TCP_CA_Loss拥塞状态,并且tcp_process_loss未能执行拥塞撤销(恢复到TCP_CA_Open),函数tcp_identify_packet_loss执行丢包标记。
- 套接口拥塞状态不等于TCP_CA_Recovery或TCP_CA_Loss,由tcp_identify_packet_loss函数执行丢包标记
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
* packets lost by network.
*
* Besides that it updates the congestion state when packet loss or ECN
* is detected. But it does not reduce the cwnd, it is done by the
* congestion control later.
*
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
int num_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
bool ece_ack = flag & FLAG_ECE;
bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
tcp_force_fast_retransmit(sk));
/* 如果packet_out 等待被ack的报文为0,那么不可能有sacked_out(被sack 确认的段) */
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
if (ece_ack)
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs.
检查是否为虚假的SACK,即ACK是否确认已经被SACK的数据
如果接收到的 ACK 指向已记录的 SACK,这说明我们记录的 SACK 并没有反应接收方的真实状态。
也就是说接收方现在已经处于严重的拥塞状态或者在处理上有bug,那么我们接下来就要按照重传超时的方式去处理。
因为按照正常的逻辑流程,接受的 ACK不应该指向已记录的 SACK,而应指向 SACK 并未包含的,
这说明接收方由于拥塞已经把 SACK 部分接收的段已经丢弃或者处理上有 BUG,
我们必须需要重传*/
if (tcp_check_sack_reneging(sk, flag))
return;
/* C. Check consistency of the current state.
查看是否从发送队列发出的包的数量是否不小于发出主机的包的数量*/
tcp_verify_left_out(tp);
/* D. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (icsk->icsk_ca_state == TCP_CA_Open) {
WARN_ON(tp->retrans_out != 0);
tp->retrans_stamp = 0;/* 清除上次重传阶段第一个重传段的发送时间*/
} else if (!before(tp->snd_una, tp->high_seq)) {//tp->snd_una >= tp->high_seq
switch (icsk->icsk_ca_state) {
case TCP_CA_CWR:
/* CWR is to be held something *above* high_seq
* is ACKed for CWR bit to reach receiver. */
if (tp->snd_una != tp->high_seq) {
tcp_end_cwnd_reduction(sk);
tcp_set_ca_state(sk, TCP_CA_Open);
}
break;
case TCP_CA_Recovery:
/*TCP_CA_Recovery拥塞状态接收到ACK报文,其ack_seq序号确认了high_seq之前的所有报文(SND.UNA >= high_seq),
high_seq记录了进入拥塞时的最大发送序号SND.NXT,故表明对端接收到了SND.NXT之前的所有报文,未发生丢包,需要撤销拥塞状态*/
if (tcp_is_reno(tp))//判断对方是否提供了 SACK 服务,提供,返回 0, 否则返回 1
tcp_reset_reno_sack(tp);//设置 sacked_out 为 0
if (tcp_try_undo_recovery(sk))//尝试从 Recovery 状态撤销 成功,就直接返回
return;
tcp_end_cwnd_reduction(sk);//结束拥塞窗口缩小
break;
}
}
/* E. Process state. */
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {//判断是否没有段被确认 也就是 ack报文是否将send_un增长了
if (tcp_is_reno(tp))//判断是否启用了 SACK, 未启用返回 1,
tcp_add_reno_sack(sk, num_dupack, ece_ack);/* 增加sacked_out (记录接收到的重复的 ACK 数量 没有启用sack 就是 dupack了),检查是否出现reorder*/
} else {
/*于TCP_CA_Recovery拥塞状态,如果ACK报文没有确认全部的进入拥塞时SND.NXT(high_seq)之前的数据,仅确认了一部分(FLAG_SND_UNA_ADVANCED),
执行撤销函数tcp_try_undo_partial*/
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_force_fast_retransmit(sk);
}
/*对于处在TCP_CA_Recovery拥塞状态的套接口,ACK报文并没有推进SND.UNA序号,或者,
在partial-undo未执行的情况下,尝试进行DSACK相关的撤销操作,由函数tcp_try_undo_dsack完成。*/
if (tcp_try_undo_dsack(sk)) {
tcp_try_keep_open(sk);
return;
}
tcp_identify_packet_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, num_dupack, rexmit);
tcp_identify_packet_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
return;
/* Change state if cwnd is undone or retransmits are lost */
fallthrough;
default:
if (tcp_is_reno(tp)) {//判断是否开启了 SACK,没启用返回1
if (flag & FLAG_SND_UNA_ADVANCED)// 也就是 snd_unack 序列号增长了 所以重置
tcp_reset_reno_sack(tp);
tcp_add_reno_sack(sk, num_dupack, ece_ack);//是 dupack 则记录并且sack_out ++
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
tcp_identify_packet_loss(sk, ack_flag);
//确定能够离开 Disorder 状态,而进入 Recovery 状态。如果不进入 Recovery 状态,判断可否进入 OPen 状态。
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag);
return;
}
/* MTU probe failure: don't reduce cwnd */
if (icsk->icsk_ca_state < TCP_CA_CWR &&
icsk->icsk_mtup.probe_size &&
tp->snd_una == tp->mtu_probe.probe_seq_start) {
tcp_mtup_probe_failed(sk);
/* Restores the reduction we did in tcp_mtup_probe() */
tp->snd_cwnd++;
tcp_simple_retransmit(sk);
return;
}
/* Otherwise enter Recovery state */
tcp_enter_recovery(sk, ece_ack);
fast_rexmit = 1;
}
if (!tcp_is_rack(sk) && do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
*rexmit = REXMIT_LOST;
}
对于Reno/NewReno-TCP,由函数tcp_newreno_mark_lost执行丢包标记。启用RACK算法的TCP,由函数tcp_rack_mark_lost标记丢包;
TCP协议中默认都是支持SACK,FACK,D-SACK的 。用sysctl -a|grep ack 就可知道。 但具体到一个连接的支持情况还要看三次握手,只要双方都支持了SACK,FACK就会自动打开,只有有一方不支持SACK,连接就处于最基本的reno方法的拥塞控制。 FACK和SACK都是很好的东西,是对传统的RENO的改进,
static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_rtx_queue_empty(sk))
return;
if (unlikely(tcp_is_reno(tp))) {
tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
} else if (tcp_is_rack(sk)) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk);
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
}
SACK下的重传
SACK选项的时候说过,SACK可以把接收端系列号空间的洞反映给发送端,因此发送端可以更充分的理解接收端的情况,而进行更好的重传恢复过程。这种过程有时候也叫做advanced loss recovery
/* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
* the next unacked packet upon receiving
* a) three or more DUPACKs to start the fast recovery
* b) an ACK acknowledging new data during the fast recovery.
*/
void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced)
{
const u8 state = inet_csk(sk)->icsk_ca_state;
struct tcp_sock *tp = tcp_sk(sk);
/*当拥塞状态小于TCP_CA_Recovery,并且SACK(对于Reno,sacked_out等于dupack数量)确认的报文数量大于等于乱序级别时,认为发生了丢包,
由于无法确认丢包数量,这里认为数量为1*/
if ((state < TCP_CA_Recovery && tp->sacked_out >= tp->reordering) ||
(state == TCP_CA_Recovery && snd_una_advanced)) {
struct sk_buff *skb = tcp_rtx_queue_head(sk);
u32 mss;
if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
return;
mss = tcp_skb_mss(skb);
if (tcp_skb_pcount(skb) > 1 && skb->len > mss)
tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
mss, mss, GFP_ATOMIC);
tcp_mark_skb_lost(sk, skb);
}
}
RACK基本思想:如果发送端收到的确认包中的SACK选项确认收到了一个数据包,那么在这个数据包之前发送的数据包要么是在传输过程中发生了乱序,要么是发生了丢包。RACK并不记录数据被(s)ACK的时间,而是在收到ACK的时候,记录被该ACK确认的数据包的发送时间,在这些发送时间中取Recent,即最晚发送的。RACK的思想是,记录这个Recent (s)ACK所确认数据包的发送时间T.rack,然后给定一个时间窗口reo_wnd,在时间T.rack-reo_wnd之前发送的未被确认的数据包均被标记为LOST,然后这些数据包会被交给发送逻辑去发送。这非常符合常理。
RACK可以修复丢包而不用等一个比较长的RTO超时,RACK可以用于快速恢复也可以用于超时恢复,既可以利用初传的数据包探测丢包也可以利用重传的数据包探测丢包,而且可以探测初传丢包也可以探测重传丢包,因此RACK是一个适应多种场景的丢包恢复机制。
启用RACK算法的TCP,由函数tcp_rack_mark_lost标记丢包
对于启用RACK算法的TCP,由函数tcp_rack_mark_lost标记丢包,完成之后,如果之前的重传报文数量大于当前的重传数量,表明丢失了部分重传报文,设置FLAG_LOST_RETRANS标志。
tcp_rack_mark_lost函数,由子函数tcp_rack_detect_loss标记丢失报文,并且设置重传队列中报文超时的定时器ICSK_TIME_REO_TIMEOUT。
函数tcp_rack_detect_loss负责依据RACK算法标记丢失报文,即如果发送时间靠后的报文已经被确认(ACK或者SACK),那么之前的未确认报文认为已经丢失。为抵御乱序的情况,RACK在确认报文和丢失报文之间设置了一定的时间差值。遍历tsorted时间排序的报文链表,从最早发送的报文开始,如果其已经被标记为丢失,但是还没有重传,不进行处理。如果报文剩余时间小于等于0,表明已经超时,由函数tcp_mark_skb_lost进行标记,否则,如果报文剩余时间大于0,计算超时时长,返回给调用函数设置定时器。
/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
*
* Marks a packet lost, if some packet sent later has been (s)acked.
* The underlying idea is similar to the traditional dupthresh and FACK
* but they look at different metrics:
*
* dupthresh: 3 OOO packets delivered (packet count)
* FACK: sequence delta to highest sacked sequence (sequence space)
* RACK: sent time delta to the latest delivered packet (time domain)
*
* The advantage of RACK is it applies to both original and retransmitted
* packet and therefore is robust against tail losses. Another advantage
* is being more resilient to reordering by simply allowing some
* "settling delay", instead of tweaking the dupthresh.
*
* When tcp_rack_detect_loss() detects some packets are lost and we
* are not already in the CA_Recovery state, either tcp_rack_reo_timeout()
* or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
* make us enter the CA_Recovery state.
*/
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *n;
u32 reo_wnd;
*reo_timeout = 0;
reo_wnd = tcp_rack_reo_wnd(sk);/*获取乱序时间窗口值*/
list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
tcp_tsorted_anchor) {/*遍历传输队列*/
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
s32 remaining;
/* Skip ones marked lost but not yet retransmitted */
if ((scb->sacked & TCPCB_LOST) &&
!(scb->sacked & TCPCB_SACKED_RETRANS))
continue;/*忽略已经标记丢失但未重传的skb*/
/*已经判断完最近被(s)ack确认skb的之前所有的包*/
if (!tcp_skb_sent_after(tp->rack.mstamp,
tcp_skb_timestamp_us(skb),
tp->rack.end_seq, scb->end_seq))
break;
/* A packet is lost if it has not been s/acked beyond
* the recent RTT plus the reordering window.
前报文的发送时间戳+最近测量的RTT+乱序窗口时长,小于当前TCP时间,即认为此报文已经丢失
*//*小于等于零,可以判断为丢包,大于零,为需要在额外等待的时间*/
remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
if (remaining <= 0) {
tcp_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
} else {
/* Record maximum wait time *//*记录需要等待最长的额外时间,用该值重置REO定时器*/
*reo_timeout = max_t(u32, *reo_timeout, remaining);
}
}
}
SACK标记丢失报文
tcp_fastretrans_alert函数中对丢包(do_lost)的判断,如果接收到dupack,或者对端SACK序号块确认的最高序号,超出SND.UNA加上乱序级别的值,认为套接口发生了丢包。另外,对于TCP_CA_Recovery拥塞状态的套接口,如果接收到的ACK报文(dupack)未能推进SND.UNA,并且Partial-Recovery未能实行,对于Reno-TCP(无SACK)或者tcp_force_fast_retransmit为真,设置丢包变量do_lost。
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
int num_dupack, int *ack_flag, int *rexmit)
{
bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
tcp_force_fast_retransmit(sk));
...
/* E. Process state. */
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
...
} else {
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
tcp_force_fast_retransmit(sk);
}
if (!tcp_is_rack(sk) && do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
*rexmit = REXMIT_LOST;
对于未启用RACK算法的情况,如果判断发生丢包,使用函数tcp_update_scoreboard处理。
TCP套接口协商了SACK(非Reno/NewReno),并且SACK确认的报文数量大于乱序级别,即认为最早发送的报文已经丢失,sacked_upto表示丢失的报文中所包含的SACK确认报文的数量。或者fast_rexmit为真,仅将重传队列头部的首个报文标记为丢失。
/* Account newly detected lost packet(s) */
static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_sack(tp)) {
//sacked_upto 可能表示已经成功接收并被确认的数据包数量减去当前 乱序级别
int sacked_upto = tp->sacked_out - tp->reordering;
if (sacked_upto >= 0)
tcp_mark_head_lost(sk, sacked_upto, 0);
else if (fast_rexmit)
tcp_mark_head_lost(sk, 1, 1);
}
}
/* It's reordering when higher sequence was delivered (i.e. sacked) before
* some lower never-retransmitted sequence ("low_seq"). The maximum reordering
* distance is approximated in full-mss packet distance ("reordering").
*/
static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
const u32 mss = tp->mss_cache;
fack = tcp_highest_sack_seq(tp);
metric = fack - low_seq;
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
}
tp->sacked_out表示sack中最高的seq---最低的seq 除去mss 有多少个段 pkt
对于fast_rexmit的设置,在函数tcp_fastretrans_alert中,如果套接口拥塞状态为TCP_CA_Loss,并且tcp_process_loss未能执行拥塞撤销(恢复到TCP_CA_Open),丢包确实发生。
或者,套接口的拥塞状态不等于TCP_CA_Recovery也不等于TCP_CA_Loss,在tcp_time_to_recover函数检测到需要进行快速恢复时,设置fast_rexmit变量为真。
/* Detect loss in event "A" above by marking head of queue up as lost.
* For RFC3517 SACK, a segment is considered lost if it
* has at least tp->reordering SACKed seqments above it; "packets" refers to
* the maximum SACKed segments to pass before reaching this limit.
*/
static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt;
/* Use SACK to deduce losses of new sequences sent during recovery */
const u32 loss_high = tp->snd_nxt;
WARN_ON(packets > tp->packets_out);
skb = tp->lost_skb_hint;
if (skb) {
//如果之前已经标记过丢失报文,取出保存的skb和丢包数量值
/* Head already handled? */
//如果仅标记一个报文(mark_head为真),并且保存的丢失报文开始序号在SND.UNA之后,
//表明已经完成请求的一个丢包的标记
if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
return;
cnt = tp->lost_cnt_hint;
} else {//否者由重传队列的首报文开始遍历
skb = tcp_rtx_queue_head(sk);
cnt = 0;
}
skb_rbtree_walk_from(skb) {
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
//由报文skb开始遍历,如果当前遍历报文的结束序号位于最高的丢包序号之后,结束遍历
if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
break;
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
cnt += tcp_skb_pcount(skb);
//如果当前遍历的SACK所确认报文数量达到要求的packets值===tp->sacked_out - tp->reordering;,退出遍历
//sacked_upto 可能表示已经成功接收并被确认的数据包数量减去当前正在处理的重新排序的数据包数量。这个值可能用于指示已经确认但尚未被处理的数据包数量
if (cnt > packets)
break;
if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
tcp_mark_skb_lost(sk, skb);
if (mark_head)
break;
}
tcp_verify_left_out(tp);
}
/*
tcp_skb_mark_lost,如果报文没有被标记过丢失(TCPCB_LOST),也没有被SACK确认(TCPCB_SACKED_ACKED),
将其设置TCPCB_LOST标志,并且更新lost_out丢包统计。函数tcp_verify_retransmit_hint用于更新retransmit_skb_hint
重传报文指针,其中记录的为首个应当重传的报文。
*/
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
2023-02-24 dpdk代码总体思路
2021-02-24 内核协议栈 netfilter中tproxy新版本对比