tcp cubic代码分析

https://www.cnblogs.com/mylinuxer/p/5146142.html

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
/*
 * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
 * Home page:
 *      http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
 * This is from the implementation of CUBIC TCP in
 * Sangtae Ha, Injong Rhee and Lisong Xu,
 *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant"
 *  in ACM SIGOPS Operating System Review, July 2008.
 * Available from:
 *  http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf
 *
 * CUBIC integrates a new slow start algorithm, called HyStart.
 * The details of HyStart are presented in
 *  Sangtae Ha and Injong Rhee,
 *  "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008.
 * Available from:
 *  http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf
 *
 * All testing results are available from:
 * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing
 *
 * Unless CUBIC is enabled and congestion window is large
 * this behaves the same as the original Reno.
 */
 
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/math64.h>
#include <net/tcp.h>
 
#define BICTCP_BETA_SCALE    1024    /* Scale factor beta calculation
                     * max_cwnd = snd_cwnd * beta
                     */
#define    BICTCP_HZ        10    /* BIC HZ 2^10 = 1024 */
 
/* Two methods of hybrid slow start */
//Both run independently at the same time and slow start exits when any of them detects an exit point.
//1. ACK train length
//2. Delay increase
 
#define HYSTART_ACK_TRAIN    0x1
#define HYSTART_DELAY        0x2
/* 注意:这里的delay_min没有放大8倍!
 * 此宏用来计算Delay increase threshold
 * delay_min <= 32ms,则threshold = 2ms
 * 32ms < delay_min < 256ms,则threshold = delay_min / 16 ms
 * delay_min >= 256ms,则threshold = 16ms
 */
/* Number of delay samples for detecting the increase of delay */
#define HYSTART_MIN_SAMPLES    8
#define HYSTART_DELAY_MIN    (2U<<3)
#define HYSTART_DELAY_MAX    (16U<<3)
#define HYSTART_DELAY_THRESH(x)    clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
 
static int fast_convergence __read_mostly = 1;
static int beta __read_mostly = 717;    /* = 717/1024 (BICTCP_BETA_SCALE) */
//beta在BIC中为819,而CUBIC中为717,
//会导致在bictcp_recalc_ssthresh中,并且启用了fast convergence,
//cubic: last_max_cwnd = 0.85*snd_cwnd ,而慢启动阈值=0.7*snd_cwnd 。
//bic:   last_max_cwnd = 0.95*snd_cwnd ,而慢启动阈值=0.8*snd_cwnd 。
//这样会导致更早的到达平衡值,对snd_cwnd有很大的影响。
 
 
 
static int initial_ssthresh __read_mostly;
static int bic_scale __read_mostly = 41;
static int tcp_friendliness __read_mostly = 1;
 
 
 
//hybrid slow start的开关
static int hystart __read_mostly = 1;
//HyStart状态描述
//1:packet-train  2: delay   3:both packet-train and delay
//默认2种方法都使用,故设为3
static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
//设置snd_ssthresh的最小拥塞窗口值,除非cwnd超过了这个值,才能使用HyStart
static int hystart_low_window __read_mostly = 16;
 
static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly;
static u64 cube_factor __read_mostly;
 
/* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644);
MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
module_param(beta, int, 0644);
MODULE_PARM_DESC(beta, "beta for multiplicative increase");
module_param(initial_ssthresh, int, 0644);
MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
module_param(bic_scale, int, 0444);
MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
module_param(hystart, int, 0644);
MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
module_param(hystart_detect, int, 0644);
MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
         " 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
 
/* BIC TCP Parameters */
struct bictcp {
    u32    cnt;        /*用来控制snd_cwnd的增长 increase cwnd by 1 after ACKs */
//两个重要的count值:
//第一个是tcp_sock->snd_cwnd_cnt,表示在当前的拥塞窗口中已经
    //发送(经过对方ack包确认)的数据段的个数,
//而第二个是bictcp->cnt,它是cubic拥塞算法的核心,
//主要用来控制在拥塞避免状态的时候,什么时候才能增大拥塞窗口,
//具体实现是通过比较cnt和snd_cwnd_cnt,来决定是否增大拥塞窗口,
 
    u32 last_max_cwnd;    /*上一次的最大拥塞窗口值 last maximum snd_cwnd */
    u32    loss_cwnd;    /* 拥塞状态切换时的拥塞窗口值congestion window at last loss */
    u32    last_cwnd;    /* 上一次的拥塞窗口值 the last snd_cwnd */
    u32    last_time;    /* time when updated last_cwnd */
    u32    bic_origin_point;/*即新的Wmax饱和点,取Wlast_max_cwnd和snd_cwnd较大者 origin point of bic function */
    u32    bic_K;        /*即新Wmax所对应的时间点t,W(bic_K) = Wmax    time to origin point from the beginning of the current epoch */
    u32    delay_min;    /*应该是最小RTT    min delay */
    u32    epoch_start;    /*拥塞状态切换开始的时刻  beginning of an epoch */
    u32    ack_cnt;    /*在一个epoch中的ack包的数量   number of acks */
    u32    tcp_cwnd;    /*按照Reno算法计算得的cwnd    estimated tcp cwnd */
#define ACK_RATIO_SHIFT    4
    u16    delayed_ack;    /* estimate the ratio of Packets/ACKs << 4 */
    u8    sample_cnt;    /*第几个sample    number of samples to decide curr_rtt */
    u8    found;        /* the exit point is found? */
    u32    round_start;    /*针对每个RTT     beginning of each round */
    u32    end_seq;    /*用来标识每个RTT    end_seq of the round */
    u32    last_jiffies;    /*超过2ms则不认为是连续的   last time when the ACK spacing is close */
    u32    curr_rtt;    /*由sampe中最小的决定    the minimum rtt of current round */
};
 
static inline void bictcp_reset(struct bictcp *ca)
{//论文说Time out时调用
    ca->cnt = 0;
    ca->last_max_cwnd = 0;
    ca->loss_cwnd = 0;
    ca->last_cwnd = 0;
    ca->last_time = 0;
    ca->bic_origin_point = 0;
    ca->bic_K = 0;
    ca->delay_min = 0;
    ca->epoch_start = 0;
    ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
    ca->ack_cnt = 0;
    ca->tcp_cwnd = 0;
    ca->found = 0;
}
 
static inline void bictcp_hystart_reset(struct sock *sk)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    ca->round_start = ca->last_jiffies = jiffies;//记录时间戳
    ca->end_seq = tp->snd_nxt;//记录待发送的下一个序列号
    ca->curr_rtt = 0;
    ca->sample_cnt = 0;
 
    //bictcp_hystart_reset中并没有对ca->found置0。
    //也就是说,只有在初始化时、LOSS状态时、开启hystart的慢启动时。
    //HyStart才会派上用场,其它时间并不使用.
}
 
static void bictcp_init(struct sock *sk)
{
    bictcp_reset(inet_csk_ca(sk));
 
    if (hystart)//如果指定hystart
        bictcp_hystart_reset(sk);
 
    if (!hystart && initial_ssthresh)
        tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
 
/* calculate the cubic root of x using a table lookup followed by one
 * Newton-Raphson iteration.
 * Avg err ~= 0.195%
 */
static u32 cubic_root(u64 a) //用来计算立方根
{
    u32 x, b, shift;
    /*
     * cbrt(x) MSB values for x MSB values in [0..63].
     * Precomputed then refined by hand - Willy Tarreau
     *
     * For x in [0..63],
     *   v = cbrt(x << 18) - 1
     *   cbrt(x) = (v[x] + 10) >> 6
     */
    static const u8 v[] = {
        /* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
        /* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
        /* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
        /* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
        /* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
        /* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
        /* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
        /* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
    };
 
    b = fls64(a);
    if (b < 7) {
        /* a in [0..63] */
        return ((u32)v[(u32)a] + 35) >> 6;
    }
 
    b = ((b * 84) >> 8) - 1;
    shift = (a >> (b * 3));
 
    x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
 
    /*
     * Newton-Raphson iteration
     *                         2
     * x    = ( 2 * x  +  a / x  ) / 3
     *  k+1          k         k
     */
    x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
    x = ((x * 341) >> 10);
    return x;
}
 
/*
 * Compute congestion window to use.
 */  //从快速恢复退出并进入拥塞避免状态之后,更新cnt
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
    u64 offs;//时间差|t - K|
    //delta是cwnd差,bic_target是预测值,t为预测时间
    u32 delta, t, bic_target, max_cnt;
 
    ca->ack_cnt++;    /*ack包计数器加1   count the number of ACKs */
 
    if (ca->last_cwnd == cwnd && //当前窗口与历史窗口相同
        (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)//时间差小于1000/32ms
        return; //直接结束
 
    ca->last_cwnd = cwnd;//记录进入拥塞避免时的窗口值
    ca->last_time = tcp_time_stamp;//记录进入拥塞避免时的时刻
 
    if (ca->epoch_start == 0) {//丢包后,开启一个新的时段
        ca->epoch_start = tcp_time_stamp;    /*新时段的开始 record the beginning of an epoch */
        ca->ack_cnt = 1;            /*ack包计数器初始化  start counting */
        ca->tcp_cwnd = cwnd;            /*同步更新 syn with cubic */
 
        //取max(last_max_cwnd , cwnd)作为当前Wmax饱和点
        if (ca->last_max_cwnd <= cwnd) {
            ca->bic_K = 0;
            ca->bic_origin_point = cwnd;
        } else {
            /* Compute new K based on
             * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
             */
            ca->bic_K = cubic_root(cube_factor
                           * (ca->last_max_cwnd - cwnd));
            ca->bic_origin_point = ca->last_max_cwnd;
        }
    }
 
    /* cubic function - calc*/
    /* calculate c * time^3 / rtt,
     *  while considering overflow in calculation of time^3
     * (so time^3 is done by using 64 bit)
     * and without the support of division of 64bit numbers
     * (so all divisions are done by using 32 bit)
     *  also NOTE the unit of those veriables
     *      time  = (t - K) / 2^bictcp_HZ
     *      c = bic_scale >> 10 == 0.04
     * rtt  = (srtt >> 3) / HZ
     * !!! The following code does not have overflow problems,
     * if the cwnd < 1 million packets !!!
     */
 
    /* change the unit from HZ to bictcp_HZ */
    t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
         << BICTCP_HZ) / HZ;
 
     //求| t - bic_K |
    if (t < ca->bic_K)        // 还未达到Wmax
        offs = ca->bic_K - t;
    else
        offs = t - ca->bic_K;//已经超过Wmax
 
    /* c/rtt * (t-K)^3 */     //计算立方,delta =| W(t) - W(bic_K) |
    delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
 
 
 
     //t为预测时间,bic_K为新Wmax所对应的时间,
     //bic_target为cwnd预测值,bic_origin_point为当前Wmax饱和点
    if (t < ca->bic_K)                                    /* below origin*/
        bic_target = ca->bic_origin_point - delta;
    else                                                    /* above origin*/
        bic_target = ca->bic_origin_point + delta;
 
    /* cubic function - calc bictcp_cnt*/
    if (bic_target > cwnd) {// 相差越多,增长越快,这就是函数形状由来
        ca->cnt = cwnd / (bic_target - cwnd);//
    } else {//目前cwnd已经超出预期了,应该降速
        ca->cnt = 100 * cwnd;              /* very small increment*/
    }
 
 
 
    /* TCP Friendly —如果bic比RENO慢,则提升cwnd增长速度,即减小cnt
     * 以上次丢包以后的时间t算起,每次RTT增长 3B / ( 2 - B),那么可以得到
      * 采用RENO算法的cwnd。
      * cwnd (RENO) = cwnd + 3B / (2 - B) * ack_cnt / cwnd
     * B为乘性减少因子,在此算法中为0.3
     */
    if (tcp_friendliness) {
        u32 scale = beta_scale;
        delta = (cwnd * scale) >> 3; //delta代表多少ACK可使tcp_cwnd++
        while (ca->ack_cnt > delta) {        /* update tcp cwnd */
            ca->ack_cnt -= delta;
            ca->tcp_cwnd++;
        }
 
        if (ca->tcp_cwnd > cwnd){    /* if bic is slower than tcp */
            delta = ca->tcp_cwnd - cwnd;
            max_cnt = cwnd / delta;
            if (ca->cnt > max_cnt)
                ca->cnt = max_cnt;
        }
    }
 
    ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
    if (ca->cnt == 0)            /* cannot be zero */
        ca->cnt = 1; //此时代表cwnd远小于bic_target,增长速度最大
}
 
static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    //判断发送拥塞窗口是否到达限制,如果到达限制则直接返回。
    if (!tcp_is_cwnd_limited(sk, in_flight))
        return;
 
    if (tp->snd_cwnd <= tp->snd_ssthresh) {
        //当snd_cwnd<=ssthresh的时候,进入慢启动状态
        if (hystart && after(ack, ca->end_seq))//是否需要reset对应的bictcp的值
            bictcp_hystart_reset(sk);
        tcp_slow_start(tp);//进入slow start状态
    } else {
        //当snd_cwnd>ssthresh的时候,进入拥塞避免状态
        bictcp_update(ca, tp->snd_cwnd);//首先会更新bictcp->cnt
        tcp_cong_avoid_ai(tp, ca->cnt);//然后进入拥塞避免,更新tcp_sock->snd_cwnd_cnt
    }
 
}
 
 
//每次发生拥塞状态切换时,就会重新计算慢启动阈值
//做了两件事:重赋值last_max_cwnd、返回新的慢启动阈值
static u32 bictcp_recalc_ssthresh(struct sock *sk)
{//论文说这个函数在Packet loss时调用
    const struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    ca->epoch_start = 0;    /* 发生拥塞状态切换,标志一个epoch结束   end of epoch */
 
    /* Wmax and fast convergence */
    //当一个新的TCP流加入到网络,
    //网络中已有TCP流需要放弃自己带宽,
    //给新的TCP流提供一定的上升空间。
    //为提高已有TCP流所释放的带宽而引入快速收敛机制。
    if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
        //snd_cwnd<last_max_cwnd
        //表示已有TCP流所经历的饱和点因为可用带宽改变而正在降低。
        //然后,通过进一步降低Wmax让已有流释放更多带宽。
        //这种行为有效地延长已有流增大其窗口的时间,
        //因为降低后的Wmax强制已有流更早进入平稳状态。
        //这允许新流有更多的时间来赶上其窗口尺寸。
        ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
            / (2 * BICTCP_BETA_SCALE); //last_max_cwnd = 0.9 * snd_cwnd
    else
        ca->last_max_cwnd = tp->snd_cwnd;
 
    ca->loss_cwnd = tp->snd_cwnd;
 
    //修改snd_ssthresh,即max(0.7*snd_cwnd,2)
    return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 
}
 
static u32 bictcp_undo_cwnd(struct sock *sk)
{
    struct bictcp *ca = inet_csk_ca(sk);
 
    return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
}
 
static void bictcp_state(struct sock *sk, u8 new_state)
{
    if (new_state == TCP_CA_Loss) {//如果处于LOSS状态,丢包处理
        bictcp_reset(inet_csk_ca(sk));
        bictcp_hystart_reset(sk);
    }
}
 
static void hystart_update(struct sock *sk, u32 delay)
{//会修改snd_ssthresh
    struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    if (!(ca->found & hystart_detect)) {
        u32 curr_jiffies = jiffies;
 
        /* first detection parameter - ack-train detection */
        if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) {
            ca->last_jiffies = curr_jiffies;
            if (curr_jiffies - ca->round_start >= ca->delay_min>>4)
                ca->found |= HYSTART_ACK_TRAIN;
        }
 
        /* obtain the minimum delay of more than sampling packets */
        if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
            if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
                ca->curr_rtt = delay;
 
            ca->sample_cnt++;
        } else {
            if (ca->curr_rtt > ca->delay_min +
                HYSTART_DELAY_THRESH(ca->delay_min>>4))
                ca->found |= HYSTART_DELAY;
        }
        /*
         * Either one of two conditions are met,
         * we exit from slow start immediately.
         */
        if (ca->found & hystart_detect)//found是一个是否退出slow start的标记
            tp->snd_ssthresh = tp->snd_cwnd;//修改snd_ssthresh
    }
}
 
/* Track delayed acknowledgment ratio using sliding window
 * ratio = (15*ratio + sample) / 16
 */  //基本每次收到ack都会调用这个函数,更新snd_ssthresh和delayed_ack
static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
{//论文说这个函数在On each ACK时调用
    const struct inet_connection_sock *icsk = inet_csk(sk);
    const struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
    u32 delay;
 
    if (icsk->icsk_ca_state == TCP_CA_Open) {
        cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
        ca->delayed_ack += cnt;
    }
 
    /* Some calls are for duplicates without timetamps */
    if (rtt_us < 0)
        return;
 
    /* Discard delay samples right after fast recovery */
    if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
        return;
 
    delay = usecs_to_jiffies(rtt_us) << 3;
    if (delay == 0)
        delay = 1;
 
    /* first time call or link delay decreases */
    if (ca->delay_min == 0 || ca->delay_min > delay)
        ca->delay_min = delay;
 
    /* hystart triggers when cwnd is larger than some threshold */
    //tp->snd_ssthresh初始值是一个很大的值0x7fffffff
 
    //当拥塞窗口增大到16的时候,
    //调用hystart_update来修改更新snd_ssthresh
    //hystart_update主要用于是否退出slow start
    if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
        tp->snd_cwnd >= hystart_low_window)
        hystart_update(sk, delay);
}
 
static struct tcp_congestion_ops cubictcp = {
 
    .init        = bictcp_init,
 
 
    //调用ssthresh函数的地方有:tcp_fastretrans_alert(), tcp_enter_cwr(),tcp_enter_frto(), tcp_enter_loss()
    //看起来每次发生拥塞状态切换的时候,都会调整ssthresh。
  //修改snd_ssthresh值的地方有bictcp_init,hystart_update以及上面列出的调用ssthresh函数处。
    .ssthresh    = bictcp_recalc_ssthresh,
 
    //发送方发出一个data包之后,接收方回复一个ack包,发送方收到这个ack包之后,
  //调用tcp_ack()->tcp_cong_avoid()->bictcp_cong_avoid()来更改拥塞窗口snd_cwnd大小.
    .cong_avoid    = bictcp_cong_avoid,
 
    .set_state    = bictcp_state,
 
    //调用undo_cwnd函数的地方有:tcp_undo_cwr()用来撤销之前误判导致的"缩小拥塞窗口"
    .undo_cwnd    = bictcp_undo_cwnd,
 
    //调用ptts_acked函数的路径为:tcp_ack() -->tcp_clean_rtx_queue()
    .pkts_acked     = bictcp_acked,
 
    .owner        = THIS_MODULE,
    .name        = "cubic",
};
 
static int __init cubictcp_register(void)
{
     //bictcp参数的个数不能过多
    BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 
    /* Precompute a bunch of the scaling factors that are used per-packet
     * based on SRTT of 100ms
     */
     //beta_scale == 8*(1024 + 717) / 3 / (1024 -717 ),大约为15
    beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
 
    //cube_rtt_scale == 41*10 = 410
    cube_rtt_scale = (bic_scale * 10);    /* 1024*c/rtt */
 
    /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
     *  so K = cubic_root( (wmax-cwnd)*rtt/c )
     * the unit of K is bictcp_HZ=2^10, not HZ
     *
     *  c = bic_scale >> 10
     *  rtt = 100ms
     *
     * the following code has been designed and tested for
     * cwnd < 1 million packets
     * RTT < 100 seconds
     * HZ < 1,000,00  (corresponding to 10 nano-second)
     */
 
    /* 1/c * 2^2*bictcp_HZ * srtt */
    cube_factor = 1ull << (10+3*BICTCP_HZ); /* cube_factor == 2^40 */
 
    /* divide by bic_scale and by constant Srtt (100ms) */
    do_div(cube_factor, bic_scale * 10);//cube_factor == 2^40 / 410
 
    return tcp_register_congestion_control(&cubictcp);
}
 
static void __exit cubictcp_unregister(void)
{
    tcp_unregister_congestion_control(&cubictcp);
}
 
module_init(cubictcp_register);
module_exit(cubictcp_unregister);
 
MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CUBIC TCP");
MODULE_VERSION("2.3");

 

posted @   张同光  阅读(190)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
历史上的今天:
2021-01-27 socket、sock、sk_buff、net_device 关系图
点击右上角即可分享
微信分享提示