tcp cubic代码分析

https://www.cnblogs.com/mylinuxer/p/5146142.html

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

/*
 * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
 * Home page:
 *      http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
 * This is from the implementation of CUBIC TCP in
 * Sangtae Ha, Injong Rhee and Lisong Xu,
 *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant"
 *  in ACM SIGOPS Operating System Review, July 2008.
 * Available from:
 *  http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf
 *
 * CUBIC integrates a new slow start algorithm, called HyStart.
 * The details of HyStart are presented in
 *  Sangtae Ha and Injong Rhee,
 *  "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008.
 * Available from:
 *  http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf
 *
 * All testing results are available from:
 * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing
 *
 * Unless CUBIC is enabled and congestion window is large
 * this behaves the same as the original Reno.
 */
 
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/math64.h>
#include <net/tcp.h>
 
#define BICTCP_BETA_SCALE    1024    /* Scale factor beta calculation
                     * max_cwnd = snd_cwnd * beta
                     */
#define    BICTCP_HZ        10    /* BIC HZ 2^10 = 1024 */
 
/* Two methods of hybrid slow start */
//Both run independently at the same time and slow start exits when any of them detects an exit point.
//1. ACK train length
//2. Delay increase
 
#define HYSTART_ACK_TRAIN    0x1
#define HYSTART_DELAY        0x2
/* 注意：这里的delay_min没有放大8倍！
 * 此宏用来计算Delay increase threshold
 * delay_min <= 32ms，则threshold = 2ms
 * 32ms < delay_min < 256ms，则threshold = delay_min / 16 ms
 * delay_min >= 256ms，则threshold = 16ms
 */
/* Number of delay samples for detecting the increase of delay */
#define HYSTART_MIN_SAMPLES    8
#define HYSTART_DELAY_MIN    (2U<<3)
#define HYSTART_DELAY_MAX    (16U<<3)
#define HYSTART_DELAY_THRESH(x)    clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
 
static int fast_convergence __read_mostly = 1;
static int beta __read_mostly = 717;    /* = 717/1024 (BICTCP_BETA_SCALE) */
//beta在BIC中为819，而CUBIC中为717，
//会导致在bictcp_recalc_ssthresh中，并且启用了fast convergence，
//cubic: last_max_cwnd = 0.85*snd_cwnd ，而慢启动阈值=0.7*snd_cwnd 。
//bic:   last_max_cwnd = 0.95*snd_cwnd ，而慢启动阈值=0.8*snd_cwnd 。
//这样会导致更早的到达平衡值，对snd_cwnd有很大的影响。
 
 
 
static int initial_ssthresh __read_mostly;
static int bic_scale __read_mostly = 41;
static int tcp_friendliness __read_mostly = 1;
 
 
 
//hybrid slow start的开关
static int hystart __read_mostly = 1;
//HyStart状态描述
//1：packet-train  2: delay   3:both packet-train and delay
//默认2种方法都使用，故设为3
static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
//设置snd_ssthresh的最小拥塞窗口值,除非cwnd超过了这个值，才能使用HyStart
static int hystart_low_window __read_mostly = 16;
 
static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly;
static u64 cube_factor __read_mostly;
 
/* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644);
MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
module_param(beta, int, 0644);
MODULE_PARM_DESC(beta, "beta for multiplicative increase");
module_param(initial_ssthresh, int, 0644);
MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
module_param(bic_scale, int, 0444);
MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
module_param(hystart, int, 0644);
MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
module_param(hystart_detect, int, 0644);
MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
         " 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
 
/* BIC TCP Parameters */
struct bictcp {
    u32    cnt;        /*用来控制snd_cwnd的增长 increase cwnd by 1 after ACKs */
//两个重要的count值:
//第一个是tcp_sock->snd_cwnd_cnt，表示在当前的拥塞窗口中已经
    //发送(经过对方ack包确认)的数据段的个数，
//而第二个是bictcp->cnt，它是cubic拥塞算法的核心，
//主要用来控制在拥塞避免状态的时候，什么时候才能增大拥塞窗口，
//具体实现是通过比较cnt和snd_cwnd_cnt，来决定是否增大拥塞窗口，
 
    u32 last_max_cwnd;    /*上一次的最大拥塞窗口值 last maximum snd_cwnd */
    u32    loss_cwnd;    /* 拥塞状态切换时的拥塞窗口值congestion window at last loss */
    u32    last_cwnd;    /* 上一次的拥塞窗口值 the last snd_cwnd */
    u32    last_time;    /* time when updated last_cwnd */
    u32    bic_origin_point;/*即新的Wmax饱和点，取Wlast_max_cwnd和snd_cwnd较大者 origin point of bic function */
    u32    bic_K;        /*即新Wmax所对应的时间点t，W(bic_K) = Wmax    time to origin point from the beginning of the current epoch */
    u32    delay_min;    /*应该是最小RTT    min delay */
    u32    epoch_start;    /*拥塞状态切换开始的时刻  beginning of an epoch */
    u32    ack_cnt;    /*在一个epoch中的ack包的数量   number of acks */
    u32    tcp_cwnd;    /*按照Reno算法计算得的cwnd    estimated tcp cwnd */
#define ACK_RATIO_SHIFT    4
    u16    delayed_ack;    /* estimate the ratio of Packets/ACKs << 4 */
    u8    sample_cnt;    /*第几个sample    number of samples to decide curr_rtt */
    u8    found;        /* the exit point is found? */
    u32    round_start;    /*针对每个RTT     beginning of each round */
    u32    end_seq;    /*用来标识每个RTT    end_seq of the round */
    u32    last_jiffies;    /*超过2ms则不认为是连续的   last time when the ACK spacing is close */
    u32    curr_rtt;    /*由sampe中最小的决定    the minimum rtt of current round */
};
 
static inline void bictcp_reset(struct bictcp *ca)
{//论文说Time out时调用
    ca->cnt = 0;
    ca->last_max_cwnd = 0;
    ca->loss_cwnd = 0;
    ca->last_cwnd = 0;
    ca->last_time = 0;
    ca->bic_origin_point = 0;
    ca->bic_K = 0;
    ca->delay_min = 0;
    ca->epoch_start = 0;
    ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
    ca->ack_cnt = 0;
    ca->tcp_cwnd = 0;
    ca->found = 0;
}
 
static inline void bictcp_hystart_reset(struct sock *sk)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    ca->round_start = ca->last_jiffies = jiffies;//记录时间戳
    ca->end_seq = tp->snd_nxt;//记录待发送的下一个序列号
    ca->curr_rtt = 0;
    ca->sample_cnt = 0;
 
    //bictcp_hystart_reset中并没有对ca->found置0。
    //也就是说，只有在初始化时、LOSS状态时、开启hystart的慢启动时。
    //HyStart才会派上用场，其它时间并不使用.
}
 
static void bictcp_init(struct sock *sk)
{
    bictcp_reset(inet_csk_ca(sk));
 
    if (hystart)//如果指定hystart
        bictcp_hystart_reset(sk);
 
    if (!hystart && initial_ssthresh)
        tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
 
/* calculate the cubic root of x using a table lookup followed by one
 * Newton-Raphson iteration.
 * Avg err ~= 0.195%
 */
static u32 cubic_root(u64 a) //用来计算立方根
{
    u32 x, b, shift;
    /*
     * cbrt(x) MSB values for x MSB values in [0..63].
     * Precomputed then refined by hand - Willy Tarreau
     *
     * For x in [0..63],
     *   v = cbrt(x << 18) - 1
     *   cbrt(x) = (v[x] + 10) >> 6
     */
    static const u8 v[] = {
        /* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
        /* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
        /* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
        /* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
        /* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
        /* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
        /* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
        /* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
    };
 
    b = fls64(a);
    if (b < 7) {
        /* a in [0..63] */
        return ((u32)v[(u32)a] + 35) >> 6;
    }
 
    b = ((b * 84) >> 8) - 1;
    shift = (a >> (b * 3));
 
    x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
 
    /*
     * Newton-Raphson iteration
     *                         2
     * x    = ( 2 * x  +  a / x  ) / 3
     *  k+1          k         k
     */
    x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
    x = ((x * 341) >> 10);
    return x;
}
 
/*
 * Compute congestion window to use.
 */  //从快速恢复退出并进入拥塞避免状态之后，更新cnt
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
    u64 offs;//时间差|t - K|
    //delta是cwnd差，bic_target是预测值，t为预测时间
    u32 delta, t, bic_target, max_cnt;
 
    ca->ack_cnt++;    /*ack包计数器加1   count the number of ACKs */
 
    if (ca->last_cwnd == cwnd && //当前窗口与历史窗口相同
        (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)//时间差小于1000/32ms
        return; //直接结束
 
    ca->last_cwnd = cwnd;//记录进入拥塞避免时的窗口值
    ca->last_time = tcp_time_stamp;//记录进入拥塞避免时的时刻
 
    if (ca->epoch_start == 0) {//丢包后，开启一个新的时段
        ca->epoch_start = tcp_time_stamp;    /*新时段的开始 record the beginning of an epoch */
        ca->ack_cnt = 1;            /*ack包计数器初始化  start counting */
        ca->tcp_cwnd = cwnd;            /*同步更新 syn with cubic */
 
        //取max(last_max_cwnd , cwnd)作为当前Wmax饱和点
        if (ca->last_max_cwnd <= cwnd) {
            ca->bic_K = 0;
            ca->bic_origin_point = cwnd;
        } else {
            /* Compute new K based on
             * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
             */
            ca->bic_K = cubic_root(cube_factor
                           * (ca->last_max_cwnd - cwnd));
            ca->bic_origin_point = ca->last_max_cwnd;
        }
    }
 
    /* cubic function - calc*/
    /* calculate c * time^3 / rtt,
     *  while considering overflow in calculation of time^3
     * (so time^3 is done by using 64 bit)
     * and without the support of division of 64bit numbers
     * (so all divisions are done by using 32 bit)
     *  also NOTE the unit of those veriables
     *      time  = (t - K) / 2^bictcp_HZ
     *      c = bic_scale >> 10 == 0.04
     * rtt  = (srtt >> 3) / HZ
     * !!! The following code does not have overflow problems,
     * if the cwnd < 1 million packets !!!
     */
 
    /* change the unit from HZ to bictcp_HZ */
    t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
         << BICTCP_HZ) / HZ;
 
     //求| t - bic_K |
    if (t < ca->bic_K)        // 还未达到Wmax
        offs = ca->bic_K - t;
    else
        offs = t - ca->bic_K;//已经超过Wmax
 
    /* c/rtt * (t-K)^3 */     //计算立方，delta =| W(t) - W(bic_K) |
    delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
 
 
 
     //t为预测时间，bic_K为新Wmax所对应的时间，
     //bic_target为cwnd预测值，bic_origin_point为当前Wmax饱和点
    if (t < ca->bic_K)                                    /* below origin*/
        bic_target = ca->bic_origin_point - delta;
    else                                                    /* above origin*/
        bic_target = ca->bic_origin_point + delta;
 
    /* cubic function - calc bictcp_cnt*/
    if (bic_target > cwnd) {// 相差越多，增长越快，这就是函数形状由来
        ca->cnt = cwnd / (bic_target - cwnd);//
    } else {//目前cwnd已经超出预期了，应该降速
        ca->cnt = 100 * cwnd;              /* very small increment*/
    }
 
 
 
    /* TCP Friendly —如果bic比RENO慢，则提升cwnd增长速度，即减小cnt
     * 以上次丢包以后的时间t算起，每次RTT增长 3B / ( 2 - B)，那么可以得到
      * 采用RENO算法的cwnd。
      * cwnd (RENO) = cwnd + 3B / (2 - B) * ack_cnt / cwnd
     * B为乘性减少因子，在此算法中为0.3
     */
    if (tcp_friendliness) {
        u32 scale = beta_scale;
        delta = (cwnd * scale) >> 3; //delta代表多少ACK可使tcp_cwnd++
        while (ca->ack_cnt > delta) {        /* update tcp cwnd */
            ca->ack_cnt -= delta;
            ca->tcp_cwnd++;
        }
 
        if (ca->tcp_cwnd > cwnd){    /* if bic is slower than tcp */
            delta = ca->tcp_cwnd - cwnd;
            max_cnt = cwnd / delta;
            if (ca->cnt > max_cnt)
                ca->cnt = max_cnt;
        }
    }
 
    ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
    if (ca->cnt == 0)            /* cannot be zero */
        ca->cnt = 1; //此时代表cwnd远小于bic_target，增长速度最大
}
 
static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
    struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    //判断发送拥塞窗口是否到达限制，如果到达限制则直接返回。
    if (!tcp_is_cwnd_limited(sk, in_flight))
        return;
 
    if (tp->snd_cwnd <= tp->snd_ssthresh) {
        //当snd_cwnd<=ssthresh的时候，进入慢启动状态
        if (hystart && after(ack, ca->end_seq))//是否需要reset对应的bictcp的值
            bictcp_hystart_reset(sk);
        tcp_slow_start(tp);//进入slow start状态
    } else {
        //当snd_cwnd>ssthresh的时候，进入拥塞避免状态
        bictcp_update(ca, tp->snd_cwnd);//首先会更新bictcp->cnt
        tcp_cong_avoid_ai(tp, ca->cnt);//然后进入拥塞避免，更新tcp_sock->snd_cwnd_cnt
    }
 
}
 
 
//每次发生拥塞状态切换时，就会重新计算慢启动阈值
//做了两件事：重赋值last_max_cwnd、返回新的慢启动阈值
static u32 bictcp_recalc_ssthresh(struct sock *sk)
{//论文说这个函数在Packet loss时调用
    const struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    ca->epoch_start = 0;    /* 发生拥塞状态切换，标志一个epoch结束   end of epoch */
 
    /* Wmax and fast convergence */
    //当一个新的TCP流加入到网络，
    //网络中已有TCP流需要放弃自己带宽，
    //给新的TCP流提供一定的上升空间。
    //为提高已有TCP流所释放的带宽而引入快速收敛机制。
    if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
        //snd_cwnd<last_max_cwnd
        //表示已有TCP流所经历的饱和点因为可用带宽改变而正在降低。
        //然后，通过进一步降低Wmax让已有流释放更多带宽。
        //这种行为有效地延长已有流增大其窗口的时间，
        //因为降低后的Wmax强制已有流更早进入平稳状态。
        //这允许新流有更多的时间来赶上其窗口尺寸。
        ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
            / (2 * BICTCP_BETA_SCALE); //last_max_cwnd = 0.9 * snd_cwnd
    else
        ca->last_max_cwnd = tp->snd_cwnd;
 
    ca->loss_cwnd = tp->snd_cwnd;
 
    //修改snd_ssthresh，即max(0.7*snd_cwnd，2)
    return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 
}
 
static u32 bictcp_undo_cwnd(struct sock *sk)
{
    struct bictcp *ca = inet_csk_ca(sk);
 
    return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
}
 
static void bictcp_state(struct sock *sk, u8 new_state)
{
    if (new_state == TCP_CA_Loss) {//如果处于LOSS状态，丢包处理
        bictcp_reset(inet_csk_ca(sk));
        bictcp_hystart_reset(sk);
    }
}
 
static void hystart_update(struct sock *sk, u32 delay)
{//会修改snd_ssthresh
    struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
 
    if (!(ca->found & hystart_detect)) {
        u32 curr_jiffies = jiffies;
 
        /* first detection parameter - ack-train detection */
        if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) {
            ca->last_jiffies = curr_jiffies;
            if (curr_jiffies - ca->round_start >= ca->delay_min>>4)
                ca->found |= HYSTART_ACK_TRAIN;
        }
 
        /* obtain the minimum delay of more than sampling packets */
        if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
            if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
                ca->curr_rtt = delay;
 
            ca->sample_cnt++;
        } else {
            if (ca->curr_rtt > ca->delay_min +
                HYSTART_DELAY_THRESH(ca->delay_min>>4))
                ca->found |= HYSTART_DELAY;
        }
        /*
         * Either one of two conditions are met,
         * we exit from slow start immediately.
         */
        if (ca->found & hystart_detect)//found是一个是否退出slow start的标记
            tp->snd_ssthresh = tp->snd_cwnd;//修改snd_ssthresh
    }
}
 
/* Track delayed acknowledgment ratio using sliding window
 * ratio = (15*ratio + sample) / 16
 */  //基本每次收到ack都会调用这个函数，更新snd_ssthresh和delayed_ack
static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
{//论文说这个函数在On each ACK时调用
    const struct inet_connection_sock *icsk = inet_csk(sk);
    const struct tcp_sock *tp = tcp_sk(sk);
    struct bictcp *ca = inet_csk_ca(sk);
    u32 delay;
 
    if (icsk->icsk_ca_state == TCP_CA_Open) {
        cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
        ca->delayed_ack += cnt;
    }
 
    /* Some calls are for duplicates without timetamps */
    if (rtt_us < 0)
        return;
 
    /* Discard delay samples right after fast recovery */
    if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
        return;
 
    delay = usecs_to_jiffies(rtt_us) << 3;
    if (delay == 0)
        delay = 1;
 
    /* first time call or link delay decreases */
    if (ca->delay_min == 0 || ca->delay_min > delay)
        ca->delay_min = delay;
 
    /* hystart triggers when cwnd is larger than some threshold */
    //tp->snd_ssthresh初始值是一个很大的值0x7fffffff
 
    //当拥塞窗口增大到16的时候，
    //调用hystart_update来修改更新snd_ssthresh
    //hystart_update主要用于是否退出slow start
    if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
        tp->snd_cwnd >= hystart_low_window)
        hystart_update(sk, delay);
}
 
static struct tcp_congestion_ops cubictcp = {
 
    .init        = bictcp_init,
 
 
    //调用ssthresh函数的地方有：tcp_fastretrans_alert(), tcp_enter_cwr(),tcp_enter_frto(), tcp_enter_loss()
    //看起来每次发生拥塞状态切换的时候，都会调整ssthresh。
　　//修改snd_ssthresh值的地方有bictcp_init,hystart_update以及上面列出的调用ssthresh函数处。
    .ssthresh    = bictcp_recalc_ssthresh,
 
    //发送方发出一个data包之后，接收方回复一个ack包，发送方收到这个ack包之后，
　　//调用tcp_ack()->tcp_cong_avoid()->bictcp_cong_avoid()来更改拥塞窗口snd_cwnd大小.
    .cong_avoid    = bictcp_cong_avoid,
 
    .set_state    = bictcp_state,
 
    //调用undo_cwnd函数的地方有:tcp_undo_cwr()用来撤销之前误判导致的"缩小拥塞窗口"
    .undo_cwnd    = bictcp_undo_cwnd,
 
    //调用ptts_acked函数的路径为：tcp_ack() -->tcp_clean_rtx_queue()
    .pkts_acked     = bictcp_acked,
 
    .owner        = THIS_MODULE,
    .name        = "cubic",
};
 
static int __init cubictcp_register(void)
{
     //bictcp参数的个数不能过多
    BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 
    /* Precompute a bunch of the scaling factors that are used per-packet
     * based on SRTT of 100ms
     */
     //beta_scale == 8*(1024 + 717) / 3 / (1024 -717 )，大约为15
    beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
 
    //cube_rtt_scale == 41*10 = 410
    cube_rtt_scale = (bic_scale * 10);    /* 1024*c/rtt */
 
    /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
     *  so K = cubic_root( (wmax-cwnd)*rtt/c )
     * the unit of K is bictcp_HZ=2^10, not HZ
     *
     *  c = bic_scale >> 10
     *  rtt = 100ms
     *
     * the following code has been designed and tested for
     * cwnd < 1 million packets
     * RTT < 100 seconds
     * HZ < 1,000,00  (corresponding to 10 nano-second)
     */
 
    /* 1/c * 2^2*bictcp_HZ * srtt */
    cube_factor = 1ull << (10+3*BICTCP_HZ); /* cube_factor == 2^40 */
 
    /* divide by bic_scale and by constant Srtt (100ms) */
    do_div(cube_factor, bic_scale * 10);//cube_factor == 2^40 / 410
 
    return tcp_register_congestion_control(&cubictcp);
}
 
static void __exit cubictcp_unregister(void)
{
    tcp_unregister_congestion_control(&cubictcp);
}
 
module_init(cubictcp_register);
module_exit(cubictcp_unregister);
 
MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CUBIC TCP");
MODULE_VERSION("2.3");

posted @ 2022-01-27 19:21 张同光阅读(190) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

· 基于吞吐率和丢包控制cwnd的优化方法和系统---本发明涉及TCP协议的防止网络拥塞技术

· 转载Cubic拥塞控制算法进行简单分析

· linux源码解读（二十二）：网络通信简介——网络拥塞控制之cubic算法

· 混合慢启动Hybrid Slow Start

阅读排行：
· 无需6万激活码！GitHub神秘组织3小时极速复刻Manus，手把手教你使用OpenManus搭建本
· Manus爆火，是硬核还是营销？
· 终于写完轮子一部分：tcp代理了，记录一下
· 别再用vector＜bool＞了！Google高级工程师：这可能是STL最大的设计失误
· 单元测试从入门到精通

历史上的今天：
2021-01-27 socket、sock、sk_buff、net_device 关系图

公告

昵称：张同光
园龄： 8年11个月
粉丝： 70
关注： 0

+加关注

2025年3月

日

一

二

三

四

五

六

张同光 (Tongguang Zhang)

张同光 (Tongguang Zhang)：Hello everyone !
Let us make progress together every day ! —— 微信号：ztguang

tcp cubic代码分析

公告

搜索

常用链接

最新随笔

我的标签

积分与排名

随笔分类 (929)

随笔档案 (3269)

阅读排行榜

评论排行榜

推荐排行榜

最新评论

张同光 (Tongguang Zhang)

张同光 (Tongguang Zhang)：Hello everyone ! Let us make progress together every day ! —— 微信号：ztguang

tcp cubic代码分析

公告

搜索

常用链接

最新随笔

我的标签

积分与排名

随笔分类 (929)

随笔档案 (3269)

阅读排行榜

评论排行榜

推荐排行榜

最新评论

张同光 (Tongguang Zhang)：Hello everyone !
Let us make progress together every day ! —— 微信号：ztguang