dctcp-2.6.26-rev1.1.0.patch

dctcp-2.6.26-rev1.1.0.patch

  1 diff -Naur linux-2.6.26/include/linux/sysctl.h linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h
  2 --- linux-2.6.26/include/linux/sysctl.h    2008-07-13 14:51:29.000000000 -0700
  3 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h    2011-10-07 14:41:50.000000000 -0700
  4 @@ -435,6 +435,9 @@
  5      NET_TCP_ALLOWED_CONG_CONTROL=123,
  6      NET_TCP_MAX_SSTHRESH=124,
  7      NET_TCP_FRTO_RESPONSE=125,
  8 +    NET_TCP_DELAYED_ACK=126,
  9 +    NET_TCP_DCTCP_ENABLE=127,
 10 +    NET_TCP_DCTCP_SHIFT_G=128,
 11  };
 12  
 13  enum {
 14 diff -Naur linux-2.6.26/include/linux/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h
 15 --- linux-2.6.26/include/linux/tcp.h    2008-07-13 14:51:29.000000000 -0700
 16 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h    2011-10-07 14:53:45.000000000 -0700
 17 @@ -405,6 +405,15 @@
 18  /* TCP MD5 Signagure Option information */
 19      struct tcp_md5sig_info    *md5sig_info;
 20  #endif
 21 +
 22 +/* DCTCP Specific Parameters */
 23 +     u32    acked_bytes_ecn;
 24 +     u32    acked_bytes_total;
 25 +     u32    prior_rcv_nxt;
 26 +     u32    dctcp_alpha;
 27 +     u32    next_seq;
 28 +     u32    ce_state;    /* 0: last pkt was non-ce , 1: last pkt was ce */
 29 +     u32    delayed_ack_reserved;
 30  };
 31  
 32  static inline struct tcp_sock *tcp_sk(const struct sock *sk)
 33 diff -Naur linux-2.6.26/include/net/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h
 34 --- linux-2.6.26/include/net/tcp.h    2008-07-13 14:51:29.000000000 -0700
 35 +++ linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h    2011-10-07 14:41:50.000000000 -0700
 36 @@ -214,6 +214,9 @@
 37  extern int sysctl_tcp_fack;
 38  extern int sysctl_tcp_reordering;
 39  extern int sysctl_tcp_ecn;
 40 +extern int sysctl_tcp_delayed_ack;
 41 +extern int sysctl_tcp_dctcp_enable;
 42 +extern int sysctl_tcp_dctcp_shift_g;
 43  extern int sysctl_tcp_dsack;
 44  extern int sysctl_tcp_mem[3];
 45  extern int sysctl_tcp_wmem[3];
 46 diff -Naur linux-2.6.26/kernel/sysctl_check.c linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c
 47 --- linux-2.6.26/kernel/sysctl_check.c    2008-07-13 14:51:29.000000000 -0700
 48 +++ linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c    2011-10-07 14:41:50.000000000 -0700
 49 @@ -353,6 +353,9 @@
 50      { NET_TCP_FACK,                "tcp_fack" },
 51      { NET_TCP_REORDERING,            "tcp_reordering" },
 52      { NET_TCP_ECN,                "tcp_ecn" },
 53 +    { NET_TCP_DELAYED_ACK,                  "tcp_delayed_ack" },
 54 +    { NET_TCP_DCTCP_ENABLE,                 "tcp_dctcp_enable" },
 55 +        { NET_TCP_DCTCP_SHIFT_G,                "tcp_dctcp_shift_g" },
 56      { NET_TCP_DSACK,            "tcp_dsack" },
 57      { NET_TCP_MEM,                "tcp_mem" },
 58      { NET_TCP_WMEM,                "tcp_wmem" },
 59 diff -Naur linux-2.6.26/net/ipv4/sysctl_net_ipv4.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c
 60 --- linux-2.6.26/net/ipv4/sysctl_net_ipv4.c    2008-07-13 14:51:29.000000000 -0700
 61 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c    2011-10-07 14:41:50.000000000 -0700
 62 @@ -506,6 +506,30 @@
 63          .proc_handler    = &proc_dointvec
 64      },
 65      {
 66 +        .ctl_name    = NET_TCP_DELAYED_ACK,
 67 +        .procname    = "tcp_delayed_ack",
 68 +        .data        = &sysctl_tcp_delayed_ack,
 69 +        .maxlen        = sizeof(int),
 70 +        .mode        = 0644,
 71 +        .proc_handler    = &proc_dointvec
 72 +    },
 73 +    {
 74 +        .ctl_name    = NET_TCP_DCTCP_ENABLE,
 75 +        .procname    = "tcp_dctcp_enable",
 76 +        .data        = &sysctl_tcp_dctcp_enable,
 77 +        .maxlen        = sizeof(int),
 78 +        .mode        = 0644,
 79 +        .proc_handler    = &proc_dointvec
 80 +    },
 81 +    {
 82 +        .ctl_name    = NET_TCP_DCTCP_SHIFT_G,
 83 +        .procname    = "tcp_dctcp_shift_g",
 84 +        .data        = &sysctl_tcp_dctcp_shift_g,
 85 +        .maxlen        = sizeof(int),
 86 +        .mode        = 0644,
 87 +        .proc_handler    = &proc_dointvec
 88 +    },
 89 +    {
 90          .ctl_name    = NET_TCP_DSACK,
 91          .procname    = "tcp_dsack",
 92          .data        = &sysctl_tcp_dsack,
 93 diff -Naur linux-2.6.26/net/ipv4/tcp_input.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c
 94 --- linux-2.6.26/net/ipv4/tcp_input.c    2008-07-13 14:51:29.000000000 -0700
 95 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c    2011-10-07 14:53:21.000000000 -0700
 96 @@ -79,6 +79,9 @@
 97  int sysctl_tcp_fack __read_mostly = 1;
 98  int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
 99  int sysctl_tcp_ecn __read_mostly;
100 +int sysctl_tcp_delayed_ack __read_mostly = 1;
101 +int sysctl_tcp_dctcp_enable __read_mostly;
102 +int sysctl_tcp_dctcp_shift_g  __read_mostly = 5; /* g=1/2^5 */
103  int sysctl_tcp_dsack __read_mostly = 1;
104  int sysctl_tcp_app_win __read_mostly = 31;
105  int sysctl_tcp_adv_win_scale __read_mostly = 2;
106 @@ -212,16 +215,68 @@
107      tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
108  }
109  
110 -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
111 +static inline void TCP_ECN_dctcp_check_ce(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
112  {
113      if (tp->ecn_flags & TCP_ECN_OK) {
114 -        if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
115 -            tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
116 -        /* Funny extension: if ECT is not set on a segment,
117 -         * it is surely retransmit. It is not in ECN RFC,
118 -         * but Linux follows this rule. */
119 -        else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
120 -            tcp_enter_quickack_mode((struct sock *)tp);
121 +      u32 temp_rcv_nxt;
122 +
123 +      if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) {
124 +
125 +        /* rcv_nxt is already update in previous process (tcp_rcv_established) */
126 +
127 +        if(sysctl_tcp_dctcp_enable) {
128 +
129 +          /* state has changed from CE=0 to CE=1 && delayed ack has not sent yet */
130 +          if(tp->ce_state == 0 && tp->delayed_ack_reserved) {
131 +
132 +        /* save current rcv_nxt */
133 +        temp_rcv_nxt = tp->rcv_nxt;
134 +        /* generate previous ack with CE=0 */
135 +        tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
136 +        tp->rcv_nxt = tp->prior_rcv_nxt;
137 +        tcp_send_ack(sk);
138 +        /* recover current rcv_nxt */
139 +        tp->rcv_nxt = temp_rcv_nxt;
140 +          }
141 +          
142 +          tp->ce_state = 1;
143 +        }
144 +
145 +        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
146 +
147 +
148 +        /* Funny extension: if ECT is not set on a segment,
149 +         * it is surely retransmit. It is not in ECN RFC,
150 +         * but Linux follows this rule. */
151 +      } else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) {
152 +        tcp_enter_quickack_mode((struct sock *)tp);
153 +      }else {
154 +        /* It has ECT but it doesn't have CE */
155 +        
156 +        if(sysctl_tcp_dctcp_enable) {
157 +          
158 +          if(tp->ce_state != 0 && tp->delayed_ack_reserved) {
159 +        
160 +        /* save current rcv_nxt */
161 +        temp_rcv_nxt = tp->rcv_nxt;
162 +        /* generate previous ack with CE=1 */
163 +        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
164 +        tp->rcv_nxt = tp->prior_rcv_nxt;
165 +        tcp_send_ack(sk);
166 +        /* recover current rcv_nxt */
167 +        tp->rcv_nxt = temp_rcv_nxt;
168 +          }
169 +
170 +          tp->ce_state = 0;
171 +
172 +          /* deassert only when DCTCP is enabled */
173 +          tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
174 +        }
175 +
176 +      }
177 +        
178 +      /* set current rcv_nxt to prior_rcv_nxt */
179 +      tp->prior_rcv_nxt = tp->rcv_nxt;
180      }
181  }
182  
183 @@ -572,6 +627,8 @@
184           */
185          tcp_incr_quickack(sk);
186          icsk->icsk_ack.ato = TCP_ATO_MIN;
187 +
188 +        tp->ce_state = 0;
189      } else {
190          int m = now - icsk->icsk_ack.lrcvtime;
191  
192 @@ -592,7 +649,7 @@
193      }
194      icsk->icsk_ack.lrcvtime = now;
195  
196 -    TCP_ECN_check_ce(tp, skb);
197 +    TCP_ECN_dctcp_check_ce(sk, tp, skb);
198  
199      if (skb->len >= 128)
200          tcp_grow_window(sk, skb);
201 @@ -836,19 +893,54 @@
202      struct tcp_sock *tp = tcp_sk(sk);
203      const struct inet_connection_sock *icsk = inet_csk(sk);
204  
205 +    __u32 ssthresh_old; 
206 +    __u32 cwnd_old;
207 +    __u32 cwnd_new;
208 +
209      tp->prior_ssthresh = 0;
210      tp->bytes_acked = 0;
211      if (icsk->icsk_ca_state < TCP_CA_CWR) {
212          tp->undo_marker = 0;
213 -        if (set_ssthresh)
214 -            tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
215 -        tp->snd_cwnd = min(tp->snd_cwnd,
216 -                   tcp_packets_in_flight(tp) + 1U);
217 +
218 +        if(!sysctl_tcp_dctcp_enable) {
219 +
220 +          if (set_ssthresh)
221 +            tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
222 +
223 +          tp->snd_cwnd = min(tp->snd_cwnd,
224 +                     tcp_packets_in_flight(tp) + 1U);
225 +          
226 +        }else {
227 +
228 +          cwnd_new = max (tp->snd_cwnd - ((tp->snd_cwnd * tp->dctcp_alpha)>>11) , 2U);
229 +
230 +          if(set_ssthresh) {
231 +            
232 +            ssthresh_old = tp->snd_ssthresh;
233 +            tp->snd_ssthresh =  cwnd_new;
234 +            
235 +            /* printk("%llu alpha= %d ssth old= %d new= %d\n", */
236 +            /*                ktime_to_us(ktime_get_real()), */
237 +            /*                tp->dctcp_alpha, */
238 +            /*                ssthresh_old, */
239 +            /*                tp->snd_ssthresh); */
240 +          }
241 +          
242 +          cwnd_old = tp->snd_cwnd;
243 +          tp->snd_cwnd = cwnd_new;
244 +          
245 +          /* printk("%llu alpha= %d cwnd old= %d new= %d\n", */
246 +          /*              ktime_to_us(ktime_get_real()), */
247 +          /*              tp->dctcp_alpha, */
248 +          /*              cwnd_old, */
249 +          /*              tp->snd_cwnd); */
250 +        }
251 +        
252          tp->snd_cwnd_cnt = 0;
253          tp->high_seq = tp->snd_nxt;
254          tp->snd_cwnd_stamp = tcp_time_stamp;
255          TCP_ECN_queue_cwr(tp);
256 -
257 +        
258          tcp_set_ca_state(sk, TCP_CA_CWR);
259      }
260  }
261 @@ -2513,7 +2605,8 @@
262          tcp_try_keep_open(sk);
263          tcp_moderate_cwnd(tp);
264      } else {
265 -        tcp_cwnd_down(sk, flag);
266 +      if(!sysctl_tcp_dctcp_enable)
267 +        tcp_cwnd_down(sk, flag);
268      }
269  }
270  
271 @@ -3216,6 +3309,9 @@
272      int prior_packets;
273      int frto_cwnd = 0;
274  
275 +    __u32 alpha_old;
276 +    __u32 acked_bytes;
277 +
278      /* If the ack is newer than sent or older than previous acks
279       * then we can probably ignore it.
280       */
281 @@ -3269,6 +3365,45 @@
282          tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
283      }
284  
285 +
286 +    /* START: DCTCP Processing */
287 +
288 +    /* calc acked bytes */
289 +    if(after(ack,prior_snd_una)) {
290 +      acked_bytes = ack - prior_snd_una;
291 +    } else {
292 +      acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
293 +    }
294 +    
295 +    if(flag & FLAG_ECE) 
296 +      tp->acked_bytes_ecn += acked_bytes;
297 +
298 +    tp->acked_bytes_total += acked_bytes;
299 +
300 +    /* Expired RTT */
301 +        if (!before(tp->snd_una,tp->next_seq)) {
302 +
303 +      /* For avoiding denominator == 1 */
304 +      if(tp->acked_bytes_total == 0) tp->acked_bytes_total = 1;
305 +
306 +          alpha_old = tp->dctcp_alpha; 
307 +
308 +      /* alpha = (1-g) * alpha + g * F */
309 +      tp->dctcp_alpha = alpha_old - (alpha_old >> sysctl_tcp_dctcp_shift_g)
310 +        + (tp->acked_bytes_ecn << (10 - sysctl_tcp_dctcp_shift_g)) / tp->acked_bytes_total;  
311 +      
312 +      if(tp->dctcp_alpha > 1024) tp->dctcp_alpha = 1024; /* round to 0-1024 */
313 +
314 +          /* printk("bytes_ecn= %d total= %d alpha: old= %d new= %d\n", */
315 +      /*          tp->acked_bytes_ecn, tp->acked_bytes_total, alpha_old, tp->dctcp_alpha); */
316 +      
317 +      tp->acked_bytes_ecn = 0;
318 +      tp->acked_bytes_total = 0;
319 +      tp->next_seq = tp->snd_nxt;
320 +        }
321 +
322 +    /* END: DCTCP Processing */
323 +
324      /* We passed data and got it acked, remove any soft error
325       * log. Something worked...
326       */
327 @@ -4014,7 +4149,7 @@
328          goto queue_and_out;
329      }
330  
331 -    TCP_ECN_check_ce(tp, skb);
332 +    TCP_ECN_dctcp_check_ce(sk, tp, skb);
333  
334      if (tcp_try_rmem_schedule(sk, skb->truesize))
335          goto drop;
336 @@ -4421,6 +4556,8 @@
337           && __tcp_select_window(sk) >= tp->rcv_wnd) ||
338          /* We ACK each frame or... */
339          tcp_in_quickack_mode(sk) ||
340 +        /* Delayed ACK is disabled or ... */
341 +        sysctl_tcp_delayed_ack == 0 ||
342          /* We have out of order data. */
343          (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
344          /* Then ack it now */
345 @@ -5419,6 +5556,9 @@
346  }
347  
348  EXPORT_SYMBOL(sysctl_tcp_ecn);
349 +EXPORT_SYMBOL(sysctl_tcp_delayed_ack);
350 +EXPORT_SYMBOL(sysctl_tcp_dctcp_enable);
351 +EXPORT_SYMBOL(sysctl_tcp_dctcp_shift_g);
352  EXPORT_SYMBOL(sysctl_tcp_reordering);
353  EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
354  EXPORT_SYMBOL(tcp_parse_options);
355 diff -Naur linux-2.6.26/net/ipv4/tcp_minisocks.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c
356 --- linux-2.6.26/net/ipv4/tcp_minisocks.c    2008-07-13 14:51:29.000000000 -0700
357 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c    2011-10-07 15:03:45.000000000 -0700
358 @@ -398,6 +398,11 @@
359          newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
360          newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
361  
362 +        /* Initialize DCTCP internal parameters */
363 +        newtp->next_seq = newtp->snd_nxt; 
364 +        newtp->acked_bytes_ecn = 0;
365 +        newtp->acked_bytes_total = 0;
366 +
367          tcp_prequeue_init(newtp);
368  
369          tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
370 diff -Naur linux-2.6.26/net/ipv4/tcp_output.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c
371 --- linux-2.6.26/net/ipv4/tcp_output.c    2008-07-13 14:51:29.000000000 -0700
372 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c    2011-10-07 14:41:50.000000000 -0700
373 @@ -290,7 +290,7 @@
374      struct tcp_sock *tp = tcp_sk(sk);
375  
376      tp->ecn_flags = 0;
377 -    if (sysctl_tcp_ecn) {
378 +    if (sysctl_tcp_ecn || sysctl_tcp_dctcp_enable) {
379          TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
380          tp->ecn_flags = TCP_ECN_OK;
381      }
382 @@ -600,6 +600,10 @@
383          TCP_ECN_send(sk, skb, tcp_header_size);
384      }
385  
386 +    /* In DCTCP, Assert ECT bit to all packets*/
387 +    if(sysctl_tcp_dctcp_enable)
388 +      INET_ECN_xmit(sk);
389 +
390  #ifdef CONFIG_TCP_MD5SIG
391      /* Calculate the MD5 hash, as we have all we need now */
392      if (md5) {
393 @@ -2352,6 +2356,11 @@
394      tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
395      TCP_ECN_send_syn(sk, buff);
396  
397 +    /* Initialize DCTCP internal parameters */
398 +    tp->next_seq = tp->snd_nxt; 
399 +    tp->acked_bytes_ecn = 0;
400 +    tp->acked_bytes_total = 0;
401 +
402      /* Send it off. */
403      TCP_SKB_CB(buff)->when = tcp_time_stamp;
404      tp->retrans_stamp = TCP_SKB_CB(buff)->when;
405 @@ -2385,6 +2394,10 @@
406      int ato = icsk->icsk_ack.ato;
407      unsigned long timeout;
408  
409 +    /* Delayed ACK reserved flag for DCTCP */
410 +    struct tcp_sock *tp = tcp_sk(sk);
411 +    tp->delayed_ack_reserved = 1;
412 +
413      if (ato > TCP_DELACK_MIN) {
414          const struct tcp_sock *tp = tcp_sk(sk);
415          int max_ato = HZ / 2;
416 @@ -2436,6 +2449,10 @@
417  {
418      struct sk_buff *buff;
419  
420 +    /* Delayed ACK reserved flag for DCTCP */
421 +    struct tcp_sock *tp = tcp_sk(sk);
422 +    tp->delayed_ack_reserved = 0;
423 +
424      /* If we have been reset, we may not send again. */
425      if (sk->sk_state == TCP_CLOSE)
426          return;

https://github.com/myasuda/DCTCP-Linux/blob/master/dctcp-2.6.26-rev1.1.0.patch

posted @ 2014-12-12 10:28  Ryan in C++  阅读(615)  评论(1编辑  收藏  举报