路由4 【转载: ip_route_output_key函数分析(1) 】

  1 上面的文章读了net/ipv4/route.c 中的ip_route_input函数,是协议站对收到报文的路由查找函数。继续阅读一下协议栈发包的时候路由查找的调用函数ip_roue_output_key。
  2 
  3 int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
  4 {
  5     return ip_route_output_flow(net, rp, flp, NULL, 0);
  6 }
  7 只是一个函数封装,真正的处理函数是ip_route_output_flow.
  8 int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
  9              struct sock *sk, int flags)
 10 {
 11     int err;
 12 
 13     /*路由查找*/
 14     if ((err = __ip_route_output_key(net, rp, flp)) != 0)
 15         return err;
 16 
 17     /*IPSec 的处理代码*/
 18     if (flp->proto) {
 19         if (!flp->fl4_src)
 20             flp->fl4_src = (*rp)->rt_src;
 21         if (!flp->fl4_dst)
 22             flp->fl4_dst = (*rp)->rt_dst;
 23         err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
 24                     flags ? XFRM_LOOKUP_WAIT : 0);
 25         if (err == -EREMOTE)
 26             err = ipv4_dst_blackhole(net, rp, flp);
 27 
 28         return err;
 29     }
 30 
 31     return 0;
 32 }
 33 
 34 很眼熟的__ip_route_output_key函数。
 35 int __ip_route_output_key(struct net *net, struct rtable **rp,
 36               const struct flowi *flp)
 37 {
 38     unsigned hash;
 39     struct rtable *rth;
 40 
 41     if (!rt_caching(net))
 42         goto slow_output;
 43 
 44     /*类似于ip_route_input,先在cache中查找路由, 找到就返回*/
 45     hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
 46 
 47     rcu_read_lock_bh();
 48     for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 49         rth = rcu_dereference(rth->u.dst.rt_next)) {
 50         if (rth->fl.fl4_dst == flp->fl4_dst &&
 51             rth->fl.fl4_src == flp->fl4_src &&
 52             rth->fl.iif == 0 &&
 53             rth->fl.oif == flp->oif &&
 54             rth->fl.mark == flp->mark &&
 55             !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 56                 (IPTOS_RT_MASK | RTO_ONLINK)) &&
 57             net_eq(dev_net(rth->u.dst.dev), net) &&
 58             !rt_is_expired(rth)) {
 59             dst_use(&rth->u.dst, jiffies);
 60             RT_CACHE_STAT_INC(out_hit);
 61             rcu_read_unlock_bh();
 62             *rp = rth;
 63             return 0;
 64         }
 65         RT_CACHE_STAT_INC(out_hlist_search);
 66     }
 67     rcu_read_unlock_bh();
 68 
 69 /*不支持cache 或在cache中没找到相应的路由信息,在路由表中查找*/
 70 slow_output:
 71     return ip_route_output_slow(net, rp, flp);
 72 }
 73 
 74 继续探险
 75 /*
 76  * Major route resolver routine.
 77  */
 78 
 79 static int ip_route_output_slow(struct net *net, struct rtable **rp,
 80                 const struct flowi *oldflp)
 81 {
 82     u32 tos    = RT_FL_TOS(oldflp); /*获取tos和当前的RTO_ONLINK(?)标志*/
 83     struct flowi fl = { .nl_u = { .ip4_u =
 84                       { .daddr = oldflp->fl4_dst,
 85                     .saddr = oldflp->fl4_src,
 86                     .tos = tos & IPTOS_RT_MASK,
 87                     .scope = ((tos & RTO_ONLINK) ? /*根据这个标志,得出路由的scope*/
 88                           RT_SCOPE_LINK :
 89                           RT_SCOPE_UNIVERSE),
 90                       } },
 91                 .mark = oldflp->mark,
 92                 .iif = net->loopback_dev->ifindex, /*设备号为lo的设备号?*/
 93                 .oif = oldflp->oif };
 94     struct fib_result res;
 95     unsigned flags = 0;
 96     struct net_device *dev_out = NULL;
 97     int free_res = 0;
 98     int err;
 99 
100 
101     res.fi        = NULL;
102 #ifdef CONFIG_IP_MULTIPLE_TABLES
103 
104 
105     res.r        = NULL;
106 #endif
107 
108     /*先是对源地址, 发包接口号和目的地址进行判断分类处理。下面的每一个红色跳转就是一种情况*/  
109     if (oldflp->fl4_src) { /**/
110         err = -EINVAL;
111         if (ipv4_is_multicast(oldflp->fl4_src) ||
112             ipv4_is_lbcast(oldflp->fl4_src) ||
113             ipv4_is_zeronet(oldflp->fl4_src))
114             goto out;
115         
116         /*上面是对报文源地址的合理性检查,源地址是多播,广播或0地址时,返回错误*/
117 
118         /* I removed check for oif == dev_out->oif here.
119            It was wrong for two reasons:
120            我在这里删去检查oif == dev_out->oif是否成立,因为有两个原因说明这个检查时错误的:
121            1. ip_dev_find(net, saddr) can return wrong iface, if saddr
122               is assigned to multiple interfaces 
123               如果源地址是一个多播接口的地址,函数ip_dev_find(net, saddr)可能返回错误的设备接口。
124            2. Moreover, we are allowed to send packets with saddr   
125               of another iface. --ANK
126               而且可以用另外设备接口的源地址发送报文
127          */
128 
129         if (oldflp->oif == 0
130             && (ipv4_is_multicast(oldflp->fl4_dst) ||
131             oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /*发包接口为lo,目的地址是广播或多播时查找发包设备,ip_dev_find返回与所给定的源地址相等的第一个设备*/
132             /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
133             dev_out = ip_dev_find(net, oldflp->fl4_src);
134             if (dev_out == NULL)
135                 goto out;
136 
137             /* Special hack: user can direct multicasts
138                and limited broadcast via necessary interface
139                without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
140                This hack is not just for fun, it allows
141                vic,vat and friends to work.
142                They bind socket to loopback, set ttl to zero
143                and expect that it will work.
144                From the viewpoint of routing cache they are broken,
145                because we are not allowed to build multicast path
146                with loopback source addr (look, routing cache
147                cannot know, that ttl is zero, so that packet
148                will not leave this host and route is valid).
149                Luckily, this hack is good workaround.
150              */
151 
152             /*当报文初始化的出接口为lo接口源地址不为空目的地址是多播或广播地址时,找到源地址所对应的接口重新为出接口赋值, 然后创建cache路由项*/
153             fl.oif = dev_out->ifindex;
154             goto make_route;
155         }
156 
157         /*?????*/
158         if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
159             /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
160             dev_out = ip_dev_find(net, oldflp->fl4_src);
161             if (dev_out == NULL)
162                 goto out;
163             dev_put(dev_out);
164             dev_out = NULL;
165         }
166     }
167 
168 
169     if (oldflp->oif) {/*发包设备不为空*/
170         /*检测出接口是否存在*/
171         dev_out = dev_get_by_index(net, oldflp->oif);
172         err = -ENODEV;
173         if (dev_out == NULL)
174             goto out;
175 
176         /* RACE: Check return value of inet_select_addr instead. */
177         /*看设备是否是多地址*/
178         if (__in_dev_get_rtnl(dev_out) == NULL) {
179             dev_put(dev_out);
180             goto out;    /* Wrong error code */
181         }
182 
183         /*当目的地址是本地多播地址或广播地址,并且报文源地址为空时,找出出接口设备上IP地址scope小于RT_SCOPE_LINK的地址,并赋值,然后往cache中添加路由表项*/
184         if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
185             oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
186             if (!fl.fl4_src)
187                 fl.fl4_src = inet_select_addr(dev_out, 0,
188                                   RT_SCOPE_LINK);
189             goto make_route;
190         }
191         /*目的地址是单播地址或空,源地址为空,那就选一个小于特定scope的IP地址*/
192         if (!fl.fl4_src) {
193             if (ipv4_is_multicast(oldflp->fl4_dst))
194                 fl.fl4_src = inet_select_addr(dev_out, 0,
195                                   fl.fl4_scope);
196             else if (!oldflp->fl4_dst)
197                 fl.fl4_src = inet_select_addr(dev_out, 0,
198                                   RT_SCOPE_HOST);
199         }
200     }
201 
202     if (!fl.fl4_dst) {/*目的地址为空*/
203         fl.fl4_dst = fl.fl4_src;
204         if (!fl.fl4_dst)
205             fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);/*目的和源地址都是空,则赋值为lo接口地址*/
206         if (dev_out)
207             dev_put(dev_out);
208         dev_out = net->loopback_dev;
209         dev_hold(dev_out);
210         fl.oif = net->loopback_dev->ifindex;
211         res.type = RTN_LOCAL;
212         flags |= RTCF_LOCAL;
213         /*为发给本机的报文添加cache路由*/
214         goto make_route;
215     }
216 
217     /*一种情况是源地址目的地址不为空,目的地址为空,出接口为lo*/
218     /*还有其他几种情况,就是目的地址和出接口必须对应*/
219     if (fib_lookup(net, &fl, &res)) {
220         res.fi = NULL;
221         if (oldflp->oif) {
222             /* Apparently, routing tables are wrong. Assume,
223                that the destination is on link.
224 
225                WHY? DW.
226                Because we are allowed to send to iface
227                even if it has NO routes and NO assigned
228                addresses. When oif is specified, routing
229                tables are looked up with only one purpose:
230                to catch if destination is gatewayed, rather than
231                direct. Moreover, if MSG_DONTROUTE is set,
232                we send packet, ignoring both routing tables
233                and ifaddr state. --ANK
234 
235 
236                We could make it even if oif is unknown,
237                likely IPv6, but we do not.
238              */
239 
240             if (fl.fl4_src == 0)
241                 fl.fl4_src = inet_select_addr(dev_out, 0,
242                                   RT_SCOPE_LINK);
243             res.type = RTN_UNICAST;
244             /*没有查到路由,并且出接口不为lo*/
245             goto make_route;
246         }
247         if (dev_out)
248             dev_put(dev_out);
249         err = -ENETUNREACH;
250         goto out;
251     }
252     /*找到路由*/
253     free_res = 1;
254 
255     /*路由指向本地*/
256     if (res.type == RTN_LOCAL) {
257         if (!fl.fl4_src)
258             fl.fl4_src = fl.fl4_dst;
259         if (dev_out)
260             dev_put(dev_out);
261         dev_out = net->loopback_dev;
262         dev_hold(dev_out);
263         fl.oif = dev_out->ifindex;
264         if (res.fi)
265             fib_info_put(res.fi);
266         res.fi = NULL;
267         flags |= RTCF_LOCAL;
268         goto make_route;
269     }
270 
271 /*是否支持多路径路由*/
272 #ifdef CONFIG_IP_ROUTE_MULTIPATH
273     if (res.fi->fib_nhs > 1 && fl.oif == 0)
274         fib_select_multipath(&fl, &res);
275     else
276 #endif
277     if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
278         fib_select_default(net, &fl, &res);
279 
280     if (!fl.fl4_src)
281         fl.fl4_src = FIB_RES_PREFSRC(res);
282 
283     if (dev_out)
284         dev_put(dev_out);
285     dev_out = FIB_RES_DEV(res);
286     dev_hold(dev_out);
287     fl.oif = dev_out->ifindex;
288 
289 /*往cache中添加相应的路由项*/
290 make_route:
291     err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
292 
293 
294     if (free_res)
295         fib_res_put(&res);
296     if (dev_out)
297         dev_put(dev_out);
298 out:    return err;
299 }

 

posted on 2017-05-12 17:02  listenerln  阅读(3107)  评论(0编辑  收藏  举报