[SK_BUFF]: Introduce skb_network_header()
[linux-2.6.git] / net / ipv4 / ip_output.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              The Internet Protocol (IP) output module.
7  *
8  * Version:     $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9  *
10  * Authors:     Ross Biro
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Donald Becker, <becker@super.org>
13  *              Alan Cox, <Alan.Cox@linux.org>
14  *              Richard Underwood
15  *              Stefan Becker, <stefanb@yello.ping.de>
16  *              Jorge Cwik, <jorge@laser.satlink.net>
17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18  *              Hirokazu Takahashi, <taka@valinux.co.jp>
19  *
20  *      See ip_input.c for original log
21  *
22  *      Fixes:
23  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
24  *              Mike Kilburn    :       htons() missing in ip_build_xmit.
25  *              Bradford Johnson:       Fix faulty handling of some frames when
26  *                                      no route is found.
27  *              Alexander Demenshin:    Missing sk/skb free in ip_queue_xmit
28  *                                      (in case if packet not accepted by
29  *                                      output firewall rules)
30  *              Mike McLagan    :       Routing by source
31  *              Alexey Kuznetsov:       use new route cache
32  *              Andi Kleen:             Fix broken PMTU recovery and remove
33  *                                      some redundant tests.
34  *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
35  *              Andi Kleen      :       Replace ip_reply with ip_send_reply.
36  *              Andi Kleen      :       Split fast and slow ip_build_xmit path
37  *                                      for decreased register pressure on x86
38  *                                      and more readibility.
39  *              Marc Boucher    :       When call_out_firewall returns FW_QUEUE,
40  *                                      silently drop skb instead of failing with -EPERM.
41  *              Detlev Wengorz  :       Copy protocol for fragments.
42  *              Hirokazu Takahashi:     HW checksumming for outgoing UDP
43  *                                      datagrams.
44  *              Hirokazu Takahashi:     sendfile() on UDP works now.
45  */
46
47 #include <asm/uaccess.h>
48 #include <asm/system.h>
49 #include <linux/module.h>
50 #include <linux/types.h>
51 #include <linux/kernel.h>
52 #include <linux/mm.h>
53 #include <linux/string.h>
54 #include <linux/errno.h>
55 #include <linux/highmem.h>
56
57 #include <linux/socket.h>
58 #include <linux/sockios.h>
59 #include <linux/in.h>
60 #include <linux/inet.h>
61 #include <linux/netdevice.h>
62 #include <linux/etherdevice.h>
63 #include <linux/proc_fs.h>
64 #include <linux/stat.h>
65 #include <linux/init.h>
66
67 #include <net/snmp.h>
68 #include <net/ip.h>
69 #include <net/protocol.h>
70 #include <net/route.h>
71 #include <net/xfrm.h>
72 #include <linux/skbuff.h>
73 #include <net/sock.h>
74 #include <net/arp.h>
75 #include <net/icmp.h>
76 #include <net/checksum.h>
77 #include <net/inetpeer.h>
78 #include <net/checksum.h>
79 #include <linux/igmp.h>
80 #include <linux/netfilter_ipv4.h>
81 #include <linux/netfilter_bridge.h>
82 #include <linux/mroute.h>
83 #include <linux/netlink.h>
84 #include <linux/tcp.h>
85
86 int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
87
88 /* Generate a checksum for an outgoing IP datagram. */
89 __inline__ void ip_send_check(struct iphdr *iph)
90 {
91         iph->check = 0;
92         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
93 }
94
95 /* dev_loopback_xmit for use with netfilter. */
96 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
97 {
98         skb_reset_mac_header(newskb);
99         __skb_pull(newskb, skb_network_offset(newskb));
100         newskb->pkt_type = PACKET_LOOPBACK;
101         newskb->ip_summed = CHECKSUM_UNNECESSARY;
102         BUG_TRAP(newskb->dst);
103         netif_rx(newskb);
104         return 0;
105 }
106
107 static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
108 {
109         int ttl = inet->uc_ttl;
110
111         if (ttl < 0)
112                 ttl = dst_metric(dst, RTAX_HOPLIMIT);
113         return ttl;
114 }
115
116 /*
117  *              Add an ip header to a skbuff and send it out.
118  *
119  */
120 int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
121                           __be32 saddr, __be32 daddr, struct ip_options *opt)
122 {
123         struct inet_sock *inet = inet_sk(sk);
124         struct rtable *rt = (struct rtable *)skb->dst;
125         struct iphdr *iph;
126
127         /* Build the IP header. */
128         skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
129         skb_reset_network_header(skb);
130         iph = skb->nh.iph;
131         iph->version  = 4;
132         iph->ihl      = 5;
133         iph->tos      = inet->tos;
134         if (ip_dont_fragment(sk, &rt->u.dst))
135                 iph->frag_off = htons(IP_DF);
136         else
137                 iph->frag_off = 0;
138         iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
139         iph->daddr    = rt->rt_dst;
140         iph->saddr    = rt->rt_src;
141         iph->protocol = sk->sk_protocol;
142         iph->tot_len  = htons(skb->len);
143         ip_select_ident(iph, &rt->u.dst, sk);
144
145         if (opt && opt->optlen) {
146                 iph->ihl += opt->optlen>>2;
147                 ip_options_build(skb, opt, daddr, rt, 0);
148         }
149         ip_send_check(iph);
150
151         skb->priority = sk->sk_priority;
152
153         /* Send it out. */
154         return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
155                        dst_output);
156 }
157
158 EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
159
160 static inline int ip_finish_output2(struct sk_buff *skb)
161 {
162         struct dst_entry *dst = skb->dst;
163         struct net_device *dev = dst->dev;
164         int hh_len = LL_RESERVED_SPACE(dev);
165
166         /* Be paranoid, rather than too clever. */
167         if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
168                 struct sk_buff *skb2;
169
170                 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
171                 if (skb2 == NULL) {
172                         kfree_skb(skb);
173                         return -ENOMEM;
174                 }
175                 if (skb->sk)
176                         skb_set_owner_w(skb2, skb->sk);
177                 kfree_skb(skb);
178                 skb = skb2;
179         }
180
181         if (dst->hh)
182                 return neigh_hh_output(dst->hh, skb);
183         else if (dst->neighbour)
184                 return dst->neighbour->output(skb);
185
186         if (net_ratelimit())
187                 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
188         kfree_skb(skb);
189         return -EINVAL;
190 }
191
192 static inline int ip_finish_output(struct sk_buff *skb)
193 {
194 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
195         /* Policy lookup after SNAT yielded a new policy */
196         if (skb->dst->xfrm != NULL) {
197                 IPCB(skb)->flags |= IPSKB_REROUTED;
198                 return dst_output(skb);
199         }
200 #endif
201         if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
202                 return ip_fragment(skb, ip_finish_output2);
203         else
204                 return ip_finish_output2(skb);
205 }
206
207 int ip_mc_output(struct sk_buff *skb)
208 {
209         struct sock *sk = skb->sk;
210         struct rtable *rt = (struct rtable*)skb->dst;
211         struct net_device *dev = rt->u.dst.dev;
212
213         /*
214          *      If the indicated interface is up and running, send the packet.
215          */
216         IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
217
218         skb->dev = dev;
219         skb->protocol = htons(ETH_P_IP);
220
221         /*
222          *      Multicasts are looped back for other local users
223          */
224
225         if (rt->rt_flags&RTCF_MULTICAST) {
226                 if ((!sk || inet_sk(sk)->mc_loop)
227 #ifdef CONFIG_IP_MROUTE
228                 /* Small optimization: do not loopback not local frames,
229                    which returned after forwarding; they will be  dropped
230                    by ip_mr_input in any case.
231                    Note, that local frames are looped back to be delivered
232                    to local recipients.
233
234                    This check is duplicated in ip_mr_input at the moment.
235                  */
236                     && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
237 #endif
238                 ) {
239                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
240                         if (newskb)
241                                 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
242                                         newskb->dev,
243                                         ip_dev_loopback_xmit);
244                 }
245
246                 /* Multicasts with ttl 0 must not go beyond the host */
247
248                 if (skb->nh.iph->ttl == 0) {
249                         kfree_skb(skb);
250                         return 0;
251                 }
252         }
253
254         if (rt->rt_flags&RTCF_BROADCAST) {
255                 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
256                 if (newskb)
257                         NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
258                                 newskb->dev, ip_dev_loopback_xmit);
259         }
260
261         return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
262                             ip_finish_output,
263                             !(IPCB(skb)->flags & IPSKB_REROUTED));
264 }
265
266 int ip_output(struct sk_buff *skb)
267 {
268         struct net_device *dev = skb->dst->dev;
269
270         IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
271
272         skb->dev = dev;
273         skb->protocol = htons(ETH_P_IP);
274
275         return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
276                             ip_finish_output,
277                             !(IPCB(skb)->flags & IPSKB_REROUTED));
278 }
279
280 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
281 {
282         struct sock *sk = skb->sk;
283         struct inet_sock *inet = inet_sk(sk);
284         struct ip_options *opt = inet->opt;
285         struct rtable *rt;
286         struct iphdr *iph;
287
288         /* Skip all of this if the packet is already routed,
289          * f.e. by something like SCTP.
290          */
291         rt = (struct rtable *) skb->dst;
292         if (rt != NULL)
293                 goto packet_routed;
294
295         /* Make sure we can route this packet. */
296         rt = (struct rtable *)__sk_dst_check(sk, 0);
297         if (rt == NULL) {
298                 __be32 daddr;
299
300                 /* Use correct destination address if we have options. */
301                 daddr = inet->daddr;
302                 if(opt && opt->srr)
303                         daddr = opt->faddr;
304
305                 {
306                         struct flowi fl = { .oif = sk->sk_bound_dev_if,
307                                             .nl_u = { .ip4_u =
308                                                       { .daddr = daddr,
309                                                         .saddr = inet->saddr,
310                                                         .tos = RT_CONN_FLAGS(sk) } },
311                                             .proto = sk->sk_protocol,
312                                             .uli_u = { .ports =
313                                                        { .sport = inet->sport,
314                                                          .dport = inet->dport } } };
315
316                         /* If this fails, retransmit mechanism of transport layer will
317                          * keep trying until route appears or the connection times
318                          * itself out.
319                          */
320                         security_sk_classify_flow(sk, &fl);
321                         if (ip_route_output_flow(&rt, &fl, sk, 0))
322                                 goto no_route;
323                 }
324                 sk_setup_caps(sk, &rt->u.dst);
325         }
326         skb->dst = dst_clone(&rt->u.dst);
327
328 packet_routed:
329         if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
330                 goto no_route;
331
332         /* OK, we know where to send it, allocate and build IP header. */
333         skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
334         skb_reset_network_header(skb);
335         iph = skb->nh.iph;
336         *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
337         iph->tot_len = htons(skb->len);
338         if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
339                 iph->frag_off = htons(IP_DF);
340         else
341                 iph->frag_off = 0;
342         iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
343         iph->protocol = sk->sk_protocol;
344         iph->saddr    = rt->rt_src;
345         iph->daddr    = rt->rt_dst;
346         /* Transport layer set skb->h.foo itself. */
347
348         if (opt && opt->optlen) {
349                 iph->ihl += opt->optlen >> 2;
350                 ip_options_build(skb, opt, inet->daddr, rt, 0);
351         }
352
353         ip_select_ident_more(iph, &rt->u.dst, sk,
354                              (skb_shinfo(skb)->gso_segs ?: 1) - 1);
355
356         /* Add an IP checksum. */
357         ip_send_check(iph);
358
359         skb->priority = sk->sk_priority;
360
361         return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
362                        dst_output);
363
364 no_route:
365         IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
366         kfree_skb(skb);
367         return -EHOSTUNREACH;
368 }
369
370
371 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
372 {
373         to->pkt_type = from->pkt_type;
374         to->priority = from->priority;
375         to->protocol = from->protocol;
376         dst_release(to->dst);
377         to->dst = dst_clone(from->dst);
378         to->dev = from->dev;
379         to->mark = from->mark;
380
381         /* Copy the flags to each fragment. */
382         IPCB(to)->flags = IPCB(from)->flags;
383
384 #ifdef CONFIG_NET_SCHED
385         to->tc_index = from->tc_index;
386 #endif
387 #ifdef CONFIG_NETFILTER
388         /* Connection association is same as pre-frag packet */
389         nf_conntrack_put(to->nfct);
390         to->nfct = from->nfct;
391         nf_conntrack_get(to->nfct);
392         to->nfctinfo = from->nfctinfo;
393 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
394         to->ipvs_property = from->ipvs_property;
395 #endif
396 #ifdef CONFIG_BRIDGE_NETFILTER
397         nf_bridge_put(to->nf_bridge);
398         to->nf_bridge = from->nf_bridge;
399         nf_bridge_get(to->nf_bridge);
400 #endif
401 #endif
402         skb_copy_secmark(to, from);
403 }
404
405 /*
406  *      This IP datagram is too large to be sent in one piece.  Break it up into
407  *      smaller pieces (each of size equal to IP header plus
408  *      a block of the data of the original IP data part) that will yet fit in a
409  *      single device frame, and queue such a frame for sending.
410  */
411
412 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
413 {
414         struct iphdr *iph;
415         int raw = 0;
416         int ptr;
417         struct net_device *dev;
418         struct sk_buff *skb2;
419         unsigned int mtu, hlen, left, len, ll_rs, pad;
420         int offset;
421         __be16 not_last_frag;
422         struct rtable *rt = (struct rtable*)skb->dst;
423         int err = 0;
424
425         dev = rt->u.dst.dev;
426
427         /*
428          *      Point into the IP datagram header.
429          */
430
431         iph = skb->nh.iph;
432
433         if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
434                 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
435                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
436                           htonl(dst_mtu(&rt->u.dst)));
437                 kfree_skb(skb);
438                 return -EMSGSIZE;
439         }
440
441         /*
442          *      Setup starting values.
443          */
444
445         hlen = iph->ihl * 4;
446         mtu = dst_mtu(&rt->u.dst) - hlen;       /* Size of data space */
447         IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
448
449         /* When frag_list is given, use it. First, check its validity:
450          * some transformers could create wrong frag_list or break existing
451          * one, it is not prohibited. In this case fall back to copying.
452          *
453          * LATER: this step can be merged to real generation of fragments,
454          * we can switch to copy when see the first bad fragment.
455          */
456         if (skb_shinfo(skb)->frag_list) {
457                 struct sk_buff *frag;
458                 int first_len = skb_pagelen(skb);
459
460                 if (first_len - hlen > mtu ||
461                     ((first_len - hlen) & 7) ||
462                     (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
463                     skb_cloned(skb))
464                         goto slow_path;
465
466                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
467                         /* Correct geometry. */
468                         if (frag->len > mtu ||
469                             ((frag->len & 7) && frag->next) ||
470                             skb_headroom(frag) < hlen)
471                             goto slow_path;
472
473                         /* Partially cloned skb? */
474                         if (skb_shared(frag))
475                                 goto slow_path;
476
477                         BUG_ON(frag->sk);
478                         if (skb->sk) {
479                                 sock_hold(skb->sk);
480                                 frag->sk = skb->sk;
481                                 frag->destructor = sock_wfree;
482                                 skb->truesize -= frag->truesize;
483                         }
484                 }
485
486                 /* Everything is OK. Generate! */
487
488                 err = 0;
489                 offset = 0;
490                 frag = skb_shinfo(skb)->frag_list;
491                 skb_shinfo(skb)->frag_list = NULL;
492                 skb->data_len = first_len - skb_headlen(skb);
493                 skb->len = first_len;
494                 iph->tot_len = htons(first_len);
495                 iph->frag_off = htons(IP_MF);
496                 ip_send_check(iph);
497
498                 for (;;) {
499                         /* Prepare header of the next frame,
500                          * before previous one went down. */
501                         if (frag) {
502                                 frag->ip_summed = CHECKSUM_NONE;
503                                 frag->h.raw = frag->data;
504                                 __skb_push(frag, hlen);
505                                 skb_reset_network_header(frag);
506                                 memcpy(skb_network_header(frag), iph, hlen);
507                                 iph = frag->nh.iph;
508                                 iph->tot_len = htons(frag->len);
509                                 ip_copy_metadata(frag, skb);
510                                 if (offset == 0)
511                                         ip_options_fragment(frag);
512                                 offset += skb->len - hlen;
513                                 iph->frag_off = htons(offset>>3);
514                                 if (frag->next != NULL)
515                                         iph->frag_off |= htons(IP_MF);
516                                 /* Ready, complete checksum */
517                                 ip_send_check(iph);
518                         }
519
520                         err = output(skb);
521
522                         if (!err)
523                                 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
524                         if (err || !frag)
525                                 break;
526
527                         skb = frag;
528                         frag = skb->next;
529                         skb->next = NULL;
530                 }
531
532                 if (err == 0) {
533                         IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
534                         return 0;
535                 }
536
537                 while (frag) {
538                         skb = frag->next;
539                         kfree_skb(frag);
540                         frag = skb;
541                 }
542                 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
543                 return err;
544         }
545
546 slow_path:
547         left = skb->len - hlen;         /* Space per frame */
548         ptr = raw + hlen;               /* Where to start from */
549
550         /* for bridged IP traffic encapsulated inside f.e. a vlan header,
551          * we need to make room for the encapsulating header
552          */
553         pad = nf_bridge_pad(skb);
554         ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
555         mtu -= pad;
556
557         /*
558          *      Fragment the datagram.
559          */
560
561         offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
562         not_last_frag = iph->frag_off & htons(IP_MF);
563
564         /*
565          *      Keep copying data until we run out.
566          */
567
568         while (left > 0) {
569                 len = left;
570                 /* IF: it doesn't fit, use 'mtu' - the data space left */
571                 if (len > mtu)
572                         len = mtu;
573                 /* IF: we are not sending upto and including the packet end
574                    then align the next start on an eight byte boundary */
575                 if (len < left) {
576                         len &= ~7;
577                 }
578                 /*
579                  *      Allocate buffer.
580                  */
581
582                 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
583                         NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
584                         err = -ENOMEM;
585                         goto fail;
586                 }
587
588                 /*
589                  *      Set up data on packet
590                  */
591
592                 ip_copy_metadata(skb2, skb);
593                 skb_reserve(skb2, ll_rs);
594                 skb_put(skb2, len + hlen);
595                 skb_reset_network_header(skb2);
596                 skb2->h.raw = skb2->data + hlen;
597
598                 /*
599                  *      Charge the memory for the fragment to any owner
600                  *      it might possess
601                  */
602
603                 if (skb->sk)
604                         skb_set_owner_w(skb2, skb->sk);
605
606                 /*
607                  *      Copy the packet header into the new buffer.
608                  */
609
610                 memcpy(skb_network_header(skb2), skb->data, hlen);
611
612                 /*
613                  *      Copy a block of the IP datagram.
614                  */
615                 if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
616                         BUG();
617                 left -= len;
618
619                 /*
620                  *      Fill in the new header fields.
621                  */
622                 iph = skb2->nh.iph;
623                 iph->frag_off = htons((offset >> 3));
624
625                 /* ANK: dirty, but effective trick. Upgrade options only if
626                  * the segment to be fragmented was THE FIRST (otherwise,
627                  * options are already fixed) and make it ONCE
628                  * on the initial skb, so that all the following fragments
629                  * will inherit fixed options.
630                  */
631                 if (offset == 0)
632                         ip_options_fragment(skb);
633
634                 /*
635                  *      Added AC : If we are fragmenting a fragment that's not the
636                  *                 last fragment then keep MF on each bit
637                  */
638                 if (left > 0 || not_last_frag)
639                         iph->frag_off |= htons(IP_MF);
640                 ptr += len;
641                 offset += len;
642
643                 /*
644                  *      Put this fragment into the sending queue.
645                  */
646                 iph->tot_len = htons(len + hlen);
647
648                 ip_send_check(iph);
649
650                 err = output(skb2);
651                 if (err)
652                         goto fail;
653
654                 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
655         }
656         kfree_skb(skb);
657         IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
658         return err;
659
660 fail:
661         kfree_skb(skb);
662         IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
663         return err;
664 }
665
666 EXPORT_SYMBOL(ip_fragment);
667
668 int
669 ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
670 {
671         struct iovec *iov = from;
672
673         if (skb->ip_summed == CHECKSUM_PARTIAL) {
674                 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
675                         return -EFAULT;
676         } else {
677                 __wsum csum = 0;
678                 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
679                         return -EFAULT;
680                 skb->csum = csum_block_add(skb->csum, csum, odd);
681         }
682         return 0;
683 }
684
685 static inline __wsum
686 csum_page(struct page *page, int offset, int copy)
687 {
688         char *kaddr;
689         __wsum csum;
690         kaddr = kmap(page);
691         csum = csum_partial(kaddr + offset, copy, 0);
692         kunmap(page);
693         return csum;
694 }
695
696 static inline int ip_ufo_append_data(struct sock *sk,
697                         int getfrag(void *from, char *to, int offset, int len,
698                                int odd, struct sk_buff *skb),
699                         void *from, int length, int hh_len, int fragheaderlen,
700                         int transhdrlen, int mtu,unsigned int flags)
701 {
702         struct sk_buff *skb;
703         int err;
704
705         /* There is support for UDP fragmentation offload by network
706          * device, so create one single skb packet containing complete
707          * udp datagram
708          */
709         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
710                 skb = sock_alloc_send_skb(sk,
711                         hh_len + fragheaderlen + transhdrlen + 20,
712                         (flags & MSG_DONTWAIT), &err);
713
714                 if (skb == NULL)
715                         return err;
716
717                 /* reserve space for Hardware header */
718                 skb_reserve(skb, hh_len);
719
720                 /* create space for UDP/IP header */
721                 skb_put(skb,fragheaderlen + transhdrlen);
722
723                 /* initialize network header pointer */
724                 skb_reset_network_header(skb);
725
726                 /* initialize protocol header pointer */
727                 skb->h.raw = skb->data + fragheaderlen;
728
729                 skb->ip_summed = CHECKSUM_PARTIAL;
730                 skb->csum = 0;
731                 sk->sk_sndmsg_off = 0;
732         }
733
734         err = skb_append_datato_frags(sk,skb, getfrag, from,
735                                (length - transhdrlen));
736         if (!err) {
737                 /* specify the length of each IP datagram fragment*/
738                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
739                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
740                 __skb_queue_tail(&sk->sk_write_queue, skb);
741
742                 return 0;
743         }
744         /* There is not enough support do UFO ,
745          * so follow normal path
746          */
747         kfree_skb(skb);
748         return err;
749 }
750
751 /*
752  *      ip_append_data() and ip_append_page() can make one large IP datagram
753  *      from many pieces of data. Each pieces will be holded on the socket
754  *      until ip_push_pending_frames() is called. Each piece can be a page
755  *      or non-page data.
756  *
757  *      Not only UDP, other transport protocols - e.g. raw sockets - can use
758  *      this interface potentially.
759  *
760  *      LATER: length must be adjusted by pad at tail, when it is required.
761  */
762 int ip_append_data(struct sock *sk,
763                    int getfrag(void *from, char *to, int offset, int len,
764                                int odd, struct sk_buff *skb),
765                    void *from, int length, int transhdrlen,
766                    struct ipcm_cookie *ipc, struct rtable *rt,
767                    unsigned int flags)
768 {
769         struct inet_sock *inet = inet_sk(sk);
770         struct sk_buff *skb;
771
772         struct ip_options *opt = NULL;
773         int hh_len;
774         int exthdrlen;
775         int mtu;
776         int copy;
777         int err;
778         int offset = 0;
779         unsigned int maxfraglen, fragheaderlen;
780         int csummode = CHECKSUM_NONE;
781
782         if (flags&MSG_PROBE)
783                 return 0;
784
785         if (skb_queue_empty(&sk->sk_write_queue)) {
786                 /*
787                  * setup for corking.
788                  */
789                 opt = ipc->opt;
790                 if (opt) {
791                         if (inet->cork.opt == NULL) {
792                                 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
793                                 if (unlikely(inet->cork.opt == NULL))
794                                         return -ENOBUFS;
795                         }
796                         memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
797                         inet->cork.flags |= IPCORK_OPT;
798                         inet->cork.addr = ipc->addr;
799                 }
800                 dst_hold(&rt->u.dst);
801                 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
802                 inet->cork.rt = rt;
803                 inet->cork.length = 0;
804                 sk->sk_sndmsg_page = NULL;
805                 sk->sk_sndmsg_off = 0;
806                 if ((exthdrlen = rt->u.dst.header_len) != 0) {
807                         length += exthdrlen;
808                         transhdrlen += exthdrlen;
809                 }
810         } else {
811                 rt = inet->cork.rt;
812                 if (inet->cork.flags & IPCORK_OPT)
813                         opt = inet->cork.opt;
814
815                 transhdrlen = 0;
816                 exthdrlen = 0;
817                 mtu = inet->cork.fragsize;
818         }
819         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
820
821         fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
822         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
823
824         if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
825                 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
826                 return -EMSGSIZE;
827         }
828
829         /*
830          * transhdrlen > 0 means that this is the first fragment and we wish
831          * it won't be fragmented in the future.
832          */
833         if (transhdrlen &&
834             length + fragheaderlen <= mtu &&
835             rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
836             !exthdrlen)
837                 csummode = CHECKSUM_PARTIAL;
838
839         inet->cork.length += length;
840         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
841                         (rt->u.dst.dev->features & NETIF_F_UFO)) {
842
843                 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
844                                          fragheaderlen, transhdrlen, mtu,
845                                          flags);
846                 if (err)
847                         goto error;
848                 return 0;
849         }
850
851         /* So, what's going on in the loop below?
852          *
853          * We use calculated fragment length to generate chained skb,
854          * each of segments is IP fragment ready for sending to network after
855          * adding appropriate IP header.
856          */
857
858         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
859                 goto alloc_new_skb;
860
861         while (length > 0) {
862                 /* Check if the remaining data fits into current packet. */
863                 copy = mtu - skb->len;
864                 if (copy < length)
865                         copy = maxfraglen - skb->len;
866                 if (copy <= 0) {
867                         char *data;
868                         unsigned int datalen;
869                         unsigned int fraglen;
870                         unsigned int fraggap;
871                         unsigned int alloclen;
872                         struct sk_buff *skb_prev;
873 alloc_new_skb:
874                         skb_prev = skb;
875                         if (skb_prev)
876                                 fraggap = skb_prev->len - maxfraglen;
877                         else
878                                 fraggap = 0;
879
880                         /*
881                          * If remaining data exceeds the mtu,
882                          * we know we need more fragment(s).
883                          */
884                         datalen = length + fraggap;
885                         if (datalen > mtu - fragheaderlen)
886                                 datalen = maxfraglen - fragheaderlen;
887                         fraglen = datalen + fragheaderlen;
888
889                         if ((flags & MSG_MORE) &&
890                             !(rt->u.dst.dev->features&NETIF_F_SG))
891                                 alloclen = mtu;
892                         else
893                                 alloclen = datalen + fragheaderlen;
894
895                         /* The last fragment gets additional space at tail.
896                          * Note, with MSG_MORE we overallocate on fragments,
897                          * because we have no idea what fragment will be
898                          * the last.
899                          */
900                         if (datalen == length + fraggap)
901                                 alloclen += rt->u.dst.trailer_len;
902
903                         if (transhdrlen) {
904                                 skb = sock_alloc_send_skb(sk,
905                                                 alloclen + hh_len + 15,
906                                                 (flags & MSG_DONTWAIT), &err);
907                         } else {
908                                 skb = NULL;
909                                 if (atomic_read(&sk->sk_wmem_alloc) <=
910                                     2 * sk->sk_sndbuf)
911                                         skb = sock_wmalloc(sk,
912                                                            alloclen + hh_len + 15, 1,
913                                                            sk->sk_allocation);
914                                 if (unlikely(skb == NULL))
915                                         err = -ENOBUFS;
916                         }
917                         if (skb == NULL)
918                                 goto error;
919
920                         /*
921                          *      Fill in the control structures
922                          */
923                         skb->ip_summed = csummode;
924                         skb->csum = 0;
925                         skb_reserve(skb, hh_len);
926
927                         /*
928                          *      Find where to start putting bytes.
929                          */
930                         data = skb_put(skb, fraglen);
931                         skb->nh.raw = data + exthdrlen;
932                         data += fragheaderlen;
933                         skb->h.raw = data + exthdrlen;
934
935                         if (fraggap) {
936                                 skb->csum = skb_copy_and_csum_bits(
937                                         skb_prev, maxfraglen,
938                                         data + transhdrlen, fraggap, 0);
939                                 skb_prev->csum = csum_sub(skb_prev->csum,
940                                                           skb->csum);
941                                 data += fraggap;
942                                 pskb_trim_unique(skb_prev, maxfraglen);
943                         }
944
945                         copy = datalen - transhdrlen - fraggap;
946                         if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
947                                 err = -EFAULT;
948                                 kfree_skb(skb);
949                                 goto error;
950                         }
951
952                         offset += copy;
953                         length -= datalen - fraggap;
954                         transhdrlen = 0;
955                         exthdrlen = 0;
956                         csummode = CHECKSUM_NONE;
957
958                         /*
959                          * Put the packet on the pending queue.
960                          */
961                         __skb_queue_tail(&sk->sk_write_queue, skb);
962                         continue;
963                 }
964
965                 if (copy > length)
966                         copy = length;
967
968                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
969                         unsigned int off;
970
971                         off = skb->len;
972                         if (getfrag(from, skb_put(skb, copy),
973                                         offset, copy, off, skb) < 0) {
974                                 __skb_trim(skb, off);
975                                 err = -EFAULT;
976                                 goto error;
977                         }
978                 } else {
979                         int i = skb_shinfo(skb)->nr_frags;
980                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
981                         struct page *page = sk->sk_sndmsg_page;
982                         int off = sk->sk_sndmsg_off;
983                         unsigned int left;
984
985                         if (page && (left = PAGE_SIZE - off) > 0) {
986                                 if (copy >= left)
987                                         copy = left;
988                                 if (page != frag->page) {
989                                         if (i == MAX_SKB_FRAGS) {
990                                                 err = -EMSGSIZE;
991                                                 goto error;
992                                         }
993                                         get_page(page);
994                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
995                                         frag = &skb_shinfo(skb)->frags[i];
996                                 }
997                         } else if (i < MAX_SKB_FRAGS) {
998                                 if (copy > PAGE_SIZE)
999                                         copy = PAGE_SIZE;
1000                                 page = alloc_pages(sk->sk_allocation, 0);
1001                                 if (page == NULL)  {
1002                                         err = -ENOMEM;
1003                                         goto error;
1004                                 }
1005                                 sk->sk_sndmsg_page = page;
1006                                 sk->sk_sndmsg_off = 0;
1007
1008                                 skb_fill_page_desc(skb, i, page, 0, 0);
1009                                 frag = &skb_shinfo(skb)->frags[i];
1010                                 skb->truesize += PAGE_SIZE;
1011                                 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1012                         } else {
1013                                 err = -EMSGSIZE;
1014                                 goto error;
1015                         }
1016                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1017                                 err = -EFAULT;
1018                                 goto error;
1019                         }
1020                         sk->sk_sndmsg_off += copy;
1021                         frag->size += copy;
1022                         skb->len += copy;
1023                         skb->data_len += copy;
1024                 }
1025                 offset += copy;
1026                 length -= copy;
1027         }
1028
1029         return 0;
1030
1031 error:
1032         inet->cork.length -= length;
1033         IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1034         return err;
1035 }
1036
1037 ssize_t ip_append_page(struct sock *sk, struct page *page,
1038                        int offset, size_t size, int flags)
1039 {
1040         struct inet_sock *inet = inet_sk(sk);
1041         struct sk_buff *skb;
1042         struct rtable *rt;
1043         struct ip_options *opt = NULL;
1044         int hh_len;
1045         int mtu;
1046         int len;
1047         int err;
1048         unsigned int maxfraglen, fragheaderlen, fraggap;
1049
1050         if (inet->hdrincl)
1051                 return -EPERM;
1052
1053         if (flags&MSG_PROBE)
1054                 return 0;
1055
1056         if (skb_queue_empty(&sk->sk_write_queue))
1057                 return -EINVAL;
1058
1059         rt = inet->cork.rt;
1060         if (inet->cork.flags & IPCORK_OPT)
1061                 opt = inet->cork.opt;
1062
1063         if (!(rt->u.dst.dev->features&NETIF_F_SG))
1064                 return -EOPNOTSUPP;
1065
1066         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1067         mtu = inet->cork.fragsize;
1068
1069         fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1070         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1071
1072         if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1073                 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1074                 return -EMSGSIZE;
1075         }
1076
1077         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1078                 return -EINVAL;
1079
1080         inet->cork.length += size;
1081         if ((sk->sk_protocol == IPPROTO_UDP) &&
1082             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1083                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1084                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1085         }
1086
1087
1088         while (size > 0) {
1089                 int i;
1090
1091                 if (skb_is_gso(skb))
1092                         len = size;
1093                 else {
1094
1095                         /* Check if the remaining data fits into current packet. */
1096                         len = mtu - skb->len;
1097                         if (len < size)
1098                                 len = maxfraglen - skb->len;
1099                 }
1100                 if (len <= 0) {
1101                         struct sk_buff *skb_prev;
1102                         char *data;
1103                         struct iphdr *iph;
1104                         int alloclen;
1105
1106                         skb_prev = skb;
1107                         fraggap = skb_prev->len - maxfraglen;
1108
1109                         alloclen = fragheaderlen + hh_len + fraggap + 15;
1110                         skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1111                         if (unlikely(!skb)) {
1112                                 err = -ENOBUFS;
1113                                 goto error;
1114                         }
1115
1116                         /*
1117                          *      Fill in the control structures
1118                          */
1119                         skb->ip_summed = CHECKSUM_NONE;
1120                         skb->csum = 0;
1121                         skb_reserve(skb, hh_len);
1122
1123                         /*
1124                          *      Find where to start putting bytes.
1125                          */
1126                         data = skb_put(skb, fragheaderlen + fraggap);
1127                         skb_reset_network_header(skb);
1128                         iph = skb->nh.iph;
1129                         data += fragheaderlen;
1130                         skb->h.raw = data;
1131
1132                         if (fraggap) {
1133                                 skb->csum = skb_copy_and_csum_bits(
1134                                         skb_prev, maxfraglen,
1135                                         data, fraggap, 0);
1136                                 skb_prev->csum = csum_sub(skb_prev->csum,
1137                                                           skb->csum);
1138                                 pskb_trim_unique(skb_prev, maxfraglen);
1139                         }
1140
1141                         /*
1142                          * Put the packet on the pending queue.
1143                          */
1144                         __skb_queue_tail(&sk->sk_write_queue, skb);
1145                         continue;
1146                 }
1147
1148                 i = skb_shinfo(skb)->nr_frags;
1149                 if (len > size)
1150                         len = size;
1151                 if (skb_can_coalesce(skb, i, page, offset)) {
1152                         skb_shinfo(skb)->frags[i-1].size += len;
1153                 } else if (i < MAX_SKB_FRAGS) {
1154                         get_page(page);
1155                         skb_fill_page_desc(skb, i, page, offset, len);
1156                 } else {
1157                         err = -EMSGSIZE;
1158                         goto error;
1159                 }
1160
1161                 if (skb->ip_summed == CHECKSUM_NONE) {
1162                         __wsum csum;
1163                         csum = csum_page(page, offset, len);
1164                         skb->csum = csum_block_add(skb->csum, csum, skb->len);
1165                 }
1166
1167                 skb->len += len;
1168                 skb->data_len += len;
1169                 offset += len;
1170                 size -= len;
1171         }
1172         return 0;
1173
1174 error:
1175         inet->cork.length -= size;
1176         IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1177         return err;
1178 }
1179
1180 /*
1181  *      Combined all pending IP fragments on the socket as one IP datagram
1182  *      and push them out.
1183  */
1184 int ip_push_pending_frames(struct sock *sk)
1185 {
1186         struct sk_buff *skb, *tmp_skb;
1187         struct sk_buff **tail_skb;
1188         struct inet_sock *inet = inet_sk(sk);
1189         struct ip_options *opt = NULL;
1190         struct rtable *rt = inet->cork.rt;
1191         struct iphdr *iph;
1192         __be16 df = 0;
1193         __u8 ttl;
1194         int err = 0;
1195
1196         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1197                 goto out;
1198         tail_skb = &(skb_shinfo(skb)->frag_list);
1199
1200         /* move skb->data to ip header from ext header */
1201         if (skb->data < skb_network_header(skb))
1202                 __skb_pull(skb, skb_network_offset(skb));
1203         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1204                 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1205                 *tail_skb = tmp_skb;
1206                 tail_skb = &(tmp_skb->next);
1207                 skb->len += tmp_skb->len;
1208                 skb->data_len += tmp_skb->len;
1209                 skb->truesize += tmp_skb->truesize;
1210                 __sock_put(tmp_skb->sk);
1211                 tmp_skb->destructor = NULL;
1212                 tmp_skb->sk = NULL;
1213         }
1214
1215         /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
1216          * to fragment the frame generated here. No matter, what transforms
1217          * how transforms change size of the packet, it will come out.
1218          */
1219         if (inet->pmtudisc != IP_PMTUDISC_DO)
1220                 skb->local_df = 1;
1221
1222         /* DF bit is set when we want to see DF on outgoing frames.
1223          * If local_df is set too, we still allow to fragment this frame
1224          * locally. */
1225         if (inet->pmtudisc == IP_PMTUDISC_DO ||
1226             (skb->len <= dst_mtu(&rt->u.dst) &&
1227              ip_dont_fragment(sk, &rt->u.dst)))
1228                 df = htons(IP_DF);
1229
1230         if (inet->cork.flags & IPCORK_OPT)
1231                 opt = inet->cork.opt;
1232
1233         if (rt->rt_type == RTN_MULTICAST)
1234                 ttl = inet->mc_ttl;
1235         else
1236                 ttl = ip_select_ttl(inet, &rt->u.dst);
1237
1238         iph = (struct iphdr *)skb->data;
1239         iph->version = 4;
1240         iph->ihl = 5;
1241         if (opt) {
1242                 iph->ihl += opt->optlen>>2;
1243                 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1244         }
1245         iph->tos = inet->tos;
1246         iph->tot_len = htons(skb->len);
1247         iph->frag_off = df;
1248         ip_select_ident(iph, &rt->u.dst, sk);
1249         iph->ttl = ttl;
1250         iph->protocol = sk->sk_protocol;
1251         iph->saddr = rt->rt_src;
1252         iph->daddr = rt->rt_dst;
1253         ip_send_check(iph);
1254
1255         skb->priority = sk->sk_priority;
1256         skb->dst = dst_clone(&rt->u.dst);
1257
1258         /* Netfilter gets whole the not fragmented skb. */
1259         err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1260                       skb->dst->dev, dst_output);
1261         if (err) {
1262                 if (err > 0)
1263                         err = inet->recverr ? net_xmit_errno(err) : 0;
1264                 if (err)
1265                         goto error;
1266         }
1267
1268 out:
1269         inet->cork.flags &= ~IPCORK_OPT;
1270         kfree(inet->cork.opt);
1271         inet->cork.opt = NULL;
1272         if (inet->cork.rt) {
1273                 ip_rt_put(inet->cork.rt);
1274                 inet->cork.rt = NULL;
1275         }
1276         return err;
1277
1278 error:
1279         IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1280         goto out;
1281 }
1282
1283 /*
1284  *      Throw away all pending data on the socket.
1285  */
1286 void ip_flush_pending_frames(struct sock *sk)
1287 {
1288         struct inet_sock *inet = inet_sk(sk);
1289         struct sk_buff *skb;
1290
1291         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1292                 kfree_skb(skb);
1293
1294         inet->cork.flags &= ~IPCORK_OPT;
1295         kfree(inet->cork.opt);
1296         inet->cork.opt = NULL;
1297         if (inet->cork.rt) {
1298                 ip_rt_put(inet->cork.rt);
1299                 inet->cork.rt = NULL;
1300         }
1301 }
1302
1303
1304 /*
1305  *      Fetch data from kernel space and fill in checksum if needed.
1306  */
1307 static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1308                               int len, int odd, struct sk_buff *skb)
1309 {
1310         __wsum csum;
1311
1312         csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1313         skb->csum = csum_block_add(skb->csum, csum, odd);
1314         return 0;
1315 }
1316
1317 /*
1318  *      Generic function to send a packet as reply to another packet.
1319  *      Used to send TCP resets so far. ICMP should use this function too.
1320  *
1321  *      Should run single threaded per socket because it uses the sock
1322  *      structure to pass arguments.
1323  *
1324  *      LATER: switch from ip_build_xmit to ip_append_*
1325  */
1326 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1327                    unsigned int len)
1328 {
1329         struct inet_sock *inet = inet_sk(sk);
1330         struct {
1331                 struct ip_options       opt;
1332                 char                    data[40];
1333         } replyopts;
1334         struct ipcm_cookie ipc;
1335         __be32 daddr;
1336         struct rtable *rt = (struct rtable*)skb->dst;
1337
1338         if (ip_options_echo(&replyopts.opt, skb))
1339                 return;
1340
1341         daddr = ipc.addr = rt->rt_src;
1342         ipc.opt = NULL;
1343
1344         if (replyopts.opt.optlen) {
1345                 ipc.opt = &replyopts.opt;
1346
1347                 if (ipc.opt->srr)
1348                         daddr = replyopts.opt.faddr;
1349         }
1350
1351         {
1352                 struct flowi fl = { .nl_u = { .ip4_u =
1353                                               { .daddr = daddr,
1354                                                 .saddr = rt->rt_spec_dst,
1355                                                 .tos = RT_TOS(skb->nh.iph->tos) } },
1356                                     /* Not quite clean, but right. */
1357                                     .uli_u = { .ports =
1358                                                { .sport = skb->h.th->dest,
1359                                                  .dport = skb->h.th->source } },
1360                                     .proto = sk->sk_protocol };
1361                 security_skb_classify_flow(skb, &fl);
1362                 if (ip_route_output_key(&rt, &fl))
1363                         return;
1364         }
1365
1366         /* And let IP do all the hard work.
1367
1368            This chunk is not reenterable, hence spinlock.
1369            Note that it uses the fact, that this function is called
1370            with locally disabled BH and that sk cannot be already spinlocked.
1371          */
1372         bh_lock_sock(sk);
1373         inet->tos = skb->nh.iph->tos;
1374         sk->sk_priority = skb->priority;
1375         sk->sk_protocol = skb->nh.iph->protocol;
1376         ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1377                        &ipc, rt, MSG_DONTWAIT);
1378         if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1379                 if (arg->csumoffset >= 0)
1380                         *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1381                 skb->ip_summed = CHECKSUM_NONE;
1382                 ip_push_pending_frames(sk);
1383         }
1384
1385         bh_unlock_sock(sk);
1386
1387         ip_rt_put(rt);
1388 }
1389
1390 void __init ip_init(void)
1391 {
1392         ip_rt_init();
1393         inet_initpeers();
1394
1395 #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1396         igmp_mc_proc_init();
1397 #endif
1398 }
1399
1400 EXPORT_SYMBOL(ip_generic_getfrag);
1401 EXPORT_SYMBOL(ip_queue_xmit);
1402 EXPORT_SYMBOL(ip_send_check);