ipv6: local dev is actually unused in ip6_fragment
[linux-2.6.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
43
44 #include <net/sock.h>
45 #include <net/snmp.h>
46
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
57
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62         static u32 ipv6_fragmentation_id = 1;
63         static DEFINE_SPINLOCK(ip6_id_lock);
64
65         spin_lock_bh(&ip6_id_lock);
66         fhdr->identification = htonl(ipv6_fragmentation_id);
67         if (++ipv6_fragmentation_id == 0)
68                 ipv6_fragmentation_id = 1;
69         spin_unlock_bh(&ip6_id_lock);
70 }
71
72 int __ip6_local_out(struct sk_buff *skb)
73 {
74         int len;
75
76         len = skb->len - sizeof(struct ipv6hdr);
77         if (len > IPV6_MAXPLEN)
78                 len = 0;
79         ipv6_hdr(skb)->payload_len = htons(len);
80
81         return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82                        dst_output);
83 }
84
85 int ip6_local_out(struct sk_buff *skb)
86 {
87         int err;
88
89         err = __ip6_local_out(skb);
90         if (likely(err == 1))
91                 err = dst_output(skb);
92
93         return err;
94 }
95 EXPORT_SYMBOL_GPL(ip6_local_out);
96
97 static int ip6_output_finish(struct sk_buff *skb)
98 {
99         struct dst_entry *dst = skb->dst;
100
101         if (dst->hh)
102                 return neigh_hh_output(dst->hh, skb);
103         else if (dst->neighbour)
104                 return dst->neighbour->output(skb);
105
106         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107         kfree_skb(skb);
108         return -EINVAL;
109
110 }
111
112 /* dev_loopback_xmit for use with netfilter. */
113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114 {
115         skb_reset_mac_header(newskb);
116         __skb_pull(newskb, skb_network_offset(newskb));
117         newskb->pkt_type = PACKET_LOOPBACK;
118         newskb->ip_summed = CHECKSUM_UNNECESSARY;
119         WARN_ON(!newskb->dst);
120
121         netif_rx(newskb);
122         return 0;
123 }
124
125
126 static int ip6_output2(struct sk_buff *skb)
127 {
128         struct dst_entry *dst = skb->dst;
129         struct net_device *dev = dst->dev;
130
131         skb->protocol = htons(ETH_P_IPV6);
132         skb->dev = dev;
133
134         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
137
138                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139                     ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141                                          &ipv6_hdr(skb)->saddr))) {
142                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144                         /* Do not check for IFF_ALLMULTI; multicast routing
145                            is not supported in any case.
146                          */
147                         if (newskb)
148                                 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149                                         NULL, newskb->dev,
150                                         ip6_dev_loopback_xmit);
151
152                         if (ipv6_hdr(skb)->hop_limit == 0) {
153                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154                                 kfree_skb(skb);
155                                 return 0;
156                         }
157                 }
158
159                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160         }
161
162         return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163                        ip6_output_finish);
164 }
165
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167 {
168         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171                skb->dst->dev->mtu : dst_mtu(skb->dst);
172 }
173
174 int ip6_output(struct sk_buff *skb)
175 {
176         struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177         if (unlikely(idev->cnf.disable_ipv6)) {
178                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179                 kfree_skb(skb);
180                 return 0;
181         }
182
183         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
184                                 dst_allfrag(skb->dst))
185                 return ip6_fragment(skb, ip6_output2);
186         else
187                 return ip6_output2(skb);
188 }
189
190 /*
191  *      xmit an sk_buff (used by TCP)
192  */
193
194 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195              struct ipv6_txoptions *opt, int ipfragok)
196 {
197         struct ipv6_pinfo *np = inet6_sk(sk);
198         struct in6_addr *first_hop = &fl->fl6_dst;
199         struct dst_entry *dst = skb->dst;
200         struct ipv6hdr *hdr;
201         u8  proto = fl->proto;
202         int seg_len = skb->len;
203         int hlimit, tclass;
204         u32 mtu;
205
206         if (opt) {
207                 unsigned int head_room;
208
209                 /* First: exthdrs may take lots of space (~8K for now)
210                    MAX_HEADER is not enough.
211                  */
212                 head_room = opt->opt_nflen + opt->opt_flen;
213                 seg_len += head_room;
214                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
215
216                 if (skb_headroom(skb) < head_room) {
217                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
218                         if (skb2 == NULL) {
219                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
220                                               IPSTATS_MIB_OUTDISCARDS);
221                                 kfree_skb(skb);
222                                 return -ENOBUFS;
223                         }
224                         kfree_skb(skb);
225                         skb = skb2;
226                         if (sk)
227                                 skb_set_owner_w(skb, sk);
228                 }
229                 if (opt->opt_flen)
230                         ipv6_push_frag_opts(skb, opt, &proto);
231                 if (opt->opt_nflen)
232                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
233         }
234
235         skb_push(skb, sizeof(struct ipv6hdr));
236         skb_reset_network_header(skb);
237         hdr = ipv6_hdr(skb);
238
239         /* Allow local fragmentation. */
240         if (ipfragok)
241                 skb->local_df = 1;
242
243         /*
244          *      Fill in the IPv6 header
245          */
246
247         hlimit = -1;
248         if (np)
249                 hlimit = np->hop_limit;
250         if (hlimit < 0)
251                 hlimit = ip6_dst_hoplimit(dst);
252
253         tclass = -1;
254         if (np)
255                 tclass = np->tclass;
256         if (tclass < 0)
257                 tclass = 0;
258
259         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
260
261         hdr->payload_len = htons(seg_len);
262         hdr->nexthdr = proto;
263         hdr->hop_limit = hlimit;
264
265         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
266         ipv6_addr_copy(&hdr->daddr, first_hop);
267
268         skb->priority = sk->sk_priority;
269         skb->mark = sk->sk_mark;
270
271         mtu = dst_mtu(dst);
272         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
273                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
274                               IPSTATS_MIB_OUTREQUESTS);
275                 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
276                                 dst_output);
277         }
278
279         if (net_ratelimit())
280                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
281         skb->dev = dst->dev;
282         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
283         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
284         kfree_skb(skb);
285         return -EMSGSIZE;
286 }
287
288 EXPORT_SYMBOL(ip6_xmit);
289
290 /*
291  *      To avoid extra problems ND packets are send through this
292  *      routine. It's code duplication but I really want to avoid
293  *      extra checks since ipv6_build_header is used by TCP (which
294  *      is for us performance critical)
295  */
296
297 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
298                const struct in6_addr *saddr, const struct in6_addr *daddr,
299                int proto, int len)
300 {
301         struct ipv6_pinfo *np = inet6_sk(sk);
302         struct ipv6hdr *hdr;
303         int totlen;
304
305         skb->protocol = htons(ETH_P_IPV6);
306         skb->dev = dev;
307
308         totlen = len + sizeof(struct ipv6hdr);
309
310         skb_reset_network_header(skb);
311         skb_put(skb, sizeof(struct ipv6hdr));
312         hdr = ipv6_hdr(skb);
313
314         *(__be32*)hdr = htonl(0x60000000);
315
316         hdr->payload_len = htons(len);
317         hdr->nexthdr = proto;
318         hdr->hop_limit = np->hop_limit;
319
320         ipv6_addr_copy(&hdr->saddr, saddr);
321         ipv6_addr_copy(&hdr->daddr, daddr);
322
323         return 0;
324 }
325
326 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
327 {
328         struct ip6_ra_chain *ra;
329         struct sock *last = NULL;
330
331         read_lock(&ip6_ra_lock);
332         for (ra = ip6_ra_chain; ra; ra = ra->next) {
333                 struct sock *sk = ra->sk;
334                 if (sk && ra->sel == sel &&
335                     (!sk->sk_bound_dev_if ||
336                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
337                         if (last) {
338                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
339                                 if (skb2)
340                                         rawv6_rcv(last, skb2);
341                         }
342                         last = sk;
343                 }
344         }
345
346         if (last) {
347                 rawv6_rcv(last, skb);
348                 read_unlock(&ip6_ra_lock);
349                 return 1;
350         }
351         read_unlock(&ip6_ra_lock);
352         return 0;
353 }
354
355 static int ip6_forward_proxy_check(struct sk_buff *skb)
356 {
357         struct ipv6hdr *hdr = ipv6_hdr(skb);
358         u8 nexthdr = hdr->nexthdr;
359         int offset;
360
361         if (ipv6_ext_hdr(nexthdr)) {
362                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
363                 if (offset < 0)
364                         return 0;
365         } else
366                 offset = sizeof(struct ipv6hdr);
367
368         if (nexthdr == IPPROTO_ICMPV6) {
369                 struct icmp6hdr *icmp6;
370
371                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
372                                          offset + 1 - skb->data)))
373                         return 0;
374
375                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
376
377                 switch (icmp6->icmp6_type) {
378                 case NDISC_ROUTER_SOLICITATION:
379                 case NDISC_ROUTER_ADVERTISEMENT:
380                 case NDISC_NEIGHBOUR_SOLICITATION:
381                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
382                 case NDISC_REDIRECT:
383                         /* For reaction involving unicast neighbor discovery
384                          * message destined to the proxied address, pass it to
385                          * input function.
386                          */
387                         return 1;
388                 default:
389                         break;
390                 }
391         }
392
393         /*
394          * The proxying router can't forward traffic sent to a link-local
395          * address, so signal the sender and discard the packet. This
396          * behavior is clarified by the MIPv6 specification.
397          */
398         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
399                 dst_link_failure(skb);
400                 return -1;
401         }
402
403         return 0;
404 }
405
406 static inline int ip6_forward_finish(struct sk_buff *skb)
407 {
408         return dst_output(skb);
409 }
410
411 int ip6_forward(struct sk_buff *skb)
412 {
413         struct dst_entry *dst = skb->dst;
414         struct ipv6hdr *hdr = ipv6_hdr(skb);
415         struct inet6_skb_parm *opt = IP6CB(skb);
416         struct net *net = dev_net(dst->dev);
417
418         if (net->ipv6.devconf_all->forwarding == 0)
419                 goto error;
420
421         if (skb_warn_if_lro(skb))
422                 goto drop;
423
424         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
425                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
426                 goto drop;
427         }
428
429         skb_forward_csum(skb);
430
431         /*
432          *      We DO NOT make any processing on
433          *      RA packets, pushing them to user level AS IS
434          *      without ane WARRANTY that application will be able
435          *      to interpret them. The reason is that we
436          *      cannot make anything clever here.
437          *
438          *      We are not end-node, so that if packet contains
439          *      AH/ESP, we cannot make anything.
440          *      Defragmentation also would be mistake, RA packets
441          *      cannot be fragmented, because there is no warranty
442          *      that different fragments will go along one path. --ANK
443          */
444         if (opt->ra) {
445                 u8 *ptr = skb_network_header(skb) + opt->ra;
446                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
447                         return 0;
448         }
449
450         /*
451          *      check and decrement ttl
452          */
453         if (hdr->hop_limit <= 1) {
454                 /* Force OUTPUT device used as source address */
455                 skb->dev = dst->dev;
456                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
457                             0, skb->dev);
458                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
459
460                 kfree_skb(skb);
461                 return -ETIMEDOUT;
462         }
463
464         /* XXX: idev->cnf.proxy_ndp? */
465         if (net->ipv6.devconf_all->proxy_ndp &&
466             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
467                 int proxied = ip6_forward_proxy_check(skb);
468                 if (proxied > 0)
469                         return ip6_input(skb);
470                 else if (proxied < 0) {
471                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
472                         goto drop;
473                 }
474         }
475
476         if (!xfrm6_route_forward(skb)) {
477                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
478                 goto drop;
479         }
480         dst = skb->dst;
481
482         /* IPv6 specs say nothing about it, but it is clear that we cannot
483            send redirects to source routed frames.
484            We don't send redirects to frames decapsulated from IPsec.
485          */
486         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
487             !skb->sp) {
488                 struct in6_addr *target = NULL;
489                 struct rt6_info *rt;
490                 struct neighbour *n = dst->neighbour;
491
492                 /*
493                  *      incoming and outgoing devices are the same
494                  *      send a redirect.
495                  */
496
497                 rt = (struct rt6_info *) dst;
498                 if ((rt->rt6i_flags & RTF_GATEWAY))
499                         target = (struct in6_addr*)&n->primary_key;
500                 else
501                         target = &hdr->daddr;
502
503                 /* Limit redirects both by destination (here)
504                    and by source (inside ndisc_send_redirect)
505                  */
506                 if (xrlim_allow(dst, 1*HZ))
507                         ndisc_send_redirect(skb, n, target);
508         } else {
509                 int addrtype = ipv6_addr_type(&hdr->saddr);
510
511                 /* This check is security critical. */
512                 if (addrtype == IPV6_ADDR_ANY ||
513                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
514                         goto error;
515                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
516                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
517                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
518                         goto error;
519                 }
520         }
521
522         if (skb->len > dst_mtu(dst)) {
523                 /* Again, force OUTPUT device used as source address */
524                 skb->dev = dst->dev;
525                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
526                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
527                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
528                 kfree_skb(skb);
529                 return -EMSGSIZE;
530         }
531
532         if (skb_cow(skb, dst->dev->hard_header_len)) {
533                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
534                 goto drop;
535         }
536
537         hdr = ipv6_hdr(skb);
538
539         /* Mangling hops number delayed to point after skb COW */
540
541         hdr->hop_limit--;
542
543         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
544         return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
545                        ip6_forward_finish);
546
547 error:
548         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
549 drop:
550         kfree_skb(skb);
551         return -EINVAL;
552 }
553
554 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
555 {
556         to->pkt_type = from->pkt_type;
557         to->priority = from->priority;
558         to->protocol = from->protocol;
559         dst_release(to->dst);
560         to->dst = dst_clone(from->dst);
561         to->dev = from->dev;
562         to->mark = from->mark;
563
564 #ifdef CONFIG_NET_SCHED
565         to->tc_index = from->tc_index;
566 #endif
567         nf_copy(to, from);
568 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
569     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
570         to->nf_trace = from->nf_trace;
571 #endif
572         skb_copy_secmark(to, from);
573 }
574
575 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
576 {
577         u16 offset = sizeof(struct ipv6hdr);
578         struct ipv6_opt_hdr *exthdr =
579                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
580         unsigned int packet_len = skb->tail - skb->network_header;
581         int found_rhdr = 0;
582         *nexthdr = &ipv6_hdr(skb)->nexthdr;
583
584         while (offset + 1 <= packet_len) {
585
586                 switch (**nexthdr) {
587
588                 case NEXTHDR_HOP:
589                         break;
590                 case NEXTHDR_ROUTING:
591                         found_rhdr = 1;
592                         break;
593                 case NEXTHDR_DEST:
594 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
595                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
596                                 break;
597 #endif
598                         if (found_rhdr)
599                                 return offset;
600                         break;
601                 default :
602                         return offset;
603                 }
604
605                 offset += ipv6_optlen(exthdr);
606                 *nexthdr = &exthdr->nexthdr;
607                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
608                                                  offset);
609         }
610
611         return offset;
612 }
613
614 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
615 {
616         struct sk_buff *frag;
617         struct rt6_info *rt = (struct rt6_info*)skb->dst;
618         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
619         struct ipv6hdr *tmp_hdr;
620         struct frag_hdr *fh;
621         unsigned int mtu, hlen, left, len;
622         __be32 frag_id = 0;
623         int ptr, offset = 0, err=0;
624         u8 *prevhdr, nexthdr = 0;
625
626         hlen = ip6_find_1stfragopt(skb, &prevhdr);
627         nexthdr = *prevhdr;
628
629         mtu = ip6_skb_dst_mtu(skb);
630
631         /* We must not fragment if the socket is set to force MTU discovery
632          * or if the skb it not generated by a local socket.  (This last
633          * check should be redundant, but it's free.)
634          */
635         if (!skb->local_df) {
636                 skb->dev = skb->dst->dev;
637                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
638                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
639                 kfree_skb(skb);
640                 return -EMSGSIZE;
641         }
642
643         if (np && np->frag_size < mtu) {
644                 if (np->frag_size)
645                         mtu = np->frag_size;
646         }
647         mtu -= hlen + sizeof(struct frag_hdr);
648
649         if (skb_shinfo(skb)->frag_list) {
650                 int first_len = skb_pagelen(skb);
651                 int truesizes = 0;
652
653                 if (first_len - hlen > mtu ||
654                     ((first_len - hlen) & 7) ||
655                     skb_cloned(skb))
656                         goto slow_path;
657
658                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
659                         /* Correct geometry. */
660                         if (frag->len > mtu ||
661                             ((frag->len & 7) && frag->next) ||
662                             skb_headroom(frag) < hlen)
663                             goto slow_path;
664
665                         /* Partially cloned skb? */
666                         if (skb_shared(frag))
667                                 goto slow_path;
668
669                         BUG_ON(frag->sk);
670                         if (skb->sk) {
671                                 sock_hold(skb->sk);
672                                 frag->sk = skb->sk;
673                                 frag->destructor = sock_wfree;
674                                 truesizes += frag->truesize;
675                         }
676                 }
677
678                 err = 0;
679                 offset = 0;
680                 frag = skb_shinfo(skb)->frag_list;
681                 skb_shinfo(skb)->frag_list = NULL;
682                 /* BUILD HEADER */
683
684                 *prevhdr = NEXTHDR_FRAGMENT;
685                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
686                 if (!tmp_hdr) {
687                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
688                         return -ENOMEM;
689                 }
690
691                 __skb_pull(skb, hlen);
692                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
693                 __skb_push(skb, hlen);
694                 skb_reset_network_header(skb);
695                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
696
697                 ipv6_select_ident(skb, fh);
698                 fh->nexthdr = nexthdr;
699                 fh->reserved = 0;
700                 fh->frag_off = htons(IP6_MF);
701                 frag_id = fh->identification;
702
703                 first_len = skb_pagelen(skb);
704                 skb->data_len = first_len - skb_headlen(skb);
705                 skb->truesize -= truesizes;
706                 skb->len = first_len;
707                 ipv6_hdr(skb)->payload_len = htons(first_len -
708                                                    sizeof(struct ipv6hdr));
709
710                 dst_hold(&rt->u.dst);
711
712                 for (;;) {
713                         /* Prepare header of the next frame,
714                          * before previous one went down. */
715                         if (frag) {
716                                 frag->ip_summed = CHECKSUM_NONE;
717                                 skb_reset_transport_header(frag);
718                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
719                                 __skb_push(frag, hlen);
720                                 skb_reset_network_header(frag);
721                                 memcpy(skb_network_header(frag), tmp_hdr,
722                                        hlen);
723                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
724                                 fh->nexthdr = nexthdr;
725                                 fh->reserved = 0;
726                                 fh->frag_off = htons(offset);
727                                 if (frag->next != NULL)
728                                         fh->frag_off |= htons(IP6_MF);
729                                 fh->identification = frag_id;
730                                 ipv6_hdr(frag)->payload_len =
731                                                 htons(frag->len -
732                                                       sizeof(struct ipv6hdr));
733                                 ip6_copy_metadata(frag, skb);
734                         }
735
736                         err = output(skb);
737                         if(!err)
738                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
739
740                         if (err || !frag)
741                                 break;
742
743                         skb = frag;
744                         frag = skb->next;
745                         skb->next = NULL;
746                 }
747
748                 kfree(tmp_hdr);
749
750                 if (err == 0) {
751                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
752                         dst_release(&rt->u.dst);
753                         return 0;
754                 }
755
756                 while (frag) {
757                         skb = frag->next;
758                         kfree_skb(frag);
759                         frag = skb;
760                 }
761
762                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
763                 dst_release(&rt->u.dst);
764                 return err;
765         }
766
767 slow_path:
768         left = skb->len - hlen;         /* Space per frame */
769         ptr = hlen;                     /* Where to start from */
770
771         /*
772          *      Fragment the datagram.
773          */
774
775         *prevhdr = NEXTHDR_FRAGMENT;
776
777         /*
778          *      Keep copying data until we run out.
779          */
780         while(left > 0) {
781                 len = left;
782                 /* IF: it doesn't fit, use 'mtu' - the data space left */
783                 if (len > mtu)
784                         len = mtu;
785                 /* IF: we are not sending upto and including the packet end
786                    then align the next start on an eight byte boundary */
787                 if (len < left) {
788                         len &= ~7;
789                 }
790                 /*
791                  *      Allocate buffer.
792                  */
793
794                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
795                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
796                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
797                                       IPSTATS_MIB_FRAGFAILS);
798                         err = -ENOMEM;
799                         goto fail;
800                 }
801
802                 /*
803                  *      Set up data on packet
804                  */
805
806                 ip6_copy_metadata(frag, skb);
807                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
808                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
809                 skb_reset_network_header(frag);
810                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
811                 frag->transport_header = (frag->network_header + hlen +
812                                           sizeof(struct frag_hdr));
813
814                 /*
815                  *      Charge the memory for the fragment to any owner
816                  *      it might possess
817                  */
818                 if (skb->sk)
819                         skb_set_owner_w(frag, skb->sk);
820
821                 /*
822                  *      Copy the packet header into the new buffer.
823                  */
824                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
825
826                 /*
827                  *      Build fragment header.
828                  */
829                 fh->nexthdr = nexthdr;
830                 fh->reserved = 0;
831                 if (!frag_id) {
832                         ipv6_select_ident(skb, fh);
833                         frag_id = fh->identification;
834                 } else
835                         fh->identification = frag_id;
836
837                 /*
838                  *      Copy a block of the IP datagram.
839                  */
840                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
841                         BUG();
842                 left -= len;
843
844                 fh->frag_off = htons(offset);
845                 if (left > 0)
846                         fh->frag_off |= htons(IP6_MF);
847                 ipv6_hdr(frag)->payload_len = htons(frag->len -
848                                                     sizeof(struct ipv6hdr));
849
850                 ptr += len;
851                 offset += len;
852
853                 /*
854                  *      Put this fragment into the sending queue.
855                  */
856                 err = output(frag);
857                 if (err)
858                         goto fail;
859
860                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
861         }
862         IP6_INC_STATS(ip6_dst_idev(skb->dst),
863                       IPSTATS_MIB_FRAGOKS);
864         kfree_skb(skb);
865         return err;
866
867 fail:
868         IP6_INC_STATS(ip6_dst_idev(skb->dst),
869                       IPSTATS_MIB_FRAGFAILS);
870         kfree_skb(skb);
871         return err;
872 }
873
874 static inline int ip6_rt_check(struct rt6key *rt_key,
875                                struct in6_addr *fl_addr,
876                                struct in6_addr *addr_cache)
877 {
878         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
879                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
880 }
881
882 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
883                                           struct dst_entry *dst,
884                                           struct flowi *fl)
885 {
886         struct ipv6_pinfo *np = inet6_sk(sk);
887         struct rt6_info *rt = (struct rt6_info *)dst;
888
889         if (!dst)
890                 goto out;
891
892         /* Yes, checking route validity in not connected
893          * case is not very simple. Take into account,
894          * that we do not support routing by source, TOS,
895          * and MSG_DONTROUTE            --ANK (980726)
896          *
897          * 1. ip6_rt_check(): If route was host route,
898          *    check that cached destination is current.
899          *    If it is network route, we still may
900          *    check its validity using saved pointer
901          *    to the last used address: daddr_cache.
902          *    We do not want to save whole address now,
903          *    (because main consumer of this service
904          *    is tcp, which has not this problem),
905          *    so that the last trick works only on connected
906          *    sockets.
907          * 2. oif also should be the same.
908          */
909         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
910 #ifdef CONFIG_IPV6_SUBTREES
911             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
912 #endif
913             (fl->oif && fl->oif != dst->dev->ifindex)) {
914                 dst_release(dst);
915                 dst = NULL;
916         }
917
918 out:
919         return dst;
920 }
921
922 static int ip6_dst_lookup_tail(struct sock *sk,
923                                struct dst_entry **dst, struct flowi *fl)
924 {
925         int err;
926         struct net *net = sock_net(sk);
927
928         if (*dst == NULL)
929                 *dst = ip6_route_output(net, sk, fl);
930
931         if ((err = (*dst)->error))
932                 goto out_err_release;
933
934         if (ipv6_addr_any(&fl->fl6_src)) {
935                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
936                                          &fl->fl6_dst,
937                                          sk ? inet6_sk(sk)->srcprefs : 0,
938                                          &fl->fl6_src);
939                 if (err)
940                         goto out_err_release;
941         }
942
943 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
944         /*
945          * Here if the dst entry we've looked up
946          * has a neighbour entry that is in the INCOMPLETE
947          * state and the src address from the flow is
948          * marked as OPTIMISTIC, we release the found
949          * dst entry and replace it instead with the
950          * dst entry of the nexthop router
951          */
952         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
953                 struct inet6_ifaddr *ifp;
954                 struct flowi fl_gw;
955                 int redirect;
956
957                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
958                                       (*dst)->dev, 1);
959
960                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
961                 if (ifp)
962                         in6_ifa_put(ifp);
963
964                 if (redirect) {
965                         /*
966                          * We need to get the dst entry for the
967                          * default router instead
968                          */
969                         dst_release(*dst);
970                         memcpy(&fl_gw, fl, sizeof(struct flowi));
971                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
972                         *dst = ip6_route_output(net, sk, &fl_gw);
973                         if ((err = (*dst)->error))
974                                 goto out_err_release;
975                 }
976         }
977 #endif
978
979         return 0;
980
981 out_err_release:
982         if (err == -ENETUNREACH)
983                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
984         dst_release(*dst);
985         *dst = NULL;
986         return err;
987 }
988
989 /**
990  *      ip6_dst_lookup - perform route lookup on flow
991  *      @sk: socket which provides route info
992  *      @dst: pointer to dst_entry * for result
993  *      @fl: flow to lookup
994  *
995  *      This function performs a route lookup on the given flow.
996  *
997  *      It returns zero on success, or a standard errno code on error.
998  */
999 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1000 {
1001         *dst = NULL;
1002         return ip6_dst_lookup_tail(sk, dst, fl);
1003 }
1004 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1005
1006 /**
1007  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1008  *      @sk: socket which provides the dst cache and route info
1009  *      @dst: pointer to dst_entry * for result
1010  *      @fl: flow to lookup
1011  *
1012  *      This function performs a route lookup on the given flow with the
1013  *      possibility of using the cached route in the socket if it is valid.
1014  *      It will take the socket dst lock when operating on the dst cache.
1015  *      As a result, this function can only be used in process context.
1016  *
1017  *      It returns zero on success, or a standard errno code on error.
1018  */
1019 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1020 {
1021         *dst = NULL;
1022         if (sk) {
1023                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1024                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1025         }
1026
1027         return ip6_dst_lookup_tail(sk, dst, fl);
1028 }
1029 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1030
1031 static inline int ip6_ufo_append_data(struct sock *sk,
1032                         int getfrag(void *from, char *to, int offset, int len,
1033                         int odd, struct sk_buff *skb),
1034                         void *from, int length, int hh_len, int fragheaderlen,
1035                         int transhdrlen, int mtu,unsigned int flags)
1036
1037 {
1038         struct sk_buff *skb;
1039         int err;
1040
1041         /* There is support for UDP large send offload by network
1042          * device, so create one single skb packet containing complete
1043          * udp datagram
1044          */
1045         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1046                 skb = sock_alloc_send_skb(sk,
1047                         hh_len + fragheaderlen + transhdrlen + 20,
1048                         (flags & MSG_DONTWAIT), &err);
1049                 if (skb == NULL)
1050                         return -ENOMEM;
1051
1052                 /* reserve space for Hardware header */
1053                 skb_reserve(skb, hh_len);
1054
1055                 /* create space for UDP/IP header */
1056                 skb_put(skb,fragheaderlen + transhdrlen);
1057
1058                 /* initialize network header pointer */
1059                 skb_reset_network_header(skb);
1060
1061                 /* initialize protocol header pointer */
1062                 skb->transport_header = skb->network_header + fragheaderlen;
1063
1064                 skb->ip_summed = CHECKSUM_PARTIAL;
1065                 skb->csum = 0;
1066                 sk->sk_sndmsg_off = 0;
1067         }
1068
1069         err = skb_append_datato_frags(sk,skb, getfrag, from,
1070                                       (length - transhdrlen));
1071         if (!err) {
1072                 struct frag_hdr fhdr;
1073
1074                 /* specify the length of each IP datagram fragment*/
1075                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1076                                             sizeof(struct frag_hdr);
1077                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1078                 ipv6_select_ident(skb, &fhdr);
1079                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1080                 __skb_queue_tail(&sk->sk_write_queue, skb);
1081
1082                 return 0;
1083         }
1084         /* There is not enough support do UPD LSO,
1085          * so follow normal path
1086          */
1087         kfree_skb(skb);
1088
1089         return err;
1090 }
1091
1092 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1093         int offset, int len, int odd, struct sk_buff *skb),
1094         void *from, int length, int transhdrlen,
1095         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1096         struct rt6_info *rt, unsigned int flags)
1097 {
1098         struct inet_sock *inet = inet_sk(sk);
1099         struct ipv6_pinfo *np = inet6_sk(sk);
1100         struct sk_buff *skb;
1101         unsigned int maxfraglen, fragheaderlen;
1102         int exthdrlen;
1103         int hh_len;
1104         int mtu;
1105         int copy;
1106         int err;
1107         int offset = 0;
1108         int csummode = CHECKSUM_NONE;
1109
1110         if (flags&MSG_PROBE)
1111                 return 0;
1112         if (skb_queue_empty(&sk->sk_write_queue)) {
1113                 /*
1114                  * setup for corking
1115                  */
1116                 if (opt) {
1117                         if (np->cork.opt == NULL) {
1118                                 np->cork.opt = kmalloc(opt->tot_len,
1119                                                        sk->sk_allocation);
1120                                 if (unlikely(np->cork.opt == NULL))
1121                                         return -ENOBUFS;
1122                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1123                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1124                                 return -EINVAL;
1125                         }
1126                         memcpy(np->cork.opt, opt, opt->tot_len);
1127                         inet->cork.flags |= IPCORK_OPT;
1128                         /* need source address above miyazawa*/
1129                 }
1130                 dst_hold(&rt->u.dst);
1131                 inet->cork.dst = &rt->u.dst;
1132                 inet->cork.fl = *fl;
1133                 np->cork.hop_limit = hlimit;
1134                 np->cork.tclass = tclass;
1135                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1136                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1137                 if (np->frag_size < mtu) {
1138                         if (np->frag_size)
1139                                 mtu = np->frag_size;
1140                 }
1141                 inet->cork.fragsize = mtu;
1142                 if (dst_allfrag(rt->u.dst.path))
1143                         inet->cork.flags |= IPCORK_ALLFRAG;
1144                 inet->cork.length = 0;
1145                 sk->sk_sndmsg_page = NULL;
1146                 sk->sk_sndmsg_off = 0;
1147                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1148                             rt->rt6i_nfheader_len;
1149                 length += exthdrlen;
1150                 transhdrlen += exthdrlen;
1151         } else {
1152                 rt = (struct rt6_info *)inet->cork.dst;
1153                 fl = &inet->cork.fl;
1154                 if (inet->cork.flags & IPCORK_OPT)
1155                         opt = np->cork.opt;
1156                 transhdrlen = 0;
1157                 exthdrlen = 0;
1158                 mtu = inet->cork.fragsize;
1159         }
1160
1161         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1162
1163         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1164                         (opt ? opt->opt_nflen : 0);
1165         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1166
1167         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1168                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1169                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1170                         return -EMSGSIZE;
1171                 }
1172         }
1173
1174         /*
1175          * Let's try using as much space as possible.
1176          * Use MTU if total length of the message fits into the MTU.
1177          * Otherwise, we need to reserve fragment header and
1178          * fragment alignment (= 8-15 octects, in total).
1179          *
1180          * Note that we may need to "move" the data from the tail of
1181          * of the buffer to the new fragment when we split
1182          * the message.
1183          *
1184          * FIXME: It may be fragmented into multiple chunks
1185          *        at once if non-fragmentable extension headers
1186          *        are too large.
1187          * --yoshfuji
1188          */
1189
1190         inet->cork.length += length;
1191         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1192             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1193
1194                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1195                                           fragheaderlen, transhdrlen, mtu,
1196                                           flags);
1197                 if (err)
1198                         goto error;
1199                 return 0;
1200         }
1201
1202         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1203                 goto alloc_new_skb;
1204
1205         while (length > 0) {
1206                 /* Check if the remaining data fits into current packet. */
1207                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1208                 if (copy < length)
1209                         copy = maxfraglen - skb->len;
1210
1211                 if (copy <= 0) {
1212                         char *data;
1213                         unsigned int datalen;
1214                         unsigned int fraglen;
1215                         unsigned int fraggap;
1216                         unsigned int alloclen;
1217                         struct sk_buff *skb_prev;
1218 alloc_new_skb:
1219                         skb_prev = skb;
1220
1221                         /* There's no room in the current skb */
1222                         if (skb_prev)
1223                                 fraggap = skb_prev->len - maxfraglen;
1224                         else
1225                                 fraggap = 0;
1226
1227                         /*
1228                          * If remaining data exceeds the mtu,
1229                          * we know we need more fragment(s).
1230                          */
1231                         datalen = length + fraggap;
1232                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1233                                 datalen = maxfraglen - fragheaderlen;
1234
1235                         fraglen = datalen + fragheaderlen;
1236                         if ((flags & MSG_MORE) &&
1237                             !(rt->u.dst.dev->features&NETIF_F_SG))
1238                                 alloclen = mtu;
1239                         else
1240                                 alloclen = datalen + fragheaderlen;
1241
1242                         /*
1243                          * The last fragment gets additional space at tail.
1244                          * Note: we overallocate on fragments with MSG_MODE
1245                          * because we have no idea if we're the last one.
1246                          */
1247                         if (datalen == length + fraggap)
1248                                 alloclen += rt->u.dst.trailer_len;
1249
1250                         /*
1251                          * We just reserve space for fragment header.
1252                          * Note: this may be overallocation if the message
1253                          * (without MSG_MORE) fits into the MTU.
1254                          */
1255                         alloclen += sizeof(struct frag_hdr);
1256
1257                         if (transhdrlen) {
1258                                 skb = sock_alloc_send_skb(sk,
1259                                                 alloclen + hh_len,
1260                                                 (flags & MSG_DONTWAIT), &err);
1261                         } else {
1262                                 skb = NULL;
1263                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1264                                     2 * sk->sk_sndbuf)
1265                                         skb = sock_wmalloc(sk,
1266                                                            alloclen + hh_len, 1,
1267                                                            sk->sk_allocation);
1268                                 if (unlikely(skb == NULL))
1269                                         err = -ENOBUFS;
1270                         }
1271                         if (skb == NULL)
1272                                 goto error;
1273                         /*
1274                          *      Fill in the control structures
1275                          */
1276                         skb->ip_summed = csummode;
1277                         skb->csum = 0;
1278                         /* reserve for fragmentation */
1279                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1280
1281                         /*
1282                          *      Find where to start putting bytes
1283                          */
1284                         data = skb_put(skb, fraglen);
1285                         skb_set_network_header(skb, exthdrlen);
1286                         data += fragheaderlen;
1287                         skb->transport_header = (skb->network_header +
1288                                                  fragheaderlen);
1289                         if (fraggap) {
1290                                 skb->csum = skb_copy_and_csum_bits(
1291                                         skb_prev, maxfraglen,
1292                                         data + transhdrlen, fraggap, 0);
1293                                 skb_prev->csum = csum_sub(skb_prev->csum,
1294                                                           skb->csum);
1295                                 data += fraggap;
1296                                 pskb_trim_unique(skb_prev, maxfraglen);
1297                         }
1298                         copy = datalen - transhdrlen - fraggap;
1299                         if (copy < 0) {
1300                                 err = -EINVAL;
1301                                 kfree_skb(skb);
1302                                 goto error;
1303                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1304                                 err = -EFAULT;
1305                                 kfree_skb(skb);
1306                                 goto error;
1307                         }
1308
1309                         offset += copy;
1310                         length -= datalen - fraggap;
1311                         transhdrlen = 0;
1312                         exthdrlen = 0;
1313                         csummode = CHECKSUM_NONE;
1314
1315                         /*
1316                          * Put the packet on the pending queue
1317                          */
1318                         __skb_queue_tail(&sk->sk_write_queue, skb);
1319                         continue;
1320                 }
1321
1322                 if (copy > length)
1323                         copy = length;
1324
1325                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1326                         unsigned int off;
1327
1328                         off = skb->len;
1329                         if (getfrag(from, skb_put(skb, copy),
1330                                                 offset, copy, off, skb) < 0) {
1331                                 __skb_trim(skb, off);
1332                                 err = -EFAULT;
1333                                 goto error;
1334                         }
1335                 } else {
1336                         int i = skb_shinfo(skb)->nr_frags;
1337                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1338                         struct page *page = sk->sk_sndmsg_page;
1339                         int off = sk->sk_sndmsg_off;
1340                         unsigned int left;
1341
1342                         if (page && (left = PAGE_SIZE - off) > 0) {
1343                                 if (copy >= left)
1344                                         copy = left;
1345                                 if (page != frag->page) {
1346                                         if (i == MAX_SKB_FRAGS) {
1347                                                 err = -EMSGSIZE;
1348                                                 goto error;
1349                                         }
1350                                         get_page(page);
1351                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1352                                         frag = &skb_shinfo(skb)->frags[i];
1353                                 }
1354                         } else if(i < MAX_SKB_FRAGS) {
1355                                 if (copy > PAGE_SIZE)
1356                                         copy = PAGE_SIZE;
1357                                 page = alloc_pages(sk->sk_allocation, 0);
1358                                 if (page == NULL) {
1359                                         err = -ENOMEM;
1360                                         goto error;
1361                                 }
1362                                 sk->sk_sndmsg_page = page;
1363                                 sk->sk_sndmsg_off = 0;
1364
1365                                 skb_fill_page_desc(skb, i, page, 0, 0);
1366                                 frag = &skb_shinfo(skb)->frags[i];
1367                         } else {
1368                                 err = -EMSGSIZE;
1369                                 goto error;
1370                         }
1371                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1372                                 err = -EFAULT;
1373                                 goto error;
1374                         }
1375                         sk->sk_sndmsg_off += copy;
1376                         frag->size += copy;
1377                         skb->len += copy;
1378                         skb->data_len += copy;
1379                         skb->truesize += copy;
1380                         atomic_add(copy, &sk->sk_wmem_alloc);
1381                 }
1382                 offset += copy;
1383                 length -= copy;
1384         }
1385         return 0;
1386 error:
1387         inet->cork.length -= length;
1388         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1389         return err;
1390 }
1391
1392 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1393 {
1394         inet->cork.flags &= ~IPCORK_OPT;
1395         kfree(np->cork.opt);
1396         np->cork.opt = NULL;
1397         if (inet->cork.dst) {
1398                 dst_release(inet->cork.dst);
1399                 inet->cork.dst = NULL;
1400                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1401         }
1402         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1403 }
1404
1405 int ip6_push_pending_frames(struct sock *sk)
1406 {
1407         struct sk_buff *skb, *tmp_skb;
1408         struct sk_buff **tail_skb;
1409         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1410         struct inet_sock *inet = inet_sk(sk);
1411         struct ipv6_pinfo *np = inet6_sk(sk);
1412         struct ipv6hdr *hdr;
1413         struct ipv6_txoptions *opt = np->cork.opt;
1414         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1415         struct flowi *fl = &inet->cork.fl;
1416         unsigned char proto = fl->proto;
1417         int err = 0;
1418
1419         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1420                 goto out;
1421         tail_skb = &(skb_shinfo(skb)->frag_list);
1422
1423         /* move skb->data to ip header from ext header */
1424         if (skb->data < skb_network_header(skb))
1425                 __skb_pull(skb, skb_network_offset(skb));
1426         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1427                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1428                 *tail_skb = tmp_skb;
1429                 tail_skb = &(tmp_skb->next);
1430                 skb->len += tmp_skb->len;
1431                 skb->data_len += tmp_skb->len;
1432                 skb->truesize += tmp_skb->truesize;
1433                 __sock_put(tmp_skb->sk);
1434                 tmp_skb->destructor = NULL;
1435                 tmp_skb->sk = NULL;
1436         }
1437
1438         /* Allow local fragmentation. */
1439         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1440                 skb->local_df = 1;
1441
1442         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1443         __skb_pull(skb, skb_network_header_len(skb));
1444         if (opt && opt->opt_flen)
1445                 ipv6_push_frag_opts(skb, opt, &proto);
1446         if (opt && opt->opt_nflen)
1447                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1448
1449         skb_push(skb, sizeof(struct ipv6hdr));
1450         skb_reset_network_header(skb);
1451         hdr = ipv6_hdr(skb);
1452
1453         *(__be32*)hdr = fl->fl6_flowlabel |
1454                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1455
1456         hdr->hop_limit = np->cork.hop_limit;
1457         hdr->nexthdr = proto;
1458         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1459         ipv6_addr_copy(&hdr->daddr, final_dst);
1460
1461         skb->priority = sk->sk_priority;
1462         skb->mark = sk->sk_mark;
1463
1464         skb->dst = dst_clone(&rt->u.dst);
1465         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1466         if (proto == IPPROTO_ICMPV6) {
1467                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1468
1469                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1470                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1471         }
1472
1473         err = ip6_local_out(skb);
1474         if (err) {
1475                 if (err > 0)
1476                         err = np->recverr ? net_xmit_errno(err) : 0;
1477                 if (err)
1478                         goto error;
1479         }
1480
1481 out:
1482         ip6_cork_release(inet, np);
1483         return err;
1484 error:
1485         goto out;
1486 }
1487
1488 void ip6_flush_pending_frames(struct sock *sk)
1489 {
1490         struct sk_buff *skb;
1491
1492         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1493                 if (skb->dst)
1494                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1495                                       IPSTATS_MIB_OUTDISCARDS);
1496                 kfree_skb(skb);
1497         }
1498
1499         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1500 }