[IPV6]: Only set nfheader_len for top xfrm dst
[linux-2.6.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on linux/net/ipv4/ip_output.c
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *      Changes:
18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
19  *                              extension headers are implemented.
20  *                              route changes now work.
21  *                              ip6_forward does not confuse sniffers.
22  *                              etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *      Imran Patel     :       frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *                      :       add ip6_append_data and related functions
28  *                              for datagram xmit
29  */
30
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63         static u32 ipv6_fragmentation_id = 1;
64         static DEFINE_SPINLOCK(ip6_id_lock);
65
66         spin_lock_bh(&ip6_id_lock);
67         fhdr->identification = htonl(ipv6_fragmentation_id);
68         if (++ipv6_fragmentation_id == 0)
69                 ipv6_fragmentation_id = 1;
70         spin_unlock_bh(&ip6_id_lock);
71 }
72
73 static int ip6_output_finish(struct sk_buff *skb)
74 {
75         struct dst_entry *dst = skb->dst;
76
77         if (dst->hh)
78                 return neigh_hh_output(dst->hh, skb);
79         else if (dst->neighbour)
80                 return dst->neighbour->output(skb);
81
82         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
83         kfree_skb(skb);
84         return -EINVAL;
85
86 }
87
88 /* dev_loopback_xmit for use with netfilter. */
89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
90 {
91         skb_reset_mac_header(newskb);
92         __skb_pull(newskb, skb_network_offset(newskb));
93         newskb->pkt_type = PACKET_LOOPBACK;
94         newskb->ip_summed = CHECKSUM_UNNECESSARY;
95         BUG_TRAP(newskb->dst);
96
97         netif_rx(newskb);
98         return 0;
99 }
100
101
102 static int ip6_output2(struct sk_buff *skb)
103 {
104         struct dst_entry *dst = skb->dst;
105         struct net_device *dev = dst->dev;
106
107         skb->protocol = htons(ETH_P_IPV6);
108         skb->dev = dev;
109
110         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
111                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
112                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
113
114                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
115                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
116                                         &ipv6_hdr(skb)->saddr)) {
117                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118
119                         /* Do not check for IFF_ALLMULTI; multicast routing
120                            is not supported in any case.
121                          */
122                         if (newskb)
123                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
124                                         newskb->dev,
125                                         ip6_dev_loopback_xmit);
126
127                         if (ipv6_hdr(skb)->hop_limit == 0) {
128                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
129                                 kfree_skb(skb);
130                                 return 0;
131                         }
132                 }
133
134                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135         }
136
137         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138 }
139
140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141 {
142         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143
144         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145                skb->dst->dev->mtu : dst_mtu(skb->dst);
146 }
147
148 int ip6_output(struct sk_buff *skb)
149 {
150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151                                 dst_allfrag(skb->dst))
152                 return ip6_fragment(skb, ip6_output2);
153         else
154                 return ip6_output2(skb);
155 }
156
157 /*
158  *      xmit an sk_buff (used by TCP)
159  */
160
161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162              struct ipv6_txoptions *opt, int ipfragok)
163 {
164         struct ipv6_pinfo *np = inet6_sk(sk);
165         struct in6_addr *first_hop = &fl->fl6_dst;
166         struct dst_entry *dst = skb->dst;
167         struct ipv6hdr *hdr;
168         u8  proto = fl->proto;
169         int seg_len = skb->len;
170         int hlimit, tclass;
171         u32 mtu;
172
173         if (opt) {
174                 unsigned int head_room;
175
176                 /* First: exthdrs may take lots of space (~8K for now)
177                    MAX_HEADER is not enough.
178                  */
179                 head_room = opt->opt_nflen + opt->opt_flen;
180                 seg_len += head_room;
181                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182
183                 if (skb_headroom(skb) < head_room) {
184                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185                         if (skb2 == NULL) {
186                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
187                                               IPSTATS_MIB_OUTDISCARDS);
188                                 kfree_skb(skb);
189                                 return -ENOBUFS;
190                         }
191                         kfree_skb(skb);
192                         skb = skb2;
193                         if (sk)
194                                 skb_set_owner_w(skb, sk);
195                 }
196                 if (opt->opt_flen)
197                         ipv6_push_frag_opts(skb, opt, &proto);
198                 if (opt->opt_nflen)
199                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
200         }
201
202         skb_push(skb, sizeof(struct ipv6hdr));
203         skb_reset_network_header(skb);
204         hdr = ipv6_hdr(skb);
205
206         /*
207          *      Fill in the IPv6 header
208          */
209
210         hlimit = -1;
211         if (np)
212                 hlimit = np->hop_limit;
213         if (hlimit < 0)
214                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
215         if (hlimit < 0)
216                 hlimit = ipv6_get_hoplimit(dst->dev);
217
218         tclass = -1;
219         if (np)
220                 tclass = np->tclass;
221         if (tclass < 0)
222                 tclass = 0;
223
224         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
225
226         hdr->payload_len = htons(seg_len);
227         hdr->nexthdr = proto;
228         hdr->hop_limit = hlimit;
229
230         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
231         ipv6_addr_copy(&hdr->daddr, first_hop);
232
233         skb->priority = sk->sk_priority;
234
235         mtu = dst_mtu(dst);
236         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
238                               IPSTATS_MIB_OUTREQUESTS);
239                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
240                                 dst_output);
241         }
242
243         if (net_ratelimit())
244                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
245         skb->dev = dst->dev;
246         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
247         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
248         kfree_skb(skb);
249         return -EMSGSIZE;
250 }
251
252 EXPORT_SYMBOL(ip6_xmit);
253
254 /*
255  *      To avoid extra problems ND packets are send through this
256  *      routine. It's code duplication but I really want to avoid
257  *      extra checks since ipv6_build_header is used by TCP (which
258  *      is for us performance critical)
259  */
260
261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
262                struct in6_addr *saddr, struct in6_addr *daddr,
263                int proto, int len)
264 {
265         struct ipv6_pinfo *np = inet6_sk(sk);
266         struct ipv6hdr *hdr;
267         int totlen;
268
269         skb->protocol = htons(ETH_P_IPV6);
270         skb->dev = dev;
271
272         totlen = len + sizeof(struct ipv6hdr);
273
274         skb_reset_network_header(skb);
275         skb_put(skb, sizeof(struct ipv6hdr));
276         hdr = ipv6_hdr(skb);
277
278         *(__be32*)hdr = htonl(0x60000000);
279
280         hdr->payload_len = htons(len);
281         hdr->nexthdr = proto;
282         hdr->hop_limit = np->hop_limit;
283
284         ipv6_addr_copy(&hdr->saddr, saddr);
285         ipv6_addr_copy(&hdr->daddr, daddr);
286
287         return 0;
288 }
289
290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
291 {
292         struct ip6_ra_chain *ra;
293         struct sock *last = NULL;
294
295         read_lock(&ip6_ra_lock);
296         for (ra = ip6_ra_chain; ra; ra = ra->next) {
297                 struct sock *sk = ra->sk;
298                 if (sk && ra->sel == sel &&
299                     (!sk->sk_bound_dev_if ||
300                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
301                         if (last) {
302                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
303                                 if (skb2)
304                                         rawv6_rcv(last, skb2);
305                         }
306                         last = sk;
307                 }
308         }
309
310         if (last) {
311                 rawv6_rcv(last, skb);
312                 read_unlock(&ip6_ra_lock);
313                 return 1;
314         }
315         read_unlock(&ip6_ra_lock);
316         return 0;
317 }
318
319 static int ip6_forward_proxy_check(struct sk_buff *skb)
320 {
321         struct ipv6hdr *hdr = ipv6_hdr(skb);
322         u8 nexthdr = hdr->nexthdr;
323         int offset;
324
325         if (ipv6_ext_hdr(nexthdr)) {
326                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
327                 if (offset < 0)
328                         return 0;
329         } else
330                 offset = sizeof(struct ipv6hdr);
331
332         if (nexthdr == IPPROTO_ICMPV6) {
333                 struct icmp6hdr *icmp6;
334
335                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
336                                          offset + 1 - skb->data)))
337                         return 0;
338
339                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
340
341                 switch (icmp6->icmp6_type) {
342                 case NDISC_ROUTER_SOLICITATION:
343                 case NDISC_ROUTER_ADVERTISEMENT:
344                 case NDISC_NEIGHBOUR_SOLICITATION:
345                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
346                 case NDISC_REDIRECT:
347                         /* For reaction involving unicast neighbor discovery
348                          * message destined to the proxied address, pass it to
349                          * input function.
350                          */
351                         return 1;
352                 default:
353                         break;
354                 }
355         }
356
357         /*
358          * The proxying router can't forward traffic sent to a link-local
359          * address, so signal the sender and discard the packet. This
360          * behavior is clarified by the MIPv6 specification.
361          */
362         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
363                 dst_link_failure(skb);
364                 return -1;
365         }
366
367         return 0;
368 }
369
370 static inline int ip6_forward_finish(struct sk_buff *skb)
371 {
372         return dst_output(skb);
373 }
374
375 int ip6_forward(struct sk_buff *skb)
376 {
377         struct dst_entry *dst = skb->dst;
378         struct ipv6hdr *hdr = ipv6_hdr(skb);
379         struct inet6_skb_parm *opt = IP6CB(skb);
380
381         if (ipv6_devconf.forwarding == 0)
382                 goto error;
383
384         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
385                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
386                 goto drop;
387         }
388
389         skb_forward_csum(skb);
390
391         /*
392          *      We DO NOT make any processing on
393          *      RA packets, pushing them to user level AS IS
394          *      without ane WARRANTY that application will be able
395          *      to interpret them. The reason is that we
396          *      cannot make anything clever here.
397          *
398          *      We are not end-node, so that if packet contains
399          *      AH/ESP, we cannot make anything.
400          *      Defragmentation also would be mistake, RA packets
401          *      cannot be fragmented, because there is no warranty
402          *      that different fragments will go along one path. --ANK
403          */
404         if (opt->ra) {
405                 u8 *ptr = skb_network_header(skb) + opt->ra;
406                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
407                         return 0;
408         }
409
410         /*
411          *      check and decrement ttl
412          */
413         if (hdr->hop_limit <= 1) {
414                 /* Force OUTPUT device used as source address */
415                 skb->dev = dst->dev;
416                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
417                             0, skb->dev);
418                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
419
420                 kfree_skb(skb);
421                 return -ETIMEDOUT;
422         }
423
424         /* XXX: idev->cnf.proxy_ndp? */
425         if (ipv6_devconf.proxy_ndp &&
426             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
427                 int proxied = ip6_forward_proxy_check(skb);
428                 if (proxied > 0)
429                         return ip6_input(skb);
430                 else if (proxied < 0) {
431                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432                         goto drop;
433                 }
434         }
435
436         if (!xfrm6_route_forward(skb)) {
437                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
438                 goto drop;
439         }
440         dst = skb->dst;
441
442         /* IPv6 specs say nothing about it, but it is clear that we cannot
443            send redirects to source routed frames.
444            We don't send redirects to frames decapsulated from IPsec.
445          */
446         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
447             !skb->sp) {
448                 struct in6_addr *target = NULL;
449                 struct rt6_info *rt;
450                 struct neighbour *n = dst->neighbour;
451
452                 /*
453                  *      incoming and outgoing devices are the same
454                  *      send a redirect.
455                  */
456
457                 rt = (struct rt6_info *) dst;
458                 if ((rt->rt6i_flags & RTF_GATEWAY))
459                         target = (struct in6_addr*)&n->primary_key;
460                 else
461                         target = &hdr->daddr;
462
463                 /* Limit redirects both by destination (here)
464                    and by source (inside ndisc_send_redirect)
465                  */
466                 if (xrlim_allow(dst, 1*HZ))
467                         ndisc_send_redirect(skb, n, target);
468         } else {
469                 int addrtype = ipv6_addr_type(&hdr->saddr);
470
471                 /* This check is security critical. */
472                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
473                         goto error;
474                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
475                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
476                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
477                         goto error;
478                 }
479         }
480
481         if (skb->len > dst_mtu(dst)) {
482                 /* Again, force OUTPUT device used as source address */
483                 skb->dev = dst->dev;
484                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
485                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
486                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
487                 kfree_skb(skb);
488                 return -EMSGSIZE;
489         }
490
491         if (skb_cow(skb, dst->dev->hard_header_len)) {
492                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
493                 goto drop;
494         }
495
496         hdr = ipv6_hdr(skb);
497
498         /* Mangling hops number delayed to point after skb COW */
499
500         hdr->hop_limit--;
501
502         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
504
505 error:
506         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
507 drop:
508         kfree_skb(skb);
509         return -EINVAL;
510 }
511
512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
513 {
514         to->pkt_type = from->pkt_type;
515         to->priority = from->priority;
516         to->protocol = from->protocol;
517         dst_release(to->dst);
518         to->dst = dst_clone(from->dst);
519         to->dev = from->dev;
520         to->mark = from->mark;
521
522 #ifdef CONFIG_NET_SCHED
523         to->tc_index = from->tc_index;
524 #endif
525         nf_copy(to, from);
526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
528         to->nf_trace = from->nf_trace;
529 #endif
530         skb_copy_secmark(to, from);
531 }
532
533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
534 {
535         u16 offset = sizeof(struct ipv6hdr);
536         struct ipv6_opt_hdr *exthdr =
537                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
538         unsigned int packet_len = skb->tail - skb->network_header;
539         int found_rhdr = 0;
540         *nexthdr = &ipv6_hdr(skb)->nexthdr;
541
542         while (offset + 1 <= packet_len) {
543
544                 switch (**nexthdr) {
545
546                 case NEXTHDR_HOP:
547                         break;
548                 case NEXTHDR_ROUTING:
549                         found_rhdr = 1;
550                         break;
551                 case NEXTHDR_DEST:
552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
553                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
554                                 break;
555 #endif
556                         if (found_rhdr)
557                                 return offset;
558                         break;
559                 default :
560                         return offset;
561                 }
562
563                 offset += ipv6_optlen(exthdr);
564                 *nexthdr = &exthdr->nexthdr;
565                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
566                                                  offset);
567         }
568
569         return offset;
570 }
571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
572
573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
574 {
575         struct net_device *dev;
576         struct sk_buff *frag;
577         struct rt6_info *rt = (struct rt6_info*)skb->dst;
578         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
579         struct ipv6hdr *tmp_hdr;
580         struct frag_hdr *fh;
581         unsigned int mtu, hlen, left, len;
582         __be32 frag_id = 0;
583         int ptr, offset = 0, err=0;
584         u8 *prevhdr, nexthdr = 0;
585
586         dev = rt->u.dst.dev;
587         hlen = ip6_find_1stfragopt(skb, &prevhdr);
588         nexthdr = *prevhdr;
589
590         mtu = ip6_skb_dst_mtu(skb);
591
592         /* We must not fragment if the socket is set to force MTU discovery
593          * or if the skb it not generated by a local socket.  (This last
594          * check should be redundant, but it's free.)
595          */
596         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
597                 skb->dev = skb->dst->dev;
598                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
599                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
600                 kfree_skb(skb);
601                 return -EMSGSIZE;
602         }
603
604         if (np && np->frag_size < mtu) {
605                 if (np->frag_size)
606                         mtu = np->frag_size;
607         }
608         mtu -= hlen + sizeof(struct frag_hdr);
609
610         if (skb_shinfo(skb)->frag_list) {
611                 int first_len = skb_pagelen(skb);
612
613                 if (first_len - hlen > mtu ||
614                     ((first_len - hlen) & 7) ||
615                     skb_cloned(skb))
616                         goto slow_path;
617
618                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
619                         /* Correct geometry. */
620                         if (frag->len > mtu ||
621                             ((frag->len & 7) && frag->next) ||
622                             skb_headroom(frag) < hlen)
623                             goto slow_path;
624
625                         /* Partially cloned skb? */
626                         if (skb_shared(frag))
627                                 goto slow_path;
628
629                         BUG_ON(frag->sk);
630                         if (skb->sk) {
631                                 sock_hold(skb->sk);
632                                 frag->sk = skb->sk;
633                                 frag->destructor = sock_wfree;
634                                 skb->truesize -= frag->truesize;
635                         }
636                 }
637
638                 err = 0;
639                 offset = 0;
640                 frag = skb_shinfo(skb)->frag_list;
641                 skb_shinfo(skb)->frag_list = NULL;
642                 /* BUILD HEADER */
643
644                 *prevhdr = NEXTHDR_FRAGMENT;
645                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
646                 if (!tmp_hdr) {
647                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
648                         return -ENOMEM;
649                 }
650
651                 __skb_pull(skb, hlen);
652                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
653                 __skb_push(skb, hlen);
654                 skb_reset_network_header(skb);
655                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
656
657                 ipv6_select_ident(skb, fh);
658                 fh->nexthdr = nexthdr;
659                 fh->reserved = 0;
660                 fh->frag_off = htons(IP6_MF);
661                 frag_id = fh->identification;
662
663                 first_len = skb_pagelen(skb);
664                 skb->data_len = first_len - skb_headlen(skb);
665                 skb->len = first_len;
666                 ipv6_hdr(skb)->payload_len = htons(first_len -
667                                                    sizeof(struct ipv6hdr));
668
669                 dst_hold(&rt->u.dst);
670
671                 for (;;) {
672                         /* Prepare header of the next frame,
673                          * before previous one went down. */
674                         if (frag) {
675                                 frag->ip_summed = CHECKSUM_NONE;
676                                 skb_reset_transport_header(frag);
677                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
678                                 __skb_push(frag, hlen);
679                                 skb_reset_network_header(frag);
680                                 memcpy(skb_network_header(frag), tmp_hdr,
681                                        hlen);
682                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
683                                 fh->nexthdr = nexthdr;
684                                 fh->reserved = 0;
685                                 fh->frag_off = htons(offset);
686                                 if (frag->next != NULL)
687                                         fh->frag_off |= htons(IP6_MF);
688                                 fh->identification = frag_id;
689                                 ipv6_hdr(frag)->payload_len =
690                                                 htons(frag->len -
691                                                       sizeof(struct ipv6hdr));
692                                 ip6_copy_metadata(frag, skb);
693                         }
694
695                         err = output(skb);
696                         if(!err)
697                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
698
699                         if (err || !frag)
700                                 break;
701
702                         skb = frag;
703                         frag = skb->next;
704                         skb->next = NULL;
705                 }
706
707                 kfree(tmp_hdr);
708
709                 if (err == 0) {
710                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
711                         dst_release(&rt->u.dst);
712                         return 0;
713                 }
714
715                 while (frag) {
716                         skb = frag->next;
717                         kfree_skb(frag);
718                         frag = skb;
719                 }
720
721                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
722                 dst_release(&rt->u.dst);
723                 return err;
724         }
725
726 slow_path:
727         left = skb->len - hlen;         /* Space per frame */
728         ptr = hlen;                     /* Where to start from */
729
730         /*
731          *      Fragment the datagram.
732          */
733
734         *prevhdr = NEXTHDR_FRAGMENT;
735
736         /*
737          *      Keep copying data until we run out.
738          */
739         while(left > 0) {
740                 len = left;
741                 /* IF: it doesn't fit, use 'mtu' - the data space left */
742                 if (len > mtu)
743                         len = mtu;
744                 /* IF: we are not sending upto and including the packet end
745                    then align the next start on an eight byte boundary */
746                 if (len < left) {
747                         len &= ~7;
748                 }
749                 /*
750                  *      Allocate buffer.
751                  */
752
753                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
754                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
755                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
756                                       IPSTATS_MIB_FRAGFAILS);
757                         err = -ENOMEM;
758                         goto fail;
759                 }
760
761                 /*
762                  *      Set up data on packet
763                  */
764
765                 ip6_copy_metadata(frag, skb);
766                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
767                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
768                 skb_reset_network_header(frag);
769                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
770                 frag->transport_header = (frag->network_header + hlen +
771                                           sizeof(struct frag_hdr));
772
773                 /*
774                  *      Charge the memory for the fragment to any owner
775                  *      it might possess
776                  */
777                 if (skb->sk)
778                         skb_set_owner_w(frag, skb->sk);
779
780                 /*
781                  *      Copy the packet header into the new buffer.
782                  */
783                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
784
785                 /*
786                  *      Build fragment header.
787                  */
788                 fh->nexthdr = nexthdr;
789                 fh->reserved = 0;
790                 if (!frag_id) {
791                         ipv6_select_ident(skb, fh);
792                         frag_id = fh->identification;
793                 } else
794                         fh->identification = frag_id;
795
796                 /*
797                  *      Copy a block of the IP datagram.
798                  */
799                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
800                         BUG();
801                 left -= len;
802
803                 fh->frag_off = htons(offset);
804                 if (left > 0)
805                         fh->frag_off |= htons(IP6_MF);
806                 ipv6_hdr(frag)->payload_len = htons(frag->len -
807                                                     sizeof(struct ipv6hdr));
808
809                 ptr += len;
810                 offset += len;
811
812                 /*
813                  *      Put this fragment into the sending queue.
814                  */
815                 err = output(frag);
816                 if (err)
817                         goto fail;
818
819                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
820         }
821         IP6_INC_STATS(ip6_dst_idev(skb->dst),
822                       IPSTATS_MIB_FRAGOKS);
823         kfree_skb(skb);
824         return err;
825
826 fail:
827         IP6_INC_STATS(ip6_dst_idev(skb->dst),
828                       IPSTATS_MIB_FRAGFAILS);
829         kfree_skb(skb);
830         return err;
831 }
832
833 static inline int ip6_rt_check(struct rt6key *rt_key,
834                                struct in6_addr *fl_addr,
835                                struct in6_addr *addr_cache)
836 {
837         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
839 }
840
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842                                           struct dst_entry *dst,
843                                           struct flowi *fl)
844 {
845         struct ipv6_pinfo *np = inet6_sk(sk);
846         struct rt6_info *rt = (struct rt6_info *)dst;
847
848         if (!dst)
849                 goto out;
850
851         /* Yes, checking route validity in not connected
852          * case is not very simple. Take into account,
853          * that we do not support routing by source, TOS,
854          * and MSG_DONTROUTE            --ANK (980726)
855          *
856          * 1. ip6_rt_check(): If route was host route,
857          *    check that cached destination is current.
858          *    If it is network route, we still may
859          *    check its validity using saved pointer
860          *    to the last used address: daddr_cache.
861          *    We do not want to save whole address now,
862          *    (because main consumer of this service
863          *    is tcp, which has not this problem),
864          *    so that the last trick works only on connected
865          *    sockets.
866          * 2. oif also should be the same.
867          */
868         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
871 #endif
872             (fl->oif && fl->oif != dst->dev->ifindex)) {
873                 dst_release(dst);
874                 dst = NULL;
875         }
876
877 out:
878         return dst;
879 }
880
881 static int ip6_dst_lookup_tail(struct sock *sk,
882                                struct dst_entry **dst, struct flowi *fl)
883 {
884         int err;
885
886         if (*dst == NULL)
887                 *dst = ip6_route_output(sk, fl);
888
889         if ((err = (*dst)->error))
890                 goto out_err_release;
891
892         if (ipv6_addr_any(&fl->fl6_src)) {
893                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
894                 if (err)
895                         goto out_err_release;
896         }
897
898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
899                 /*
900                  * Here if the dst entry we've looked up
901                  * has a neighbour entry that is in the INCOMPLETE
902                  * state and the src address from the flow is
903                  * marked as OPTIMISTIC, we release the found
904                  * dst entry and replace it instead with the
905                  * dst entry of the nexthop router
906                  */
907                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
908                         struct inet6_ifaddr *ifp;
909                         struct flowi fl_gw;
910                         int redirect;
911
912                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
913
914                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915                         if (ifp)
916                                 in6_ifa_put(ifp);
917
918                         if (redirect) {
919                                 /*
920                                  * We need to get the dst entry for the
921                                  * default router instead
922                                  */
923                                 dst_release(*dst);
924                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
925                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
926                                 *dst = ip6_route_output(sk, &fl_gw);
927                                 if ((err = (*dst)->error))
928                                         goto out_err_release;
929                         }
930                 }
931 #endif
932
933         return 0;
934
935 out_err_release:
936         if (err == -ENETUNREACH)
937                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
938         dst_release(*dst);
939         *dst = NULL;
940         return err;
941 }
942
943 /**
944  *      ip6_dst_lookup - perform route lookup on flow
945  *      @sk: socket which provides route info
946  *      @dst: pointer to dst_entry * for result
947  *      @fl: flow to lookup
948  *
949  *      This function performs a route lookup on the given flow.
950  *
951  *      It returns zero on success, or a standard errno code on error.
952  */
953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
954 {
955         *dst = NULL;
956         return ip6_dst_lookup_tail(sk, dst, fl);
957 }
958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
959
960 /**
961  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
962  *      @sk: socket which provides the dst cache and route info
963  *      @dst: pointer to dst_entry * for result
964  *      @fl: flow to lookup
965  *
966  *      This function performs a route lookup on the given flow with the
967  *      possibility of using the cached route in the socket if it is valid.
968  *      It will take the socket dst lock when operating on the dst cache.
969  *      As a result, this function can only be used in process context.
970  *
971  *      It returns zero on success, or a standard errno code on error.
972  */
973 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
974 {
975         *dst = NULL;
976         if (sk) {
977                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
978                 *dst = ip6_sk_dst_check(sk, *dst, fl);
979         }
980
981         return ip6_dst_lookup_tail(sk, dst, fl);
982 }
983 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
984
985 static inline int ip6_ufo_append_data(struct sock *sk,
986                         int getfrag(void *from, char *to, int offset, int len,
987                         int odd, struct sk_buff *skb),
988                         void *from, int length, int hh_len, int fragheaderlen,
989                         int transhdrlen, int mtu,unsigned int flags)
990
991 {
992         struct sk_buff *skb;
993         int err;
994
995         /* There is support for UDP large send offload by network
996          * device, so create one single skb packet containing complete
997          * udp datagram
998          */
999         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1000                 skb = sock_alloc_send_skb(sk,
1001                         hh_len + fragheaderlen + transhdrlen + 20,
1002                         (flags & MSG_DONTWAIT), &err);
1003                 if (skb == NULL)
1004                         return -ENOMEM;
1005
1006                 /* reserve space for Hardware header */
1007                 skb_reserve(skb, hh_len);
1008
1009                 /* create space for UDP/IP header */
1010                 skb_put(skb,fragheaderlen + transhdrlen);
1011
1012                 /* initialize network header pointer */
1013                 skb_reset_network_header(skb);
1014
1015                 /* initialize protocol header pointer */
1016                 skb->transport_header = skb->network_header + fragheaderlen;
1017
1018                 skb->ip_summed = CHECKSUM_PARTIAL;
1019                 skb->csum = 0;
1020                 sk->sk_sndmsg_off = 0;
1021         }
1022
1023         err = skb_append_datato_frags(sk,skb, getfrag, from,
1024                                       (length - transhdrlen));
1025         if (!err) {
1026                 struct frag_hdr fhdr;
1027
1028                 /* specify the length of each IP datagram fragment*/
1029                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1030                                             sizeof(struct frag_hdr);
1031                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1032                 ipv6_select_ident(skb, &fhdr);
1033                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1034                 __skb_queue_tail(&sk->sk_write_queue, skb);
1035
1036                 return 0;
1037         }
1038         /* There is not enough support do UPD LSO,
1039          * so follow normal path
1040          */
1041         kfree_skb(skb);
1042
1043         return err;
1044 }
1045
1046 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1047         int offset, int len, int odd, struct sk_buff *skb),
1048         void *from, int length, int transhdrlen,
1049         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1050         struct rt6_info *rt, unsigned int flags)
1051 {
1052         struct inet_sock *inet = inet_sk(sk);
1053         struct ipv6_pinfo *np = inet6_sk(sk);
1054         struct sk_buff *skb;
1055         unsigned int maxfraglen, fragheaderlen;
1056         int exthdrlen;
1057         int hh_len;
1058         int mtu;
1059         int copy;
1060         int err;
1061         int offset = 0;
1062         int csummode = CHECKSUM_NONE;
1063
1064         if (flags&MSG_PROBE)
1065                 return 0;
1066         if (skb_queue_empty(&sk->sk_write_queue)) {
1067                 /*
1068                  * setup for corking
1069                  */
1070                 if (opt) {
1071                         if (np->cork.opt == NULL) {
1072                                 np->cork.opt = kmalloc(opt->tot_len,
1073                                                        sk->sk_allocation);
1074                                 if (unlikely(np->cork.opt == NULL))
1075                                         return -ENOBUFS;
1076                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1077                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1078                                 return -EINVAL;
1079                         }
1080                         memcpy(np->cork.opt, opt, opt->tot_len);
1081                         inet->cork.flags |= IPCORK_OPT;
1082                         /* need source address above miyazawa*/
1083                 }
1084                 dst_hold(&rt->u.dst);
1085                 np->cork.rt = rt;
1086                 inet->cork.fl = *fl;
1087                 np->cork.hop_limit = hlimit;
1088                 np->cork.tclass = tclass;
1089                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1090                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1091                 if (np->frag_size < mtu) {
1092                         if (np->frag_size)
1093                                 mtu = np->frag_size;
1094                 }
1095                 inet->cork.fragsize = mtu;
1096                 if (dst_allfrag(rt->u.dst.path))
1097                         inet->cork.flags |= IPCORK_ALLFRAG;
1098                 inet->cork.length = 0;
1099                 sk->sk_sndmsg_page = NULL;
1100                 sk->sk_sndmsg_off = 0;
1101                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1102                             rt->u.dst.nfheader_len;
1103                 length += exthdrlen;
1104                 transhdrlen += exthdrlen;
1105         } else {
1106                 rt = np->cork.rt;
1107                 fl = &inet->cork.fl;
1108                 if (inet->cork.flags & IPCORK_OPT)
1109                         opt = np->cork.opt;
1110                 transhdrlen = 0;
1111                 exthdrlen = 0;
1112                 mtu = inet->cork.fragsize;
1113         }
1114
1115         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1116
1117         fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1118         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1119
1120         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1121                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1122                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1123                         return -EMSGSIZE;
1124                 }
1125         }
1126
1127         /*
1128          * Let's try using as much space as possible.
1129          * Use MTU if total length of the message fits into the MTU.
1130          * Otherwise, we need to reserve fragment header and
1131          * fragment alignment (= 8-15 octects, in total).
1132          *
1133          * Note that we may need to "move" the data from the tail of
1134          * of the buffer to the new fragment when we split
1135          * the message.
1136          *
1137          * FIXME: It may be fragmented into multiple chunks
1138          *        at once if non-fragmentable extension headers
1139          *        are too large.
1140          * --yoshfuji
1141          */
1142
1143         inet->cork.length += length;
1144         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1145             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1146
1147                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1148                                           fragheaderlen, transhdrlen, mtu,
1149                                           flags);
1150                 if (err)
1151                         goto error;
1152                 return 0;
1153         }
1154
1155         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1156                 goto alloc_new_skb;
1157
1158         while (length > 0) {
1159                 /* Check if the remaining data fits into current packet. */
1160                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1161                 if (copy < length)
1162                         copy = maxfraglen - skb->len;
1163
1164                 if (copy <= 0) {
1165                         char *data;
1166                         unsigned int datalen;
1167                         unsigned int fraglen;
1168                         unsigned int fraggap;
1169                         unsigned int alloclen;
1170                         struct sk_buff *skb_prev;
1171 alloc_new_skb:
1172                         skb_prev = skb;
1173
1174                         /* There's no room in the current skb */
1175                         if (skb_prev)
1176                                 fraggap = skb_prev->len - maxfraglen;
1177                         else
1178                                 fraggap = 0;
1179
1180                         /*
1181                          * If remaining data exceeds the mtu,
1182                          * we know we need more fragment(s).
1183                          */
1184                         datalen = length + fraggap;
1185                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1186                                 datalen = maxfraglen - fragheaderlen;
1187
1188                         fraglen = datalen + fragheaderlen;
1189                         if ((flags & MSG_MORE) &&
1190                             !(rt->u.dst.dev->features&NETIF_F_SG))
1191                                 alloclen = mtu;
1192                         else
1193                                 alloclen = datalen + fragheaderlen;
1194
1195                         /*
1196                          * The last fragment gets additional space at tail.
1197                          * Note: we overallocate on fragments with MSG_MODE
1198                          * because we have no idea if we're the last one.
1199                          */
1200                         if (datalen == length + fraggap)
1201                                 alloclen += rt->u.dst.trailer_len;
1202
1203                         /*
1204                          * We just reserve space for fragment header.
1205                          * Note: this may be overallocation if the message
1206                          * (without MSG_MORE) fits into the MTU.
1207                          */
1208                         alloclen += sizeof(struct frag_hdr);
1209
1210                         if (transhdrlen) {
1211                                 skb = sock_alloc_send_skb(sk,
1212                                                 alloclen + hh_len,
1213                                                 (flags & MSG_DONTWAIT), &err);
1214                         } else {
1215                                 skb = NULL;
1216                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1217                                     2 * sk->sk_sndbuf)
1218                                         skb = sock_wmalloc(sk,
1219                                                            alloclen + hh_len, 1,
1220                                                            sk->sk_allocation);
1221                                 if (unlikely(skb == NULL))
1222                                         err = -ENOBUFS;
1223                         }
1224                         if (skb == NULL)
1225                                 goto error;
1226                         /*
1227                          *      Fill in the control structures
1228                          */
1229                         skb->ip_summed = csummode;
1230                         skb->csum = 0;
1231                         /* reserve for fragmentation */
1232                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1233
1234                         /*
1235                          *      Find where to start putting bytes
1236                          */
1237                         data = skb_put(skb, fraglen);
1238                         skb_set_network_header(skb, exthdrlen);
1239                         data += fragheaderlen;
1240                         skb->transport_header = (skb->network_header +
1241                                                  fragheaderlen);
1242                         if (fraggap) {
1243                                 skb->csum = skb_copy_and_csum_bits(
1244                                         skb_prev, maxfraglen,
1245                                         data + transhdrlen, fraggap, 0);
1246                                 skb_prev->csum = csum_sub(skb_prev->csum,
1247                                                           skb->csum);
1248                                 data += fraggap;
1249                                 pskb_trim_unique(skb_prev, maxfraglen);
1250                         }
1251                         copy = datalen - transhdrlen - fraggap;
1252                         if (copy < 0) {
1253                                 err = -EINVAL;
1254                                 kfree_skb(skb);
1255                                 goto error;
1256                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1257                                 err = -EFAULT;
1258                                 kfree_skb(skb);
1259                                 goto error;
1260                         }
1261
1262                         offset += copy;
1263                         length -= datalen - fraggap;
1264                         transhdrlen = 0;
1265                         exthdrlen = 0;
1266                         csummode = CHECKSUM_NONE;
1267
1268                         /*
1269                          * Put the packet on the pending queue
1270                          */
1271                         __skb_queue_tail(&sk->sk_write_queue, skb);
1272                         continue;
1273                 }
1274
1275                 if (copy > length)
1276                         copy = length;
1277
1278                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1279                         unsigned int off;
1280
1281                         off = skb->len;
1282                         if (getfrag(from, skb_put(skb, copy),
1283                                                 offset, copy, off, skb) < 0) {
1284                                 __skb_trim(skb, off);
1285                                 err = -EFAULT;
1286                                 goto error;
1287                         }
1288                 } else {
1289                         int i = skb_shinfo(skb)->nr_frags;
1290                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1291                         struct page *page = sk->sk_sndmsg_page;
1292                         int off = sk->sk_sndmsg_off;
1293                         unsigned int left;
1294
1295                         if (page && (left = PAGE_SIZE - off) > 0) {
1296                                 if (copy >= left)
1297                                         copy = left;
1298                                 if (page != frag->page) {
1299                                         if (i == MAX_SKB_FRAGS) {
1300                                                 err = -EMSGSIZE;
1301                                                 goto error;
1302                                         }
1303                                         get_page(page);
1304                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1305                                         frag = &skb_shinfo(skb)->frags[i];
1306                                 }
1307                         } else if(i < MAX_SKB_FRAGS) {
1308                                 if (copy > PAGE_SIZE)
1309                                         copy = PAGE_SIZE;
1310                                 page = alloc_pages(sk->sk_allocation, 0);
1311                                 if (page == NULL) {
1312                                         err = -ENOMEM;
1313                                         goto error;
1314                                 }
1315                                 sk->sk_sndmsg_page = page;
1316                                 sk->sk_sndmsg_off = 0;
1317
1318                                 skb_fill_page_desc(skb, i, page, 0, 0);
1319                                 frag = &skb_shinfo(skb)->frags[i];
1320                         } else {
1321                                 err = -EMSGSIZE;
1322                                 goto error;
1323                         }
1324                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1325                                 err = -EFAULT;
1326                                 goto error;
1327                         }
1328                         sk->sk_sndmsg_off += copy;
1329                         frag->size += copy;
1330                         skb->len += copy;
1331                         skb->data_len += copy;
1332                         skb->truesize += copy;
1333                         atomic_add(copy, &sk->sk_wmem_alloc);
1334                 }
1335                 offset += copy;
1336                 length -= copy;
1337         }
1338         return 0;
1339 error:
1340         inet->cork.length -= length;
1341         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1342         return err;
1343 }
1344
1345 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1346 {
1347         inet->cork.flags &= ~IPCORK_OPT;
1348         kfree(np->cork.opt);
1349         np->cork.opt = NULL;
1350         if (np->cork.rt) {
1351                 dst_release(&np->cork.rt->u.dst);
1352                 np->cork.rt = NULL;
1353                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1354         }
1355         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1356 }
1357
1358 int ip6_push_pending_frames(struct sock *sk)
1359 {
1360         struct sk_buff *skb, *tmp_skb;
1361         struct sk_buff **tail_skb;
1362         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1363         struct inet_sock *inet = inet_sk(sk);
1364         struct ipv6_pinfo *np = inet6_sk(sk);
1365         struct ipv6hdr *hdr;
1366         struct ipv6_txoptions *opt = np->cork.opt;
1367         struct rt6_info *rt = np->cork.rt;
1368         struct flowi *fl = &inet->cork.fl;
1369         unsigned char proto = fl->proto;
1370         int err = 0;
1371
1372         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1373                 goto out;
1374         tail_skb = &(skb_shinfo(skb)->frag_list);
1375
1376         /* move skb->data to ip header from ext header */
1377         if (skb->data < skb_network_header(skb))
1378                 __skb_pull(skb, skb_network_offset(skb));
1379         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1380                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1381                 *tail_skb = tmp_skb;
1382                 tail_skb = &(tmp_skb->next);
1383                 skb->len += tmp_skb->len;
1384                 skb->data_len += tmp_skb->len;
1385                 skb->truesize += tmp_skb->truesize;
1386                 __sock_put(tmp_skb->sk);
1387                 tmp_skb->destructor = NULL;
1388                 tmp_skb->sk = NULL;
1389         }
1390
1391         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1392         __skb_pull(skb, skb_network_header_len(skb));
1393         if (opt && opt->opt_flen)
1394                 ipv6_push_frag_opts(skb, opt, &proto);
1395         if (opt && opt->opt_nflen)
1396                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1397
1398         skb_push(skb, sizeof(struct ipv6hdr));
1399         skb_reset_network_header(skb);
1400         hdr = ipv6_hdr(skb);
1401
1402         *(__be32*)hdr = fl->fl6_flowlabel |
1403                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1404
1405         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1406                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1407         else
1408                 hdr->payload_len = 0;
1409         hdr->hop_limit = np->cork.hop_limit;
1410         hdr->nexthdr = proto;
1411         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1412         ipv6_addr_copy(&hdr->daddr, final_dst);
1413
1414         skb->priority = sk->sk_priority;
1415
1416         skb->dst = dst_clone(&rt->u.dst);
1417         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1418         if (proto == IPPROTO_ICMPV6) {
1419                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1420
1421                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1422                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1423         }
1424
1425         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1426         if (err) {
1427                 if (err > 0)
1428                         err = np->recverr ? net_xmit_errno(err) : 0;
1429                 if (err)
1430                         goto error;
1431         }
1432
1433 out:
1434         ip6_cork_release(inet, np);
1435         return err;
1436 error:
1437         goto out;
1438 }
1439
1440 void ip6_flush_pending_frames(struct sock *sk)
1441 {
1442         struct sk_buff *skb;
1443
1444         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1445                 if (skb->dst)
1446                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1447                                       IPSTATS_MIB_OUTDISCARDS);
1448                 kfree_skb(skb);
1449         }
1450
1451         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1452 }