[INET]: Move bind_hash from tcp_sk to inet_sk
[linux-2.6.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61
62 #include <asm/uaccess.h>
63
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
66
67 static void     tcp_v6_send_reset(struct sk_buff *skb);
68 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
70                                   struct sk_buff *skb);
71
72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
77
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80                                     struct in6_addr *faddr, u16 fport)
81 {
82         int hashent = (lport ^ fport);
83
84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85         hashent ^= hashent>>16;
86         hashent ^= hashent>>8;
87         return (hashent & (tcp_ehash_size - 1));
88 }
89
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91 {
92         struct inet_sock *inet = inet_sk(sk);
93         struct ipv6_pinfo *np = inet6_sk(sk);
94         struct in6_addr *laddr = &np->rcv_saddr;
95         struct in6_addr *faddr = &np->daddr;
96         __u16 lport = inet->num;
97         __u16 fport = inet->dport;
98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
99 }
100
101 static inline int tcp_v6_bind_conflict(const struct sock *sk,
102                                        const struct inet_bind_bucket *tb)
103 {
104         const struct sock *sk2;
105         const struct hlist_node *node;
106
107         /* We must walk the whole port owner list in this case. -DaveM */
108         sk_for_each_bound(sk2, node, &tb->owners) {
109                 if (sk != sk2 &&
110                     (!sk->sk_bound_dev_if ||
111                      !sk2->sk_bound_dev_if ||
112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113                     (!sk->sk_reuse || !sk2->sk_reuse ||
114                      sk2->sk_state == TCP_LISTEN) &&
115                      ipv6_rcv_saddr_equal(sk, sk2))
116                         break;
117         }
118
119         return node != NULL;
120 }
121
122 /* Grrr, addr_type already calculated by caller, but I don't want
123  * to add some silly "cookie" argument to this method just for that.
124  * But it doesn't matter, the recalculation is in the rarest path
125  * this function ever takes.
126  */
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128 {
129         struct inet_bind_hashbucket *head;
130         struct inet_bind_bucket *tb;
131         struct hlist_node *node;
132         int ret;
133
134         local_bh_disable();
135         if (snum == 0) {
136                 int low = sysctl_local_port_range[0];
137                 int high = sysctl_local_port_range[1];
138                 int remaining = (high - low) + 1;
139                 int rover;
140
141                 spin_lock(&tcp_portalloc_lock);
142                 if (tcp_port_rover < low)
143                         rover = low;
144                 else
145                         rover = tcp_port_rover;
146                 do {    rover++;
147                         if (rover > high)
148                                 rover = low;
149                         head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)];
150                         spin_lock(&head->lock);
151                         inet_bind_bucket_for_each(tb, node, &head->chain)
152                                 if (tb->port == rover)
153                                         goto next;
154                         break;
155                 next:
156                         spin_unlock(&head->lock);
157                 } while (--remaining > 0);
158                 tcp_port_rover = rover;
159                 spin_unlock(&tcp_portalloc_lock);
160
161                 /* Exhausted local port range during search?  It is not
162                  * possible for us to be holding one of the bind hash
163                  * locks if this test triggers, because if 'remaining'
164                  * drops to zero, we broke out of the do/while loop at
165                  * the top level, not from the 'break;' statement.
166                  */
167                 ret = 1;
168                 if (unlikely(remaining <= 0))
169                         goto fail;
170
171                 /* OK, here is the one we will use. */
172                 snum = rover;
173         } else {
174                 head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)];
175                 spin_lock(&head->lock);
176                 inet_bind_bucket_for_each(tb, node, &head->chain)
177                         if (tb->port == snum)
178                                 goto tb_found;
179         }
180         tb = NULL;
181         goto tb_not_found;
182 tb_found:
183         if (tb && !hlist_empty(&tb->owners)) {
184                 if (tb->fastreuse > 0 && sk->sk_reuse &&
185                     sk->sk_state != TCP_LISTEN) {
186                         goto success;
187                 } else {
188                         ret = 1;
189                         if (tcp_v6_bind_conflict(sk, tb))
190                                 goto fail_unlock;
191                 }
192         }
193 tb_not_found:
194         ret = 1;
195         if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL)
196                 goto fail_unlock;
197         if (hlist_empty(&tb->owners)) {
198                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
199                         tb->fastreuse = 1;
200                 else
201                         tb->fastreuse = 0;
202         } else if (tb->fastreuse &&
203                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
204                 tb->fastreuse = 0;
205
206 success:
207         if (!inet_sk(sk)->bind_hash)
208                 tcp_bind_hash(sk, tb, snum);
209         BUG_TRAP(inet_sk(sk)->bind_hash == tb);
210         ret = 0;
211
212 fail_unlock:
213         spin_unlock(&head->lock);
214 fail:
215         local_bh_enable();
216         return ret;
217 }
218
219 static __inline__ void __tcp_v6_hash(struct sock *sk)
220 {
221         struct hlist_head *list;
222         rwlock_t *lock;
223
224         BUG_TRAP(sk_unhashed(sk));
225
226         if (sk->sk_state == TCP_LISTEN) {
227                 list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)];
228                 lock = &tcp_lhash_lock;
229                 tcp_listen_wlock();
230         } else {
231                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
232                 list = &tcp_ehash[sk->sk_hashent].chain;
233                 lock = &tcp_ehash[sk->sk_hashent].lock;
234                 write_lock(lock);
235         }
236
237         __sk_add_node(sk, list);
238         sock_prot_inc_use(sk->sk_prot);
239         write_unlock(lock);
240 }
241
242
243 static void tcp_v6_hash(struct sock *sk)
244 {
245         if (sk->sk_state != TCP_CLOSE) {
246                 struct tcp_sock *tp = tcp_sk(sk);
247
248                 if (tp->af_specific == &ipv6_mapped) {
249                         tcp_prot.hash(sk);
250                         return;
251                 }
252                 local_bh_disable();
253                 __tcp_v6_hash(sk);
254                 local_bh_enable();
255         }
256 }
257
258 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
259 {
260         struct sock *sk;
261         struct hlist_node *node;
262         struct sock *result = NULL;
263         int score, hiscore;
264
265         hiscore=0;
266         read_lock(&tcp_lhash_lock);
267         sk_for_each(sk, node, &tcp_listening_hash[inet_lhashfn(hnum)]) {
268                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
269                         struct ipv6_pinfo *np = inet6_sk(sk);
270                         
271                         score = 1;
272                         if (!ipv6_addr_any(&np->rcv_saddr)) {
273                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
274                                         continue;
275                                 score++;
276                         }
277                         if (sk->sk_bound_dev_if) {
278                                 if (sk->sk_bound_dev_if != dif)
279                                         continue;
280                                 score++;
281                         }
282                         if (score == 3) {
283                                 result = sk;
284                                 break;
285                         }
286                         if (score > hiscore) {
287                                 hiscore = score;
288                                 result = sk;
289                         }
290                 }
291         }
292         if (result)
293                 sock_hold(result);
294         read_unlock(&tcp_lhash_lock);
295         return result;
296 }
297
298 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
299  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
300  *
301  * The sockhash lock must be held as a reader here.
302  */
303
304 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
305                                                        struct in6_addr *daddr, u16 hnum,
306                                                        int dif)
307 {
308         struct inet_ehash_bucket *head;
309         struct sock *sk;
310         struct hlist_node *node;
311         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
312         int hash;
313
314         /* Optimize here for direct hit, only listening connections can
315          * have wildcards anyways.
316          */
317         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318         head = &tcp_ehash[hash];
319         read_lock(&head->lock);
320         sk_for_each(sk, node, &head->chain) {
321                 /* For IPV6 do the cheaper port and family tests first. */
322                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
323                         goto hit; /* You sunk my battleship! */
324         }
325         /* Must check for a TIME_WAIT'er before going to listener hash. */
326         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
327                 /* FIXME: acme: check this... */
328                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
329
330                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
331                    sk->sk_family                == PF_INET6) {
332                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
333                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
334                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
335                                 goto hit;
336                 }
337         }
338         read_unlock(&head->lock);
339         return NULL;
340
341 hit:
342         sock_hold(sk);
343         read_unlock(&head->lock);
344         return sk;
345 }
346
347
348 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
349                                            struct in6_addr *daddr, u16 hnum,
350                                            int dif)
351 {
352         struct sock *sk;
353
354         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
355
356         if (sk)
357                 return sk;
358
359         return tcp_v6_lookup_listener(daddr, hnum, dif);
360 }
361
362 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
363                                   struct in6_addr *daddr, u16 dport,
364                                   int dif)
365 {
366         struct sock *sk;
367
368         local_bh_disable();
369         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
370         local_bh_enable();
371
372         return sk;
373 }
374
375 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
376
377
378 /*
379  * Open request hash tables.
380  */
381
382 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
383 {
384         u32 a, b, c;
385
386         a = raddr->s6_addr32[0];
387         b = raddr->s6_addr32[1];
388         c = raddr->s6_addr32[2];
389
390         a += JHASH_GOLDEN_RATIO;
391         b += JHASH_GOLDEN_RATIO;
392         c += rnd;
393         __jhash_mix(a, b, c);
394
395         a += raddr->s6_addr32[3];
396         b += (u32) rport;
397         __jhash_mix(a, b, c);
398
399         return c & (TCP_SYNQ_HSIZE - 1);
400 }
401
402 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
403                                               struct request_sock ***prevp,
404                                               __u16 rport,
405                                               struct in6_addr *raddr,
406                                               struct in6_addr *laddr,
407                                               int iif)
408 {
409         struct listen_sock *lopt = tp->accept_queue.listen_opt;
410         struct request_sock *req, **prev;  
411
412         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
413              (req = *prev) != NULL;
414              prev = &req->dl_next) {
415                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
416
417                 if (inet_rsk(req)->rmt_port == rport &&
418                     req->rsk_ops->family == AF_INET6 &&
419                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
420                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
421                     (!treq->iif || treq->iif == iif)) {
422                         BUG_TRAP(req->sk == NULL);
423                         *prevp = prev;
424                         return req;
425                 }
426         }
427
428         return NULL;
429 }
430
431 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
432                                    struct in6_addr *saddr, 
433                                    struct in6_addr *daddr, 
434                                    unsigned long base)
435 {
436         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
437 }
438
439 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
440 {
441         if (skb->protocol == htons(ETH_P_IPV6)) {
442                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
443                                                     skb->nh.ipv6h->saddr.s6_addr32,
444                                                     skb->h.th->dest,
445                                                     skb->h.th->source);
446         } else {
447                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
448                                                   skb->nh.iph->saddr,
449                                                   skb->h.th->dest,
450                                                   skb->h.th->source);
451         }
452 }
453
454 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
455                                       struct tcp_tw_bucket **twp)
456 {
457         struct inet_sock *inet = inet_sk(sk);
458         struct ipv6_pinfo *np = inet6_sk(sk);
459         struct in6_addr *daddr = &np->rcv_saddr;
460         struct in6_addr *saddr = &np->daddr;
461         int dif = sk->sk_bound_dev_if;
462         u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
463         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
464         struct inet_ehash_bucket *head = &tcp_ehash[hash];
465         struct sock *sk2;
466         struct hlist_node *node;
467         struct tcp_tw_bucket *tw;
468
469         write_lock(&head->lock);
470
471         /* Check TIME-WAIT sockets first. */
472         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
473                 tw = (struct tcp_tw_bucket*)sk2;
474
475                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
476                    sk2->sk_family               == PF_INET6     &&
477                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
478                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
479                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
480                         struct tcp_sock *tp = tcp_sk(sk);
481
482                         if (tw->tw_ts_recent_stamp &&
483                             (!twp || (sysctl_tcp_tw_reuse &&
484                                       xtime.tv_sec - 
485                                       tw->tw_ts_recent_stamp > 1))) {
486                                 /* See comment in tcp_ipv4.c */
487                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
488                                 if (!tp->write_seq)
489                                         tp->write_seq = 1;
490                                 tp->rx_opt.ts_recent = tw->tw_ts_recent;
491                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
492                                 sock_hold(sk2);
493                                 goto unique;
494                         } else
495                                 goto not_unique;
496                 }
497         }
498         tw = NULL;
499
500         /* And established part... */
501         sk_for_each(sk2, node, &head->chain) {
502                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
503                         goto not_unique;
504         }
505
506 unique:
507         BUG_TRAP(sk_unhashed(sk));
508         __sk_add_node(sk, &head->chain);
509         sk->sk_hashent = hash;
510         sock_prot_inc_use(sk->sk_prot);
511         write_unlock(&head->lock);
512
513         if (twp) {
514                 *twp = tw;
515                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
516         } else if (tw) {
517                 /* Silly. Should hash-dance instead... */
518                 tcp_tw_deschedule(tw);
519                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
520
521                 tcp_tw_put(tw);
522         }
523         return 0;
524
525 not_unique:
526         write_unlock(&head->lock);
527         return -EADDRNOTAVAIL;
528 }
529
530 static inline u32 tcpv6_port_offset(const struct sock *sk)
531 {
532         const struct inet_sock *inet = inet_sk(sk);
533         const struct ipv6_pinfo *np = inet6_sk(sk);
534
535         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
536                                            np->daddr.s6_addr32,
537                                            inet->dport);
538 }
539
540 static int tcp_v6_hash_connect(struct sock *sk)
541 {
542         unsigned short snum = inet_sk(sk)->num;
543         struct inet_bind_hashbucket *head;
544         struct inet_bind_bucket *tb;
545         int ret;
546
547         if (!snum) {
548                 int low = sysctl_local_port_range[0];
549                 int high = sysctl_local_port_range[1];
550                 int range = high - low;
551                 int i;
552                 int port;
553                 static u32 hint;
554                 u32 offset = hint + tcpv6_port_offset(sk);
555                 struct hlist_node *node;
556                 struct tcp_tw_bucket *tw = NULL;
557
558                 local_bh_disable();
559                 for (i = 1; i <= range; i++) {
560                         port = low + (i + offset) % range;
561                         head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)];
562                         spin_lock(&head->lock);
563
564                         /* Does not bother with rcv_saddr checks,
565                          * because the established check is already
566                          * unique enough.
567                          */
568                         inet_bind_bucket_for_each(tb, node, &head->chain) {
569                                 if (tb->port == port) {
570                                         BUG_TRAP(!hlist_empty(&tb->owners));
571                                         if (tb->fastreuse >= 0)
572                                                 goto next_port;
573                                         if (!__tcp_v6_check_established(sk,
574                                                                         port,
575                                                                         &tw))
576                                                 goto ok;
577                                         goto next_port;
578                                 }
579                         }
580
581                         tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port);
582                         if (!tb) {
583                                 spin_unlock(&head->lock);
584                                 break;
585                         }
586                         tb->fastreuse = -1;
587                         goto ok;
588
589                 next_port:
590                         spin_unlock(&head->lock);
591                 }
592                 local_bh_enable();
593
594                 return -EADDRNOTAVAIL;
595
596 ok:
597                 hint += i;
598
599                 /* Head lock still held and bh's disabled */
600                 tcp_bind_hash(sk, tb, port);
601                 if (sk_unhashed(sk)) {
602                         inet_sk(sk)->sport = htons(port);
603                         __tcp_v6_hash(sk);
604                 }
605                 spin_unlock(&head->lock);
606
607                 if (tw) {
608                         tcp_tw_deschedule(tw);
609                         tcp_tw_put(tw);
610                 }
611
612                 ret = 0;
613                 goto out;
614         }
615
616         head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)];
617         tb   = inet_sk(sk)->bind_hash;
618         spin_lock_bh(&head->lock);
619
620         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
621                 __tcp_v6_hash(sk);
622                 spin_unlock_bh(&head->lock);
623                 return 0;
624         } else {
625                 spin_unlock(&head->lock);
626                 /* No definite answer... Walk to established hash table */
627                 ret = __tcp_v6_check_established(sk, snum, NULL);
628 out:
629                 local_bh_enable();
630                 return ret;
631         }
632 }
633
634 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
635 {
636         return IP6CB(skb)->iif;
637 }
638
639 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
640                           int addr_len)
641 {
642         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
643         struct inet_sock *inet = inet_sk(sk);
644         struct ipv6_pinfo *np = inet6_sk(sk);
645         struct tcp_sock *tp = tcp_sk(sk);
646         struct in6_addr *saddr = NULL, *final_p = NULL, final;
647         struct flowi fl;
648         struct dst_entry *dst;
649         int addr_type;
650         int err;
651
652         if (addr_len < SIN6_LEN_RFC2133) 
653                 return -EINVAL;
654
655         if (usin->sin6_family != AF_INET6) 
656                 return(-EAFNOSUPPORT);
657
658         memset(&fl, 0, sizeof(fl));
659
660         if (np->sndflow) {
661                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
662                 IP6_ECN_flow_init(fl.fl6_flowlabel);
663                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
664                         struct ip6_flowlabel *flowlabel;
665                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
666                         if (flowlabel == NULL)
667                                 return -EINVAL;
668                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
669                         fl6_sock_release(flowlabel);
670                 }
671         }
672
673         /*
674          *      connect() to INADDR_ANY means loopback (BSD'ism).
675          */
676         
677         if(ipv6_addr_any(&usin->sin6_addr))
678                 usin->sin6_addr.s6_addr[15] = 0x1; 
679
680         addr_type = ipv6_addr_type(&usin->sin6_addr);
681
682         if(addr_type & IPV6_ADDR_MULTICAST)
683                 return -ENETUNREACH;
684
685         if (addr_type&IPV6_ADDR_LINKLOCAL) {
686                 if (addr_len >= sizeof(struct sockaddr_in6) &&
687                     usin->sin6_scope_id) {
688                         /* If interface is set while binding, indices
689                          * must coincide.
690                          */
691                         if (sk->sk_bound_dev_if &&
692                             sk->sk_bound_dev_if != usin->sin6_scope_id)
693                                 return -EINVAL;
694
695                         sk->sk_bound_dev_if = usin->sin6_scope_id;
696                 }
697
698                 /* Connect to link-local address requires an interface */
699                 if (!sk->sk_bound_dev_if)
700                         return -EINVAL;
701         }
702
703         if (tp->rx_opt.ts_recent_stamp &&
704             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
705                 tp->rx_opt.ts_recent = 0;
706                 tp->rx_opt.ts_recent_stamp = 0;
707                 tp->write_seq = 0;
708         }
709
710         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
711         np->flow_label = fl.fl6_flowlabel;
712
713         /*
714          *      TCP over IPv4
715          */
716
717         if (addr_type == IPV6_ADDR_MAPPED) {
718                 u32 exthdrlen = tp->ext_header_len;
719                 struct sockaddr_in sin;
720
721                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
722
723                 if (__ipv6_only_sock(sk))
724                         return -ENETUNREACH;
725
726                 sin.sin_family = AF_INET;
727                 sin.sin_port = usin->sin6_port;
728                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
729
730                 tp->af_specific = &ipv6_mapped;
731                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
732
733                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
734
735                 if (err) {
736                         tp->ext_header_len = exthdrlen;
737                         tp->af_specific = &ipv6_specific;
738                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
739                         goto failure;
740                 } else {
741                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
742                                       inet->saddr);
743                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
744                                       inet->rcv_saddr);
745                 }
746
747                 return err;
748         }
749
750         if (!ipv6_addr_any(&np->rcv_saddr))
751                 saddr = &np->rcv_saddr;
752
753         fl.proto = IPPROTO_TCP;
754         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
755         ipv6_addr_copy(&fl.fl6_src,
756                        (saddr ? saddr : &np->saddr));
757         fl.oif = sk->sk_bound_dev_if;
758         fl.fl_ip_dport = usin->sin6_port;
759         fl.fl_ip_sport = inet->sport;
760
761         if (np->opt && np->opt->srcrt) {
762                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
763                 ipv6_addr_copy(&final, &fl.fl6_dst);
764                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
765                 final_p = &final;
766         }
767
768         err = ip6_dst_lookup(sk, &dst, &fl);
769         if (err)
770                 goto failure;
771         if (final_p)
772                 ipv6_addr_copy(&fl.fl6_dst, final_p);
773
774         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
775                 dst_release(dst);
776                 goto failure;
777         }
778
779         if (saddr == NULL) {
780                 saddr = &fl.fl6_src;
781                 ipv6_addr_copy(&np->rcv_saddr, saddr);
782         }
783
784         /* set the source address */
785         ipv6_addr_copy(&np->saddr, saddr);
786         inet->rcv_saddr = LOOPBACK4_IPV6;
787
788         ip6_dst_store(sk, dst, NULL);
789         sk->sk_route_caps = dst->dev->features &
790                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
791
792         tp->ext_header_len = 0;
793         if (np->opt)
794                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
795
796         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
797
798         inet->dport = usin->sin6_port;
799
800         tcp_set_state(sk, TCP_SYN_SENT);
801         err = tcp_v6_hash_connect(sk);
802         if (err)
803                 goto late_failure;
804
805         if (!tp->write_seq)
806                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
807                                                              np->daddr.s6_addr32,
808                                                              inet->sport,
809                                                              inet->dport);
810
811         err = tcp_connect(sk);
812         if (err)
813                 goto late_failure;
814
815         return 0;
816
817 late_failure:
818         tcp_set_state(sk, TCP_CLOSE);
819         __sk_dst_reset(sk);
820 failure:
821         inet->dport = 0;
822         sk->sk_route_caps = 0;
823         return err;
824 }
825
826 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
827                 int type, int code, int offset, __u32 info)
828 {
829         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
830         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
831         struct ipv6_pinfo *np;
832         struct sock *sk;
833         int err;
834         struct tcp_sock *tp; 
835         __u32 seq;
836
837         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
838
839         if (sk == NULL) {
840                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
841                 return;
842         }
843
844         if (sk->sk_state == TCP_TIME_WAIT) {
845                 tcp_tw_put((struct tcp_tw_bucket*)sk);
846                 return;
847         }
848
849         bh_lock_sock(sk);
850         if (sock_owned_by_user(sk))
851                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
852
853         if (sk->sk_state == TCP_CLOSE)
854                 goto out;
855
856         tp = tcp_sk(sk);
857         seq = ntohl(th->seq); 
858         if (sk->sk_state != TCP_LISTEN &&
859             !between(seq, tp->snd_una, tp->snd_nxt)) {
860                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
861                 goto out;
862         }
863
864         np = inet6_sk(sk);
865
866         if (type == ICMPV6_PKT_TOOBIG) {
867                 struct dst_entry *dst = NULL;
868
869                 if (sock_owned_by_user(sk))
870                         goto out;
871                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
872                         goto out;
873
874                 /* icmp should have updated the destination cache entry */
875                 dst = __sk_dst_check(sk, np->dst_cookie);
876
877                 if (dst == NULL) {
878                         struct inet_sock *inet = inet_sk(sk);
879                         struct flowi fl;
880
881                         /* BUGGG_FUTURE: Again, it is not clear how
882                            to handle rthdr case. Ignore this complexity
883                            for now.
884                          */
885                         memset(&fl, 0, sizeof(fl));
886                         fl.proto = IPPROTO_TCP;
887                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
888                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
889                         fl.oif = sk->sk_bound_dev_if;
890                         fl.fl_ip_dport = inet->dport;
891                         fl.fl_ip_sport = inet->sport;
892
893                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
894                                 sk->sk_err_soft = -err;
895                                 goto out;
896                         }
897
898                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
899                                 sk->sk_err_soft = -err;
900                                 goto out;
901                         }
902
903                 } else
904                         dst_hold(dst);
905
906                 if (tp->pmtu_cookie > dst_mtu(dst)) {
907                         tcp_sync_mss(sk, dst_mtu(dst));
908                         tcp_simple_retransmit(sk);
909                 } /* else let the usual retransmit timer handle it */
910                 dst_release(dst);
911                 goto out;
912         }
913
914         icmpv6_err_convert(type, code, &err);
915
916         /* Might be for an request_sock */
917         switch (sk->sk_state) {
918                 struct request_sock *req, **prev;
919         case TCP_LISTEN:
920                 if (sock_owned_by_user(sk))
921                         goto out;
922
923                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
924                                         &hdr->saddr, tcp_v6_iif(skb));
925                 if (!req)
926                         goto out;
927
928                 /* ICMPs are not backlogged, hence we cannot get
929                  * an established socket here.
930                  */
931                 BUG_TRAP(req->sk == NULL);
932
933                 if (seq != tcp_rsk(req)->snt_isn) {
934                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
935                         goto out;
936                 }
937
938                 tcp_synq_drop(sk, req, prev);
939                 goto out;
940
941         case TCP_SYN_SENT:
942         case TCP_SYN_RECV:  /* Cannot happen.
943                                It can, it SYNs are crossed. --ANK */ 
944                 if (!sock_owned_by_user(sk)) {
945                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
946                         sk->sk_err = err;
947                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
948
949                         tcp_done(sk);
950                 } else
951                         sk->sk_err_soft = err;
952                 goto out;
953         }
954
955         if (!sock_owned_by_user(sk) && np->recverr) {
956                 sk->sk_err = err;
957                 sk->sk_error_report(sk);
958         } else
959                 sk->sk_err_soft = err;
960
961 out:
962         bh_unlock_sock(sk);
963         sock_put(sk);
964 }
965
966
967 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
968                               struct dst_entry *dst)
969 {
970         struct tcp6_request_sock *treq = tcp6_rsk(req);
971         struct ipv6_pinfo *np = inet6_sk(sk);
972         struct sk_buff * skb;
973         struct ipv6_txoptions *opt = NULL;
974         struct in6_addr * final_p = NULL, final;
975         struct flowi fl;
976         int err = -1;
977
978         memset(&fl, 0, sizeof(fl));
979         fl.proto = IPPROTO_TCP;
980         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
981         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
982         fl.fl6_flowlabel = 0;
983         fl.oif = treq->iif;
984         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
985         fl.fl_ip_sport = inet_sk(sk)->sport;
986
987         if (dst == NULL) {
988                 opt = np->opt;
989                 if (opt == NULL &&
990                     np->rxopt.bits.srcrt == 2 &&
991                     treq->pktopts) {
992                         struct sk_buff *pktopts = treq->pktopts;
993                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
994                         if (rxopt->srcrt)
995                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
996                 }
997
998                 if (opt && opt->srcrt) {
999                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1000                         ipv6_addr_copy(&final, &fl.fl6_dst);
1001                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1002                         final_p = &final;
1003                 }
1004
1005                 err = ip6_dst_lookup(sk, &dst, &fl);
1006                 if (err)
1007                         goto done;
1008                 if (final_p)
1009                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1010                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1011                         goto done;
1012         }
1013
1014         skb = tcp_make_synack(sk, dst, req);
1015         if (skb) {
1016                 struct tcphdr *th = skb->h.th;
1017
1018                 th->check = tcp_v6_check(th, skb->len,
1019                                          &treq->loc_addr, &treq->rmt_addr,
1020                                          csum_partial((char *)th, skb->len, skb->csum));
1021
1022                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1023                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1024                 if (err == NET_XMIT_CN)
1025                         err = 0;
1026         }
1027
1028 done:
1029         dst_release(dst);
1030         if (opt && opt != np->opt)
1031                 sock_kfree_s(sk, opt, opt->tot_len);
1032         return err;
1033 }
1034
1035 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1036 {
1037         if (tcp6_rsk(req)->pktopts)
1038                 kfree_skb(tcp6_rsk(req)->pktopts);
1039 }
1040
1041 static struct request_sock_ops tcp6_request_sock_ops = {
1042         .family         =       AF_INET6,
1043         .obj_size       =       sizeof(struct tcp6_request_sock),
1044         .rtx_syn_ack    =       tcp_v6_send_synack,
1045         .send_ack       =       tcp_v6_reqsk_send_ack,
1046         .destructor     =       tcp_v6_reqsk_destructor,
1047         .send_reset     =       tcp_v6_send_reset
1048 };
1049
1050 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1051 {
1052         struct ipv6_pinfo *np = inet6_sk(sk);
1053         struct inet6_skb_parm *opt = IP6CB(skb);
1054
1055         if (np->rxopt.all) {
1056                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1057                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1058                      np->rxopt.bits.rxflow) ||
1059                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1060                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1061                         return 1;
1062         }
1063         return 0;
1064 }
1065
1066
1067 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
1068                               struct sk_buff *skb)
1069 {
1070         struct ipv6_pinfo *np = inet6_sk(sk);
1071
1072         if (skb->ip_summed == CHECKSUM_HW) {
1073                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1074                 skb->csum = offsetof(struct tcphdr, check);
1075         } else {
1076                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
1077                                             csum_partial((char *)th, th->doff<<2, 
1078                                                          skb->csum));
1079         }
1080 }
1081
1082
1083 static void tcp_v6_send_reset(struct sk_buff *skb)
1084 {
1085         struct tcphdr *th = skb->h.th, *t1; 
1086         struct sk_buff *buff;
1087         struct flowi fl;
1088
1089         if (th->rst)
1090                 return;
1091
1092         if (!ipv6_unicast_destination(skb))
1093                 return; 
1094
1095         /*
1096          * We need to grab some memory, and put together an RST,
1097          * and then put it into the queue to be sent.
1098          */
1099
1100         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1101                          GFP_ATOMIC);
1102         if (buff == NULL) 
1103                 return;
1104
1105         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1106
1107         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1108
1109         /* Swap the send and the receive. */
1110         memset(t1, 0, sizeof(*t1));
1111         t1->dest = th->source;
1112         t1->source = th->dest;
1113         t1->doff = sizeof(*t1)/4;
1114         t1->rst = 1;
1115   
1116         if(th->ack) {
1117                 t1->seq = th->ack_seq;
1118         } else {
1119                 t1->ack = 1;
1120                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1121                                     + skb->len - (th->doff<<2));
1122         }
1123
1124         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1125
1126         memset(&fl, 0, sizeof(fl));
1127         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1128         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1129
1130         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1131                                     sizeof(*t1), IPPROTO_TCP,
1132                                     buff->csum);
1133
1134         fl.proto = IPPROTO_TCP;
1135         fl.oif = tcp_v6_iif(skb);
1136         fl.fl_ip_dport = t1->dest;
1137         fl.fl_ip_sport = t1->source;
1138
1139         /* sk = NULL, but it is safe for now. RST socket required. */
1140         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1141
1142                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1143                         dst_release(buff->dst);
1144                         return;
1145                 }
1146
1147                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1148                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1149                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1150                 return;
1151         }
1152
1153         kfree_skb(buff);
1154 }
1155
1156 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1157 {
1158         struct tcphdr *th = skb->h.th, *t1;
1159         struct sk_buff *buff;
1160         struct flowi fl;
1161         int tot_len = sizeof(struct tcphdr);
1162
1163         if (ts)
1164                 tot_len += 3*4;
1165
1166         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1167                          GFP_ATOMIC);
1168         if (buff == NULL)
1169                 return;
1170
1171         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1172
1173         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1174
1175         /* Swap the send and the receive. */
1176         memset(t1, 0, sizeof(*t1));
1177         t1->dest = th->source;
1178         t1->source = th->dest;
1179         t1->doff = tot_len/4;
1180         t1->seq = htonl(seq);
1181         t1->ack_seq = htonl(ack);
1182         t1->ack = 1;
1183         t1->window = htons(win);
1184         
1185         if (ts) {
1186                 u32 *ptr = (u32*)(t1 + 1);
1187                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1188                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1189                 *ptr++ = htonl(tcp_time_stamp);
1190                 *ptr = htonl(ts);
1191         }
1192
1193         buff->csum = csum_partial((char *)t1, tot_len, 0);
1194
1195         memset(&fl, 0, sizeof(fl));
1196         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1197         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1198
1199         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1200                                     tot_len, IPPROTO_TCP,
1201                                     buff->csum);
1202
1203         fl.proto = IPPROTO_TCP;
1204         fl.oif = tcp_v6_iif(skb);
1205         fl.fl_ip_dport = t1->dest;
1206         fl.fl_ip_sport = t1->source;
1207
1208         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1209                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1210                         dst_release(buff->dst);
1211                         return;
1212                 }
1213                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1214                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1215                 return;
1216         }
1217
1218         kfree_skb(buff);
1219 }
1220
1221 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1222 {
1223         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1224
1225         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1226                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1227
1228         tcp_tw_put(tw);
1229 }
1230
1231 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1232 {
1233         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1234 }
1235
1236
1237 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1238 {
1239         struct request_sock *req, **prev;
1240         struct tcphdr *th = skb->h.th;
1241         struct tcp_sock *tp = tcp_sk(sk);
1242         struct sock *nsk;
1243
1244         /* Find possible connection requests. */
1245         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1246                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1247         if (req)
1248                 return tcp_check_req(sk, skb, req, prev);
1249
1250         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1251                                           th->source,
1252                                           &skb->nh.ipv6h->daddr,
1253                                           ntohs(th->dest),
1254                                           tcp_v6_iif(skb));
1255
1256         if (nsk) {
1257                 if (nsk->sk_state != TCP_TIME_WAIT) {
1258                         bh_lock_sock(nsk);
1259                         return nsk;
1260                 }
1261                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1262                 return NULL;
1263         }
1264
1265 #if 0 /*def CONFIG_SYN_COOKIES*/
1266         if (!th->rst && !th->syn && th->ack)
1267                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1268 #endif
1269         return sk;
1270 }
1271
1272 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1273 {
1274         struct tcp_sock *tp = tcp_sk(sk);
1275         struct listen_sock *lopt = tp->accept_queue.listen_opt;
1276         u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1277
1278         reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1279         tcp_synq_added(sk);
1280 }
1281
1282
1283 /* FIXME: this is substantially similar to the ipv4 code.
1284  * Can some kind of merge be done? -- erics
1285  */
1286 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1287 {
1288         struct tcp6_request_sock *treq;
1289         struct ipv6_pinfo *np = inet6_sk(sk);
1290         struct tcp_options_received tmp_opt;
1291         struct tcp_sock *tp = tcp_sk(sk);
1292         struct request_sock *req = NULL;
1293         __u32 isn = TCP_SKB_CB(skb)->when;
1294
1295         if (skb->protocol == htons(ETH_P_IP))
1296                 return tcp_v4_conn_request(sk, skb);
1297
1298         if (!ipv6_unicast_destination(skb))
1299                 goto drop; 
1300
1301         /*
1302          *      There are no SYN attacks on IPv6, yet...        
1303          */
1304         if (tcp_synq_is_full(sk) && !isn) {
1305                 if (net_ratelimit())
1306                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1307                 goto drop;              
1308         }
1309
1310         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1311                 goto drop;
1312
1313         req = reqsk_alloc(&tcp6_request_sock_ops);
1314         if (req == NULL)
1315                 goto drop;
1316
1317         tcp_clear_options(&tmp_opt);
1318         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1319         tmp_opt.user_mss = tp->rx_opt.user_mss;
1320
1321         tcp_parse_options(skb, &tmp_opt, 0);
1322
1323         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1324         tcp_openreq_init(req, &tmp_opt, skb);
1325
1326         treq = tcp6_rsk(req);
1327         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1328         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1329         TCP_ECN_create_request(req, skb->h.th);
1330         treq->pktopts = NULL;
1331         if (ipv6_opt_accepted(sk, skb) ||
1332             np->rxopt.bits.rxinfo ||
1333             np->rxopt.bits.rxhlim) {
1334                 atomic_inc(&skb->users);
1335                 treq->pktopts = skb;
1336         }
1337         treq->iif = sk->sk_bound_dev_if;
1338
1339         /* So that link locals have meaning */
1340         if (!sk->sk_bound_dev_if &&
1341             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1342                 treq->iif = tcp_v6_iif(skb);
1343
1344         if (isn == 0) 
1345                 isn = tcp_v6_init_sequence(sk,skb);
1346
1347         tcp_rsk(req)->snt_isn = isn;
1348
1349         if (tcp_v6_send_synack(sk, req, NULL))
1350                 goto drop;
1351
1352         tcp_v6_synq_add(sk, req);
1353
1354         return 0;
1355
1356 drop:
1357         if (req)
1358                 reqsk_free(req);
1359
1360         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1361         return 0; /* don't send reset */
1362 }
1363
1364 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1365                                           struct request_sock *req,
1366                                           struct dst_entry *dst)
1367 {
1368         struct tcp6_request_sock *treq = tcp6_rsk(req);
1369         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1370         struct tcp6_sock *newtcp6sk;
1371         struct inet_sock *newinet;
1372         struct tcp_sock *newtp;
1373         struct sock *newsk;
1374         struct ipv6_txoptions *opt;
1375
1376         if (skb->protocol == htons(ETH_P_IP)) {
1377                 /*
1378                  *      v6 mapped
1379                  */
1380
1381                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1382
1383                 if (newsk == NULL) 
1384                         return NULL;
1385
1386                 newtcp6sk = (struct tcp6_sock *)newsk;
1387                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1388
1389                 newinet = inet_sk(newsk);
1390                 newnp = inet6_sk(newsk);
1391                 newtp = tcp_sk(newsk);
1392
1393                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1394
1395                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1396                               newinet->daddr);
1397
1398                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1399                               newinet->saddr);
1400
1401                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1402
1403                 newtp->af_specific = &ipv6_mapped;
1404                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1405                 newnp->pktoptions  = NULL;
1406                 newnp->opt         = NULL;
1407                 newnp->mcast_oif   = tcp_v6_iif(skb);
1408                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1409
1410                 /*
1411                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1412                  * here, tcp_create_openreq_child now does this for us, see the comment in
1413                  * that function for the gory details. -acme
1414                  */
1415
1416                 /* It is tricky place. Until this moment IPv4 tcp
1417                    worked with IPv6 af_tcp.af_specific.
1418                    Sync it now.
1419                  */
1420                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1421
1422                 return newsk;
1423         }
1424
1425         opt = np->opt;
1426
1427         if (sk_acceptq_is_full(sk))
1428                 goto out_overflow;
1429
1430         if (np->rxopt.bits.srcrt == 2 &&
1431             opt == NULL && treq->pktopts) {
1432                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1433                 if (rxopt->srcrt)
1434                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1435         }
1436
1437         if (dst == NULL) {
1438                 struct in6_addr *final_p = NULL, final;
1439                 struct flowi fl;
1440
1441                 memset(&fl, 0, sizeof(fl));
1442                 fl.proto = IPPROTO_TCP;
1443                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1444                 if (opt && opt->srcrt) {
1445                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1446                         ipv6_addr_copy(&final, &fl.fl6_dst);
1447                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1448                         final_p = &final;
1449                 }
1450                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1451                 fl.oif = sk->sk_bound_dev_if;
1452                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1453                 fl.fl_ip_sport = inet_sk(sk)->sport;
1454
1455                 if (ip6_dst_lookup(sk, &dst, &fl))
1456                         goto out;
1457
1458                 if (final_p)
1459                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1460
1461                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1462                         goto out;
1463         } 
1464
1465         newsk = tcp_create_openreq_child(sk, req, skb);
1466         if (newsk == NULL)
1467                 goto out;
1468
1469         /*
1470          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1471          * count here, tcp_create_openreq_child now does this for us, see the
1472          * comment in that function for the gory details. -acme
1473          */
1474
1475         ip6_dst_store(newsk, dst, NULL);
1476         newsk->sk_route_caps = dst->dev->features &
1477                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1478
1479         newtcp6sk = (struct tcp6_sock *)newsk;
1480         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1481
1482         newtp = tcp_sk(newsk);
1483         newinet = inet_sk(newsk);
1484         newnp = inet6_sk(newsk);
1485
1486         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1487
1488         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1489         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1490         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1491         newsk->sk_bound_dev_if = treq->iif;
1492
1493         /* Now IPv6 options... 
1494
1495            First: no IPv4 options.
1496          */
1497         newinet->opt = NULL;
1498
1499         /* Clone RX bits */
1500         newnp->rxopt.all = np->rxopt.all;
1501
1502         /* Clone pktoptions received with SYN */
1503         newnp->pktoptions = NULL;
1504         if (treq->pktopts != NULL) {
1505                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1506                 kfree_skb(treq->pktopts);
1507                 treq->pktopts = NULL;
1508                 if (newnp->pktoptions)
1509                         skb_set_owner_r(newnp->pktoptions, newsk);
1510         }
1511         newnp->opt        = NULL;
1512         newnp->mcast_oif  = tcp_v6_iif(skb);
1513         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1514
1515         /* Clone native IPv6 options from listening socket (if any)
1516
1517            Yes, keeping reference count would be much more clever,
1518            but we make one more one thing there: reattach optmem
1519            to newsk.
1520          */
1521         if (opt) {
1522                 newnp->opt = ipv6_dup_options(newsk, opt);
1523                 if (opt != np->opt)
1524                         sock_kfree_s(sk, opt, opt->tot_len);
1525         }
1526
1527         newtp->ext_header_len = 0;
1528         if (newnp->opt)
1529                 newtp->ext_header_len = newnp->opt->opt_nflen +
1530                                         newnp->opt->opt_flen;
1531
1532         tcp_sync_mss(newsk, dst_mtu(dst));
1533         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1534         tcp_initialize_rcv_mss(newsk);
1535
1536         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1537
1538         __tcp_v6_hash(newsk);
1539         tcp_inherit_port(sk, newsk);
1540
1541         return newsk;
1542
1543 out_overflow:
1544         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1545 out:
1546         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1547         if (opt && opt != np->opt)
1548                 sock_kfree_s(sk, opt, opt->tot_len);
1549         dst_release(dst);
1550         return NULL;
1551 }
1552
1553 static int tcp_v6_checksum_init(struct sk_buff *skb)
1554 {
1555         if (skb->ip_summed == CHECKSUM_HW) {
1556                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1557                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1558                                   &skb->nh.ipv6h->daddr,skb->csum))
1559                         return 0;
1560                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1561         }
1562         if (skb->len <= 76) {
1563                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1565                         return -1;
1566                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1567         } else {
1568                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1569                                           &skb->nh.ipv6h->daddr,0);
1570         }
1571         return 0;
1572 }
1573
1574 /* The socket must have it's spinlock held when we get
1575  * here.
1576  *
1577  * We have a potential double-lock case here, so even when
1578  * doing backlog processing we use the BH locking scheme.
1579  * This is because we cannot sleep with the original spinlock
1580  * held.
1581  */
1582 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1583 {
1584         struct ipv6_pinfo *np = inet6_sk(sk);
1585         struct tcp_sock *tp;
1586         struct sk_buff *opt_skb = NULL;
1587
1588         /* Imagine: socket is IPv6. IPv4 packet arrives,
1589            goes to IPv4 receive handler and backlogged.
1590            From backlog it always goes here. Kerboom...
1591            Fortunately, tcp_rcv_established and rcv_established
1592            handle them correctly, but it is not case with
1593            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1594          */
1595
1596         if (skb->protocol == htons(ETH_P_IP))
1597                 return tcp_v4_do_rcv(sk, skb);
1598
1599         if (sk_filter(sk, skb, 0))
1600                 goto discard;
1601
1602         /*
1603          *      socket locking is here for SMP purposes as backlog rcv
1604          *      is currently called with bh processing disabled.
1605          */
1606
1607         /* Do Stevens' IPV6_PKTOPTIONS.
1608
1609            Yes, guys, it is the only place in our code, where we
1610            may make it not affecting IPv4.
1611            The rest of code is protocol independent,
1612            and I do not like idea to uglify IPv4.
1613
1614            Actually, all the idea behind IPV6_PKTOPTIONS
1615            looks not very well thought. For now we latch
1616            options, received in the last packet, enqueued
1617            by tcp. Feel free to propose better solution.
1618                                                --ANK (980728)
1619          */
1620         if (np->rxopt.all)
1621                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1622
1623         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1624                 TCP_CHECK_TIMER(sk);
1625                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1626                         goto reset;
1627                 TCP_CHECK_TIMER(sk);
1628                 if (opt_skb)
1629                         goto ipv6_pktoptions;
1630                 return 0;
1631         }
1632
1633         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1634                 goto csum_err;
1635
1636         if (sk->sk_state == TCP_LISTEN) { 
1637                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1638                 if (!nsk)
1639                         goto discard;
1640
1641                 /*
1642                  * Queue it on the new socket if the new socket is active,
1643                  * otherwise we just shortcircuit this and continue with
1644                  * the new socket..
1645                  */
1646                 if(nsk != sk) {
1647                         if (tcp_child_process(sk, nsk, skb))
1648                                 goto reset;
1649                         if (opt_skb)
1650                                 __kfree_skb(opt_skb);
1651                         return 0;
1652                 }
1653         }
1654
1655         TCP_CHECK_TIMER(sk);
1656         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1657                 goto reset;
1658         TCP_CHECK_TIMER(sk);
1659         if (opt_skb)
1660                 goto ipv6_pktoptions;
1661         return 0;
1662
1663 reset:
1664         tcp_v6_send_reset(skb);
1665 discard:
1666         if (opt_skb)
1667                 __kfree_skb(opt_skb);
1668         kfree_skb(skb);
1669         return 0;
1670 csum_err:
1671         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1672         goto discard;
1673
1674
1675 ipv6_pktoptions:
1676         /* Do you ask, what is it?
1677
1678            1. skb was enqueued by tcp.
1679            2. skb is added to tail of read queue, rather than out of order.
1680            3. socket is not in passive state.
1681            4. Finally, it really contains options, which user wants to receive.
1682          */
1683         tp = tcp_sk(sk);
1684         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1685             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1686                 if (np->rxopt.bits.rxinfo)
1687                         np->mcast_oif = tcp_v6_iif(opt_skb);
1688                 if (np->rxopt.bits.rxhlim)
1689                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1690                 if (ipv6_opt_accepted(sk, opt_skb)) {
1691                         skb_set_owner_r(opt_skb, sk);
1692                         opt_skb = xchg(&np->pktoptions, opt_skb);
1693                 } else {
1694                         __kfree_skb(opt_skb);
1695                         opt_skb = xchg(&np->pktoptions, NULL);
1696                 }
1697         }
1698
1699         if (opt_skb)
1700                 kfree_skb(opt_skb);
1701         return 0;
1702 }
1703
1704 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1705 {
1706         struct sk_buff *skb = *pskb;
1707         struct tcphdr *th;      
1708         struct sock *sk;
1709         int ret;
1710
1711         if (skb->pkt_type != PACKET_HOST)
1712                 goto discard_it;
1713
1714         /*
1715          *      Count it even if it's bad.
1716          */
1717         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1718
1719         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1720                 goto discard_it;
1721
1722         th = skb->h.th;
1723
1724         if (th->doff < sizeof(struct tcphdr)/4)
1725                 goto bad_packet;
1726         if (!pskb_may_pull(skb, th->doff*4))
1727                 goto discard_it;
1728
1729         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1730              tcp_v6_checksum_init(skb) < 0))
1731                 goto bad_packet;
1732
1733         th = skb->h.th;
1734         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1735         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1736                                     skb->len - th->doff*4);
1737         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1738         TCP_SKB_CB(skb)->when = 0;
1739         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1740         TCP_SKB_CB(skb)->sacked = 0;
1741
1742         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1743                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1744
1745         if (!sk)
1746                 goto no_tcp_socket;
1747
1748 process:
1749         if (sk->sk_state == TCP_TIME_WAIT)
1750                 goto do_time_wait;
1751
1752         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1753                 goto discard_and_relse;
1754
1755         if (sk_filter(sk, skb, 0))
1756                 goto discard_and_relse;
1757
1758         skb->dev = NULL;
1759
1760         bh_lock_sock(sk);
1761         ret = 0;
1762         if (!sock_owned_by_user(sk)) {
1763                 if (!tcp_prequeue(sk, skb))
1764                         ret = tcp_v6_do_rcv(sk, skb);
1765         } else
1766                 sk_add_backlog(sk, skb);
1767         bh_unlock_sock(sk);
1768
1769         sock_put(sk);
1770         return ret ? -1 : 0;
1771
1772 no_tcp_socket:
1773         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1774                 goto discard_it;
1775
1776         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1777 bad_packet:
1778                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1779         } else {
1780                 tcp_v6_send_reset(skb);
1781         }
1782
1783 discard_it:
1784
1785         /*
1786          *      Discard frame
1787          */
1788
1789         kfree_skb(skb);
1790         return 0;
1791
1792 discard_and_relse:
1793         sock_put(sk);
1794         goto discard_it;
1795
1796 do_time_wait:
1797         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1799                 goto discard_it;
1800         }
1801
1802         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1803                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1804                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1805                 goto discard_it;
1806         }
1807
1808         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1809                                           skb, th, skb->len)) {
1810         case TCP_TW_SYN:
1811         {
1812                 struct sock *sk2;
1813
1814                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1815                 if (sk2 != NULL) {
1816                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1817                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1818                         sk = sk2;
1819                         goto process;
1820                 }
1821                 /* Fall through to ACK */
1822         }
1823         case TCP_TW_ACK:
1824                 tcp_v6_timewait_ack(sk, skb);
1825                 break;
1826         case TCP_TW_RST:
1827                 goto no_tcp_socket;
1828         case TCP_TW_SUCCESS:;
1829         }
1830         goto discard_it;
1831 }
1832
1833 static int tcp_v6_rebuild_header(struct sock *sk)
1834 {
1835         int err;
1836         struct dst_entry *dst;
1837         struct ipv6_pinfo *np = inet6_sk(sk);
1838
1839         dst = __sk_dst_check(sk, np->dst_cookie);
1840
1841         if (dst == NULL) {
1842                 struct inet_sock *inet = inet_sk(sk);
1843                 struct in6_addr *final_p = NULL, final;
1844                 struct flowi fl;
1845
1846                 memset(&fl, 0, sizeof(fl));
1847                 fl.proto = IPPROTO_TCP;
1848                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1849                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1850                 fl.fl6_flowlabel = np->flow_label;
1851                 fl.oif = sk->sk_bound_dev_if;
1852                 fl.fl_ip_dport = inet->dport;
1853                 fl.fl_ip_sport = inet->sport;
1854
1855                 if (np->opt && np->opt->srcrt) {
1856                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1857                         ipv6_addr_copy(&final, &fl.fl6_dst);
1858                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1859                         final_p = &final;
1860                 }
1861
1862                 err = ip6_dst_lookup(sk, &dst, &fl);
1863                 if (err) {
1864                         sk->sk_route_caps = 0;
1865                         return err;
1866                 }
1867                 if (final_p)
1868                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1869
1870                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1871                         sk->sk_err_soft = -err;
1872                         dst_release(dst);
1873                         return err;
1874                 }
1875
1876                 ip6_dst_store(sk, dst, NULL);
1877                 sk->sk_route_caps = dst->dev->features &
1878                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1879         }
1880
1881         return 0;
1882 }
1883
1884 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1885 {
1886         struct sock *sk = skb->sk;
1887         struct inet_sock *inet = inet_sk(sk);
1888         struct ipv6_pinfo *np = inet6_sk(sk);
1889         struct flowi fl;
1890         struct dst_entry *dst;
1891         struct in6_addr *final_p = NULL, final;
1892
1893         memset(&fl, 0, sizeof(fl));
1894         fl.proto = IPPROTO_TCP;
1895         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1896         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1897         fl.fl6_flowlabel = np->flow_label;
1898         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1899         fl.oif = sk->sk_bound_dev_if;
1900         fl.fl_ip_sport = inet->sport;
1901         fl.fl_ip_dport = inet->dport;
1902
1903         if (np->opt && np->opt->srcrt) {
1904                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1905                 ipv6_addr_copy(&final, &fl.fl6_dst);
1906                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1907                 final_p = &final;
1908         }
1909
1910         dst = __sk_dst_check(sk, np->dst_cookie);
1911
1912         if (dst == NULL) {
1913                 int err = ip6_dst_lookup(sk, &dst, &fl);
1914
1915                 if (err) {
1916                         sk->sk_err_soft = -err;
1917                         return err;
1918                 }
1919
1920                 if (final_p)
1921                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1922
1923                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1924                         sk->sk_route_caps = 0;
1925                         dst_release(dst);
1926                         return err;
1927                 }
1928
1929                 ip6_dst_store(sk, dst, NULL);
1930                 sk->sk_route_caps = dst->dev->features &
1931                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1932         }
1933
1934         skb->dst = dst_clone(dst);
1935
1936         /* Restore final destination back after routing done */
1937         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1938
1939         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1940 }
1941
1942 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1943 {
1944         struct ipv6_pinfo *np = inet6_sk(sk);
1945         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1946
1947         sin6->sin6_family = AF_INET6;
1948         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1949         sin6->sin6_port = inet_sk(sk)->dport;
1950         /* We do not store received flowlabel for TCP */
1951         sin6->sin6_flowinfo = 0;
1952         sin6->sin6_scope_id = 0;
1953         if (sk->sk_bound_dev_if &&
1954             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1955                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1956 }
1957
1958 static int tcp_v6_remember_stamp(struct sock *sk)
1959 {
1960         /* Alas, not yet... */
1961         return 0;
1962 }
1963
1964 static struct tcp_func ipv6_specific = {
1965         .queue_xmit     =       tcp_v6_xmit,
1966         .send_check     =       tcp_v6_send_check,
1967         .rebuild_header =       tcp_v6_rebuild_header,
1968         .conn_request   =       tcp_v6_conn_request,
1969         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1970         .remember_stamp =       tcp_v6_remember_stamp,
1971         .net_header_len =       sizeof(struct ipv6hdr),
1972
1973         .setsockopt     =       ipv6_setsockopt,
1974         .getsockopt     =       ipv6_getsockopt,
1975         .addr2sockaddr  =       v6_addr2sockaddr,
1976         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1977 };
1978
1979 /*
1980  *      TCP over IPv4 via INET6 API
1981  */
1982
1983 static struct tcp_func ipv6_mapped = {
1984         .queue_xmit     =       ip_queue_xmit,
1985         .send_check     =       tcp_v4_send_check,
1986         .rebuild_header =       inet_sk_rebuild_header,
1987         .conn_request   =       tcp_v6_conn_request,
1988         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1989         .remember_stamp =       tcp_v4_remember_stamp,
1990         .net_header_len =       sizeof(struct iphdr),
1991
1992         .setsockopt     =       ipv6_setsockopt,
1993         .getsockopt     =       ipv6_getsockopt,
1994         .addr2sockaddr  =       v6_addr2sockaddr,
1995         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1996 };
1997
1998
1999
2000 /* NOTE: A lot of things set to zero explicitly by call to
2001  *       sk_alloc() so need not be done here.
2002  */
2003 static int tcp_v6_init_sock(struct sock *sk)
2004 {
2005         struct tcp_sock *tp = tcp_sk(sk);
2006
2007         skb_queue_head_init(&tp->out_of_order_queue);
2008         tcp_init_xmit_timers(sk);
2009         tcp_prequeue_init(tp);
2010
2011         tp->rto  = TCP_TIMEOUT_INIT;
2012         tp->mdev = TCP_TIMEOUT_INIT;
2013
2014         /* So many TCP implementations out there (incorrectly) count the
2015          * initial SYN frame in their delayed-ACK and congestion control
2016          * algorithms that we must have the following bandaid to talk
2017          * efficiently to them.  -DaveM
2018          */
2019         tp->snd_cwnd = 2;
2020
2021         /* See draft-stevens-tcpca-spec-01 for discussion of the
2022          * initialization of these values.
2023          */
2024         tp->snd_ssthresh = 0x7fffffff;
2025         tp->snd_cwnd_clamp = ~0;
2026         tp->mss_cache = 536;
2027
2028         tp->reordering = sysctl_tcp_reordering;
2029
2030         sk->sk_state = TCP_CLOSE;
2031
2032         tp->af_specific = &ipv6_specific;
2033         tp->ca_ops = &tcp_init_congestion_ops;
2034         sk->sk_write_space = sk_stream_write_space;
2035         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2036
2037         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2038         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2039
2040         atomic_inc(&tcp_sockets_allocated);
2041
2042         return 0;
2043 }
2044
2045 static int tcp_v6_destroy_sock(struct sock *sk)
2046 {
2047         extern int tcp_v4_destroy_sock(struct sock *sk);
2048
2049         tcp_v4_destroy_sock(sk);
2050         return inet6_destroy_sock(sk);
2051 }
2052
2053 /* Proc filesystem TCPv6 sock list dumping. */
2054 static void get_openreq6(struct seq_file *seq, 
2055                          struct sock *sk, struct request_sock *req, int i, int uid)
2056 {
2057         struct in6_addr *dest, *src;
2058         int ttd = req->expires - jiffies;
2059
2060         if (ttd < 0)
2061                 ttd = 0;
2062
2063         src = &tcp6_rsk(req)->loc_addr;
2064         dest = &tcp6_rsk(req)->rmt_addr;
2065         seq_printf(seq,
2066                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2067                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2068                    i,
2069                    src->s6_addr32[0], src->s6_addr32[1],
2070                    src->s6_addr32[2], src->s6_addr32[3],
2071                    ntohs(inet_sk(sk)->sport),
2072                    dest->s6_addr32[0], dest->s6_addr32[1],
2073                    dest->s6_addr32[2], dest->s6_addr32[3],
2074                    ntohs(inet_rsk(req)->rmt_port),
2075                    TCP_SYN_RECV,
2076                    0,0, /* could print option size, but that is af dependent. */
2077                    1,   /* timers active (only the expire timer) */  
2078                    jiffies_to_clock_t(ttd), 
2079                    req->retrans,
2080                    uid,
2081                    0,  /* non standard timer */  
2082                    0, /* open_requests have no inode */
2083                    0, req);
2084 }
2085
2086 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2087 {
2088         struct in6_addr *dest, *src;
2089         __u16 destp, srcp;
2090         int timer_active;
2091         unsigned long timer_expires;
2092         struct inet_sock *inet = inet_sk(sp);
2093         struct tcp_sock *tp = tcp_sk(sp);
2094         struct ipv6_pinfo *np = inet6_sk(sp);
2095
2096         dest  = &np->daddr;
2097         src   = &np->rcv_saddr;
2098         destp = ntohs(inet->dport);
2099         srcp  = ntohs(inet->sport);
2100         if (tp->pending == TCP_TIME_RETRANS) {
2101                 timer_active    = 1;
2102                 timer_expires   = tp->timeout;
2103         } else if (tp->pending == TCP_TIME_PROBE0) {
2104                 timer_active    = 4;
2105                 timer_expires   = tp->timeout;
2106         } else if (timer_pending(&sp->sk_timer)) {
2107                 timer_active    = 2;
2108                 timer_expires   = sp->sk_timer.expires;
2109         } else {
2110                 timer_active    = 0;
2111                 timer_expires = jiffies;
2112         }
2113
2114         seq_printf(seq,
2115                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2116                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2117                    i,
2118                    src->s6_addr32[0], src->s6_addr32[1],
2119                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2120                    dest->s6_addr32[0], dest->s6_addr32[1],
2121                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2122                    sp->sk_state, 
2123                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2124                    timer_active,
2125                    jiffies_to_clock_t(timer_expires - jiffies),
2126                    tp->retransmits,
2127                    sock_i_uid(sp),
2128                    tp->probes_out,
2129                    sock_i_ino(sp),
2130                    atomic_read(&sp->sk_refcnt), sp,
2131                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2132                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2133                    );
2134 }
2135
2136 static void get_timewait6_sock(struct seq_file *seq, 
2137                                struct tcp_tw_bucket *tw, int i)
2138 {
2139         struct in6_addr *dest, *src;
2140         __u16 destp, srcp;
2141         int ttd = tw->tw_ttd - jiffies;
2142
2143         if (ttd < 0)
2144                 ttd = 0;
2145
2146         dest  = &tw->tw_v6_daddr;
2147         src   = &tw->tw_v6_rcv_saddr;
2148         destp = ntohs(tw->tw_dport);
2149         srcp  = ntohs(tw->tw_sport);
2150
2151         seq_printf(seq,
2152                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2153                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2154                    i,
2155                    src->s6_addr32[0], src->s6_addr32[1],
2156                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2157                    dest->s6_addr32[0], dest->s6_addr32[1],
2158                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2159                    tw->tw_substate, 0, 0,
2160                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2161                    atomic_read(&tw->tw_refcnt), tw);
2162 }
2163
2164 #ifdef CONFIG_PROC_FS
2165 static int tcp6_seq_show(struct seq_file *seq, void *v)
2166 {
2167         struct tcp_iter_state *st;
2168
2169         if (v == SEQ_START_TOKEN) {
2170                 seq_puts(seq,
2171                          "  sl  "
2172                          "local_address                         "
2173                          "remote_address                        "
2174                          "st tx_queue rx_queue tr tm->when retrnsmt"
2175                          "   uid  timeout inode\n");
2176                 goto out;
2177         }
2178         st = seq->private;
2179
2180         switch (st->state) {
2181         case TCP_SEQ_STATE_LISTENING:
2182         case TCP_SEQ_STATE_ESTABLISHED:
2183                 get_tcp6_sock(seq, v, st->num);
2184                 break;
2185         case TCP_SEQ_STATE_OPENREQ:
2186                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2187                 break;
2188         case TCP_SEQ_STATE_TIME_WAIT:
2189                 get_timewait6_sock(seq, v, st->num);
2190                 break;
2191         }
2192 out:
2193         return 0;
2194 }
2195
2196 static struct file_operations tcp6_seq_fops;
2197 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2198         .owner          = THIS_MODULE,
2199         .name           = "tcp6",
2200         .family         = AF_INET6,
2201         .seq_show       = tcp6_seq_show,
2202         .seq_fops       = &tcp6_seq_fops,
2203 };
2204
2205 int __init tcp6_proc_init(void)
2206 {
2207         return tcp_proc_register(&tcp6_seq_afinfo);
2208 }
2209
2210 void tcp6_proc_exit(void)
2211 {
2212         tcp_proc_unregister(&tcp6_seq_afinfo);
2213 }
2214 #endif
2215
2216 struct proto tcpv6_prot = {
2217         .name                   = "TCPv6",
2218         .owner                  = THIS_MODULE,
2219         .close                  = tcp_close,
2220         .connect                = tcp_v6_connect,
2221         .disconnect             = tcp_disconnect,
2222         .accept                 = tcp_accept,
2223         .ioctl                  = tcp_ioctl,
2224         .init                   = tcp_v6_init_sock,
2225         .destroy                = tcp_v6_destroy_sock,
2226         .shutdown               = tcp_shutdown,
2227         .setsockopt             = tcp_setsockopt,
2228         .getsockopt             = tcp_getsockopt,
2229         .sendmsg                = tcp_sendmsg,
2230         .recvmsg                = tcp_recvmsg,
2231         .backlog_rcv            = tcp_v6_do_rcv,
2232         .hash                   = tcp_v6_hash,
2233         .unhash                 = tcp_unhash,
2234         .get_port               = tcp_v6_get_port,
2235         .enter_memory_pressure  = tcp_enter_memory_pressure,
2236         .sockets_allocated      = &tcp_sockets_allocated,
2237         .memory_allocated       = &tcp_memory_allocated,
2238         .memory_pressure        = &tcp_memory_pressure,
2239         .sysctl_mem             = sysctl_tcp_mem,
2240         .sysctl_wmem            = sysctl_tcp_wmem,
2241         .sysctl_rmem            = sysctl_tcp_rmem,
2242         .max_header             = MAX_TCP_HEADER,
2243         .obj_size               = sizeof(struct tcp6_sock),
2244         .rsk_prot               = &tcp6_request_sock_ops,
2245 };
2246
2247 static struct inet6_protocol tcpv6_protocol = {
2248         .handler        =       tcp_v6_rcv,
2249         .err_handler    =       tcp_v6_err,
2250         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2251 };
2252
2253 extern struct proto_ops inet6_stream_ops;
2254
2255 static struct inet_protosw tcpv6_protosw = {
2256         .type           =       SOCK_STREAM,
2257         .protocol       =       IPPROTO_TCP,
2258         .prot           =       &tcpv6_prot,
2259         .ops            =       &inet6_stream_ops,
2260         .capability     =       -1,
2261         .no_check       =       0,
2262         .flags          =       INET_PROTOSW_PERMANENT,
2263 };
2264
2265 void __init tcpv6_init(void)
2266 {
2267         /* register inet6 protocol */
2268         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2269                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2270         inet6_register_protosw(&tcpv6_protosw);
2271 }