3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
99 struct dst_entry *dst = skb_dst(skb);
100 const struct rt6_info *rt = (const struct rt6_info *)dst;
104 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
106 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
109 static void tcp_v6_hash(struct sock *sk)
111 if (sk->sk_state != TCP_CLOSE) {
112 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
117 __inet6_hash(sk, NULL);
122 static __inline__ __sum16 tcp_v6_check(int len,
123 const struct in6_addr *saddr,
124 const struct in6_addr *daddr,
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
130 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
132 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
133 ipv6_hdr(skb)->saddr.s6_addr32,
135 tcp_hdr(skb)->source);
138 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
141 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
142 struct inet_sock *inet = inet_sk(sk);
143 struct inet_connection_sock *icsk = inet_csk(sk);
144 struct ipv6_pinfo *np = inet6_sk(sk);
145 struct tcp_sock *tp = tcp_sk(sk);
146 struct in6_addr *saddr = NULL, *final_p, final;
149 struct dst_entry *dst;
153 if (addr_len < SIN6_LEN_RFC2133)
156 if (usin->sin6_family != AF_INET6)
157 return -EAFNOSUPPORT;
159 memset(&fl6, 0, sizeof(fl6));
162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
163 IP6_ECN_flow_init(fl6.flowlabel);
164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
165 struct ip6_flowlabel *flowlabel;
166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 if (flowlabel == NULL)
169 usin->sin6_addr = flowlabel->dst;
170 fl6_sock_release(flowlabel);
175 * connect() to INADDR_ANY means loopback (BSD'ism).
178 if(ipv6_addr_any(&usin->sin6_addr))
179 usin->sin6_addr.s6_addr[15] = 0x1;
181 addr_type = ipv6_addr_type(&usin->sin6_addr);
183 if(addr_type & IPV6_ADDR_MULTICAST)
186 if (addr_type&IPV6_ADDR_LINKLOCAL) {
187 if (addr_len >= sizeof(struct sockaddr_in6) &&
188 usin->sin6_scope_id) {
189 /* If interface is set while binding, indices
192 if (sk->sk_bound_dev_if &&
193 sk->sk_bound_dev_if != usin->sin6_scope_id)
196 sk->sk_bound_dev_if = usin->sin6_scope_id;
199 /* Connect to link-local address requires an interface */
200 if (!sk->sk_bound_dev_if)
204 if (tp->rx_opt.ts_recent_stamp &&
205 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
206 tp->rx_opt.ts_recent = 0;
207 tp->rx_opt.ts_recent_stamp = 0;
211 np->daddr = usin->sin6_addr;
212 np->flow_label = fl6.flowlabel;
218 if (addr_type == IPV6_ADDR_MAPPED) {
219 u32 exthdrlen = icsk->icsk_ext_hdr_len;
220 struct sockaddr_in sin;
222 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
224 if (__ipv6_only_sock(sk))
227 sin.sin_family = AF_INET;
228 sin.sin_port = usin->sin6_port;
229 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
231 icsk->icsk_af_ops = &ipv6_mapped;
232 sk->sk_backlog_rcv = tcp_v4_do_rcv;
233 #ifdef CONFIG_TCP_MD5SIG
234 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
240 icsk->icsk_ext_hdr_len = exthdrlen;
241 icsk->icsk_af_ops = &ipv6_specific;
242 sk->sk_backlog_rcv = tcp_v6_do_rcv;
243 #ifdef CONFIG_TCP_MD5SIG
244 tp->af_specific = &tcp_sock_ipv6_specific;
248 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
249 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
256 if (!ipv6_addr_any(&np->rcv_saddr))
257 saddr = &np->rcv_saddr;
259 fl6.flowi6_proto = IPPROTO_TCP;
260 fl6.daddr = np->daddr;
261 fl6.saddr = saddr ? *saddr : np->saddr;
262 fl6.flowi6_oif = sk->sk_bound_dev_if;
263 fl6.flowi6_mark = sk->sk_mark;
264 fl6.fl6_dport = usin->sin6_port;
265 fl6.fl6_sport = inet->inet_sport;
267 final_p = fl6_update_dst(&fl6, np->opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
279 np->rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 __ip6_dst_store(sk, dst, NULL, NULL);
289 rt = (struct rt6_info *) dst;
290 if (tcp_death_row.sysctl_tw_recycle &&
291 !tp->rx_opt.ts_recent_stamp &&
292 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
293 tcp_fetch_timewait_stamp(sk, dst);
295 icsk->icsk_ext_hdr_len = 0;
297 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
300 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302 inet->inet_dport = usin->sin6_port;
304 tcp_set_state(sk, TCP_SYN_SENT);
305 err = inet6_hash_connect(&tcp_death_row, sk);
310 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
315 err = tcp_connect(sk);
322 tcp_set_state(sk, TCP_CLOSE);
325 inet->inet_dport = 0;
326 sk->sk_route_caps = 0;
330 static void tcp_v6_mtu_reduced(struct sock *sk)
332 struct dst_entry *dst;
334 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
337 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
341 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
342 tcp_sync_mss(sk, dst_mtu(dst));
343 tcp_simple_retransmit(sk);
347 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
348 u8 type, u8 code, int offset, __be32 info)
350 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
351 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
352 struct ipv6_pinfo *np;
357 struct net *net = dev_net(skb->dev);
359 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
360 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
363 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
368 if (sk->sk_state == TCP_TIME_WAIT) {
369 inet_twsk_put(inet_twsk(sk));
374 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
375 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
377 if (sk->sk_state == TCP_CLOSE)
380 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
381 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
386 seq = ntohl(th->seq);
387 if (sk->sk_state != TCP_LISTEN &&
388 !between(seq, tp->snd_una, tp->snd_nxt)) {
389 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
395 if (type == NDISC_REDIRECT) {
396 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
399 dst->ops->redirect(dst, sk, skb);
402 if (type == ICMPV6_PKT_TOOBIG) {
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
412 icmpv6_err_convert(type, code, &err);
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
418 if (sock_owned_by_user(sk))
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
429 WARN_ON(req->sk != NULL);
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 inet_csk_reqsk_queue_drop(sk, req, prev);
440 case TCP_SYN_RECV: /* Cannot happen.
441 It can, it SYNs are crossed. --ANK */
442 if (!sock_owned_by_user(sk)) {
444 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
448 sk->sk_err_soft = err;
452 if (!sock_owned_by_user(sk) && np->recverr) {
454 sk->sk_error_report(sk);
456 sk->sk_err_soft = err;
464 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
466 struct request_sock *req,
467 struct request_values *rvp,
470 struct inet6_request_sock *treq = inet6_rsk(req);
471 struct ipv6_pinfo *np = inet6_sk(sk);
472 struct sk_buff * skb;
475 /* First, grab a route. */
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
479 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
484 fl6->daddr = treq->rmt_addr;
485 skb_set_queue_mapping(skb, queue_mapping);
486 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
487 err = net_xmit_eval(err);
494 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
495 struct request_values *rvp)
499 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
500 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
503 static void tcp_v6_reqsk_destructor(struct request_sock *req)
505 kfree_skb(inet6_rsk(req)->pktopts);
508 #ifdef CONFIG_TCP_MD5SIG
509 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
510 const struct in6_addr *addr)
512 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
515 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
516 struct sock *addr_sk)
518 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
521 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
522 struct request_sock *req)
524 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
527 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
530 struct tcp_md5sig cmd;
531 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
533 if (optlen < sizeof(cmd))
536 if (copy_from_user(&cmd, optval, sizeof(cmd)))
539 if (sin6->sin6_family != AF_INET6)
542 if (!cmd.tcpm_keylen) {
543 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
544 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
546 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
550 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
553 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
554 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
555 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
557 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
558 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
561 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
562 const struct in6_addr *daddr,
563 const struct in6_addr *saddr, int nbytes)
565 struct tcp6_pseudohdr *bp;
566 struct scatterlist sg;
568 bp = &hp->md5_blk.ip6;
569 /* 1. TCP pseudo-header (RFC2460) */
572 bp->protocol = cpu_to_be32(IPPROTO_TCP);
573 bp->len = cpu_to_be32(nbytes);
575 sg_init_one(&sg, bp, sizeof(*bp));
576 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
579 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
580 const struct in6_addr *daddr, struct in6_addr *saddr,
581 const struct tcphdr *th)
583 struct tcp_md5sig_pool *hp;
584 struct hash_desc *desc;
586 hp = tcp_get_md5sig_pool();
588 goto clear_hash_noput;
589 desc = &hp->md5_desc;
591 if (crypto_hash_init(desc))
593 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
595 if (tcp_md5_hash_header(hp, th))
597 if (tcp_md5_hash_key(hp, key))
599 if (crypto_hash_final(desc, md5_hash))
602 tcp_put_md5sig_pool();
606 tcp_put_md5sig_pool();
608 memset(md5_hash, 0, 16);
612 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
613 const struct sock *sk,
614 const struct request_sock *req,
615 const struct sk_buff *skb)
617 const struct in6_addr *saddr, *daddr;
618 struct tcp_md5sig_pool *hp;
619 struct hash_desc *desc;
620 const struct tcphdr *th = tcp_hdr(skb);
623 saddr = &inet6_sk(sk)->saddr;
624 daddr = &inet6_sk(sk)->daddr;
626 saddr = &inet6_rsk(req)->loc_addr;
627 daddr = &inet6_rsk(req)->rmt_addr;
629 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
630 saddr = &ip6h->saddr;
631 daddr = &ip6h->daddr;
634 hp = tcp_get_md5sig_pool();
636 goto clear_hash_noput;
637 desc = &hp->md5_desc;
639 if (crypto_hash_init(desc))
642 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
644 if (tcp_md5_hash_header(hp, th))
646 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
648 if (tcp_md5_hash_key(hp, key))
650 if (crypto_hash_final(desc, md5_hash))
653 tcp_put_md5sig_pool();
657 tcp_put_md5sig_pool();
659 memset(md5_hash, 0, 16);
663 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
665 const __u8 *hash_location = NULL;
666 struct tcp_md5sig_key *hash_expected;
667 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
668 const struct tcphdr *th = tcp_hdr(skb);
672 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
673 hash_location = tcp_parse_md5sig_option(th);
675 /* We've parsed the options - do we have a hash? */
676 if (!hash_expected && !hash_location)
679 if (hash_expected && !hash_location) {
680 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
684 if (!hash_expected && hash_location) {
685 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
689 /* check the signature */
690 genhash = tcp_v6_md5_hash_skb(newhash,
694 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
695 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
696 genhash ? "failed" : "mismatch",
697 &ip6h->saddr, ntohs(th->source),
698 &ip6h->daddr, ntohs(th->dest));
705 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
707 .obj_size = sizeof(struct tcp6_request_sock),
708 .rtx_syn_ack = tcp_v6_rtx_synack,
709 .send_ack = tcp_v6_reqsk_send_ack,
710 .destructor = tcp_v6_reqsk_destructor,
711 .send_reset = tcp_v6_send_reset,
712 .syn_ack_timeout = tcp_syn_ack_timeout,
715 #ifdef CONFIG_TCP_MD5SIG
716 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
717 .md5_lookup = tcp_v6_reqsk_md5_lookup,
718 .calc_md5_hash = tcp_v6_md5_hash_skb,
722 static void __tcp_v6_send_check(struct sk_buff *skb,
723 const struct in6_addr *saddr, const struct in6_addr *daddr)
725 struct tcphdr *th = tcp_hdr(skb);
727 if (skb->ip_summed == CHECKSUM_PARTIAL) {
728 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
729 skb->csum_start = skb_transport_header(skb) - skb->head;
730 skb->csum_offset = offsetof(struct tcphdr, check);
732 th->check = tcp_v6_check(skb->len, saddr, daddr,
733 csum_partial(th, th->doff << 2,
738 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
740 struct ipv6_pinfo *np = inet6_sk(sk);
742 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
745 static int tcp_v6_gso_send_check(struct sk_buff *skb)
747 const struct ipv6hdr *ipv6h;
750 if (!pskb_may_pull(skb, sizeof(*th)))
753 ipv6h = ipv6_hdr(skb);
757 skb->ip_summed = CHECKSUM_PARTIAL;
758 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
762 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
765 const struct ipv6hdr *iph = skb_gro_network_header(skb);
767 switch (skb->ip_summed) {
768 case CHECKSUM_COMPLETE:
769 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
771 skb->ip_summed = CHECKSUM_UNNECESSARY;
777 NAPI_GRO_CB(skb)->flush = 1;
781 return tcp_gro_receive(head, skb);
784 static int tcp6_gro_complete(struct sk_buff *skb)
786 const struct ipv6hdr *iph = ipv6_hdr(skb);
787 struct tcphdr *th = tcp_hdr(skb);
789 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
790 &iph->saddr, &iph->daddr, 0);
791 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
793 return tcp_gro_complete(skb);
796 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
797 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
799 const struct tcphdr *th = tcp_hdr(skb);
801 struct sk_buff *buff;
803 struct net *net = dev_net(skb_dst(skb)->dev);
804 struct sock *ctl_sk = net->ipv6.tcp_sk;
805 unsigned int tot_len = sizeof(struct tcphdr);
806 struct dst_entry *dst;
810 tot_len += TCPOLEN_TSTAMP_ALIGNED;
811 #ifdef CONFIG_TCP_MD5SIG
813 tot_len += TCPOLEN_MD5SIG_ALIGNED;
816 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
821 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
823 t1 = (struct tcphdr *) skb_push(buff, tot_len);
824 skb_reset_transport_header(buff);
826 /* Swap the send and the receive. */
827 memset(t1, 0, sizeof(*t1));
828 t1->dest = th->source;
829 t1->source = th->dest;
830 t1->doff = tot_len / 4;
831 t1->seq = htonl(seq);
832 t1->ack_seq = htonl(ack);
833 t1->ack = !rst || !th->ack;
835 t1->window = htons(win);
837 topt = (__be32 *)(t1 + 1);
840 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
841 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
842 *topt++ = htonl(tcp_time_stamp);
846 #ifdef CONFIG_TCP_MD5SIG
848 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
849 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
850 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
851 &ipv6_hdr(skb)->saddr,
852 &ipv6_hdr(skb)->daddr, t1);
856 memset(&fl6, 0, sizeof(fl6));
857 fl6.daddr = ipv6_hdr(skb)->saddr;
858 fl6.saddr = ipv6_hdr(skb)->daddr;
860 buff->ip_summed = CHECKSUM_PARTIAL;
863 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
865 fl6.flowi6_proto = IPPROTO_TCP;
866 fl6.flowi6_oif = inet6_iif(skb);
867 fl6.fl6_dport = t1->dest;
868 fl6.fl6_sport = t1->source;
869 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
871 /* Pass a socket to ip6_dst_lookup either it is for RST
872 * Underlying function will use this to retrieve the network
875 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
877 skb_dst_set(buff, dst);
878 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
879 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
881 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
888 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
890 const struct tcphdr *th = tcp_hdr(skb);
891 u32 seq = 0, ack_seq = 0;
892 struct tcp_md5sig_key *key = NULL;
893 #ifdef CONFIG_TCP_MD5SIG
894 const __u8 *hash_location = NULL;
895 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
896 unsigned char newhash[16];
898 struct sock *sk1 = NULL;
904 if (!ipv6_unicast_destination(skb))
907 #ifdef CONFIG_TCP_MD5SIG
908 hash_location = tcp_parse_md5sig_option(th);
909 if (!sk && hash_location) {
911 * active side is lost. Try to find listening socket through
912 * source port, and then find md5 key through listening socket.
913 * we are not loose security here:
914 * Incoming packet is checked with md5 hash with finding key,
915 * no RST generated if md5 hash doesn't match.
917 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
918 &tcp_hashinfo, &ipv6h->daddr,
919 ntohs(th->source), inet6_iif(skb));
924 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
928 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
929 if (genhash || memcmp(hash_location, newhash, 16) != 0)
932 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
937 seq = ntohl(th->ack_seq);
939 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
942 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
944 #ifdef CONFIG_TCP_MD5SIG
953 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
954 struct tcp_md5sig_key *key, u8 tclass)
956 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
959 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
961 struct inet_timewait_sock *tw = inet_twsk(sk);
962 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
964 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
965 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
966 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
972 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
973 struct request_sock *req)
975 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
976 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
980 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
982 struct request_sock *req, **prev;
983 const struct tcphdr *th = tcp_hdr(skb);
986 /* Find possible connection requests. */
987 req = inet6_csk_search_req(sk, &prev, th->source,
988 &ipv6_hdr(skb)->saddr,
989 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
991 return tcp_check_req(sk, skb, req, prev, false);
993 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
994 &ipv6_hdr(skb)->saddr, th->source,
995 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
998 if (nsk->sk_state != TCP_TIME_WAIT) {
1002 inet_twsk_put(inet_twsk(nsk));
1006 #ifdef CONFIG_SYN_COOKIES
1008 sk = cookie_v6_check(sk, skb);
1013 /* FIXME: this is substantially similar to the ipv4 code.
1014 * Can some kind of merge be done? -- erics
1016 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1018 struct tcp_extend_values tmp_ext;
1019 struct tcp_options_received tmp_opt;
1020 const u8 *hash_location;
1021 struct request_sock *req;
1022 struct inet6_request_sock *treq;
1023 struct ipv6_pinfo *np = inet6_sk(sk);
1024 struct tcp_sock *tp = tcp_sk(sk);
1025 __u32 isn = TCP_SKB_CB(skb)->when;
1026 struct dst_entry *dst = NULL;
1028 bool want_cookie = false;
1030 if (skb->protocol == htons(ETH_P_IP))
1031 return tcp_v4_conn_request(sk, skb);
1033 if (!ipv6_unicast_destination(skb))
1036 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1037 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1042 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1045 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1049 #ifdef CONFIG_TCP_MD5SIG
1050 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1053 tcp_clear_options(&tmp_opt);
1054 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1055 tmp_opt.user_mss = tp->rx_opt.user_mss;
1056 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1058 if (tmp_opt.cookie_plus > 0 &&
1059 tmp_opt.saw_tstamp &&
1060 !tp->rx_opt.cookie_out_never &&
1061 (sysctl_tcp_cookie_size > 0 ||
1062 (tp->cookie_values != NULL &&
1063 tp->cookie_values->cookie_desired > 0))) {
1066 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1067 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1069 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1072 /* Secret recipe starts with IP addresses */
1073 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1078 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1084 /* plus variable length Initiator Cookie */
1087 *c++ ^= *hash_location++;
1089 want_cookie = false; /* not our kind of cookie */
1090 tmp_ext.cookie_out_never = 0; /* false */
1091 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1092 } else if (!tp->rx_opt.cookie_in_always) {
1093 /* redundant indications, but ensure initialization. */
1094 tmp_ext.cookie_out_never = 1; /* true */
1095 tmp_ext.cookie_plus = 0;
1099 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1101 if (want_cookie && !tmp_opt.saw_tstamp)
1102 tcp_clear_options(&tmp_opt);
1104 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1105 tcp_openreq_init(req, &tmp_opt, skb);
1107 treq = inet6_rsk(req);
1108 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1109 treq->loc_addr = ipv6_hdr(skb)->daddr;
1110 if (!want_cookie || tmp_opt.tstamp_ok)
1111 TCP_ECN_create_request(req, skb);
1113 treq->iif = sk->sk_bound_dev_if;
1115 /* So that link locals have meaning */
1116 if (!sk->sk_bound_dev_if &&
1117 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1118 treq->iif = inet6_iif(skb);
1121 if (ipv6_opt_accepted(sk, skb) ||
1122 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1123 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1124 atomic_inc(&skb->users);
1125 treq->pktopts = skb;
1129 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1130 req->cookie_ts = tmp_opt.tstamp_ok;
1134 /* VJ's idea. We save last timestamp seen
1135 * from the destination in peer table, when entering
1136 * state TIME-WAIT, and check against it before
1137 * accepting new connection request.
1139 * If "isn" is not zero, this request hit alive
1140 * timewait bucket, so that all the necessary checks
1141 * are made in the function processing timewait state.
1143 if (tmp_opt.saw_tstamp &&
1144 tcp_death_row.sysctl_tw_recycle &&
1145 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1146 if (!tcp_peer_is_proven(req, dst, true)) {
1147 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1148 goto drop_and_release;
1151 /* Kill the following clause, if you dislike this way. */
1152 else if (!sysctl_tcp_syncookies &&
1153 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1154 (sysctl_max_syn_backlog >> 2)) &&
1155 !tcp_peer_is_proven(req, dst, false)) {
1156 /* Without syncookies last quarter of
1157 * backlog is filled with destinations,
1158 * proven to be alive.
1159 * It means that we continue to communicate
1160 * to destinations, already remembered
1161 * to the moment of synflood.
1163 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1164 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1165 goto drop_and_release;
1168 isn = tcp_v6_init_sequence(skb);
1171 tcp_rsk(req)->snt_isn = isn;
1172 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1174 if (security_inet_conn_request(sk, skb, req))
1175 goto drop_and_release;
1177 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1178 (struct request_values *)&tmp_ext,
1179 skb_get_queue_mapping(skb)) ||
1183 tcp_rsk(req)->listener = NULL;
1184 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1192 return 0; /* don't send reset */
1195 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1196 struct request_sock *req,
1197 struct dst_entry *dst)
1199 struct inet6_request_sock *treq;
1200 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1201 struct tcp6_sock *newtcp6sk;
1202 struct inet_sock *newinet;
1203 struct tcp_sock *newtp;
1205 #ifdef CONFIG_TCP_MD5SIG
1206 struct tcp_md5sig_key *key;
1210 if (skb->protocol == htons(ETH_P_IP)) {
1215 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1220 newtcp6sk = (struct tcp6_sock *)newsk;
1221 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1223 newinet = inet_sk(newsk);
1224 newnp = inet6_sk(newsk);
1225 newtp = tcp_sk(newsk);
1227 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1229 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1231 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1233 newnp->rcv_saddr = newnp->saddr;
1235 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1236 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1237 #ifdef CONFIG_TCP_MD5SIG
1238 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1241 newnp->ipv6_ac_list = NULL;
1242 newnp->ipv6_fl_list = NULL;
1243 newnp->pktoptions = NULL;
1245 newnp->mcast_oif = inet6_iif(skb);
1246 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1247 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1250 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1251 * here, tcp_create_openreq_child now does this for us, see the comment in
1252 * that function for the gory details. -acme
1255 /* It is tricky place. Until this moment IPv4 tcp
1256 worked with IPv6 icsk.icsk_af_ops.
1259 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1264 treq = inet6_rsk(req);
1266 if (sk_acceptq_is_full(sk))
1270 dst = inet6_csk_route_req(sk, &fl6, req);
1275 newsk = tcp_create_openreq_child(sk, req, skb);
1280 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1281 * count here, tcp_create_openreq_child now does this for us, see the
1282 * comment in that function for the gory details. -acme
1285 newsk->sk_gso_type = SKB_GSO_TCPV6;
1286 __ip6_dst_store(newsk, dst, NULL, NULL);
1287 inet6_sk_rx_dst_set(newsk, skb);
1289 newtcp6sk = (struct tcp6_sock *)newsk;
1290 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1292 newtp = tcp_sk(newsk);
1293 newinet = inet_sk(newsk);
1294 newnp = inet6_sk(newsk);
1296 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1298 newnp->daddr = treq->rmt_addr;
1299 newnp->saddr = treq->loc_addr;
1300 newnp->rcv_saddr = treq->loc_addr;
1301 newsk->sk_bound_dev_if = treq->iif;
1303 /* Now IPv6 options...
1305 First: no IPv4 options.
1307 newinet->inet_opt = NULL;
1308 newnp->ipv6_ac_list = NULL;
1309 newnp->ipv6_fl_list = NULL;
1312 newnp->rxopt.all = np->rxopt.all;
1314 /* Clone pktoptions received with SYN */
1315 newnp->pktoptions = NULL;
1316 if (treq->pktopts != NULL) {
1317 newnp->pktoptions = skb_clone(treq->pktopts,
1318 sk_gfp_atomic(sk, GFP_ATOMIC));
1319 consume_skb(treq->pktopts);
1320 treq->pktopts = NULL;
1321 if (newnp->pktoptions)
1322 skb_set_owner_r(newnp->pktoptions, newsk);
1325 newnp->mcast_oif = inet6_iif(skb);
1326 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1327 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1329 /* Clone native IPv6 options from listening socket (if any)
1331 Yes, keeping reference count would be much more clever,
1332 but we make one more one thing there: reattach optmem
1336 newnp->opt = ipv6_dup_options(newsk, np->opt);
1338 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1340 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1341 newnp->opt->opt_flen);
1343 tcp_mtup_init(newsk);
1344 tcp_sync_mss(newsk, dst_mtu(dst));
1345 newtp->advmss = dst_metric_advmss(dst);
1346 if (tcp_sk(sk)->rx_opt.user_mss &&
1347 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1348 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1350 tcp_initialize_rcv_mss(newsk);
1351 tcp_synack_rtt_meas(newsk, req);
1352 newtp->total_retrans = req->retrans;
1354 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1355 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1357 #ifdef CONFIG_TCP_MD5SIG
1358 /* Copy over the MD5 key from the original socket */
1359 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1360 /* We're using one, so create a matching key
1361 * on the newsk structure. If we fail to get
1362 * memory, then we end up not copying the key
1365 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1366 AF_INET6, key->key, key->keylen,
1367 sk_gfp_atomic(sk, GFP_ATOMIC));
1371 if (__inet_inherit_port(sk, newsk) < 0) {
1375 __inet6_hash(newsk, NULL);
1380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1384 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1388 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1390 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1391 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1392 &ipv6_hdr(skb)->daddr, skb->csum)) {
1393 skb->ip_summed = CHECKSUM_UNNECESSARY;
1398 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1399 &ipv6_hdr(skb)->saddr,
1400 &ipv6_hdr(skb)->daddr, 0));
1402 if (skb->len <= 76) {
1403 return __skb_checksum_complete(skb);
1408 /* The socket must have it's spinlock held when we get
1411 * We have a potential double-lock case here, so even when
1412 * doing backlog processing we use the BH locking scheme.
1413 * This is because we cannot sleep with the original spinlock
1416 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1418 struct ipv6_pinfo *np = inet6_sk(sk);
1419 struct tcp_sock *tp;
1420 struct sk_buff *opt_skb = NULL;
1422 /* Imagine: socket is IPv6. IPv4 packet arrives,
1423 goes to IPv4 receive handler and backlogged.
1424 From backlog it always goes here. Kerboom...
1425 Fortunately, tcp_rcv_established and rcv_established
1426 handle them correctly, but it is not case with
1427 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1430 if (skb->protocol == htons(ETH_P_IP))
1431 return tcp_v4_do_rcv(sk, skb);
1433 #ifdef CONFIG_TCP_MD5SIG
1434 if (tcp_v6_inbound_md5_hash (sk, skb))
1438 if (sk_filter(sk, skb))
1442 * socket locking is here for SMP purposes as backlog rcv
1443 * is currently called with bh processing disabled.
1446 /* Do Stevens' IPV6_PKTOPTIONS.
1448 Yes, guys, it is the only place in our code, where we
1449 may make it not affecting IPv4.
1450 The rest of code is protocol independent,
1451 and I do not like idea to uglify IPv4.
1453 Actually, all the idea behind IPV6_PKTOPTIONS
1454 looks not very well thought. For now we latch
1455 options, received in the last packet, enqueued
1456 by tcp. Feel free to propose better solution.
1460 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1462 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1463 struct dst_entry *dst = sk->sk_rx_dst;
1465 sock_rps_save_rxhash(sk, skb);
1467 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1468 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1470 sk->sk_rx_dst = NULL;
1474 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1477 goto ipv6_pktoptions;
1481 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1484 if (sk->sk_state == TCP_LISTEN) {
1485 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1490 * Queue it on the new socket if the new socket is active,
1491 * otherwise we just shortcircuit this and continue with
1495 sock_rps_save_rxhash(nsk, skb);
1496 if (tcp_child_process(sk, nsk, skb))
1499 __kfree_skb(opt_skb);
1503 sock_rps_save_rxhash(sk, skb);
1505 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1508 goto ipv6_pktoptions;
1512 tcp_v6_send_reset(sk, skb);
1515 __kfree_skb(opt_skb);
1519 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1524 /* Do you ask, what is it?
1526 1. skb was enqueued by tcp.
1527 2. skb is added to tail of read queue, rather than out of order.
1528 3. socket is not in passive state.
1529 4. Finally, it really contains options, which user wants to receive.
1532 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1533 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1534 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1535 np->mcast_oif = inet6_iif(opt_skb);
1536 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1537 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1538 if (np->rxopt.bits.rxtclass)
1539 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1540 if (ipv6_opt_accepted(sk, opt_skb)) {
1541 skb_set_owner_r(opt_skb, sk);
1542 opt_skb = xchg(&np->pktoptions, opt_skb);
1544 __kfree_skb(opt_skb);
1545 opt_skb = xchg(&np->pktoptions, NULL);
1553 static int tcp_v6_rcv(struct sk_buff *skb)
1555 const struct tcphdr *th;
1556 const struct ipv6hdr *hdr;
1559 struct net *net = dev_net(skb->dev);
1561 if (skb->pkt_type != PACKET_HOST)
1565 * Count it even if it's bad.
1567 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1569 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1574 if (th->doff < sizeof(struct tcphdr)/4)
1576 if (!pskb_may_pull(skb, th->doff*4))
1579 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1583 hdr = ipv6_hdr(skb);
1584 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1585 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1586 skb->len - th->doff*4);
1587 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1588 TCP_SKB_CB(skb)->when = 0;
1589 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1590 TCP_SKB_CB(skb)->sacked = 0;
1592 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1597 if (sk->sk_state == TCP_TIME_WAIT)
1600 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1601 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1602 goto discard_and_relse;
1605 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1606 goto discard_and_relse;
1608 if (sk_filter(sk, skb))
1609 goto discard_and_relse;
1613 bh_lock_sock_nested(sk);
1615 if (!sock_owned_by_user(sk)) {
1616 #ifdef CONFIG_NET_DMA
1617 struct tcp_sock *tp = tcp_sk(sk);
1618 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1619 tp->ucopy.dma_chan = net_dma_find_channel();
1620 if (tp->ucopy.dma_chan)
1621 ret = tcp_v6_do_rcv(sk, skb);
1625 if (!tcp_prequeue(sk, skb))
1626 ret = tcp_v6_do_rcv(sk, skb);
1628 } else if (unlikely(sk_add_backlog(sk, skb,
1629 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1631 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1632 goto discard_and_relse;
1637 return ret ? -1 : 0;
1640 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1643 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1645 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1647 tcp_v6_send_reset(NULL, skb);
1664 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1665 inet_twsk_put(inet_twsk(sk));
1669 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1670 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1671 inet_twsk_put(inet_twsk(sk));
1675 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1680 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1681 &ipv6_hdr(skb)->daddr,
1682 ntohs(th->dest), inet6_iif(skb));
1684 struct inet_timewait_sock *tw = inet_twsk(sk);
1685 inet_twsk_deschedule(tw, &tcp_death_row);
1690 /* Fall through to ACK */
1693 tcp_v6_timewait_ack(sk, skb);
1697 case TCP_TW_SUCCESS:;
1702 static void tcp_v6_early_demux(struct sk_buff *skb)
1704 const struct ipv6hdr *hdr;
1705 const struct tcphdr *th;
1708 if (skb->pkt_type != PACKET_HOST)
1711 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1714 hdr = ipv6_hdr(skb);
1717 if (th->doff < sizeof(struct tcphdr) / 4)
1720 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1721 &hdr->saddr, th->source,
1722 &hdr->daddr, ntohs(th->dest),
1726 skb->destructor = sock_edemux;
1727 if (sk->sk_state != TCP_TIME_WAIT) {
1728 struct dst_entry *dst = sk->sk_rx_dst;
1729 struct inet_sock *icsk = inet_sk(sk);
1731 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1733 icsk->rx_dst_ifindex == skb->skb_iif)
1734 skb_dst_set_noref(skb, dst);
1739 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1740 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1741 .twsk_unique = tcp_twsk_unique,
1742 .twsk_destructor= tcp_twsk_destructor,
1745 static const struct inet_connection_sock_af_ops ipv6_specific = {
1746 .queue_xmit = inet6_csk_xmit,
1747 .send_check = tcp_v6_send_check,
1748 .rebuild_header = inet6_sk_rebuild_header,
1749 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1750 .conn_request = tcp_v6_conn_request,
1751 .syn_recv_sock = tcp_v6_syn_recv_sock,
1752 .net_header_len = sizeof(struct ipv6hdr),
1753 .net_frag_header_len = sizeof(struct frag_hdr),
1754 .setsockopt = ipv6_setsockopt,
1755 .getsockopt = ipv6_getsockopt,
1756 .addr2sockaddr = inet6_csk_addr2sockaddr,
1757 .sockaddr_len = sizeof(struct sockaddr_in6),
1758 .bind_conflict = inet6_csk_bind_conflict,
1759 #ifdef CONFIG_COMPAT
1760 .compat_setsockopt = compat_ipv6_setsockopt,
1761 .compat_getsockopt = compat_ipv6_getsockopt,
1765 #ifdef CONFIG_TCP_MD5SIG
1766 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1767 .md5_lookup = tcp_v6_md5_lookup,
1768 .calc_md5_hash = tcp_v6_md5_hash_skb,
1769 .md5_parse = tcp_v6_parse_md5_keys,
1774 * TCP over IPv4 via INET6 API
1777 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1778 .queue_xmit = ip_queue_xmit,
1779 .send_check = tcp_v4_send_check,
1780 .rebuild_header = inet_sk_rebuild_header,
1781 .sk_rx_dst_set = inet_sk_rx_dst_set,
1782 .conn_request = tcp_v6_conn_request,
1783 .syn_recv_sock = tcp_v6_syn_recv_sock,
1784 .net_header_len = sizeof(struct iphdr),
1785 .setsockopt = ipv6_setsockopt,
1786 .getsockopt = ipv6_getsockopt,
1787 .addr2sockaddr = inet6_csk_addr2sockaddr,
1788 .sockaddr_len = sizeof(struct sockaddr_in6),
1789 .bind_conflict = inet6_csk_bind_conflict,
1790 #ifdef CONFIG_COMPAT
1791 .compat_setsockopt = compat_ipv6_setsockopt,
1792 .compat_getsockopt = compat_ipv6_getsockopt,
1796 #ifdef CONFIG_TCP_MD5SIG
1797 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1798 .md5_lookup = tcp_v4_md5_lookup,
1799 .calc_md5_hash = tcp_v4_md5_hash_skb,
1800 .md5_parse = tcp_v6_parse_md5_keys,
1804 /* NOTE: A lot of things set to zero explicitly by call to
1805 * sk_alloc() so need not be done here.
1807 static int tcp_v6_init_sock(struct sock *sk)
1809 struct inet_connection_sock *icsk = inet_csk(sk);
1813 icsk->icsk_af_ops = &ipv6_specific;
1815 #ifdef CONFIG_TCP_MD5SIG
1816 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1822 static void tcp_v6_destroy_sock(struct sock *sk)
1824 tcp_v4_destroy_sock(sk);
1825 inet6_destroy_sock(sk);
1828 #ifdef CONFIG_PROC_FS
1829 /* Proc filesystem TCPv6 sock list dumping. */
1830 static void get_openreq6(struct seq_file *seq,
1831 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1833 int ttd = req->expires - jiffies;
1834 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1835 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1841 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1842 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1844 src->s6_addr32[0], src->s6_addr32[1],
1845 src->s6_addr32[2], src->s6_addr32[3],
1846 ntohs(inet_rsk(req)->loc_port),
1847 dest->s6_addr32[0], dest->s6_addr32[1],
1848 dest->s6_addr32[2], dest->s6_addr32[3],
1849 ntohs(inet_rsk(req)->rmt_port),
1851 0,0, /* could print option size, but that is af dependent. */
1852 1, /* timers active (only the expire timer) */
1853 jiffies_to_clock_t(ttd),
1855 from_kuid_munged(seq_user_ns(seq), uid),
1856 0, /* non standard timer */
1857 0, /* open_requests have no inode */
1861 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1863 const struct in6_addr *dest, *src;
1866 unsigned long timer_expires;
1867 const struct inet_sock *inet = inet_sk(sp);
1868 const struct tcp_sock *tp = tcp_sk(sp);
1869 const struct inet_connection_sock *icsk = inet_csk(sp);
1870 const struct ipv6_pinfo *np = inet6_sk(sp);
1873 src = &np->rcv_saddr;
1874 destp = ntohs(inet->inet_dport);
1875 srcp = ntohs(inet->inet_sport);
1877 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1879 timer_expires = icsk->icsk_timeout;
1880 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1882 timer_expires = icsk->icsk_timeout;
1883 } else if (timer_pending(&sp->sk_timer)) {
1885 timer_expires = sp->sk_timer.expires;
1888 timer_expires = jiffies;
1892 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1893 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1895 src->s6_addr32[0], src->s6_addr32[1],
1896 src->s6_addr32[2], src->s6_addr32[3], srcp,
1897 dest->s6_addr32[0], dest->s6_addr32[1],
1898 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1900 tp->write_seq-tp->snd_una,
1901 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1903 jiffies_delta_to_clock_t(timer_expires - jiffies),
1904 icsk->icsk_retransmits,
1905 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1906 icsk->icsk_probes_out,
1908 atomic_read(&sp->sk_refcnt), sp,
1909 jiffies_to_clock_t(icsk->icsk_rto),
1910 jiffies_to_clock_t(icsk->icsk_ack.ato),
1911 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1913 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1917 static void get_timewait6_sock(struct seq_file *seq,
1918 struct inet_timewait_sock *tw, int i)
1920 const struct in6_addr *dest, *src;
1922 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1923 long delta = tw->tw_ttd - jiffies;
1925 dest = &tw6->tw_v6_daddr;
1926 src = &tw6->tw_v6_rcv_saddr;
1927 destp = ntohs(tw->tw_dport);
1928 srcp = ntohs(tw->tw_sport);
1931 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1932 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1934 src->s6_addr32[0], src->s6_addr32[1],
1935 src->s6_addr32[2], src->s6_addr32[3], srcp,
1936 dest->s6_addr32[0], dest->s6_addr32[1],
1937 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1938 tw->tw_substate, 0, 0,
1939 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1940 atomic_read(&tw->tw_refcnt), tw);
1943 static int tcp6_seq_show(struct seq_file *seq, void *v)
1945 struct tcp_iter_state *st;
1947 if (v == SEQ_START_TOKEN) {
1952 "st tx_queue rx_queue tr tm->when retrnsmt"
1953 " uid timeout inode\n");
1958 switch (st->state) {
1959 case TCP_SEQ_STATE_LISTENING:
1960 case TCP_SEQ_STATE_ESTABLISHED:
1961 get_tcp6_sock(seq, v, st->num);
1963 case TCP_SEQ_STATE_OPENREQ:
1964 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1966 case TCP_SEQ_STATE_TIME_WAIT:
1967 get_timewait6_sock(seq, v, st->num);
1974 static const struct file_operations tcp6_afinfo_seq_fops = {
1975 .owner = THIS_MODULE,
1976 .open = tcp_seq_open,
1978 .llseek = seq_lseek,
1979 .release = seq_release_net
1982 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1985 .seq_fops = &tcp6_afinfo_seq_fops,
1987 .show = tcp6_seq_show,
1991 int __net_init tcp6_proc_init(struct net *net)
1993 return tcp_proc_register(net, &tcp6_seq_afinfo);
1996 void tcp6_proc_exit(struct net *net)
1998 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2002 struct proto tcpv6_prot = {
2004 .owner = THIS_MODULE,
2006 .connect = tcp_v6_connect,
2007 .disconnect = tcp_disconnect,
2008 .accept = inet_csk_accept,
2010 .init = tcp_v6_init_sock,
2011 .destroy = tcp_v6_destroy_sock,
2012 .shutdown = tcp_shutdown,
2013 .setsockopt = tcp_setsockopt,
2014 .getsockopt = tcp_getsockopt,
2015 .recvmsg = tcp_recvmsg,
2016 .sendmsg = tcp_sendmsg,
2017 .sendpage = tcp_sendpage,
2018 .backlog_rcv = tcp_v6_do_rcv,
2019 .release_cb = tcp_release_cb,
2020 .mtu_reduced = tcp_v6_mtu_reduced,
2021 .hash = tcp_v6_hash,
2022 .unhash = inet_unhash,
2023 .get_port = inet_csk_get_port,
2024 .enter_memory_pressure = tcp_enter_memory_pressure,
2025 .sockets_allocated = &tcp_sockets_allocated,
2026 .memory_allocated = &tcp_memory_allocated,
2027 .memory_pressure = &tcp_memory_pressure,
2028 .orphan_count = &tcp_orphan_count,
2029 .sysctl_wmem = sysctl_tcp_wmem,
2030 .sysctl_rmem = sysctl_tcp_rmem,
2031 .max_header = MAX_TCP_HEADER,
2032 .obj_size = sizeof(struct tcp6_sock),
2033 .slab_flags = SLAB_DESTROY_BY_RCU,
2034 .twsk_prot = &tcp6_timewait_sock_ops,
2035 .rsk_prot = &tcp6_request_sock_ops,
2036 .h.hashinfo = &tcp_hashinfo,
2037 .no_autobind = true,
2038 #ifdef CONFIG_COMPAT
2039 .compat_setsockopt = compat_tcp_setsockopt,
2040 .compat_getsockopt = compat_tcp_getsockopt,
2042 #ifdef CONFIG_MEMCG_KMEM
2043 .proto_cgroup = tcp_proto_cgroup,
2047 static const struct inet6_protocol tcpv6_protocol = {
2048 .early_demux = tcp_v6_early_demux,
2049 .handler = tcp_v6_rcv,
2050 .err_handler = tcp_v6_err,
2051 .gso_send_check = tcp_v6_gso_send_check,
2052 .gso_segment = tcp_tso_segment,
2053 .gro_receive = tcp6_gro_receive,
2054 .gro_complete = tcp6_gro_complete,
2055 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2058 static struct inet_protosw tcpv6_protosw = {
2059 .type = SOCK_STREAM,
2060 .protocol = IPPROTO_TCP,
2061 .prot = &tcpv6_prot,
2062 .ops = &inet6_stream_ops,
2064 .flags = INET_PROTOSW_PERMANENT |
2068 static int __net_init tcpv6_net_init(struct net *net)
2070 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2071 SOCK_RAW, IPPROTO_TCP, net);
2074 static void __net_exit tcpv6_net_exit(struct net *net)
2076 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2079 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2081 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2084 static struct pernet_operations tcpv6_net_ops = {
2085 .init = tcpv6_net_init,
2086 .exit = tcpv6_net_exit,
2087 .exit_batch = tcpv6_net_exit_batch,
2090 int __init tcpv6_init(void)
2094 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2098 /* register inet6 protocol */
2099 ret = inet6_register_protosw(&tcpv6_protosw);
2101 goto out_tcpv6_protocol;
2103 ret = register_pernet_subsys(&tcpv6_net_ops);
2105 goto out_tcpv6_protosw;
2110 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2112 inet6_unregister_protosw(&tcpv6_protosw);
2116 void tcpv6_exit(void)
2118 unregister_pernet_subsys(&tcpv6_net_ops);
2119 inet6_unregister_protosw(&tcpv6_protosw);
2120 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);