ipvs: convert lblc scheduler to rcu
[linux-3.10.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/types.h>
10 #include <linux/timer.h>
11 #include <linux/module.h>
12 #include <linux/in.h>
13 #include <linux/tcp.h>
14 #include <linux/spinlock.h>
15 #include <linux/skbuff.h>
16 #include <linux/ipv6.h>
17 #include <net/ip6_checksum.h>
18 #include <asm/unaligned.h>
19
20 #include <net/tcp.h>
21
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/netfilter_ipv6.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_ecache.h>
28 #include <net/netfilter/nf_log.h>
29 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31
32 /* "Be conservative in what you do,
33     be liberal in what you accept from others."
34     If it's non-zero, we mark only out of window RST segments as INVALID. */
35 static int nf_ct_tcp_be_liberal __read_mostly = 0;
36
37 /* If it is set to zero, we disable picking up already established
38    connections. */
39 static int nf_ct_tcp_loose __read_mostly = 1;
40
41 /* Max number of the retransmitted packets without receiving an (acceptable)
42    ACK from the destination. If this number is reached, a shorter timer
43    will be started. */
44 static int nf_ct_tcp_max_retrans __read_mostly = 3;
45
46   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
47      closely.  They're more complex. --RR */
48
49 static const char *const tcp_conntrack_names[] = {
50         "NONE",
51         "SYN_SENT",
52         "SYN_RECV",
53         "ESTABLISHED",
54         "FIN_WAIT",
55         "CLOSE_WAIT",
56         "LAST_ACK",
57         "TIME_WAIT",
58         "CLOSE",
59         "SYN_SENT2",
60 };
61
62 #define SECS * HZ
63 #define MINS * 60 SECS
64 #define HOURS * 60 MINS
65 #define DAYS * 24 HOURS
66
67 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
68         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
69         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
70         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
71         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
72         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
73         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
74         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
75         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
76         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
77 /* RFC1122 says the R2 limit should be at least 100 seconds.
78    Linux uses 15 packets as limit, which corresponds
79    to ~13-30min depending on RTO. */
80         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
81         [TCP_CONNTRACK_UNACK]           = 5 MINS,
82 };
83
84 #define sNO TCP_CONNTRACK_NONE
85 #define sSS TCP_CONNTRACK_SYN_SENT
86 #define sSR TCP_CONNTRACK_SYN_RECV
87 #define sES TCP_CONNTRACK_ESTABLISHED
88 #define sFW TCP_CONNTRACK_FIN_WAIT
89 #define sCW TCP_CONNTRACK_CLOSE_WAIT
90 #define sLA TCP_CONNTRACK_LAST_ACK
91 #define sTW TCP_CONNTRACK_TIME_WAIT
92 #define sCL TCP_CONNTRACK_CLOSE
93 #define sS2 TCP_CONNTRACK_SYN_SENT2
94 #define sIV TCP_CONNTRACK_MAX
95 #define sIG TCP_CONNTRACK_IGNORE
96
97 /* What TCP flags are set from RST/SYN/FIN/ACK. */
98 enum tcp_bit_set {
99         TCP_SYN_SET,
100         TCP_SYNACK_SET,
101         TCP_FIN_SET,
102         TCP_ACK_SET,
103         TCP_RST_SET,
104         TCP_NONE_SET,
105 };
106
107 /*
108  * The TCP state transition table needs a few words...
109  *
110  * We are the man in the middle. All the packets go through us
111  * but might get lost in transit to the destination.
112  * It is assumed that the destinations can't receive segments
113  * we haven't seen.
114  *
115  * The checked segment is in window, but our windows are *not*
116  * equivalent with the ones of the sender/receiver. We always
117  * try to guess the state of the current sender.
118  *
119  * The meaning of the states are:
120  *
121  * NONE:        initial state
122  * SYN_SENT:    SYN-only packet seen
123  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
124  * SYN_RECV:    SYN-ACK packet seen
125  * ESTABLISHED: ACK packet seen
126  * FIN_WAIT:    FIN packet seen
127  * CLOSE_WAIT:  ACK seen (after FIN)
128  * LAST_ACK:    FIN seen (after FIN)
129  * TIME_WAIT:   last ACK seen
130  * CLOSE:       closed connection (RST)
131  *
132  * Packets marked as IGNORED (sIG):
133  *      if they may be either invalid or valid
134  *      and the receiver may send back a connection
135  *      closing RST or a SYN/ACK.
136  *
137  * Packets marked as INVALID (sIV):
138  *      if we regard them as truly invalid packets
139  */
140 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
141         {
142 /* ORIGINAL */
143 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
144 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
145 /*
146  *      sNO -> sSS      Initialize a new connection
147  *      sSS -> sSS      Retransmitted SYN
148  *      sS2 -> sS2      Late retransmitted SYN
149  *      sSR -> sIG
150  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
151  *                      are errors. Receiver will reply with RST
152  *                      and close the connection.
153  *                      Or we are not in sync and hold a dead connection.
154  *      sFW -> sIG
155  *      sCW -> sIG
156  *      sLA -> sIG
157  *      sTW -> sSS      Reopened connection (RFC 1122).
158  *      sCL -> sSS
159  */
160 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
161 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
162 /*
163  *      sNO -> sIV      Too late and no reason to do anything
164  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
165  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
166  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
167  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
168  *      sFW -> sIV
169  *      sCW -> sIV
170  *      sLA -> sIV
171  *      sTW -> sIV
172  *      sCL -> sIV
173  */
174 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
175 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
176 /*
177  *      sNO -> sIV      Too late and no reason to do anything...
178  *      sSS -> sIV      Client migth not send FIN in this state:
179  *                      we enforce waiting for a SYN/ACK reply first.
180  *      sS2 -> sIV
181  *      sSR -> sFW      Close started.
182  *      sES -> sFW
183  *      sFW -> sLA      FIN seen in both directions, waiting for
184  *                      the last ACK.
185  *                      Migth be a retransmitted FIN as well...
186  *      sCW -> sLA
187  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
188  *      sTW -> sTW
189  *      sCL -> sCL
190  */
191 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
192 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
193 /*
194  *      sNO -> sES      Assumed.
195  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
196  *      sS2 -> sIV
197  *      sSR -> sES      Established state is reached.
198  *      sES -> sES      :-)
199  *      sFW -> sCW      Normal close request answered by ACK.
200  *      sCW -> sCW
201  *      sLA -> sTW      Last ACK detected.
202  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
203  *      sCL -> sCL
204  */
205 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
206 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
207 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
208         },
209         {
210 /* REPLY */
211 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
212 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
213 /*
214  *      sNO -> sIV      Never reached.
215  *      sSS -> sS2      Simultaneous open
216  *      sS2 -> sS2      Retransmitted simultaneous SYN
217  *      sSR -> sIV      Invalid SYN packets sent by the server
218  *      sES -> sIV
219  *      sFW -> sIV
220  *      sCW -> sIV
221  *      sLA -> sIV
222  *      sTW -> sIV      Reopened connection, but server may not do it.
223  *      sCL -> sIV
224  */
225 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
226 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
227 /*
228  *      sSS -> sSR      Standard open.
229  *      sS2 -> sSR      Simultaneous open
230  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
231  *      sES -> sIG      Late retransmitted SYN/ACK?
232  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
233  *      sCW -> sIG
234  *      sLA -> sIG
235  *      sTW -> sIG
236  *      sCL -> sIG
237  */
238 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
239 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
240 /*
241  *      sSS -> sIV      Server might not send FIN in this state.
242  *      sS2 -> sIV
243  *      sSR -> sFW      Close started.
244  *      sES -> sFW
245  *      sFW -> sLA      FIN seen in both directions.
246  *      sCW -> sLA
247  *      sLA -> sLA      Retransmitted FIN.
248  *      sTW -> sTW
249  *      sCL -> sCL
250  */
251 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
252 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
253 /*
254  *      sSS -> sIG      Might be a half-open connection.
255  *      sS2 -> sIG
256  *      sSR -> sSR      Might answer late resent SYN.
257  *      sES -> sES      :-)
258  *      sFW -> sCW      Normal close request answered by ACK.
259  *      sCW -> sCW
260  *      sLA -> sTW      Last ACK detected.
261  *      sTW -> sTW      Retransmitted last ACK.
262  *      sCL -> sCL
263  */
264 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
265 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
266 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
267         }
268 };
269
270 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
271 {
272         return &net->ct.nf_ct_proto.tcp;
273 }
274
275 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
276                              struct nf_conntrack_tuple *tuple)
277 {
278         const struct tcphdr *hp;
279         struct tcphdr _hdr;
280
281         /* Actually only need first 8 bytes. */
282         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
283         if (hp == NULL)
284                 return false;
285
286         tuple->src.u.tcp.port = hp->source;
287         tuple->dst.u.tcp.port = hp->dest;
288
289         return true;
290 }
291
292 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
293                              const struct nf_conntrack_tuple *orig)
294 {
295         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
296         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
297         return true;
298 }
299
300 /* Print out the per-protocol part of the tuple. */
301 static int tcp_print_tuple(struct seq_file *s,
302                            const struct nf_conntrack_tuple *tuple)
303 {
304         return seq_printf(s, "sport=%hu dport=%hu ",
305                           ntohs(tuple->src.u.tcp.port),
306                           ntohs(tuple->dst.u.tcp.port));
307 }
308
309 /* Print out the private part of the conntrack. */
310 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
311 {
312         enum tcp_conntrack state;
313
314         spin_lock_bh(&ct->lock);
315         state = ct->proto.tcp.state;
316         spin_unlock_bh(&ct->lock);
317
318         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
319 }
320
321 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
322 {
323         if (tcph->rst) return TCP_RST_SET;
324         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
325         else if (tcph->fin) return TCP_FIN_SET;
326         else if (tcph->ack) return TCP_ACK_SET;
327         else return TCP_NONE_SET;
328 }
329
330 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
331    in IP Filter' by Guido van Rooij.
332
333    http://www.sane.nl/events/sane2000/papers.html
334    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
335
336    The boundaries and the conditions are changed according to RFC793:
337    the packet must intersect the window (i.e. segments may be
338    after the right or before the left edge) and thus receivers may ACK
339    segments after the right edge of the window.
340
341         td_maxend = max(sack + max(win,1)) seen in reply packets
342         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
343         td_maxwin += seq + len - sender.td_maxend
344                         if seq + len > sender.td_maxend
345         td_end    = max(seq + len) seen in sent packets
346
347    I.   Upper bound for valid data:     seq <= sender.td_maxend
348    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
349    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
350    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
351
352    where sack is the highest right edge of sack block found in the packet
353    or ack in the case of packet without SACK option.
354
355    The upper bound limit for a valid (s)ack is not ignored -
356    we doesn't have to deal with fragments.
357 */
358
359 static inline __u32 segment_seq_plus_len(__u32 seq,
360                                          size_t len,
361                                          unsigned int dataoff,
362                                          const struct tcphdr *tcph)
363 {
364         /* XXX Should I use payload length field in IP/IPv6 header ?
365          * - YK */
366         return (seq + len - dataoff - tcph->doff*4
367                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
368 }
369
370 /* Fixme: what about big packets? */
371 #define MAXACKWINCONST                  66000
372 #define MAXACKWINDOW(sender)                                            \
373         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
374                                               : MAXACKWINCONST)
375
376 /*
377  * Simplified tcp_parse_options routine from tcp_input.c
378  */
379 static void tcp_options(const struct sk_buff *skb,
380                         unsigned int dataoff,
381                         const struct tcphdr *tcph,
382                         struct ip_ct_tcp_state *state)
383 {
384         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
385         const unsigned char *ptr;
386         int length = (tcph->doff*4) - sizeof(struct tcphdr);
387
388         if (!length)
389                 return;
390
391         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
392                                  length, buff);
393         BUG_ON(ptr == NULL);
394
395         state->td_scale =
396         state->flags = 0;
397
398         while (length > 0) {
399                 int opcode=*ptr++;
400                 int opsize;
401
402                 switch (opcode) {
403                 case TCPOPT_EOL:
404                         return;
405                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
406                         length--;
407                         continue;
408                 default:
409                         opsize=*ptr++;
410                         if (opsize < 2) /* "silly options" */
411                                 return;
412                         if (opsize > length)
413                                 return; /* don't parse partial options */
414
415                         if (opcode == TCPOPT_SACK_PERM
416                             && opsize == TCPOLEN_SACK_PERM)
417                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
418                         else if (opcode == TCPOPT_WINDOW
419                                  && opsize == TCPOLEN_WINDOW) {
420                                 state->td_scale = *(u_int8_t *)ptr;
421
422                                 if (state->td_scale > 14) {
423                                         /* See RFC1323 */
424                                         state->td_scale = 14;
425                                 }
426                                 state->flags |=
427                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
428                         }
429                         ptr += opsize - 2;
430                         length -= opsize;
431                 }
432         }
433 }
434
435 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
436                      const struct tcphdr *tcph, __u32 *sack)
437 {
438         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
439         const unsigned char *ptr;
440         int length = (tcph->doff*4) - sizeof(struct tcphdr);
441         __u32 tmp;
442
443         if (!length)
444                 return;
445
446         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
447                                  length, buff);
448         BUG_ON(ptr == NULL);
449
450         /* Fast path for timestamp-only option */
451         if (length == TCPOLEN_TSTAMP_ALIGNED
452             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
453                                        | (TCPOPT_NOP << 16)
454                                        | (TCPOPT_TIMESTAMP << 8)
455                                        | TCPOLEN_TIMESTAMP))
456                 return;
457
458         while (length > 0) {
459                 int opcode = *ptr++;
460                 int opsize, i;
461
462                 switch (opcode) {
463                 case TCPOPT_EOL:
464                         return;
465                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
466                         length--;
467                         continue;
468                 default:
469                         opsize = *ptr++;
470                         if (opsize < 2) /* "silly options" */
471                                 return;
472                         if (opsize > length)
473                                 return; /* don't parse partial options */
474
475                         if (opcode == TCPOPT_SACK
476                             && opsize >= (TCPOLEN_SACK_BASE
477                                           + TCPOLEN_SACK_PERBLOCK)
478                             && !((opsize - TCPOLEN_SACK_BASE)
479                                  % TCPOLEN_SACK_PERBLOCK)) {
480                                 for (i = 0;
481                                      i < (opsize - TCPOLEN_SACK_BASE);
482                                      i += TCPOLEN_SACK_PERBLOCK) {
483                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
484
485                                         if (after(tmp, *sack))
486                                                 *sack = tmp;
487                                 }
488                                 return;
489                         }
490                         ptr += opsize - 2;
491                         length -= opsize;
492                 }
493         }
494 }
495
496 #ifdef CONFIG_NF_NAT_NEEDED
497 static inline s16 nat_offset(const struct nf_conn *ct,
498                              enum ip_conntrack_dir dir,
499                              u32 seq)
500 {
501         typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
502
503         return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
504 }
505 #define NAT_OFFSET(ct, dir, seq) \
506         (nat_offset(ct, dir, seq))
507 #else
508 #define NAT_OFFSET(ct, dir, seq)        0
509 #endif
510
511 static bool tcp_in_window(const struct nf_conn *ct,
512                           struct ip_ct_tcp *state,
513                           enum ip_conntrack_dir dir,
514                           unsigned int index,
515                           const struct sk_buff *skb,
516                           unsigned int dataoff,
517                           const struct tcphdr *tcph,
518                           u_int8_t pf)
519 {
520         struct net *net = nf_ct_net(ct);
521         struct nf_tcp_net *tn = tcp_pernet(net);
522         struct ip_ct_tcp_state *sender = &state->seen[dir];
523         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
524         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
525         __u32 seq, ack, sack, end, win, swin;
526         s16 receiver_offset;
527         bool res;
528
529         /*
530          * Get the required data from the packet.
531          */
532         seq = ntohl(tcph->seq);
533         ack = sack = ntohl(tcph->ack_seq);
534         win = ntohs(tcph->window);
535         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
536
537         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
538                 tcp_sack(skb, dataoff, tcph, &sack);
539
540         /* Take into account NAT sequence number mangling */
541         receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
542         ack -= receiver_offset;
543         sack -= receiver_offset;
544
545         pr_debug("tcp_in_window: START\n");
546         pr_debug("tcp_in_window: ");
547         nf_ct_dump_tuple(tuple);
548         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
549                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
550         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
551                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
552                  sender->td_end, sender->td_maxend, sender->td_maxwin,
553                  sender->td_scale,
554                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
555                  receiver->td_scale);
556
557         if (sender->td_maxwin == 0) {
558                 /*
559                  * Initialize sender data.
560                  */
561                 if (tcph->syn) {
562                         /*
563                          * SYN-ACK in reply to a SYN
564                          * or SYN from reply direction in simultaneous open.
565                          */
566                         sender->td_end =
567                         sender->td_maxend = end;
568                         sender->td_maxwin = (win == 0 ? 1 : win);
569
570                         tcp_options(skb, dataoff, tcph, sender);
571                         /*
572                          * RFC 1323:
573                          * Both sides must send the Window Scale option
574                          * to enable window scaling in either direction.
575                          */
576                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
577                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
578                                 sender->td_scale =
579                                 receiver->td_scale = 0;
580                         if (!tcph->ack)
581                                 /* Simultaneous open */
582                                 return true;
583                 } else {
584                         /*
585                          * We are in the middle of a connection,
586                          * its history is lost for us.
587                          * Let's try to use the data from the packet.
588                          */
589                         sender->td_end = end;
590                         swin = win << sender->td_scale;
591                         sender->td_maxwin = (swin == 0 ? 1 : swin);
592                         sender->td_maxend = end + sender->td_maxwin;
593                         /*
594                          * We haven't seen traffic in the other direction yet
595                          * but we have to tweak window tracking to pass III
596                          * and IV until that happens.
597                          */
598                         if (receiver->td_maxwin == 0)
599                                 receiver->td_end = receiver->td_maxend = sack;
600                 }
601         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
602                      && dir == IP_CT_DIR_ORIGINAL)
603                    || (state->state == TCP_CONNTRACK_SYN_RECV
604                      && dir == IP_CT_DIR_REPLY))
605                    && after(end, sender->td_end)) {
606                 /*
607                  * RFC 793: "if a TCP is reinitialized ... then it need
608                  * not wait at all; it must only be sure to use sequence
609                  * numbers larger than those recently used."
610                  */
611                 sender->td_end =
612                 sender->td_maxend = end;
613                 sender->td_maxwin = (win == 0 ? 1 : win);
614
615                 tcp_options(skb, dataoff, tcph, sender);
616         }
617
618         if (!(tcph->ack)) {
619                 /*
620                  * If there is no ACK, just pretend it was set and OK.
621                  */
622                 ack = sack = receiver->td_end;
623         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
624                     (TCP_FLAG_ACK|TCP_FLAG_RST))
625                    && (ack == 0)) {
626                 /*
627                  * Broken TCP stacks, that set ACK in RST packets as well
628                  * with zero ack value.
629                  */
630                 ack = sack = receiver->td_end;
631         }
632
633         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
634                 /*
635                  * RST sent answering SYN.
636                  */
637                 seq = end = sender->td_end;
638
639         pr_debug("tcp_in_window: ");
640         nf_ct_dump_tuple(tuple);
641         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
642                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
643         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
644                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
645                  sender->td_end, sender->td_maxend, sender->td_maxwin,
646                  sender->td_scale,
647                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
648                  receiver->td_scale);
649
650         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
651                  before(seq, sender->td_maxend + 1),
652                  after(end, sender->td_end - receiver->td_maxwin - 1),
653                  before(sack, receiver->td_end + 1),
654                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
655
656         if (before(seq, sender->td_maxend + 1) &&
657             after(end, sender->td_end - receiver->td_maxwin - 1) &&
658             before(sack, receiver->td_end + 1) &&
659             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
660                 /*
661                  * Take into account window scaling (RFC 1323).
662                  */
663                 if (!tcph->syn)
664                         win <<= sender->td_scale;
665
666                 /*
667                  * Update sender data.
668                  */
669                 swin = win + (sack - ack);
670                 if (sender->td_maxwin < swin)
671                         sender->td_maxwin = swin;
672                 if (after(end, sender->td_end)) {
673                         sender->td_end = end;
674                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
675                 }
676                 if (tcph->ack) {
677                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
678                                 sender->td_maxack = ack;
679                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
680                         } else if (after(ack, sender->td_maxack))
681                                 sender->td_maxack = ack;
682                 }
683
684                 /*
685                  * Update receiver data.
686                  */
687                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
688                         receiver->td_maxwin += end - sender->td_maxend;
689                 if (after(sack + win, receiver->td_maxend - 1)) {
690                         receiver->td_maxend = sack + win;
691                         if (win == 0)
692                                 receiver->td_maxend++;
693                 }
694                 if (ack == receiver->td_end)
695                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
696
697                 /*
698                  * Check retransmissions.
699                  */
700                 if (index == TCP_ACK_SET) {
701                         if (state->last_dir == dir
702                             && state->last_seq == seq
703                             && state->last_ack == ack
704                             && state->last_end == end
705                             && state->last_win == win)
706                                 state->retrans++;
707                         else {
708                                 state->last_dir = dir;
709                                 state->last_seq = seq;
710                                 state->last_ack = ack;
711                                 state->last_end = end;
712                                 state->last_win = win;
713                                 state->retrans = 0;
714                         }
715                 }
716                 res = true;
717         } else {
718                 res = false;
719                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
720                     tn->tcp_be_liberal)
721                         res = true;
722                 if (!res && LOG_INVALID(net, IPPROTO_TCP))
723                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
724                         "nf_ct_tcp: %s ",
725                         before(seq, sender->td_maxend + 1) ?
726                         after(end, sender->td_end - receiver->td_maxwin - 1) ?
727                         before(sack, receiver->td_end + 1) ?
728                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
729                         : "ACK is under the lower bound (possible overly delayed ACK)"
730                         : "ACK is over the upper bound (ACKed data not seen yet)"
731                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
732                         : "SEQ is over the upper bound (over the window of the receiver)");
733         }
734
735         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
736                  "receiver end=%u maxend=%u maxwin=%u\n",
737                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
738                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
739
740         return res;
741 }
742
743 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
744 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
745                                  TCPHDR_URG) + 1] =
746 {
747         [TCPHDR_SYN]                            = 1,
748         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
749         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
750         [TCPHDR_RST]                            = 1,
751         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
752         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
753         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
754         [TCPHDR_ACK]                            = 1,
755         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
756 };
757
758 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
759 static int tcp_error(struct net *net, struct nf_conn *tmpl,
760                      struct sk_buff *skb,
761                      unsigned int dataoff,
762                      enum ip_conntrack_info *ctinfo,
763                      u_int8_t pf,
764                      unsigned int hooknum)
765 {
766         const struct tcphdr *th;
767         struct tcphdr _tcph;
768         unsigned int tcplen = skb->len - dataoff;
769         u_int8_t tcpflags;
770
771         /* Smaller that minimal TCP header? */
772         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
773         if (th == NULL) {
774                 if (LOG_INVALID(net, IPPROTO_TCP))
775                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
776                                 "nf_ct_tcp: short packet ");
777                 return -NF_ACCEPT;
778         }
779
780         /* Not whole TCP header or malformed packet */
781         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
782                 if (LOG_INVALID(net, IPPROTO_TCP))
783                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
784                                 "nf_ct_tcp: truncated/malformed packet ");
785                 return -NF_ACCEPT;
786         }
787
788         /* Checksum invalid? Ignore.
789          * We skip checking packets on the outgoing path
790          * because the checksum is assumed to be correct.
791          */
792         /* FIXME: Source route IP option packets --RR */
793         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
794             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
795                 if (LOG_INVALID(net, IPPROTO_TCP))
796                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
797                                   "nf_ct_tcp: bad TCP checksum ");
798                 return -NF_ACCEPT;
799         }
800
801         /* Check TCP flags. */
802         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
803         if (!tcp_valid_flags[tcpflags]) {
804                 if (LOG_INVALID(net, IPPROTO_TCP))
805                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
806                                   "nf_ct_tcp: invalid TCP flag combination ");
807                 return -NF_ACCEPT;
808         }
809
810         return NF_ACCEPT;
811 }
812
813 static unsigned int *tcp_get_timeouts(struct net *net)
814 {
815         return tcp_pernet(net)->timeouts;
816 }
817
818 /* Returns verdict for packet, or -1 for invalid. */
819 static int tcp_packet(struct nf_conn *ct,
820                       const struct sk_buff *skb,
821                       unsigned int dataoff,
822                       enum ip_conntrack_info ctinfo,
823                       u_int8_t pf,
824                       unsigned int hooknum,
825                       unsigned int *timeouts)
826 {
827         struct net *net = nf_ct_net(ct);
828         struct nf_tcp_net *tn = tcp_pernet(net);
829         struct nf_conntrack_tuple *tuple;
830         enum tcp_conntrack new_state, old_state;
831         enum ip_conntrack_dir dir;
832         const struct tcphdr *th;
833         struct tcphdr _tcph;
834         unsigned long timeout;
835         unsigned int index;
836
837         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
838         BUG_ON(th == NULL);
839
840         spin_lock_bh(&ct->lock);
841         old_state = ct->proto.tcp.state;
842         dir = CTINFO2DIR(ctinfo);
843         index = get_conntrack_index(th);
844         new_state = tcp_conntracks[dir][index][old_state];
845         tuple = &ct->tuplehash[dir].tuple;
846
847         switch (new_state) {
848         case TCP_CONNTRACK_SYN_SENT:
849                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
850                         break;
851                 /* RFC 1122: "When a connection is closed actively,
852                  * it MUST linger in TIME-WAIT state for a time 2xMSL
853                  * (Maximum Segment Lifetime). However, it MAY accept
854                  * a new SYN from the remote TCP to reopen the connection
855                  * directly from TIME-WAIT state, if..."
856                  * We ignore the conditions because we are in the
857                  * TIME-WAIT state anyway.
858                  *
859                  * Handle aborted connections: we and the server
860                  * think there is an existing connection but the client
861                  * aborts it and starts a new one.
862                  */
863                 if (((ct->proto.tcp.seen[dir].flags
864                       | ct->proto.tcp.seen[!dir].flags)
865                      & IP_CT_TCP_FLAG_CLOSE_INIT)
866                     || (ct->proto.tcp.last_dir == dir
867                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
868                         /* Attempt to reopen a closed/aborted connection.
869                          * Delete this connection and look up again. */
870                         spin_unlock_bh(&ct->lock);
871
872                         /* Only repeat if we can actually remove the timer.
873                          * Destruction may already be in progress in process
874                          * context and we must give it a chance to terminate.
875                          */
876                         if (nf_ct_kill(ct))
877                                 return -NF_REPEAT;
878                         return NF_DROP;
879                 }
880                 /* Fall through */
881         case TCP_CONNTRACK_IGNORE:
882                 /* Ignored packets:
883                  *
884                  * Our connection entry may be out of sync, so ignore
885                  * packets which may signal the real connection between
886                  * the client and the server.
887                  *
888                  * a) SYN in ORIGINAL
889                  * b) SYN/ACK in REPLY
890                  * c) ACK in reply direction after initial SYN in original.
891                  *
892                  * If the ignored packet is invalid, the receiver will send
893                  * a RST we'll catch below.
894                  */
895                 if (index == TCP_SYNACK_SET
896                     && ct->proto.tcp.last_index == TCP_SYN_SET
897                     && ct->proto.tcp.last_dir != dir
898                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
899                         /* b) This SYN/ACK acknowledges a SYN that we earlier
900                          * ignored as invalid. This means that the client and
901                          * the server are both in sync, while the firewall is
902                          * not. We get in sync from the previously annotated
903                          * values.
904                          */
905                         old_state = TCP_CONNTRACK_SYN_SENT;
906                         new_state = TCP_CONNTRACK_SYN_RECV;
907                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
908                                 ct->proto.tcp.last_end;
909                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
910                                 ct->proto.tcp.last_end;
911                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
912                                 ct->proto.tcp.last_win == 0 ?
913                                         1 : ct->proto.tcp.last_win;
914                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
915                                 ct->proto.tcp.last_wscale;
916                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
917                                 ct->proto.tcp.last_flags;
918                         memset(&ct->proto.tcp.seen[dir], 0,
919                                sizeof(struct ip_ct_tcp_state));
920                         break;
921                 }
922                 ct->proto.tcp.last_index = index;
923                 ct->proto.tcp.last_dir = dir;
924                 ct->proto.tcp.last_seq = ntohl(th->seq);
925                 ct->proto.tcp.last_end =
926                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
927                 ct->proto.tcp.last_win = ntohs(th->window);
928
929                 /* a) This is a SYN in ORIGINAL. The client and the server
930                  * may be in sync but we are not. In that case, we annotate
931                  * the TCP options and let the packet go through. If it is a
932                  * valid SYN packet, the server will reply with a SYN/ACK, and
933                  * then we'll get in sync. Otherwise, the server ignores it. */
934                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
935                         struct ip_ct_tcp_state seen = {};
936
937                         ct->proto.tcp.last_flags =
938                         ct->proto.tcp.last_wscale = 0;
939                         tcp_options(skb, dataoff, th, &seen);
940                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
941                                 ct->proto.tcp.last_flags |=
942                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
943                                 ct->proto.tcp.last_wscale = seen.td_scale;
944                         }
945                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
946                                 ct->proto.tcp.last_flags |=
947                                         IP_CT_TCP_FLAG_SACK_PERM;
948                         }
949                 }
950                 spin_unlock_bh(&ct->lock);
951                 if (LOG_INVALID(net, IPPROTO_TCP))
952                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
953                                   "nf_ct_tcp: invalid packet ignored in "
954                                   "state %s ", tcp_conntrack_names[old_state]);
955                 return NF_ACCEPT;
956         case TCP_CONNTRACK_MAX:
957                 /* Invalid packet */
958                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
959                          dir, get_conntrack_index(th), old_state);
960                 spin_unlock_bh(&ct->lock);
961                 if (LOG_INVALID(net, IPPROTO_TCP))
962                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
963                                   "nf_ct_tcp: invalid state ");
964                 return -NF_ACCEPT;
965         case TCP_CONNTRACK_CLOSE:
966                 if (index == TCP_RST_SET
967                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
968                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
969                         /* Invalid RST  */
970                         spin_unlock_bh(&ct->lock);
971                         if (LOG_INVALID(net, IPPROTO_TCP))
972                                 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
973                                           "nf_ct_tcp: invalid RST ");
974                         return -NF_ACCEPT;
975                 }
976                 if (index == TCP_RST_SET
977                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
978                          && ct->proto.tcp.last_index == TCP_SYN_SET)
979                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
980                             && ct->proto.tcp.last_index == TCP_ACK_SET))
981                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
982                         /* RST sent to invalid SYN or ACK we had let through
983                          * at a) and c) above:
984                          *
985                          * a) SYN was in window then
986                          * c) we hold a half-open connection.
987                          *
988                          * Delete our connection entry.
989                          * We skip window checking, because packet might ACK
990                          * segments we ignored. */
991                         goto in_window;
992                 }
993                 /* Just fall through */
994         default:
995                 /* Keep compilers happy. */
996                 break;
997         }
998
999         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1000                            skb, dataoff, th, pf)) {
1001                 spin_unlock_bh(&ct->lock);
1002                 return -NF_ACCEPT;
1003         }
1004      in_window:
1005         /* From now on we have got in-window packets */
1006         ct->proto.tcp.last_index = index;
1007         ct->proto.tcp.last_dir = dir;
1008
1009         pr_debug("tcp_conntracks: ");
1010         nf_ct_dump_tuple(tuple);
1011         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1012                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1013                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1014                  old_state, new_state);
1015
1016         ct->proto.tcp.state = new_state;
1017         if (old_state != new_state
1018             && new_state == TCP_CONNTRACK_FIN_WAIT)
1019                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1020
1021         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1022             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1023                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1024         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1025                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1026                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1027                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1028         else
1029                 timeout = timeouts[new_state];
1030         spin_unlock_bh(&ct->lock);
1031
1032         if (new_state != old_state)
1033                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1034
1035         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1036                 /* If only reply is a RST, we can consider ourselves not to
1037                    have an established connection: this is a fairly common
1038                    problem case, so we can delete the conntrack
1039                    immediately.  --RR */
1040                 if (th->rst) {
1041                         nf_ct_kill_acct(ct, ctinfo, skb);
1042                         return NF_ACCEPT;
1043                 }
1044         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1045                    && (old_state == TCP_CONNTRACK_SYN_RECV
1046                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1047                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1048                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1049                    after SYN_RECV or a valid answer for a picked up
1050                    connection. */
1051                 set_bit(IPS_ASSURED_BIT, &ct->status);
1052                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1053         }
1054         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1055
1056         return NF_ACCEPT;
1057 }
1058
1059 /* Called when a new connection for this protocol found. */
1060 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1061                     unsigned int dataoff, unsigned int *timeouts)
1062 {
1063         enum tcp_conntrack new_state;
1064         const struct tcphdr *th;
1065         struct tcphdr _tcph;
1066         struct net *net = nf_ct_net(ct);
1067         struct nf_tcp_net *tn = tcp_pernet(net);
1068         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1069         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1070
1071         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1072         BUG_ON(th == NULL);
1073
1074         /* Don't need lock here: this conntrack not in circulation yet */
1075         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1076
1077         /* Invalid: delete conntrack */
1078         if (new_state >= TCP_CONNTRACK_MAX) {
1079                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1080                 return false;
1081         }
1082
1083         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1084                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1085                 /* SYN packet */
1086                 ct->proto.tcp.seen[0].td_end =
1087                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1088                                              dataoff, th);
1089                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1090                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1091                         ct->proto.tcp.seen[0].td_maxwin = 1;
1092                 ct->proto.tcp.seen[0].td_maxend =
1093                         ct->proto.tcp.seen[0].td_end;
1094
1095                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1096         } else if (tn->tcp_loose == 0) {
1097                 /* Don't try to pick up connections. */
1098                 return false;
1099         } else {
1100                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1101                 /*
1102                  * We are in the middle of a connection,
1103                  * its history is lost for us.
1104                  * Let's try to use the data from the packet.
1105                  */
1106                 ct->proto.tcp.seen[0].td_end =
1107                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1108                                              dataoff, th);
1109                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1110                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1111                         ct->proto.tcp.seen[0].td_maxwin = 1;
1112                 ct->proto.tcp.seen[0].td_maxend =
1113                         ct->proto.tcp.seen[0].td_end +
1114                         ct->proto.tcp.seen[0].td_maxwin;
1115
1116                 /* We assume SACK and liberal window checking to handle
1117                  * window scaling */
1118                 ct->proto.tcp.seen[0].flags =
1119                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1120                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1121         }
1122
1123         /* tcp_packet will set them */
1124         ct->proto.tcp.last_index = TCP_NONE_SET;
1125
1126         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1127                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1128                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1129                  sender->td_scale,
1130                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1131                  receiver->td_scale);
1132         return true;
1133 }
1134
1135 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1136
1137 #include <linux/netfilter/nfnetlink.h>
1138 #include <linux/netfilter/nfnetlink_conntrack.h>
1139
1140 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1141                          struct nf_conn *ct)
1142 {
1143         struct nlattr *nest_parms;
1144         struct nf_ct_tcp_flags tmp = {};
1145
1146         spin_lock_bh(&ct->lock);
1147         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1148         if (!nest_parms)
1149                 goto nla_put_failure;
1150
1151         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1152             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1153                        ct->proto.tcp.seen[0].td_scale) ||
1154             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1155                        ct->proto.tcp.seen[1].td_scale))
1156                 goto nla_put_failure;
1157
1158         tmp.flags = ct->proto.tcp.seen[0].flags;
1159         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1160                     sizeof(struct nf_ct_tcp_flags), &tmp))
1161                 goto nla_put_failure;
1162
1163         tmp.flags = ct->proto.tcp.seen[1].flags;
1164         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1165                     sizeof(struct nf_ct_tcp_flags), &tmp))
1166                 goto nla_put_failure;
1167         spin_unlock_bh(&ct->lock);
1168
1169         nla_nest_end(skb, nest_parms);
1170
1171         return 0;
1172
1173 nla_put_failure:
1174         spin_unlock_bh(&ct->lock);
1175         return -1;
1176 }
1177
1178 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1179         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1180         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1181         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1182         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1183         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1184 };
1185
1186 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1187 {
1188         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1189         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1190         int err;
1191
1192         /* updates could not contain anything about the private
1193          * protocol info, in that case skip the parsing */
1194         if (!pattr)
1195                 return 0;
1196
1197         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1198         if (err < 0)
1199                 return err;
1200
1201         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1202             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1203                 return -EINVAL;
1204
1205         spin_lock_bh(&ct->lock);
1206         if (tb[CTA_PROTOINFO_TCP_STATE])
1207                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1208
1209         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1210                 struct nf_ct_tcp_flags *attr =
1211                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1212                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1213                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1214         }
1215
1216         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1217                 struct nf_ct_tcp_flags *attr =
1218                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1219                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1220                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1221         }
1222
1223         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1224             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1225             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1226             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1227                 ct->proto.tcp.seen[0].td_scale =
1228                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1229                 ct->proto.tcp.seen[1].td_scale =
1230                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1231         }
1232         spin_unlock_bh(&ct->lock);
1233
1234         return 0;
1235 }
1236
1237 static int tcp_nlattr_size(void)
1238 {
1239         return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1240                 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1241 }
1242
1243 static int tcp_nlattr_tuple_size(void)
1244 {
1245         return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1246 }
1247 #endif
1248
1249 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1250
1251 #include <linux/netfilter/nfnetlink.h>
1252 #include <linux/netfilter/nfnetlink_cttimeout.h>
1253
1254 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1255                                      struct net *net, void *data)
1256 {
1257         unsigned int *timeouts = data;
1258         struct nf_tcp_net *tn = tcp_pernet(net);
1259         int i;
1260
1261         /* set default TCP timeouts. */
1262         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1263                 timeouts[i] = tn->timeouts[i];
1264
1265         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1266                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1267                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1268         }
1269         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1270                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1271                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1272         }
1273         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1274                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1275                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1276         }
1277         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1278                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1279                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1280         }
1281         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1282                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1283                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1284         }
1285         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1286                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1287                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1288         }
1289         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1290                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1291                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1292         }
1293         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1294                 timeouts[TCP_CONNTRACK_CLOSE] =
1295                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1296         }
1297         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1298                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1299                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1300         }
1301         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1302                 timeouts[TCP_CONNTRACK_RETRANS] =
1303                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1304         }
1305         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1306                 timeouts[TCP_CONNTRACK_UNACK] =
1307                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1308         }
1309         return 0;
1310 }
1311
1312 static int
1313 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1314 {
1315         const unsigned int *timeouts = data;
1316
1317         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1318                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1319             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1320                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1321             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1322                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1323             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1324                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1325             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1326                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1327             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1328                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1329             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1330                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1331             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1332                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1333             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1334                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1335             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1336                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1337             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1338                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1339                 goto nla_put_failure;
1340         return 0;
1341
1342 nla_put_failure:
1343         return -ENOSPC;
1344 }
1345
1346 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1347         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1348         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1349         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1350         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1351         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1352         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1353         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1354         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1355         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1356         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1357         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1358 };
1359 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1360
1361 #ifdef CONFIG_SYSCTL
1362 static struct ctl_table tcp_sysctl_table[] = {
1363         {
1364                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1365                 .maxlen         = sizeof(unsigned int),
1366                 .mode           = 0644,
1367                 .proc_handler   = proc_dointvec_jiffies,
1368         },
1369         {
1370                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1371                 .maxlen         = sizeof(unsigned int),
1372                 .mode           = 0644,
1373                 .proc_handler   = proc_dointvec_jiffies,
1374         },
1375         {
1376                 .procname       = "nf_conntrack_tcp_timeout_established",
1377                 .maxlen         = sizeof(unsigned int),
1378                 .mode           = 0644,
1379                 .proc_handler   = proc_dointvec_jiffies,
1380         },
1381         {
1382                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1383                 .maxlen         = sizeof(unsigned int),
1384                 .mode           = 0644,
1385                 .proc_handler   = proc_dointvec_jiffies,
1386         },
1387         {
1388                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1389                 .maxlen         = sizeof(unsigned int),
1390                 .mode           = 0644,
1391                 .proc_handler   = proc_dointvec_jiffies,
1392         },
1393         {
1394                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1395                 .maxlen         = sizeof(unsigned int),
1396                 .mode           = 0644,
1397                 .proc_handler   = proc_dointvec_jiffies,
1398         },
1399         {
1400                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1401                 .maxlen         = sizeof(unsigned int),
1402                 .mode           = 0644,
1403                 .proc_handler   = proc_dointvec_jiffies,
1404         },
1405         {
1406                 .procname       = "nf_conntrack_tcp_timeout_close",
1407                 .maxlen         = sizeof(unsigned int),
1408                 .mode           = 0644,
1409                 .proc_handler   = proc_dointvec_jiffies,
1410         },
1411         {
1412                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1413                 .maxlen         = sizeof(unsigned int),
1414                 .mode           = 0644,
1415                 .proc_handler   = proc_dointvec_jiffies,
1416         },
1417         {
1418                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1419                 .maxlen         = sizeof(unsigned int),
1420                 .mode           = 0644,
1421                 .proc_handler   = proc_dointvec_jiffies,
1422         },
1423         {
1424                 .procname       = "nf_conntrack_tcp_loose",
1425                 .maxlen         = sizeof(unsigned int),
1426                 .mode           = 0644,
1427                 .proc_handler   = proc_dointvec,
1428         },
1429         {
1430                 .procname       = "nf_conntrack_tcp_be_liberal",
1431                 .maxlen         = sizeof(unsigned int),
1432                 .mode           = 0644,
1433                 .proc_handler   = proc_dointvec,
1434         },
1435         {
1436                 .procname       = "nf_conntrack_tcp_max_retrans",
1437                 .maxlen         = sizeof(unsigned int),
1438                 .mode           = 0644,
1439                 .proc_handler   = proc_dointvec,
1440         },
1441         { }
1442 };
1443
1444 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1445 static struct ctl_table tcp_compat_sysctl_table[] = {
1446         {
1447                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1448                 .maxlen         = sizeof(unsigned int),
1449                 .mode           = 0644,
1450                 .proc_handler   = proc_dointvec_jiffies,
1451         },
1452         {
1453                 .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1454                 .maxlen         = sizeof(unsigned int),
1455                 .mode           = 0644,
1456                 .proc_handler   = proc_dointvec_jiffies,
1457         },
1458         {
1459                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1460                 .maxlen         = sizeof(unsigned int),
1461                 .mode           = 0644,
1462                 .proc_handler   = proc_dointvec_jiffies,
1463         },
1464         {
1465                 .procname       = "ip_conntrack_tcp_timeout_established",
1466                 .maxlen         = sizeof(unsigned int),
1467                 .mode           = 0644,
1468                 .proc_handler   = proc_dointvec_jiffies,
1469         },
1470         {
1471                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1472                 .maxlen         = sizeof(unsigned int),
1473                 .mode           = 0644,
1474                 .proc_handler   = proc_dointvec_jiffies,
1475         },
1476         {
1477                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1478                 .maxlen         = sizeof(unsigned int),
1479                 .mode           = 0644,
1480                 .proc_handler   = proc_dointvec_jiffies,
1481         },
1482         {
1483                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1484                 .maxlen         = sizeof(unsigned int),
1485                 .mode           = 0644,
1486                 .proc_handler   = proc_dointvec_jiffies,
1487         },
1488         {
1489                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1490                 .maxlen         = sizeof(unsigned int),
1491                 .mode           = 0644,
1492                 .proc_handler   = proc_dointvec_jiffies,
1493         },
1494         {
1495                 .procname       = "ip_conntrack_tcp_timeout_close",
1496                 .maxlen         = sizeof(unsigned int),
1497                 .mode           = 0644,
1498                 .proc_handler   = proc_dointvec_jiffies,
1499         },
1500         {
1501                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1502                 .maxlen         = sizeof(unsigned int),
1503                 .mode           = 0644,
1504                 .proc_handler   = proc_dointvec_jiffies,
1505         },
1506         {
1507                 .procname       = "ip_conntrack_tcp_loose",
1508                 .maxlen         = sizeof(unsigned int),
1509                 .mode           = 0644,
1510                 .proc_handler   = proc_dointvec,
1511         },
1512         {
1513                 .procname       = "ip_conntrack_tcp_be_liberal",
1514                 .maxlen         = sizeof(unsigned int),
1515                 .mode           = 0644,
1516                 .proc_handler   = proc_dointvec,
1517         },
1518         {
1519                 .procname       = "ip_conntrack_tcp_max_retrans",
1520                 .maxlen         = sizeof(unsigned int),
1521                 .mode           = 0644,
1522                 .proc_handler   = proc_dointvec,
1523         },
1524         { }
1525 };
1526 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1527 #endif /* CONFIG_SYSCTL */
1528
1529 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1530                                     struct nf_tcp_net *tn)
1531 {
1532 #ifdef CONFIG_SYSCTL
1533         if (pn->ctl_table)
1534                 return 0;
1535
1536         pn->ctl_table = kmemdup(tcp_sysctl_table,
1537                                 sizeof(tcp_sysctl_table),
1538                                 GFP_KERNEL);
1539         if (!pn->ctl_table)
1540                 return -ENOMEM;
1541
1542         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1543         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1544         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1545         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1546         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1547         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1548         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1549         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1550         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1551         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1552         pn->ctl_table[10].data = &tn->tcp_loose;
1553         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1554         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1555 #endif
1556         return 0;
1557 }
1558
1559 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1560                                            struct nf_tcp_net *tn)
1561 {
1562 #ifdef CONFIG_SYSCTL
1563 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1564         pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1565                                        sizeof(tcp_compat_sysctl_table),
1566                                        GFP_KERNEL);
1567         if (!pn->ctl_compat_table)
1568                 return -ENOMEM;
1569
1570         pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1571         pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1572         pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1573         pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1574         pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1575         pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1576         pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1577         pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1578         pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1579         pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1580         pn->ctl_compat_table[10].data = &tn->tcp_loose;
1581         pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1582         pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1583 #endif
1584 #endif
1585         return 0;
1586 }
1587
1588 static int tcp_init_net(struct net *net, u_int16_t proto)
1589 {
1590         int ret;
1591         struct nf_tcp_net *tn = tcp_pernet(net);
1592         struct nf_proto_net *pn = &tn->pn;
1593
1594         if (!pn->users) {
1595                 int i;
1596
1597                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1598                         tn->timeouts[i] = tcp_timeouts[i];
1599
1600                 tn->tcp_loose = nf_ct_tcp_loose;
1601                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1602                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1603         }
1604
1605         if (proto == AF_INET) {
1606                 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1607                 if (ret < 0)
1608                         return ret;
1609
1610                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1611                 if (ret < 0)
1612                         nf_ct_kfree_compat_sysctl_table(pn);
1613         } else
1614                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1615
1616         return ret;
1617 }
1618
1619 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1620 {
1621         return &net->ct.nf_ct_proto.tcp.pn;
1622 }
1623
1624 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1625 {
1626         .l3proto                = PF_INET,
1627         .l4proto                = IPPROTO_TCP,
1628         .name                   = "tcp",
1629         .pkt_to_tuple           = tcp_pkt_to_tuple,
1630         .invert_tuple           = tcp_invert_tuple,
1631         .print_tuple            = tcp_print_tuple,
1632         .print_conntrack        = tcp_print_conntrack,
1633         .packet                 = tcp_packet,
1634         .get_timeouts           = tcp_get_timeouts,
1635         .new                    = tcp_new,
1636         .error                  = tcp_error,
1637 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1638         .to_nlattr              = tcp_to_nlattr,
1639         .nlattr_size            = tcp_nlattr_size,
1640         .from_nlattr            = nlattr_to_tcp,
1641         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1642         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1643         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1644         .nla_policy             = nf_ct_port_nla_policy,
1645 #endif
1646 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1647         .ctnl_timeout           = {
1648                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1649                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1650                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1651                 .obj_size       = sizeof(unsigned int) *
1652                                         TCP_CONNTRACK_TIMEOUT_MAX,
1653                 .nla_policy     = tcp_timeout_nla_policy,
1654         },
1655 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1656         .init_net               = tcp_init_net,
1657         .get_net_proto          = tcp_get_net_proto,
1658 };
1659 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1660
1661 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1662 {
1663         .l3proto                = PF_INET6,
1664         .l4proto                = IPPROTO_TCP,
1665         .name                   = "tcp",
1666         .pkt_to_tuple           = tcp_pkt_to_tuple,
1667         .invert_tuple           = tcp_invert_tuple,
1668         .print_tuple            = tcp_print_tuple,
1669         .print_conntrack        = tcp_print_conntrack,
1670         .packet                 = tcp_packet,
1671         .get_timeouts           = tcp_get_timeouts,
1672         .new                    = tcp_new,
1673         .error                  = tcp_error,
1674 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1675         .to_nlattr              = tcp_to_nlattr,
1676         .nlattr_size            = tcp_nlattr_size,
1677         .from_nlattr            = nlattr_to_tcp,
1678         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1679         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1680         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1681         .nla_policy             = nf_ct_port_nla_policy,
1682 #endif
1683 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1684         .ctnl_timeout           = {
1685                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1686                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1687                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1688                 .obj_size       = sizeof(unsigned int) *
1689                                         TCP_CONNTRACK_TIMEOUT_MAX,
1690                 .nla_policy     = tcp_timeout_nla_policy,
1691         },
1692 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1693         .init_net               = tcp_init_net,
1694         .get_net_proto          = tcp_get_net_proto,
1695 };
1696 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);