[NETFILTER]: nf_conntrack_expect: support inactive expectations
[linux-3.10.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 struct hlist_head *nf_ct_expect_hash __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
33
34 unsigned int nf_ct_expect_hsize __read_mostly;
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
38 static unsigned int nf_ct_expect_count;
39 unsigned int nf_ct_expect_max __read_mostly;
40 static int nf_ct_expect_hash_rnd_initted __read_mostly;
41 static int nf_ct_expect_vmalloc;
42
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44
45 /* nf_conntrack_expect helper functions */
46 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
47 {
48         struct nf_conn_help *master_help = nfct_help(exp->master);
49
50         NF_CT_ASSERT(master_help);
51         NF_CT_ASSERT(!timer_pending(&exp->timeout));
52
53         hlist_del_rcu(&exp->hnode);
54         nf_ct_expect_count--;
55
56         hlist_del(&exp->lnode);
57         master_help->expecting--;
58         nf_ct_expect_put(exp);
59
60         NF_CT_STAT_INC(expect_delete);
61 }
62 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
63
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65 {
66         struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68         spin_lock_bh(&nf_conntrack_lock);
69         nf_ct_unlink_expect(exp);
70         spin_unlock_bh(&nf_conntrack_lock);
71         nf_ct_expect_put(exp);
72 }
73
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75 {
76         unsigned int hash;
77
78         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
79                 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
80                 nf_ct_expect_hash_rnd_initted = 1;
81         }
82
83         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
86         return ((u64)hash * nf_ct_expect_hsize) >> 32;
87 }
88
89 struct nf_conntrack_expect *
90 __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
91 {
92         struct nf_conntrack_expect *i;
93         struct hlist_node *n;
94         unsigned int h;
95
96         if (!nf_ct_expect_count)
97                 return NULL;
98
99         h = nf_ct_expect_dst_hash(tuple);
100         hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
101                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
102                         return i;
103         }
104         return NULL;
105 }
106 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
107
108 /* Just find a expectation corresponding to a tuple. */
109 struct nf_conntrack_expect *
110 nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
111 {
112         struct nf_conntrack_expect *i;
113
114         rcu_read_lock();
115         i = __nf_ct_expect_find(tuple);
116         if (i && !atomic_inc_not_zero(&i->use))
117                 i = NULL;
118         rcu_read_unlock();
119
120         return i;
121 }
122 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
123
124 /* If an expectation for this connection is found, it gets delete from
125  * global list then returned. */
126 struct nf_conntrack_expect *
127 nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
128 {
129         struct nf_conntrack_expect *i, *exp = NULL;
130         struct hlist_node *n;
131         unsigned int h;
132
133         if (!nf_ct_expect_count)
134                 return NULL;
135
136         h = nf_ct_expect_dst_hash(tuple);
137         hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
138                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
139                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
140                         exp = i;
141                         break;
142                 }
143         }
144         if (!exp)
145                 return NULL;
146
147         /* If master is not in hash table yet (ie. packet hasn't left
148            this machine yet), how can other end know about expected?
149            Hence these are not the droids you are looking for (if
150            master ct never got confirmed, we'd hold a reference to it
151            and weird things would happen to future packets). */
152         if (!nf_ct_is_confirmed(exp->master))
153                 return NULL;
154
155         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
156                 atomic_inc(&exp->use);
157                 return exp;
158         } else if (del_timer(&exp->timeout)) {
159                 nf_ct_unlink_expect(exp);
160                 return exp;
161         }
162
163         return NULL;
164 }
165
166 /* delete all expectations for this conntrack */
167 void nf_ct_remove_expectations(struct nf_conn *ct)
168 {
169         struct nf_conn_help *help = nfct_help(ct);
170         struct nf_conntrack_expect *exp;
171         struct hlist_node *n, *next;
172
173         /* Optimization: most connection never expect any others. */
174         if (!help || help->expecting == 0)
175                 return;
176
177         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
178                 if (del_timer(&exp->timeout)) {
179                         nf_ct_unlink_expect(exp);
180                         nf_ct_expect_put(exp);
181                 }
182         }
183 }
184 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
185
186 /* Would two expected things clash? */
187 static inline int expect_clash(const struct nf_conntrack_expect *a,
188                                const struct nf_conntrack_expect *b)
189 {
190         /* Part covered by intersection of masks must be unequal,
191            otherwise they clash */
192         struct nf_conntrack_tuple_mask intersect_mask;
193         int count;
194
195         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
196
197         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
198                 intersect_mask.src.u3.all[count] =
199                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
200         }
201
202         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
203 }
204
205 static inline int expect_matches(const struct nf_conntrack_expect *a,
206                                  const struct nf_conntrack_expect *b)
207 {
208         return a->master == b->master
209                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
210                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
211 }
212
213 /* Generally a bad idea to call this: could have matched already. */
214 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
215 {
216         spin_lock_bh(&nf_conntrack_lock);
217         if (del_timer(&exp->timeout)) {
218                 nf_ct_unlink_expect(exp);
219                 nf_ct_expect_put(exp);
220         }
221         spin_unlock_bh(&nf_conntrack_lock);
222 }
223 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
224
225 /* We don't increase the master conntrack refcount for non-fulfilled
226  * conntracks. During the conntrack destruction, the expectations are
227  * always killed before the conntrack itself */
228 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
229 {
230         struct nf_conntrack_expect *new;
231
232         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
233         if (!new)
234                 return NULL;
235
236         new->master = me;
237         atomic_set(&new->use, 1);
238         INIT_RCU_HEAD(&new->rcu);
239         return new;
240 }
241 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
242
243 void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
244                        const union nf_inet_addr *saddr,
245                        const union nf_inet_addr *daddr,
246                        u_int8_t proto, const __be16 *src, const __be16 *dst)
247 {
248         int len;
249
250         if (family == AF_INET)
251                 len = 4;
252         else
253                 len = 16;
254
255         exp->flags = 0;
256         exp->expectfn = NULL;
257         exp->helper = NULL;
258         exp->tuple.src.l3num = family;
259         exp->tuple.dst.protonum = proto;
260
261         if (saddr) {
262                 memcpy(&exp->tuple.src.u3, saddr, len);
263                 if (sizeof(exp->tuple.src.u3) > len)
264                         /* address needs to be cleared for nf_ct_tuple_equal */
265                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
266                                sizeof(exp->tuple.src.u3) - len);
267                 memset(&exp->mask.src.u3, 0xFF, len);
268                 if (sizeof(exp->mask.src.u3) > len)
269                         memset((void *)&exp->mask.src.u3 + len, 0x00,
270                                sizeof(exp->mask.src.u3) - len);
271         } else {
272                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
273                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
274         }
275
276         if (src) {
277                 exp->tuple.src.u.all = *src;
278                 exp->mask.src.u.all = htons(0xFFFF);
279         } else {
280                 exp->tuple.src.u.all = 0;
281                 exp->mask.src.u.all = 0;
282         }
283
284         memcpy(&exp->tuple.dst.u3, daddr, len);
285         if (sizeof(exp->tuple.dst.u3) > len)
286                 /* address needs to be cleared for nf_ct_tuple_equal */
287                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
288                        sizeof(exp->tuple.dst.u3) - len);
289
290         exp->tuple.dst.u.all = *dst;
291 }
292 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
293
294 static void nf_ct_expect_free_rcu(struct rcu_head *head)
295 {
296         struct nf_conntrack_expect *exp;
297
298         exp = container_of(head, struct nf_conntrack_expect, rcu);
299         kmem_cache_free(nf_ct_expect_cachep, exp);
300 }
301
302 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
303 {
304         if (atomic_dec_and_test(&exp->use))
305                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
306 }
307 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
308
309 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
310 {
311         struct nf_conn_help *master_help = nfct_help(exp->master);
312         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
313
314         atomic_inc(&exp->use);
315
316         hlist_add_head(&exp->lnode, &master_help->expectations);
317         master_help->expecting++;
318
319         hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
320         nf_ct_expect_count++;
321
322         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
323                     (unsigned long)exp);
324         exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
325         add_timer(&exp->timeout);
326
327         atomic_inc(&exp->use);
328         NF_CT_STAT_INC(expect_create);
329 }
330
331 /* Race with expectations being used means we could have none to find; OK. */
332 static void evict_oldest_expect(struct nf_conn *master)
333 {
334         struct nf_conn_help *master_help = nfct_help(master);
335         struct nf_conntrack_expect *exp = NULL;
336         struct hlist_node *n;
337
338         hlist_for_each_entry(exp, n, &master_help->expectations, lnode)
339                 ; /* nothing */
340
341         if (exp && del_timer(&exp->timeout)) {
342                 nf_ct_unlink_expect(exp);
343                 nf_ct_expect_put(exp);
344         }
345 }
346
347 static inline int refresh_timer(struct nf_conntrack_expect *i)
348 {
349         struct nf_conn_help *master_help = nfct_help(i->master);
350
351         if (!del_timer(&i->timeout))
352                 return 0;
353
354         i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
355         add_timer(&i->timeout);
356         return 1;
357 }
358
359 int nf_ct_expect_related(struct nf_conntrack_expect *expect)
360 {
361         struct nf_conntrack_expect *i;
362         struct nf_conn *master = expect->master;
363         struct nf_conn_help *master_help = nfct_help(master);
364         struct hlist_node *n;
365         unsigned int h;
366         int ret;
367
368         NF_CT_ASSERT(master_help);
369
370         spin_lock_bh(&nf_conntrack_lock);
371         if (!master_help->helper) {
372                 ret = -ESHUTDOWN;
373                 goto out;
374         }
375         h = nf_ct_expect_dst_hash(&expect->tuple);
376         hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
377                 if (expect_matches(i, expect)) {
378                         /* Refresh timer: if it's dying, ignore.. */
379                         if (refresh_timer(i)) {
380                                 ret = 0;
381                                 goto out;
382                         }
383                 } else if (expect_clash(i, expect)) {
384                         ret = -EBUSY;
385                         goto out;
386                 }
387         }
388         /* Will be over limit? */
389         if (master_help->helper->max_expected &&
390             master_help->expecting >= master_help->helper->max_expected)
391                 evict_oldest_expect(master);
392
393         if (nf_ct_expect_count >= nf_ct_expect_max) {
394                 if (net_ratelimit())
395                         printk(KERN_WARNING
396                                "nf_conntrack: expectation table full\n");
397                 ret = -EMFILE;
398                 goto out;
399         }
400
401         nf_ct_expect_insert(expect);
402         nf_ct_expect_event(IPEXP_NEW, expect);
403         ret = 0;
404 out:
405         spin_unlock_bh(&nf_conntrack_lock);
406         return ret;
407 }
408 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
409
410 #ifdef CONFIG_PROC_FS
411 struct ct_expect_iter_state {
412         unsigned int bucket;
413 };
414
415 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
416 {
417         struct ct_expect_iter_state *st = seq->private;
418         struct hlist_node *n;
419
420         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
421                 n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
422                 if (n)
423                         return n;
424         }
425         return NULL;
426 }
427
428 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
429                                              struct hlist_node *head)
430 {
431         struct ct_expect_iter_state *st = seq->private;
432
433         head = rcu_dereference(head->next);
434         while (head == NULL) {
435                 if (++st->bucket >= nf_ct_expect_hsize)
436                         return NULL;
437                 head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
438         }
439         return head;
440 }
441
442 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
443 {
444         struct hlist_node *head = ct_expect_get_first(seq);
445
446         if (head)
447                 while (pos && (head = ct_expect_get_next(seq, head)))
448                         pos--;
449         return pos ? NULL : head;
450 }
451
452 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
453         __acquires(RCU)
454 {
455         rcu_read_lock();
456         return ct_expect_get_idx(seq, *pos);
457 }
458
459 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
460 {
461         (*pos)++;
462         return ct_expect_get_next(seq, v);
463 }
464
465 static void exp_seq_stop(struct seq_file *seq, void *v)
466         __releases(RCU)
467 {
468         rcu_read_unlock();
469 }
470
471 static int exp_seq_show(struct seq_file *s, void *v)
472 {
473         struct nf_conntrack_expect *expect;
474         struct hlist_node *n = v;
475         char *delim = "";
476
477         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
478
479         if (expect->timeout.function)
480                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
481                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
482         else
483                 seq_printf(s, "- ");
484         seq_printf(s, "l3proto = %u proto=%u ",
485                    expect->tuple.src.l3num,
486                    expect->tuple.dst.protonum);
487         print_tuple(s, &expect->tuple,
488                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
489                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
490                                        expect->tuple.dst.protonum));
491
492         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
493                 seq_printf(s, "PERMANENT");
494                 delim = ",";
495         }
496         if (expect->flags & NF_CT_EXPECT_INACTIVE)
497                 seq_printf(s, "%sINACTIVE", delim);
498
499         return seq_putc(s, '\n');
500 }
501
502 static const struct seq_operations exp_seq_ops = {
503         .start = exp_seq_start,
504         .next = exp_seq_next,
505         .stop = exp_seq_stop,
506         .show = exp_seq_show
507 };
508
509 static int exp_open(struct inode *inode, struct file *file)
510 {
511         return seq_open_private(file, &exp_seq_ops,
512                         sizeof(struct ct_expect_iter_state));
513 }
514
515 static const struct file_operations exp_file_ops = {
516         .owner   = THIS_MODULE,
517         .open    = exp_open,
518         .read    = seq_read,
519         .llseek  = seq_lseek,
520         .release = seq_release_private,
521 };
522 #endif /* CONFIG_PROC_FS */
523
524 static int __init exp_proc_init(void)
525 {
526 #ifdef CONFIG_PROC_FS
527         struct proc_dir_entry *proc;
528
529         proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
530         if (!proc)
531                 return -ENOMEM;
532 #endif /* CONFIG_PROC_FS */
533         return 0;
534 }
535
536 static void exp_proc_remove(void)
537 {
538 #ifdef CONFIG_PROC_FS
539         proc_net_remove(&init_net, "nf_conntrack_expect");
540 #endif /* CONFIG_PROC_FS */
541 }
542
543 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
544
545 int __init nf_conntrack_expect_init(void)
546 {
547         int err = -ENOMEM;
548
549         if (!nf_ct_expect_hsize) {
550                 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
551                 if (!nf_ct_expect_hsize)
552                         nf_ct_expect_hsize = 1;
553         }
554         nf_ct_expect_max = nf_ct_expect_hsize * 4;
555
556         nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
557                                                   &nf_ct_expect_vmalloc);
558         if (nf_ct_expect_hash == NULL)
559                 goto err1;
560
561         nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
562                                         sizeof(struct nf_conntrack_expect),
563                                         0, 0, NULL);
564         if (!nf_ct_expect_cachep)
565                 goto err2;
566
567         err = exp_proc_init();
568         if (err < 0)
569                 goto err3;
570
571         return 0;
572
573 err3:
574         nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
575                              nf_ct_expect_hsize);
576 err2:
577         kmem_cache_destroy(nf_ct_expect_cachep);
578 err1:
579         return err;
580 }
581
582 void nf_conntrack_expect_fini(void)
583 {
584         exp_proc_remove();
585         kmem_cache_destroy(nf_ct_expect_cachep);
586         nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
587                              nf_ct_expect_hsize);
588 }