netfilter: netns nf_conntrack: final netns tweaks
[linux-3.10.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 unsigned int nf_ct_expect_hsize __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
33
34 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
35 unsigned int nf_ct_expect_max __read_mostly;
36 static int nf_ct_expect_hash_rnd_initted __read_mostly;
37
38 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
39
40 /* nf_conntrack_expect helper functions */
41 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
42 {
43         struct nf_conn_help *master_help = nfct_help(exp->master);
44         struct net *net = nf_ct_exp_net(exp);
45
46         NF_CT_ASSERT(master_help);
47         NF_CT_ASSERT(!timer_pending(&exp->timeout));
48
49         hlist_del_rcu(&exp->hnode);
50         net->ct.expect_count--;
51
52         hlist_del(&exp->lnode);
53         master_help->expecting[exp->class]--;
54         nf_ct_expect_put(exp);
55
56         NF_CT_STAT_INC(net, expect_delete);
57 }
58 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
59
60 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
61 {
62         struct nf_conntrack_expect *exp = (void *)ul_expect;
63
64         spin_lock_bh(&nf_conntrack_lock);
65         nf_ct_unlink_expect(exp);
66         spin_unlock_bh(&nf_conntrack_lock);
67         nf_ct_expect_put(exp);
68 }
69
70 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
71 {
72         unsigned int hash;
73
74         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75                 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
76                 nf_ct_expect_hash_rnd_initted = 1;
77         }
78
79         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
80                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
81                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
82         return ((u64)hash * nf_ct_expect_hsize) >> 32;
83 }
84
85 struct nf_conntrack_expect *
86 __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
87 {
88         struct nf_conntrack_expect *i;
89         struct hlist_node *n;
90         unsigned int h;
91
92         if (!net->ct.expect_count)
93                 return NULL;
94
95         h = nf_ct_expect_dst_hash(tuple);
96         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
97                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
98                         return i;
99         }
100         return NULL;
101 }
102 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
103
104 /* Just find a expectation corresponding to a tuple. */
105 struct nf_conntrack_expect *
106 nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
107 {
108         struct nf_conntrack_expect *i;
109
110         rcu_read_lock();
111         i = __nf_ct_expect_find(net, tuple);
112         if (i && !atomic_inc_not_zero(&i->use))
113                 i = NULL;
114         rcu_read_unlock();
115
116         return i;
117 }
118 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
119
120 /* If an expectation for this connection is found, it gets delete from
121  * global list then returned. */
122 struct nf_conntrack_expect *
123 nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
124 {
125         struct nf_conntrack_expect *i, *exp = NULL;
126         struct hlist_node *n;
127         unsigned int h;
128
129         if (!net->ct.expect_count)
130                 return NULL;
131
132         h = nf_ct_expect_dst_hash(tuple);
133         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
134                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
135                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
136                         exp = i;
137                         break;
138                 }
139         }
140         if (!exp)
141                 return NULL;
142
143         /* If master is not in hash table yet (ie. packet hasn't left
144            this machine yet), how can other end know about expected?
145            Hence these are not the droids you are looking for (if
146            master ct never got confirmed, we'd hold a reference to it
147            and weird things would happen to future packets). */
148         if (!nf_ct_is_confirmed(exp->master))
149                 return NULL;
150
151         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
152                 atomic_inc(&exp->use);
153                 return exp;
154         } else if (del_timer(&exp->timeout)) {
155                 nf_ct_unlink_expect(exp);
156                 return exp;
157         }
158
159         return NULL;
160 }
161
162 /* delete all expectations for this conntrack */
163 void nf_ct_remove_expectations(struct nf_conn *ct)
164 {
165         struct nf_conn_help *help = nfct_help(ct);
166         struct nf_conntrack_expect *exp;
167         struct hlist_node *n, *next;
168
169         /* Optimization: most connection never expect any others. */
170         if (!help)
171                 return;
172
173         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
174                 if (del_timer(&exp->timeout)) {
175                         nf_ct_unlink_expect(exp);
176                         nf_ct_expect_put(exp);
177                 }
178         }
179 }
180 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
181
182 /* Would two expected things clash? */
183 static inline int expect_clash(const struct nf_conntrack_expect *a,
184                                const struct nf_conntrack_expect *b)
185 {
186         /* Part covered by intersection of masks must be unequal,
187            otherwise they clash */
188         struct nf_conntrack_tuple_mask intersect_mask;
189         int count;
190
191         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
192
193         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
194                 intersect_mask.src.u3.all[count] =
195                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
196         }
197
198         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
199 }
200
201 static inline int expect_matches(const struct nf_conntrack_expect *a,
202                                  const struct nf_conntrack_expect *b)
203 {
204         return a->master == b->master && a->class == b->class
205                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
206                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
207 }
208
209 /* Generally a bad idea to call this: could have matched already. */
210 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
211 {
212         spin_lock_bh(&nf_conntrack_lock);
213         if (del_timer(&exp->timeout)) {
214                 nf_ct_unlink_expect(exp);
215                 nf_ct_expect_put(exp);
216         }
217         spin_unlock_bh(&nf_conntrack_lock);
218 }
219 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
220
221 /* We don't increase the master conntrack refcount for non-fulfilled
222  * conntracks. During the conntrack destruction, the expectations are
223  * always killed before the conntrack itself */
224 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
225 {
226         struct nf_conntrack_expect *new;
227
228         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
229         if (!new)
230                 return NULL;
231
232         new->master = me;
233         atomic_set(&new->use, 1);
234         INIT_RCU_HEAD(&new->rcu);
235         return new;
236 }
237 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
238
239 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
240                        u_int8_t family,
241                        const union nf_inet_addr *saddr,
242                        const union nf_inet_addr *daddr,
243                        u_int8_t proto, const __be16 *src, const __be16 *dst)
244 {
245         int len;
246
247         if (family == AF_INET)
248                 len = 4;
249         else
250                 len = 16;
251
252         exp->flags = 0;
253         exp->class = class;
254         exp->expectfn = NULL;
255         exp->helper = NULL;
256         exp->tuple.src.l3num = family;
257         exp->tuple.dst.protonum = proto;
258
259         if (saddr) {
260                 memcpy(&exp->tuple.src.u3, saddr, len);
261                 if (sizeof(exp->tuple.src.u3) > len)
262                         /* address needs to be cleared for nf_ct_tuple_equal */
263                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
264                                sizeof(exp->tuple.src.u3) - len);
265                 memset(&exp->mask.src.u3, 0xFF, len);
266                 if (sizeof(exp->mask.src.u3) > len)
267                         memset((void *)&exp->mask.src.u3 + len, 0x00,
268                                sizeof(exp->mask.src.u3) - len);
269         } else {
270                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
271                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
272         }
273
274         if (src) {
275                 exp->tuple.src.u.all = *src;
276                 exp->mask.src.u.all = htons(0xFFFF);
277         } else {
278                 exp->tuple.src.u.all = 0;
279                 exp->mask.src.u.all = 0;
280         }
281
282         memcpy(&exp->tuple.dst.u3, daddr, len);
283         if (sizeof(exp->tuple.dst.u3) > len)
284                 /* address needs to be cleared for nf_ct_tuple_equal */
285                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
286                        sizeof(exp->tuple.dst.u3) - len);
287
288         exp->tuple.dst.u.all = *dst;
289 }
290 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
291
292 static void nf_ct_expect_free_rcu(struct rcu_head *head)
293 {
294         struct nf_conntrack_expect *exp;
295
296         exp = container_of(head, struct nf_conntrack_expect, rcu);
297         kmem_cache_free(nf_ct_expect_cachep, exp);
298 }
299
300 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
301 {
302         if (atomic_dec_and_test(&exp->use))
303                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
304 }
305 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
306
307 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
308 {
309         struct nf_conn_help *master_help = nfct_help(exp->master);
310         struct net *net = nf_ct_exp_net(exp);
311         const struct nf_conntrack_expect_policy *p;
312         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
313
314         atomic_inc(&exp->use);
315
316         hlist_add_head(&exp->lnode, &master_help->expectations);
317         master_help->expecting[exp->class]++;
318
319         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
320         net->ct.expect_count++;
321
322         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
323                     (unsigned long)exp);
324         p = &master_help->helper->expect_policy[exp->class];
325         exp->timeout.expires = jiffies + p->timeout * HZ;
326         add_timer(&exp->timeout);
327
328         atomic_inc(&exp->use);
329         NF_CT_STAT_INC(net, expect_create);
330 }
331
332 /* Race with expectations being used means we could have none to find; OK. */
333 static void evict_oldest_expect(struct nf_conn *master,
334                                 struct nf_conntrack_expect *new)
335 {
336         struct nf_conn_help *master_help = nfct_help(master);
337         struct nf_conntrack_expect *exp, *last = NULL;
338         struct hlist_node *n;
339
340         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
341                 if (exp->class == new->class)
342                         last = exp;
343         }
344
345         if (last && del_timer(&last->timeout)) {
346                 nf_ct_unlink_expect(last);
347                 nf_ct_expect_put(last);
348         }
349 }
350
351 static inline int refresh_timer(struct nf_conntrack_expect *i)
352 {
353         struct nf_conn_help *master_help = nfct_help(i->master);
354         const struct nf_conntrack_expect_policy *p;
355
356         if (!del_timer(&i->timeout))
357                 return 0;
358
359         p = &master_help->helper->expect_policy[i->class];
360         i->timeout.expires = jiffies + p->timeout * HZ;
361         add_timer(&i->timeout);
362         return 1;
363 }
364
365 int nf_ct_expect_related(struct nf_conntrack_expect *expect)
366 {
367         const struct nf_conntrack_expect_policy *p;
368         struct nf_conntrack_expect *i;
369         struct nf_conn *master = expect->master;
370         struct nf_conn_help *master_help = nfct_help(master);
371         struct net *net = nf_ct_exp_net(expect);
372         struct hlist_node *n;
373         unsigned int h;
374         int ret;
375
376         NF_CT_ASSERT(master_help);
377
378         spin_lock_bh(&nf_conntrack_lock);
379         if (!master_help->helper) {
380                 ret = -ESHUTDOWN;
381                 goto out;
382         }
383         h = nf_ct_expect_dst_hash(&expect->tuple);
384         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
385                 if (expect_matches(i, expect)) {
386                         /* Refresh timer: if it's dying, ignore.. */
387                         if (refresh_timer(i)) {
388                                 ret = 0;
389                                 goto out;
390                         }
391                 } else if (expect_clash(i, expect)) {
392                         ret = -EBUSY;
393                         goto out;
394                 }
395         }
396         /* Will be over limit? */
397         p = &master_help->helper->expect_policy[expect->class];
398         if (p->max_expected &&
399             master_help->expecting[expect->class] >= p->max_expected) {
400                 evict_oldest_expect(master, expect);
401                 if (master_help->expecting[expect->class] >= p->max_expected) {
402                         ret = -EMFILE;
403                         goto out;
404                 }
405         }
406
407         if (net->ct.expect_count >= nf_ct_expect_max) {
408                 if (net_ratelimit())
409                         printk(KERN_WARNING
410                                "nf_conntrack: expectation table full\n");
411                 ret = -EMFILE;
412                 goto out;
413         }
414
415         nf_ct_expect_insert(expect);
416         nf_ct_expect_event(IPEXP_NEW, expect);
417         ret = 0;
418 out:
419         spin_unlock_bh(&nf_conntrack_lock);
420         return ret;
421 }
422 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
423
424 #ifdef CONFIG_PROC_FS
425 struct ct_expect_iter_state {
426         struct seq_net_private p;
427         unsigned int bucket;
428 };
429
430 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
431 {
432         struct net *net = seq_file_net(seq);
433         struct ct_expect_iter_state *st = seq->private;
434         struct hlist_node *n;
435
436         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
437                 n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
438                 if (n)
439                         return n;
440         }
441         return NULL;
442 }
443
444 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
445                                              struct hlist_node *head)
446 {
447         struct net *net = seq_file_net(seq);
448         struct ct_expect_iter_state *st = seq->private;
449
450         head = rcu_dereference(head->next);
451         while (head == NULL) {
452                 if (++st->bucket >= nf_ct_expect_hsize)
453                         return NULL;
454                 head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
455         }
456         return head;
457 }
458
459 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
460 {
461         struct hlist_node *head = ct_expect_get_first(seq);
462
463         if (head)
464                 while (pos && (head = ct_expect_get_next(seq, head)))
465                         pos--;
466         return pos ? NULL : head;
467 }
468
469 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
470         __acquires(RCU)
471 {
472         rcu_read_lock();
473         return ct_expect_get_idx(seq, *pos);
474 }
475
476 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
477 {
478         (*pos)++;
479         return ct_expect_get_next(seq, v);
480 }
481
482 static void exp_seq_stop(struct seq_file *seq, void *v)
483         __releases(RCU)
484 {
485         rcu_read_unlock();
486 }
487
488 static int exp_seq_show(struct seq_file *s, void *v)
489 {
490         struct nf_conntrack_expect *expect;
491         struct hlist_node *n = v;
492         char *delim = "";
493
494         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
495
496         if (expect->timeout.function)
497                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
498                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
499         else
500                 seq_printf(s, "- ");
501         seq_printf(s, "l3proto = %u proto=%u ",
502                    expect->tuple.src.l3num,
503                    expect->tuple.dst.protonum);
504         print_tuple(s, &expect->tuple,
505                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
506                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
507                                        expect->tuple.dst.protonum));
508
509         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
510                 seq_printf(s, "PERMANENT");
511                 delim = ",";
512         }
513         if (expect->flags & NF_CT_EXPECT_INACTIVE)
514                 seq_printf(s, "%sINACTIVE", delim);
515
516         return seq_putc(s, '\n');
517 }
518
519 static const struct seq_operations exp_seq_ops = {
520         .start = exp_seq_start,
521         .next = exp_seq_next,
522         .stop = exp_seq_stop,
523         .show = exp_seq_show
524 };
525
526 static int exp_open(struct inode *inode, struct file *file)
527 {
528         return seq_open_net(inode, file, &exp_seq_ops,
529                         sizeof(struct ct_expect_iter_state));
530 }
531
532 static const struct file_operations exp_file_ops = {
533         .owner   = THIS_MODULE,
534         .open    = exp_open,
535         .read    = seq_read,
536         .llseek  = seq_lseek,
537         .release = seq_release_net,
538 };
539 #endif /* CONFIG_PROC_FS */
540
541 static int exp_proc_init(struct net *net)
542 {
543 #ifdef CONFIG_PROC_FS
544         struct proc_dir_entry *proc;
545
546         proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
547         if (!proc)
548                 return -ENOMEM;
549 #endif /* CONFIG_PROC_FS */
550         return 0;
551 }
552
553 static void exp_proc_remove(struct net *net)
554 {
555 #ifdef CONFIG_PROC_FS
556         proc_net_remove(net, "nf_conntrack_expect");
557 #endif /* CONFIG_PROC_FS */
558 }
559
560 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
561
562 int nf_conntrack_expect_init(struct net *net)
563 {
564         int err = -ENOMEM;
565
566         if (net_eq(net, &init_net)) {
567                 if (!nf_ct_expect_hsize) {
568                         nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
569                         if (!nf_ct_expect_hsize)
570                                 nf_ct_expect_hsize = 1;
571                 }
572                 nf_ct_expect_max = nf_ct_expect_hsize * 4;
573         }
574
575         net->ct.expect_count = 0;
576         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
577                                                   &net->ct.expect_vmalloc);
578         if (net->ct.expect_hash == NULL)
579                 goto err1;
580
581         if (net_eq(net, &init_net)) {
582                 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
583                                         sizeof(struct nf_conntrack_expect),
584                                         0, 0, NULL);
585                 if (!nf_ct_expect_cachep)
586                         goto err2;
587         }
588
589         err = exp_proc_init(net);
590         if (err < 0)
591                 goto err3;
592
593         return 0;
594
595 err3:
596         if (net_eq(net, &init_net))
597                 kmem_cache_destroy(nf_ct_expect_cachep);
598 err2:
599         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
600                              nf_ct_expect_hsize);
601 err1:
602         return err;
603 }
604
605 void nf_conntrack_expect_fini(struct net *net)
606 {
607         exp_proc_remove(net);
608         if (net_eq(net, &init_net))
609                 kmem_cache_destroy(nf_ct_expect_cachep);
610         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
611                              nf_ct_expect_hsize);
612 }