netfilter: netns nf_conntrack: per-netns expectations
[linux-3.10.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 unsigned int nf_ct_expect_hsize __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
33
34 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
35 unsigned int nf_ct_expect_max __read_mostly;
36 static int nf_ct_expect_hash_rnd_initted __read_mostly;
37
38 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
39
40 /* nf_conntrack_expect helper functions */
41 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
42 {
43         struct nf_conn_help *master_help = nfct_help(exp->master);
44         struct net *net = nf_ct_exp_net(exp);
45
46         NF_CT_ASSERT(master_help);
47         NF_CT_ASSERT(!timer_pending(&exp->timeout));
48
49         hlist_del_rcu(&exp->hnode);
50         net->ct.expect_count--;
51
52         hlist_del(&exp->lnode);
53         master_help->expecting[exp->class]--;
54         nf_ct_expect_put(exp);
55
56         NF_CT_STAT_INC(expect_delete);
57 }
58 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
59
60 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
61 {
62         struct nf_conntrack_expect *exp = (void *)ul_expect;
63
64         spin_lock_bh(&nf_conntrack_lock);
65         nf_ct_unlink_expect(exp);
66         spin_unlock_bh(&nf_conntrack_lock);
67         nf_ct_expect_put(exp);
68 }
69
70 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
71 {
72         unsigned int hash;
73
74         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75                 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
76                 nf_ct_expect_hash_rnd_initted = 1;
77         }
78
79         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
80                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
81                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
82         return ((u64)hash * nf_ct_expect_hsize) >> 32;
83 }
84
85 struct nf_conntrack_expect *
86 __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
87 {
88         struct nf_conntrack_expect *i;
89         struct hlist_node *n;
90         unsigned int h;
91
92         if (!net->ct.expect_count)
93                 return NULL;
94
95         h = nf_ct_expect_dst_hash(tuple);
96         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
97                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
98                         return i;
99         }
100         return NULL;
101 }
102 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
103
104 /* Just find a expectation corresponding to a tuple. */
105 struct nf_conntrack_expect *
106 nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
107 {
108         struct nf_conntrack_expect *i;
109
110         rcu_read_lock();
111         i = __nf_ct_expect_find(net, tuple);
112         if (i && !atomic_inc_not_zero(&i->use))
113                 i = NULL;
114         rcu_read_unlock();
115
116         return i;
117 }
118 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
119
120 /* If an expectation for this connection is found, it gets delete from
121  * global list then returned. */
122 struct nf_conntrack_expect *
123 nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
124 {
125         struct nf_conntrack_expect *i, *exp = NULL;
126         struct hlist_node *n;
127         unsigned int h;
128
129         if (!net->ct.expect_count)
130                 return NULL;
131
132         h = nf_ct_expect_dst_hash(tuple);
133         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
134                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
135                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
136                         exp = i;
137                         break;
138                 }
139         }
140         if (!exp)
141                 return NULL;
142
143         /* If master is not in hash table yet (ie. packet hasn't left
144            this machine yet), how can other end know about expected?
145            Hence these are not the droids you are looking for (if
146            master ct never got confirmed, we'd hold a reference to it
147            and weird things would happen to future packets). */
148         if (!nf_ct_is_confirmed(exp->master))
149                 return NULL;
150
151         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
152                 atomic_inc(&exp->use);
153                 return exp;
154         } else if (del_timer(&exp->timeout)) {
155                 nf_ct_unlink_expect(exp);
156                 return exp;
157         }
158
159         return NULL;
160 }
161
162 /* delete all expectations for this conntrack */
163 void nf_ct_remove_expectations(struct nf_conn *ct)
164 {
165         struct nf_conn_help *help = nfct_help(ct);
166         struct nf_conntrack_expect *exp;
167         struct hlist_node *n, *next;
168
169         /* Optimization: most connection never expect any others. */
170         if (!help)
171                 return;
172
173         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
174                 if (del_timer(&exp->timeout)) {
175                         nf_ct_unlink_expect(exp);
176                         nf_ct_expect_put(exp);
177                 }
178         }
179 }
180 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
181
182 /* Would two expected things clash? */
183 static inline int expect_clash(const struct nf_conntrack_expect *a,
184                                const struct nf_conntrack_expect *b)
185 {
186         /* Part covered by intersection of masks must be unequal,
187            otherwise they clash */
188         struct nf_conntrack_tuple_mask intersect_mask;
189         int count;
190
191         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
192
193         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
194                 intersect_mask.src.u3.all[count] =
195                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
196         }
197
198         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
199 }
200
201 static inline int expect_matches(const struct nf_conntrack_expect *a,
202                                  const struct nf_conntrack_expect *b)
203 {
204         return a->master == b->master && a->class == b->class
205                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
206                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
207 }
208
209 /* Generally a bad idea to call this: could have matched already. */
210 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
211 {
212         spin_lock_bh(&nf_conntrack_lock);
213         if (del_timer(&exp->timeout)) {
214                 nf_ct_unlink_expect(exp);
215                 nf_ct_expect_put(exp);
216         }
217         spin_unlock_bh(&nf_conntrack_lock);
218 }
219 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
220
221 /* We don't increase the master conntrack refcount for non-fulfilled
222  * conntracks. During the conntrack destruction, the expectations are
223  * always killed before the conntrack itself */
224 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
225 {
226         struct nf_conntrack_expect *new;
227
228         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
229         if (!new)
230                 return NULL;
231
232         new->master = me;
233         atomic_set(&new->use, 1);
234         INIT_RCU_HEAD(&new->rcu);
235         return new;
236 }
237 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
238
239 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
240                        u_int8_t family,
241                        const union nf_inet_addr *saddr,
242                        const union nf_inet_addr *daddr,
243                        u_int8_t proto, const __be16 *src, const __be16 *dst)
244 {
245         int len;
246
247         if (family == AF_INET)
248                 len = 4;
249         else
250                 len = 16;
251
252         exp->flags = 0;
253         exp->class = class;
254         exp->expectfn = NULL;
255         exp->helper = NULL;
256         exp->tuple.src.l3num = family;
257         exp->tuple.dst.protonum = proto;
258
259         if (saddr) {
260                 memcpy(&exp->tuple.src.u3, saddr, len);
261                 if (sizeof(exp->tuple.src.u3) > len)
262                         /* address needs to be cleared for nf_ct_tuple_equal */
263                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
264                                sizeof(exp->tuple.src.u3) - len);
265                 memset(&exp->mask.src.u3, 0xFF, len);
266                 if (sizeof(exp->mask.src.u3) > len)
267                         memset((void *)&exp->mask.src.u3 + len, 0x00,
268                                sizeof(exp->mask.src.u3) - len);
269         } else {
270                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
271                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
272         }
273
274         if (src) {
275                 exp->tuple.src.u.all = *src;
276                 exp->mask.src.u.all = htons(0xFFFF);
277         } else {
278                 exp->tuple.src.u.all = 0;
279                 exp->mask.src.u.all = 0;
280         }
281
282         memcpy(&exp->tuple.dst.u3, daddr, len);
283         if (sizeof(exp->tuple.dst.u3) > len)
284                 /* address needs to be cleared for nf_ct_tuple_equal */
285                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
286                        sizeof(exp->tuple.dst.u3) - len);
287
288         exp->tuple.dst.u.all = *dst;
289 }
290 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
291
292 static void nf_ct_expect_free_rcu(struct rcu_head *head)
293 {
294         struct nf_conntrack_expect *exp;
295
296         exp = container_of(head, struct nf_conntrack_expect, rcu);
297         kmem_cache_free(nf_ct_expect_cachep, exp);
298 }
299
300 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
301 {
302         if (atomic_dec_and_test(&exp->use))
303                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
304 }
305 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
306
307 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
308 {
309         struct nf_conn_help *master_help = nfct_help(exp->master);
310         struct net *net = nf_ct_exp_net(exp);
311         const struct nf_conntrack_expect_policy *p;
312         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
313
314         atomic_inc(&exp->use);
315
316         hlist_add_head(&exp->lnode, &master_help->expectations);
317         master_help->expecting[exp->class]++;
318
319         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
320         net->ct.expect_count++;
321
322         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
323                     (unsigned long)exp);
324         p = &master_help->helper->expect_policy[exp->class];
325         exp->timeout.expires = jiffies + p->timeout * HZ;
326         add_timer(&exp->timeout);
327
328         atomic_inc(&exp->use);
329         NF_CT_STAT_INC(expect_create);
330 }
331
332 /* Race with expectations being used means we could have none to find; OK. */
333 static void evict_oldest_expect(struct nf_conn *master,
334                                 struct nf_conntrack_expect *new)
335 {
336         struct nf_conn_help *master_help = nfct_help(master);
337         struct nf_conntrack_expect *exp, *last = NULL;
338         struct hlist_node *n;
339
340         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
341                 if (exp->class == new->class)
342                         last = exp;
343         }
344
345         if (last && del_timer(&last->timeout)) {
346                 nf_ct_unlink_expect(last);
347                 nf_ct_expect_put(last);
348         }
349 }
350
351 static inline int refresh_timer(struct nf_conntrack_expect *i)
352 {
353         struct nf_conn_help *master_help = nfct_help(i->master);
354         const struct nf_conntrack_expect_policy *p;
355
356         if (!del_timer(&i->timeout))
357                 return 0;
358
359         p = &master_help->helper->expect_policy[i->class];
360         i->timeout.expires = jiffies + p->timeout * HZ;
361         add_timer(&i->timeout);
362         return 1;
363 }
364
365 int nf_ct_expect_related(struct nf_conntrack_expect *expect)
366 {
367         const struct nf_conntrack_expect_policy *p;
368         struct nf_conntrack_expect *i;
369         struct nf_conn *master = expect->master;
370         struct nf_conn_help *master_help = nfct_help(master);
371         struct net *net = nf_ct_exp_net(expect);
372         struct hlist_node *n;
373         unsigned int h;
374         int ret;
375
376         NF_CT_ASSERT(master_help);
377
378         spin_lock_bh(&nf_conntrack_lock);
379         if (!master_help->helper) {
380                 ret = -ESHUTDOWN;
381                 goto out;
382         }
383         h = nf_ct_expect_dst_hash(&expect->tuple);
384         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
385                 if (expect_matches(i, expect)) {
386                         /* Refresh timer: if it's dying, ignore.. */
387                         if (refresh_timer(i)) {
388                                 ret = 0;
389                                 goto out;
390                         }
391                 } else if (expect_clash(i, expect)) {
392                         ret = -EBUSY;
393                         goto out;
394                 }
395         }
396         /* Will be over limit? */
397         p = &master_help->helper->expect_policy[expect->class];
398         if (p->max_expected &&
399             master_help->expecting[expect->class] >= p->max_expected) {
400                 evict_oldest_expect(master, expect);
401                 if (master_help->expecting[expect->class] >= p->max_expected) {
402                         ret = -EMFILE;
403                         goto out;
404                 }
405         }
406
407         if (net->ct.expect_count >= nf_ct_expect_max) {
408                 if (net_ratelimit())
409                         printk(KERN_WARNING
410                                "nf_conntrack: expectation table full\n");
411                 ret = -EMFILE;
412                 goto out;
413         }
414
415         nf_ct_expect_insert(expect);
416         nf_ct_expect_event(IPEXP_NEW, expect);
417         ret = 0;
418 out:
419         spin_unlock_bh(&nf_conntrack_lock);
420         return ret;
421 }
422 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
423
424 #ifdef CONFIG_PROC_FS
425 struct ct_expect_iter_state {
426         unsigned int bucket;
427 };
428
429 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
430 {
431         struct net *net = &init_net;
432         struct ct_expect_iter_state *st = seq->private;
433         struct hlist_node *n;
434
435         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
436                 n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
437                 if (n)
438                         return n;
439         }
440         return NULL;
441 }
442
443 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
444                                              struct hlist_node *head)
445 {
446         struct net *net = &init_net;
447         struct ct_expect_iter_state *st = seq->private;
448
449         head = rcu_dereference(head->next);
450         while (head == NULL) {
451                 if (++st->bucket >= nf_ct_expect_hsize)
452                         return NULL;
453                 head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
454         }
455         return head;
456 }
457
458 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
459 {
460         struct hlist_node *head = ct_expect_get_first(seq);
461
462         if (head)
463                 while (pos && (head = ct_expect_get_next(seq, head)))
464                         pos--;
465         return pos ? NULL : head;
466 }
467
468 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
469         __acquires(RCU)
470 {
471         rcu_read_lock();
472         return ct_expect_get_idx(seq, *pos);
473 }
474
475 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
476 {
477         (*pos)++;
478         return ct_expect_get_next(seq, v);
479 }
480
481 static void exp_seq_stop(struct seq_file *seq, void *v)
482         __releases(RCU)
483 {
484         rcu_read_unlock();
485 }
486
487 static int exp_seq_show(struct seq_file *s, void *v)
488 {
489         struct nf_conntrack_expect *expect;
490         struct hlist_node *n = v;
491         char *delim = "";
492
493         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
494
495         if (expect->timeout.function)
496                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
497                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
498         else
499                 seq_printf(s, "- ");
500         seq_printf(s, "l3proto = %u proto=%u ",
501                    expect->tuple.src.l3num,
502                    expect->tuple.dst.protonum);
503         print_tuple(s, &expect->tuple,
504                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
505                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
506                                        expect->tuple.dst.protonum));
507
508         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
509                 seq_printf(s, "PERMANENT");
510                 delim = ",";
511         }
512         if (expect->flags & NF_CT_EXPECT_INACTIVE)
513                 seq_printf(s, "%sINACTIVE", delim);
514
515         return seq_putc(s, '\n');
516 }
517
518 static const struct seq_operations exp_seq_ops = {
519         .start = exp_seq_start,
520         .next = exp_seq_next,
521         .stop = exp_seq_stop,
522         .show = exp_seq_show
523 };
524
525 static int exp_open(struct inode *inode, struct file *file)
526 {
527         return seq_open_private(file, &exp_seq_ops,
528                         sizeof(struct ct_expect_iter_state));
529 }
530
531 static const struct file_operations exp_file_ops = {
532         .owner   = THIS_MODULE,
533         .open    = exp_open,
534         .read    = seq_read,
535         .llseek  = seq_lseek,
536         .release = seq_release_private,
537 };
538 #endif /* CONFIG_PROC_FS */
539
540 static int exp_proc_init(void)
541 {
542 #ifdef CONFIG_PROC_FS
543         struct proc_dir_entry *proc;
544
545         proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
546         if (!proc)
547                 return -ENOMEM;
548 #endif /* CONFIG_PROC_FS */
549         return 0;
550 }
551
552 static void exp_proc_remove(void)
553 {
554 #ifdef CONFIG_PROC_FS
555         proc_net_remove(&init_net, "nf_conntrack_expect");
556 #endif /* CONFIG_PROC_FS */
557 }
558
559 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
560
561 int nf_conntrack_expect_init(struct net *net)
562 {
563         int err = -ENOMEM;
564
565         if (!nf_ct_expect_hsize) {
566                 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
567                 if (!nf_ct_expect_hsize)
568                         nf_ct_expect_hsize = 1;
569         }
570         nf_ct_expect_max = nf_ct_expect_hsize * 4;
571
572         net->ct.expect_count = 0;
573         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
574                                                   &net->ct.expect_vmalloc);
575         if (net->ct.expect_hash == NULL)
576                 goto err1;
577
578         nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
579                                         sizeof(struct nf_conntrack_expect),
580                                         0, 0, NULL);
581         if (!nf_ct_expect_cachep)
582                 goto err2;
583
584         err = exp_proc_init();
585         if (err < 0)
586                 goto err3;
587
588         return 0;
589
590 err3:
591         kmem_cache_destroy(nf_ct_expect_cachep);
592 err2:
593         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
594                              nf_ct_expect_hsize);
595 err1:
596         return err;
597 }
598
599 void nf_conntrack_expect_fini(struct net *net)
600 {
601         exp_proc_remove();
602         kmem_cache_destroy(nf_ct_expect_cachep);
603         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
604                              nf_ct_expect_hsize);
605 }