8202c1c0afadaee88dc1a3d29e9583ba443570d8
[linux-2.6.git] / net / ipv4 / netfilter / nf_conntrack_l3proto_ipv4.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9  *      - move L3 protocol dependent part to this file.
10  * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
11  *      - add get_features() to support various size of conntrack
12  *        structures.
13  *
14  * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
15  */
16
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/ip.h>
20 #include <linux/netfilter.h>
21 #include <linux/module.h>
22 #include <linux/skbuff.h>
23 #include <linux/icmp.h>
24 #include <linux/sysctl.h>
25 #include <net/ip.h>
26
27 #include <linux/netfilter_ipv4.h>
28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_helper.h>
30 #include <net/netfilter/nf_conntrack_protocol.h>
31 #include <net/netfilter/nf_conntrack_l3proto.h>
32 #include <net/netfilter/nf_conntrack_core.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34
35 #if 0
36 #define DEBUGP printk
37 #else
38 #define DEBUGP(format, args...)
39 #endif
40
41 DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
42
43 static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
44                              struct nf_conntrack_tuple *tuple)
45 {
46         u_int32_t _addrs[2], *ap;
47         ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
48                                 sizeof(u_int32_t) * 2, _addrs);
49         if (ap == NULL)
50                 return 0;
51
52         tuple->src.u3.ip = ap[0];
53         tuple->dst.u3.ip = ap[1];
54
55         return 1;
56 }
57
58 static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
59                            const struct nf_conntrack_tuple *orig)
60 {
61         tuple->src.u3.ip = orig->dst.u3.ip;
62         tuple->dst.u3.ip = orig->src.u3.ip;
63
64         return 1;
65 }
66
67 static int ipv4_print_tuple(struct seq_file *s,
68                             const struct nf_conntrack_tuple *tuple)
69 {
70         return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
71                           NIPQUAD(tuple->src.u3.ip),
72                           NIPQUAD(tuple->dst.u3.ip));
73 }
74
75 static int ipv4_print_conntrack(struct seq_file *s,
76                                 const struct nf_conn *conntrack)
77 {
78         return 0;
79 }
80
81 /* Returns new sk_buff, or NULL */
82 static struct sk_buff *
83 nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
84 {
85         skb_orphan(skb);
86
87         local_bh_disable();
88         skb = ip_defrag(skb, user);
89         local_bh_enable();
90
91         if (skb)
92                 ip_send_check(skb->nh.iph);
93
94         return skb;
95 }
96
97 static int
98 ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
99              u_int8_t *protonum)
100 {
101         /* Never happen */
102         if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
103                 if (net_ratelimit()) {
104                         printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
105                         (*pskb)->nh.iph->protocol, hooknum);
106                 }
107                 return -NF_DROP;
108         }
109
110         *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
111         *protonum = (*pskb)->nh.iph->protocol;
112
113         return NF_ACCEPT;
114 }
115
116 int nat_module_is_loaded = 0;
117 static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
118 {
119         if (nat_module_is_loaded)
120                 return NF_CT_F_NAT;
121
122         return NF_CT_F_BASIC;
123 }
124
125 static unsigned int ipv4_confirm(unsigned int hooknum,
126                                  struct sk_buff **pskb,
127                                  const struct net_device *in,
128                                  const struct net_device *out,
129                                  int (*okfn)(struct sk_buff *))
130 {
131         /* We've seen it coming out the other side: confirm it */
132         return nf_conntrack_confirm(pskb);
133 }
134
135 static unsigned int ipv4_conntrack_help(unsigned int hooknum,
136                                       struct sk_buff **pskb,
137                                       const struct net_device *in,
138                                       const struct net_device *out,
139                                       int (*okfn)(struct sk_buff *))
140 {
141         struct nf_conn *ct;
142         enum ip_conntrack_info ctinfo;
143
144         /* This is where we call the helper: as the packet goes out. */
145         ct = nf_ct_get(*pskb, &ctinfo);
146         if (ct && ct->helper) {
147                 unsigned int ret;
148                 ret = ct->helper->help(pskb,
149                                        (*pskb)->nh.raw - (*pskb)->data
150                                                        + (*pskb)->nh.iph->ihl*4,
151                                        ct, ctinfo);
152                 if (ret != NF_ACCEPT)
153                         return ret;
154         }
155         return NF_ACCEPT;
156 }
157
158 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
159                                           struct sk_buff **pskb,
160                                           const struct net_device *in,
161                                           const struct net_device *out,
162                                           int (*okfn)(struct sk_buff *))
163 {
164 #if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
165         /* Previously seen (loopback)?  Ignore.  Do this before
166            fragment check. */
167         if ((*pskb)->nfct)
168                 return NF_ACCEPT;
169 #endif
170
171         /* Gather fragments. */
172         if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
173                 *pskb = nf_ct_ipv4_gather_frags(*pskb,
174                                                 hooknum == NF_IP_PRE_ROUTING ?
175                                                 IP_DEFRAG_CONNTRACK_IN :
176                                                 IP_DEFRAG_CONNTRACK_OUT);
177                 if (!*pskb)
178                         return NF_STOLEN;
179         }
180         return NF_ACCEPT;
181 }
182
183 static unsigned int ipv4_refrag(unsigned int hooknum,
184                                 struct sk_buff **pskb,
185                                 const struct net_device *in,
186                                 const struct net_device *out,
187                                 int (*okfn)(struct sk_buff *))
188 {
189         struct rtable *rt = (struct rtable *)(*pskb)->dst;
190
191         /* We've seen it coming out the other side: confirm */
192         if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
193                 return NF_DROP;
194
195         /* Local packets are never produced too large for their
196            interface.  We degfragment them at LOCAL_OUT, however,
197            so we have to refragment them here. */
198         if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
199             !skb_shinfo(*pskb)->tso_size) {
200                 /* No hook can be after us, so this should be OK. */
201                 ip_fragment(*pskb, okfn);
202                 return NF_STOLEN;
203         }
204         return NF_ACCEPT;
205 }
206
207 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
208                                       struct sk_buff **pskb,
209                                       const struct net_device *in,
210                                       const struct net_device *out,
211                                       int (*okfn)(struct sk_buff *))
212 {
213         return nf_conntrack_in(PF_INET, hooknum, pskb);
214 }
215
216 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
217                                          struct sk_buff **pskb,
218                                          const struct net_device *in,
219                                          const struct net_device *out,
220                                          int (*okfn)(struct sk_buff *))
221 {
222         /* root is playing with raw sockets. */
223         if ((*pskb)->len < sizeof(struct iphdr)
224             || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
225                 if (net_ratelimit())
226                         printk("ipt_hook: happy cracking.\n");
227                 return NF_ACCEPT;
228         }
229         return nf_conntrack_in(PF_INET, hooknum, pskb);
230 }
231
232 /* Connection tracking may drop packets, but never alters them, so
233    make it the first hook. */
234 static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
235         .hook           = ipv4_conntrack_defrag,
236         .owner          = THIS_MODULE,
237         .pf             = PF_INET,
238         .hooknum        = NF_IP_PRE_ROUTING,
239         .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
240 };
241
242 static struct nf_hook_ops ipv4_conntrack_in_ops = {
243         .hook           = ipv4_conntrack_in,
244         .owner          = THIS_MODULE,
245         .pf             = PF_INET,
246         .hooknum        = NF_IP_PRE_ROUTING,
247         .priority       = NF_IP_PRI_CONNTRACK,
248 };
249
250 static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
251         .hook           = ipv4_conntrack_defrag,
252         .owner          = THIS_MODULE,
253         .pf             = PF_INET,
254         .hooknum        = NF_IP_LOCAL_OUT,
255         .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
256 };
257
258 static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
259         .hook           = ipv4_conntrack_local,
260         .owner          = THIS_MODULE,
261         .pf             = PF_INET,
262         .hooknum        = NF_IP_LOCAL_OUT,
263         .priority       = NF_IP_PRI_CONNTRACK,
264 };
265
266 /* helpers */
267 static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
268         .hook           = ipv4_conntrack_help,
269         .owner          = THIS_MODULE,
270         .pf             = PF_INET,
271         .hooknum        = NF_IP_POST_ROUTING,
272         .priority       = NF_IP_PRI_CONNTRACK_HELPER,
273 };
274
275 static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
276         .hook           = ipv4_conntrack_help,
277         .owner          = THIS_MODULE,
278         .pf             = PF_INET,
279         .hooknum        = NF_IP_LOCAL_IN,
280         .priority       = NF_IP_PRI_CONNTRACK_HELPER,
281 };
282
283
284 /* Refragmenter; last chance. */
285 static struct nf_hook_ops ipv4_conntrack_out_ops = {
286         .hook           = ipv4_refrag,
287         .owner          = THIS_MODULE,
288         .pf             = PF_INET,
289         .hooknum        = NF_IP_POST_ROUTING,
290         .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
291 };
292
293 static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
294         .hook           = ipv4_confirm,
295         .owner          = THIS_MODULE,
296         .pf             = PF_INET,
297         .hooknum        = NF_IP_LOCAL_IN,
298         .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
299 };
300
301 #ifdef CONFIG_SYSCTL
302 /* From nf_conntrack_proto_icmp.c */
303 extern unsigned long nf_ct_icmp_timeout;
304 static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
305
306 static ctl_table nf_ct_sysctl_table[] = {
307         {
308                 .ctl_name       = NET_NF_CONNTRACK_ICMP_TIMEOUT,
309                 .procname       = "nf_conntrack_icmp_timeout",
310                 .data           = &nf_ct_icmp_timeout,
311                 .maxlen         = sizeof(unsigned int),
312                 .mode           = 0644,
313                 .proc_handler   = &proc_dointvec_jiffies,
314         },
315         { .ctl_name = 0 }
316 };
317
318 static ctl_table nf_ct_netfilter_table[] = {
319         {
320                 .ctl_name       = NET_NETFILTER,
321                 .procname       = "netfilter",
322                 .mode           = 0555,
323                 .child          = nf_ct_sysctl_table,
324         },
325         { .ctl_name = 0 }
326 };
327
328 static ctl_table nf_ct_net_table[] = {
329         {
330                 .ctl_name       = CTL_NET,
331                 .procname       = "net",
332                 .mode           = 0555,
333                 .child          = nf_ct_netfilter_table,
334         },
335         { .ctl_name = 0 }
336 };
337 #endif
338
339 /* Fast function for those who don't want to parse /proc (and I don't
340    blame them). */
341 /* Reversing the socket's dst/src point of view gives us the reply
342    mapping. */
343 static int
344 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
345 {
346         struct inet_sock *inet = inet_sk(sk);
347         struct nf_conntrack_tuple_hash *h;
348         struct nf_conntrack_tuple tuple;
349         
350         NF_CT_TUPLE_U_BLANK(&tuple);
351         tuple.src.u3.ip = inet->rcv_saddr;
352         tuple.src.u.tcp.port = inet->sport;
353         tuple.dst.u3.ip = inet->daddr;
354         tuple.dst.u.tcp.port = inet->dport;
355         tuple.src.l3num = PF_INET;
356         tuple.dst.protonum = IPPROTO_TCP;
357
358         /* We only do TCP at the moment: is there a better way? */
359         if (strcmp(sk->sk_prot->name, "TCP")) {
360                 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
361                 return -ENOPROTOOPT;
362         }
363
364         if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
365                 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
366                        *len, sizeof(struct sockaddr_in));
367                 return -EINVAL;
368         }
369
370         h = nf_conntrack_find_get(&tuple, NULL);
371         if (h) {
372                 struct sockaddr_in sin;
373                 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
374
375                 sin.sin_family = AF_INET;
376                 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
377                         .tuple.dst.u.tcp.port;
378                 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
379                         .tuple.dst.u3.ip;
380
381                 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
382                        NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
383                 nf_ct_put(ct);
384                 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
385                         return -EFAULT;
386                 else
387                         return 0;
388         }
389         DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
390                NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
391                NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
392         return -ENOENT;
393 }
394
395 static struct nf_sockopt_ops so_getorigdst = {
396         .pf             = PF_INET,
397         .get_optmin     = SO_ORIGINAL_DST,
398         .get_optmax     = SO_ORIGINAL_DST+1,
399         .get            = &getorigdst,
400 };
401
402 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
403         .l3proto         = PF_INET,
404         .name            = "ipv4",
405         .pkt_to_tuple    = ipv4_pkt_to_tuple,
406         .invert_tuple    = ipv4_invert_tuple,
407         .print_tuple     = ipv4_print_tuple,
408         .print_conntrack = ipv4_print_conntrack,
409         .prepare         = ipv4_prepare,
410         .get_features    = ipv4_get_features,
411         .me              = THIS_MODULE,
412 };
413
414 extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
415 extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
416 extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
417 static int init_or_cleanup(int init)
418 {
419         int ret = 0;
420
421         if (!init) goto cleanup;
422
423         ret = nf_register_sockopt(&so_getorigdst);
424         if (ret < 0) {
425                 printk(KERN_ERR "Unable to register netfilter socket option\n");
426                 goto cleanup_nothing;
427         }
428
429         ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
430         if (ret < 0) {
431                 printk("nf_conntrack_ipv4: can't register tcp.\n");
432                 goto cleanup_sockopt;
433         }
434
435         ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
436         if (ret < 0) {
437                 printk("nf_conntrack_ipv4: can't register udp.\n");
438                 goto cleanup_tcp;
439         }
440
441         ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
442         if (ret < 0) {
443                 printk("nf_conntrack_ipv4: can't register icmp.\n");
444                 goto cleanup_udp;
445         }
446
447         ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
448         if (ret < 0) {
449                 printk("nf_conntrack_ipv4: can't register ipv4\n");
450                 goto cleanup_icmp;
451         }
452
453         ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
454         if (ret < 0) {
455                 printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
456                 goto cleanup_ipv4;
457         }
458         ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
459         if (ret < 0) {
460                 printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
461                 goto cleanup_defragops;
462         }
463
464         ret = nf_register_hook(&ipv4_conntrack_in_ops);
465         if (ret < 0) {
466                 printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
467                 goto cleanup_defraglocalops;
468         }
469
470         ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
471         if (ret < 0) {
472                 printk("nf_conntrack_ipv4: can't register local out hook.\n");
473                 goto cleanup_inops;
474         }
475
476         ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
477         if (ret < 0) {
478                 printk("nf_conntrack_ipv4: can't register local helper hook.\n");
479                 goto cleanup_inandlocalops;
480         }
481
482         ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
483         if (ret < 0) {
484                 printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
485                 goto cleanup_helperinops;
486         }
487
488         ret = nf_register_hook(&ipv4_conntrack_out_ops);
489         if (ret < 0) {
490                 printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
491                 goto cleanup_helperoutops;
492         }
493
494         ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
495         if (ret < 0) {
496                 printk("nf_conntrack_ipv4: can't register local in hook.\n");
497                 goto cleanup_inoutandlocalops;
498         }
499
500 #ifdef CONFIG_SYSCTL
501         nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
502         if (nf_ct_ipv4_sysctl_header == NULL) {
503                 printk("nf_conntrack: can't register to sysctl.\n");
504                 ret = -ENOMEM;
505                 goto cleanup_localinops;
506         }
507 #endif
508
509         /* For use by REJECT target */
510         ip_ct_attach = __nf_conntrack_attach;
511
512         return ret;
513
514  cleanup:
515         synchronize_net();
516         ip_ct_attach = NULL;
517 #ifdef CONFIG_SYSCTL
518         unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
519  cleanup_localinops:
520 #endif
521         nf_unregister_hook(&ipv4_conntrack_local_in_ops);
522  cleanup_inoutandlocalops:
523         nf_unregister_hook(&ipv4_conntrack_out_ops);
524  cleanup_helperoutops:
525         nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
526  cleanup_helperinops:
527         nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
528  cleanup_inandlocalops:
529         nf_unregister_hook(&ipv4_conntrack_local_out_ops);
530  cleanup_inops:
531         nf_unregister_hook(&ipv4_conntrack_in_ops);
532  cleanup_defraglocalops:
533         nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
534  cleanup_defragops:
535         nf_unregister_hook(&ipv4_conntrack_defrag_ops);
536  cleanup_ipv4:
537         nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
538  cleanup_icmp:
539         nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
540  cleanup_udp:
541         nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
542  cleanup_tcp:
543         nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
544  cleanup_sockopt:
545         nf_unregister_sockopt(&so_getorigdst);
546  cleanup_nothing:
547         return ret;
548 }
549
550 MODULE_LICENSE("GPL");
551
552 static int __init init(void)
553 {
554         need_nf_conntrack();
555         return init_or_cleanup(1);
556 }
557
558 static void __exit fini(void)
559 {
560         init_or_cleanup(0);
561 }
562
563 module_init(init);
564 module_exit(fini);
565
566 void need_ip_conntrack(void)
567 {
568 }
569
570 EXPORT_SYMBOL(need_ip_conntrack);
571 EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);