ipvs: convert lblc scheduler to rcu
[linux-3.10.git] / net / netfilter / nf_queue.c
1 #include <linux/kernel.h>
2 #include <linux/slab.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/proc_fs.h>
6 #include <linux/skbuff.h>
7 #include <linux/netfilter.h>
8 #include <linux/seq_file.h>
9 #include <linux/rcupdate.h>
10 #include <net/protocol.h>
11 #include <net/netfilter/nf_queue.h>
12 #include <net/dst.h>
13
14 #include "nf_internals.h"
15
16 /*
17  * Hook for nfnetlink_queue to register its queue handler.
18  * We do this so that most of the NFQUEUE code can be modular.
19  *
20  * Once the queue is registered it must reinject all packets it
21  * receives, no matter what.
22  */
23 static const struct nf_queue_handler __rcu *queue_handler __read_mostly;
24
25 /* return EBUSY when somebody else is registered, return EEXIST if the
26  * same handler is registered, return 0 in case of success. */
27 void nf_register_queue_handler(const struct nf_queue_handler *qh)
28 {
29         /* should never happen, we only have one queueing backend in kernel */
30         WARN_ON(rcu_access_pointer(queue_handler));
31         rcu_assign_pointer(queue_handler, qh);
32 }
33 EXPORT_SYMBOL(nf_register_queue_handler);
34
35 /* The caller must flush their queue before this */
36 void nf_unregister_queue_handler(void)
37 {
38         RCU_INIT_POINTER(queue_handler, NULL);
39         synchronize_rcu();
40 }
41 EXPORT_SYMBOL(nf_unregister_queue_handler);
42
43 static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
44 {
45         /* Release those devices we held, or Alexey will kill me. */
46         if (entry->indev)
47                 dev_put(entry->indev);
48         if (entry->outdev)
49                 dev_put(entry->outdev);
50 #ifdef CONFIG_BRIDGE_NETFILTER
51         if (entry->skb->nf_bridge) {
52                 struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
53
54                 if (nf_bridge->physindev)
55                         dev_put(nf_bridge->physindev);
56                 if (nf_bridge->physoutdev)
57                         dev_put(nf_bridge->physoutdev);
58         }
59 #endif
60         /* Drop reference to owner of hook which queued us. */
61         module_put(entry->elem->owner);
62 }
63
64 /*
65  * Any packet that leaves via this function must come back
66  * through nf_reinject().
67  */
68 static int __nf_queue(struct sk_buff *skb,
69                       struct nf_hook_ops *elem,
70                       u_int8_t pf, unsigned int hook,
71                       struct net_device *indev,
72                       struct net_device *outdev,
73                       int (*okfn)(struct sk_buff *),
74                       unsigned int queuenum)
75 {
76         int status = -ENOENT;
77         struct nf_queue_entry *entry = NULL;
78 #ifdef CONFIG_BRIDGE_NETFILTER
79         struct net_device *physindev;
80         struct net_device *physoutdev;
81 #endif
82         const struct nf_afinfo *afinfo;
83         const struct nf_queue_handler *qh;
84
85         /* QUEUE == DROP if no one is waiting, to be safe. */
86         rcu_read_lock();
87
88         qh = rcu_dereference(queue_handler);
89         if (!qh) {
90                 status = -ESRCH;
91                 goto err_unlock;
92         }
93
94         afinfo = nf_get_afinfo(pf);
95         if (!afinfo)
96                 goto err_unlock;
97
98         entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
99         if (!entry) {
100                 status = -ENOMEM;
101                 goto err_unlock;
102         }
103
104         *entry = (struct nf_queue_entry) {
105                 .skb    = skb,
106                 .elem   = elem,
107                 .pf     = pf,
108                 .hook   = hook,
109                 .indev  = indev,
110                 .outdev = outdev,
111                 .okfn   = okfn,
112         };
113
114         /* If it's going away, ignore hook. */
115         if (!try_module_get(entry->elem->owner)) {
116                 status = -ECANCELED;
117                 goto err_unlock;
118         }
119         /* Bump dev refs so they don't vanish while packet is out */
120         if (indev)
121                 dev_hold(indev);
122         if (outdev)
123                 dev_hold(outdev);
124 #ifdef CONFIG_BRIDGE_NETFILTER
125         if (skb->nf_bridge) {
126                 physindev = skb->nf_bridge->physindev;
127                 if (physindev)
128                         dev_hold(physindev);
129                 physoutdev = skb->nf_bridge->physoutdev;
130                 if (physoutdev)
131                         dev_hold(physoutdev);
132         }
133 #endif
134         skb_dst_force(skb);
135         afinfo->saveroute(skb, entry);
136         status = qh->outfn(entry, queuenum);
137
138         rcu_read_unlock();
139
140         if (status < 0) {
141                 nf_queue_entry_release_refs(entry);
142                 goto err;
143         }
144
145         return 0;
146
147 err_unlock:
148         rcu_read_unlock();
149 err:
150         kfree(entry);
151         return status;
152 }
153
154 #ifdef CONFIG_BRIDGE_NETFILTER
155 /* When called from bridge netfilter, skb->data must point to MAC header
156  * before calling skb_gso_segment(). Else, original MAC header is lost
157  * and segmented skbs will be sent to wrong destination.
158  */
159 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
160 {
161         if (skb->nf_bridge)
162                 __skb_push(skb, skb->network_header - skb->mac_header);
163 }
164
165 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
166 {
167         if (skb->nf_bridge)
168                 __skb_pull(skb, skb->network_header - skb->mac_header);
169 }
170 #else
171 #define nf_bridge_adjust_skb_data(s) do {} while (0)
172 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
173 #endif
174
175 int nf_queue(struct sk_buff *skb,
176              struct nf_hook_ops *elem,
177              u_int8_t pf, unsigned int hook,
178              struct net_device *indev,
179              struct net_device *outdev,
180              int (*okfn)(struct sk_buff *),
181              unsigned int queuenum)
182 {
183         struct sk_buff *segs;
184         int err = -EINVAL;
185         unsigned int queued;
186
187         if (!skb_is_gso(skb))
188                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
189                                   queuenum);
190
191         switch (pf) {
192         case NFPROTO_IPV4:
193                 skb->protocol = htons(ETH_P_IP);
194                 break;
195         case NFPROTO_IPV6:
196                 skb->protocol = htons(ETH_P_IPV6);
197                 break;
198         }
199
200         nf_bridge_adjust_skb_data(skb);
201         segs = skb_gso_segment(skb, 0);
202         /* Does not use PTR_ERR to limit the number of error codes that can be
203          * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
204          * 'ignore this hook'.
205          */
206         if (IS_ERR(segs))
207                 goto out_err;
208         queued = 0;
209         err = 0;
210         do {
211                 struct sk_buff *nskb = segs->next;
212
213                 segs->next = NULL;
214                 if (err == 0) {
215                         nf_bridge_adjust_segmented_data(segs);
216                         err = __nf_queue(segs, elem, pf, hook, indev,
217                                            outdev, okfn, queuenum);
218                 }
219                 if (err == 0)
220                         queued++;
221                 else
222                         kfree_skb(segs);
223                 segs = nskb;
224         } while (segs);
225
226         if (queued) {
227                 kfree_skb(skb);
228                 return 0;
229         }
230   out_err:
231         nf_bridge_adjust_segmented_data(skb);
232         return err;
233 }
234
235 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
236 {
237         struct sk_buff *skb = entry->skb;
238         struct nf_hook_ops *elem = entry->elem;
239         const struct nf_afinfo *afinfo;
240         int err;
241
242         rcu_read_lock();
243
244         nf_queue_entry_release_refs(entry);
245
246         /* Continue traversal iff userspace said ok... */
247         if (verdict == NF_REPEAT) {
248                 elem = list_entry(elem->list.prev, struct nf_hook_ops, list);
249                 verdict = NF_ACCEPT;
250         }
251
252         if (verdict == NF_ACCEPT) {
253                 afinfo = nf_get_afinfo(entry->pf);
254                 if (!afinfo || afinfo->reroute(skb, entry) < 0)
255                         verdict = NF_DROP;
256         }
257
258         if (verdict == NF_ACCEPT) {
259         next_hook:
260                 verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
261                                      skb, entry->hook,
262                                      entry->indev, entry->outdev, &elem,
263                                      entry->okfn, INT_MIN);
264         }
265
266         switch (verdict & NF_VERDICT_MASK) {
267         case NF_ACCEPT:
268         case NF_STOP:
269                 local_bh_disable();
270                 entry->okfn(skb);
271                 local_bh_enable();
272                 break;
273         case NF_QUEUE:
274                 err = __nf_queue(skb, elem, entry->pf, entry->hook,
275                                  entry->indev, entry->outdev, entry->okfn,
276                                  verdict >> NF_VERDICT_QBITS);
277                 if (err < 0) {
278                         if (err == -ECANCELED)
279                                 goto next_hook;
280                         if (err == -ESRCH &&
281                            (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
282                                 goto next_hook;
283                         kfree_skb(skb);
284                 }
285                 break;
286         case NF_STOLEN:
287                 break;
288         default:
289                 kfree_skb(skb);
290         }
291         rcu_read_unlock();
292         kfree(entry);
293 }
294 EXPORT_SYMBOL(nf_reinject);