]> nv-tegra.nvidia Code Review - linux-2.6.git/blob - net/xfrm/xfrm_policy.c
3a4221a71b098fee51ad7ce103786469dee9c782
[linux-2.6.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34
35 #include "xfrm_hash.h"
36
37 DEFINE_MUTEX(xfrm_cfg_mutex);
38 EXPORT_SYMBOL(xfrm_cfg_mutex);
39
40 static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
41 static struct dst_entry *xfrm_policy_sk_bundles;
42 static DEFINE_RWLOCK(xfrm_policy_lock);
43
44 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
45 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
46
47 static struct kmem_cache *xfrm_dst_cache __read_mostly;
48
49 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
50 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
51 static void xfrm_init_pmtu(struct dst_entry *dst);
52 static int stale_bundle(struct dst_entry *dst);
53 static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
54                           const struct flowi *fl, int family);
55
56
57 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
58                                                 int dir);
59
60 static inline int
61 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
62 {
63         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
64                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
65                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
66                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
67                 (fl->proto == sel->proto || !sel->proto) &&
68                 (fl->oif == sel->ifindex || !sel->ifindex);
69 }
70
71 static inline int
72 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
73 {
74         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
75                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
76                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
77                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
78                 (fl->proto == sel->proto || !sel->proto) &&
79                 (fl->oif == sel->ifindex || !sel->ifindex);
80 }
81
82 int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
83                         unsigned short family)
84 {
85         switch (family) {
86         case AF_INET:
87                 return __xfrm4_selector_match(sel, fl);
88         case AF_INET6:
89                 return __xfrm6_selector_match(sel, fl);
90         }
91         return 0;
92 }
93
94 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
95                                                   const xfrm_address_t *saddr,
96                                                   const xfrm_address_t *daddr,
97                                                   int family)
98 {
99         struct xfrm_policy_afinfo *afinfo;
100         struct dst_entry *dst;
101
102         afinfo = xfrm_policy_get_afinfo(family);
103         if (unlikely(afinfo == NULL))
104                 return ERR_PTR(-EAFNOSUPPORT);
105
106         dst = afinfo->dst_lookup(net, tos, saddr, daddr);
107
108         xfrm_policy_put_afinfo(afinfo);
109
110         return dst;
111 }
112
113 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
114                                                 xfrm_address_t *prev_saddr,
115                                                 xfrm_address_t *prev_daddr,
116                                                 int family)
117 {
118         struct net *net = xs_net(x);
119         xfrm_address_t *saddr = &x->props.saddr;
120         xfrm_address_t *daddr = &x->id.daddr;
121         struct dst_entry *dst;
122
123         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
124                 saddr = x->coaddr;
125                 daddr = prev_daddr;
126         }
127         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
128                 saddr = prev_saddr;
129                 daddr = x->coaddr;
130         }
131
132         dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
133
134         if (!IS_ERR(dst)) {
135                 if (prev_saddr != saddr)
136                         memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
137                 if (prev_daddr != daddr)
138                         memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
139         }
140
141         return dst;
142 }
143
144 static inline unsigned long make_jiffies(long secs)
145 {
146         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
147                 return MAX_SCHEDULE_TIMEOUT-1;
148         else
149                 return secs*HZ;
150 }
151
152 static void xfrm_policy_timer(unsigned long data)
153 {
154         struct xfrm_policy *xp = (struct xfrm_policy*)data;
155         unsigned long now = get_seconds();
156         long next = LONG_MAX;
157         int warn = 0;
158         int dir;
159
160         read_lock(&xp->lock);
161
162         if (unlikely(xp->walk.dead))
163                 goto out;
164
165         dir = xfrm_policy_id2dir(xp->index);
166
167         if (xp->lft.hard_add_expires_seconds) {
168                 long tmo = xp->lft.hard_add_expires_seconds +
169                         xp->curlft.add_time - now;
170                 if (tmo <= 0)
171                         goto expired;
172                 if (tmo < next)
173                         next = tmo;
174         }
175         if (xp->lft.hard_use_expires_seconds) {
176                 long tmo = xp->lft.hard_use_expires_seconds +
177                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
178                 if (tmo <= 0)
179                         goto expired;
180                 if (tmo < next)
181                         next = tmo;
182         }
183         if (xp->lft.soft_add_expires_seconds) {
184                 long tmo = xp->lft.soft_add_expires_seconds +
185                         xp->curlft.add_time - now;
186                 if (tmo <= 0) {
187                         warn = 1;
188                         tmo = XFRM_KM_TIMEOUT;
189                 }
190                 if (tmo < next)
191                         next = tmo;
192         }
193         if (xp->lft.soft_use_expires_seconds) {
194                 long tmo = xp->lft.soft_use_expires_seconds +
195                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
196                 if (tmo <= 0) {
197                         warn = 1;
198                         tmo = XFRM_KM_TIMEOUT;
199                 }
200                 if (tmo < next)
201                         next = tmo;
202         }
203
204         if (warn)
205                 km_policy_expired(xp, dir, 0, 0);
206         if (next != LONG_MAX &&
207             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
208                 xfrm_pol_hold(xp);
209
210 out:
211         read_unlock(&xp->lock);
212         xfrm_pol_put(xp);
213         return;
214
215 expired:
216         read_unlock(&xp->lock);
217         if (!xfrm_policy_delete(xp, dir))
218                 km_policy_expired(xp, dir, 1, 0);
219         xfrm_pol_put(xp);
220 }
221
222 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
223 {
224         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
225
226         if (unlikely(pol->walk.dead))
227                 flo = NULL;
228         else
229                 xfrm_pol_hold(pol);
230
231         return flo;
232 }
233
234 static int xfrm_policy_flo_check(struct flow_cache_object *flo)
235 {
236         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
237
238         return !pol->walk.dead;
239 }
240
241 static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
242 {
243         xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
244 }
245
246 static const struct flow_cache_ops xfrm_policy_fc_ops = {
247         .get = xfrm_policy_flo_get,
248         .check = xfrm_policy_flo_check,
249         .delete = xfrm_policy_flo_delete,
250 };
251
252 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
253  * SPD calls.
254  */
255
256 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
257 {
258         struct xfrm_policy *policy;
259
260         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
261
262         if (policy) {
263                 write_pnet(&policy->xp_net, net);
264                 INIT_LIST_HEAD(&policy->walk.all);
265                 INIT_HLIST_NODE(&policy->bydst);
266                 INIT_HLIST_NODE(&policy->byidx);
267                 rwlock_init(&policy->lock);
268                 atomic_set(&policy->refcnt, 1);
269                 setup_timer(&policy->timer, xfrm_policy_timer,
270                                 (unsigned long)policy);
271                 policy->flo.ops = &xfrm_policy_fc_ops;
272         }
273         return policy;
274 }
275 EXPORT_SYMBOL(xfrm_policy_alloc);
276
277 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
278
279 void xfrm_policy_destroy(struct xfrm_policy *policy)
280 {
281         BUG_ON(!policy->walk.dead);
282
283         if (del_timer(&policy->timer))
284                 BUG();
285
286         security_xfrm_policy_free(policy->security);
287         kfree(policy);
288 }
289 EXPORT_SYMBOL(xfrm_policy_destroy);
290
291 /* Rule must be locked. Release descentant resources, announce
292  * entry dead. The rule must be unlinked from lists to the moment.
293  */
294
295 static void xfrm_policy_kill(struct xfrm_policy *policy)
296 {
297         policy->walk.dead = 1;
298
299         atomic_inc(&policy->genid);
300
301         if (del_timer(&policy->timer))
302                 xfrm_pol_put(policy);
303
304         xfrm_pol_put(policy);
305 }
306
307 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
308
309 static inline unsigned int idx_hash(struct net *net, u32 index)
310 {
311         return __idx_hash(index, net->xfrm.policy_idx_hmask);
312 }
313
314 static struct hlist_head *policy_hash_bysel(struct net *net,
315                                             const struct xfrm_selector *sel,
316                                             unsigned short family, int dir)
317 {
318         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
319         unsigned int hash = __sel_hash(sel, family, hmask);
320
321         return (hash == hmask + 1 ?
322                 &net->xfrm.policy_inexact[dir] :
323                 net->xfrm.policy_bydst[dir].table + hash);
324 }
325
326 static struct hlist_head *policy_hash_direct(struct net *net,
327                                              const xfrm_address_t *daddr,
328                                              const xfrm_address_t *saddr,
329                                              unsigned short family, int dir)
330 {
331         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
332         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
333
334         return net->xfrm.policy_bydst[dir].table + hash;
335 }
336
337 static void xfrm_dst_hash_transfer(struct hlist_head *list,
338                                    struct hlist_head *ndsttable,
339                                    unsigned int nhashmask)
340 {
341         struct hlist_node *entry, *tmp, *entry0 = NULL;
342         struct xfrm_policy *pol;
343         unsigned int h0 = 0;
344
345 redo:
346         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
347                 unsigned int h;
348
349                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
350                                 pol->family, nhashmask);
351                 if (!entry0) {
352                         hlist_del(entry);
353                         hlist_add_head(&pol->bydst, ndsttable+h);
354                         h0 = h;
355                 } else {
356                         if (h != h0)
357                                 continue;
358                         hlist_del(entry);
359                         hlist_add_after(entry0, &pol->bydst);
360                 }
361                 entry0 = entry;
362         }
363         if (!hlist_empty(list)) {
364                 entry0 = NULL;
365                 goto redo;
366         }
367 }
368
369 static void xfrm_idx_hash_transfer(struct hlist_head *list,
370                                    struct hlist_head *nidxtable,
371                                    unsigned int nhashmask)
372 {
373         struct hlist_node *entry, *tmp;
374         struct xfrm_policy *pol;
375
376         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
377                 unsigned int h;
378
379                 h = __idx_hash(pol->index, nhashmask);
380                 hlist_add_head(&pol->byidx, nidxtable+h);
381         }
382 }
383
384 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
385 {
386         return ((old_hmask + 1) << 1) - 1;
387 }
388
389 static void xfrm_bydst_resize(struct net *net, int dir)
390 {
391         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
392         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
393         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
394         struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
395         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
396         int i;
397
398         if (!ndst)
399                 return;
400
401         write_lock_bh(&xfrm_policy_lock);
402
403         for (i = hmask; i >= 0; i--)
404                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
405
406         net->xfrm.policy_bydst[dir].table = ndst;
407         net->xfrm.policy_bydst[dir].hmask = nhashmask;
408
409         write_unlock_bh(&xfrm_policy_lock);
410
411         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
412 }
413
414 static void xfrm_byidx_resize(struct net *net, int total)
415 {
416         unsigned int hmask = net->xfrm.policy_idx_hmask;
417         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
418         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
419         struct hlist_head *oidx = net->xfrm.policy_byidx;
420         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
421         int i;
422
423         if (!nidx)
424                 return;
425
426         write_lock_bh(&xfrm_policy_lock);
427
428         for (i = hmask; i >= 0; i--)
429                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
430
431         net->xfrm.policy_byidx = nidx;
432         net->xfrm.policy_idx_hmask = nhashmask;
433
434         write_unlock_bh(&xfrm_policy_lock);
435
436         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
437 }
438
439 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
440 {
441         unsigned int cnt = net->xfrm.policy_count[dir];
442         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
443
444         if (total)
445                 *total += cnt;
446
447         if ((hmask + 1) < xfrm_policy_hashmax &&
448             cnt > hmask)
449                 return 1;
450
451         return 0;
452 }
453
454 static inline int xfrm_byidx_should_resize(struct net *net, int total)
455 {
456         unsigned int hmask = net->xfrm.policy_idx_hmask;
457
458         if ((hmask + 1) < xfrm_policy_hashmax &&
459             total > hmask)
460                 return 1;
461
462         return 0;
463 }
464
465 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
466 {
467         read_lock_bh(&xfrm_policy_lock);
468         si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
469         si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
470         si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
471         si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
472         si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
473         si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
474         si->spdhcnt = net->xfrm.policy_idx_hmask;
475         si->spdhmcnt = xfrm_policy_hashmax;
476         read_unlock_bh(&xfrm_policy_lock);
477 }
478 EXPORT_SYMBOL(xfrm_spd_getinfo);
479
480 static DEFINE_MUTEX(hash_resize_mutex);
481 static void xfrm_hash_resize(struct work_struct *work)
482 {
483         struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
484         int dir, total;
485
486         mutex_lock(&hash_resize_mutex);
487
488         total = 0;
489         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
490                 if (xfrm_bydst_should_resize(net, dir, &total))
491                         xfrm_bydst_resize(net, dir);
492         }
493         if (xfrm_byidx_should_resize(net, total))
494                 xfrm_byidx_resize(net, total);
495
496         mutex_unlock(&hash_resize_mutex);
497 }
498
499 /* Generate new index... KAME seems to generate them ordered by cost
500  * of an absolute inpredictability of ordering of rules. This will not pass. */
501 static u32 xfrm_gen_index(struct net *net, int dir)
502 {
503         static u32 idx_generator;
504
505         for (;;) {
506                 struct hlist_node *entry;
507                 struct hlist_head *list;
508                 struct xfrm_policy *p;
509                 u32 idx;
510                 int found;
511
512                 idx = (idx_generator | dir);
513                 idx_generator += 8;
514                 if (idx == 0)
515                         idx = 8;
516                 list = net->xfrm.policy_byidx + idx_hash(net, idx);
517                 found = 0;
518                 hlist_for_each_entry(p, entry, list, byidx) {
519                         if (p->index == idx) {
520                                 found = 1;
521                                 break;
522                         }
523                 }
524                 if (!found)
525                         return idx;
526         }
527 }
528
529 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
530 {
531         u32 *p1 = (u32 *) s1;
532         u32 *p2 = (u32 *) s2;
533         int len = sizeof(struct xfrm_selector) / sizeof(u32);
534         int i;
535
536         for (i = 0; i < len; i++) {
537                 if (p1[i] != p2[i])
538                         return 1;
539         }
540
541         return 0;
542 }
543
544 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
545 {
546         struct net *net = xp_net(policy);
547         struct xfrm_policy *pol;
548         struct xfrm_policy *delpol;
549         struct hlist_head *chain;
550         struct hlist_node *entry, *newpos;
551         u32 mark = policy->mark.v & policy->mark.m;
552
553         write_lock_bh(&xfrm_policy_lock);
554         chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
555         delpol = NULL;
556         newpos = NULL;
557         hlist_for_each_entry(pol, entry, chain, bydst) {
558                 if (pol->type == policy->type &&
559                     !selector_cmp(&pol->selector, &policy->selector) &&
560                     (mark & pol->mark.m) == pol->mark.v &&
561                     xfrm_sec_ctx_match(pol->security, policy->security) &&
562                     !WARN_ON(delpol)) {
563                         if (excl) {
564                                 write_unlock_bh(&xfrm_policy_lock);
565                                 return -EEXIST;
566                         }
567                         delpol = pol;
568                         if (policy->priority > pol->priority)
569                                 continue;
570                 } else if (policy->priority >= pol->priority) {
571                         newpos = &pol->bydst;
572                         continue;
573                 }
574                 if (delpol)
575                         break;
576         }
577         if (newpos)
578                 hlist_add_after(newpos, &policy->bydst);
579         else
580                 hlist_add_head(&policy->bydst, chain);
581         xfrm_pol_hold(policy);
582         net->xfrm.policy_count[dir]++;
583         atomic_inc(&flow_cache_genid);
584         if (delpol)
585                 __xfrm_policy_unlink(delpol, dir);
586         policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
587         hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
588         policy->curlft.add_time = get_seconds();
589         policy->curlft.use_time = 0;
590         if (!mod_timer(&policy->timer, jiffies + HZ))
591                 xfrm_pol_hold(policy);
592         list_add(&policy->walk.all, &net->xfrm.policy_all);
593         write_unlock_bh(&xfrm_policy_lock);
594
595         if (delpol)
596                 xfrm_policy_kill(delpol);
597         else if (xfrm_bydst_should_resize(net, dir, NULL))
598                 schedule_work(&net->xfrm.policy_hash_work);
599
600         return 0;
601 }
602 EXPORT_SYMBOL(xfrm_policy_insert);
603
604 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
605                                           int dir, struct xfrm_selector *sel,
606                                           struct xfrm_sec_ctx *ctx, int delete,
607                                           int *err)
608 {
609         struct xfrm_policy *pol, *ret;
610         struct hlist_head *chain;
611         struct hlist_node *entry;
612
613         *err = 0;
614         write_lock_bh(&xfrm_policy_lock);
615         chain = policy_hash_bysel(net, sel, sel->family, dir);
616         ret = NULL;
617         hlist_for_each_entry(pol, entry, chain, bydst) {
618                 if (pol->type == type &&
619                     (mark & pol->mark.m) == pol->mark.v &&
620                     !selector_cmp(sel, &pol->selector) &&
621                     xfrm_sec_ctx_match(ctx, pol->security)) {
622                         xfrm_pol_hold(pol);
623                         if (delete) {
624                                 *err = security_xfrm_policy_delete(
625                                                                 pol->security);
626                                 if (*err) {
627                                         write_unlock_bh(&xfrm_policy_lock);
628                                         return pol;
629                                 }
630                                 __xfrm_policy_unlink(pol, dir);
631                         }
632                         ret = pol;
633                         break;
634                 }
635         }
636         write_unlock_bh(&xfrm_policy_lock);
637
638         if (ret && delete)
639                 xfrm_policy_kill(ret);
640         return ret;
641 }
642 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
643
644 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
645                                      int dir, u32 id, int delete, int *err)
646 {
647         struct xfrm_policy *pol, *ret;
648         struct hlist_head *chain;
649         struct hlist_node *entry;
650
651         *err = -ENOENT;
652         if (xfrm_policy_id2dir(id) != dir)
653                 return NULL;
654
655         *err = 0;
656         write_lock_bh(&xfrm_policy_lock);
657         chain = net->xfrm.policy_byidx + idx_hash(net, id);
658         ret = NULL;
659         hlist_for_each_entry(pol, entry, chain, byidx) {
660                 if (pol->type == type && pol->index == id &&
661                     (mark & pol->mark.m) == pol->mark.v) {
662                         xfrm_pol_hold(pol);
663                         if (delete) {
664                                 *err = security_xfrm_policy_delete(
665                                                                 pol->security);
666                                 if (*err) {
667                                         write_unlock_bh(&xfrm_policy_lock);
668                                         return pol;
669                                 }
670                                 __xfrm_policy_unlink(pol, dir);
671                         }
672                         ret = pol;
673                         break;
674                 }
675         }
676         write_unlock_bh(&xfrm_policy_lock);
677
678         if (ret && delete)
679                 xfrm_policy_kill(ret);
680         return ret;
681 }
682 EXPORT_SYMBOL(xfrm_policy_byid);
683
684 #ifdef CONFIG_SECURITY_NETWORK_XFRM
685 static inline int
686 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
687 {
688         int dir, err = 0;
689
690         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
691                 struct xfrm_policy *pol;
692                 struct hlist_node *entry;
693                 int i;
694
695                 hlist_for_each_entry(pol, entry,
696                                      &net->xfrm.policy_inexact[dir], bydst) {
697                         if (pol->type != type)
698                                 continue;
699                         err = security_xfrm_policy_delete(pol->security);
700                         if (err) {
701                                 xfrm_audit_policy_delete(pol, 0,
702                                                          audit_info->loginuid,
703                                                          audit_info->sessionid,
704                                                          audit_info->secid);
705                                 return err;
706                         }
707                 }
708                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
709                         hlist_for_each_entry(pol, entry,
710                                              net->xfrm.policy_bydst[dir].table + i,
711                                              bydst) {
712                                 if (pol->type != type)
713                                         continue;
714                                 err = security_xfrm_policy_delete(
715                                                                 pol->security);
716                                 if (err) {
717                                         xfrm_audit_policy_delete(pol, 0,
718                                                         audit_info->loginuid,
719                                                         audit_info->sessionid,
720                                                         audit_info->secid);
721                                         return err;
722                                 }
723                         }
724                 }
725         }
726         return err;
727 }
728 #else
729 static inline int
730 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
731 {
732         return 0;
733 }
734 #endif
735
736 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
737 {
738         int dir, err = 0, cnt = 0;
739
740         write_lock_bh(&xfrm_policy_lock);
741
742         err = xfrm_policy_flush_secctx_check(net, type, audit_info);
743         if (err)
744                 goto out;
745
746         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
747                 struct xfrm_policy *pol;
748                 struct hlist_node *entry;
749                 int i;
750
751         again1:
752                 hlist_for_each_entry(pol, entry,
753                                      &net->xfrm.policy_inexact[dir], bydst) {
754                         if (pol->type != type)
755                                 continue;
756                         __xfrm_policy_unlink(pol, dir);
757                         write_unlock_bh(&xfrm_policy_lock);
758                         cnt++;
759
760                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
761                                                  audit_info->sessionid,
762                                                  audit_info->secid);
763
764                         xfrm_policy_kill(pol);
765
766                         write_lock_bh(&xfrm_policy_lock);
767                         goto again1;
768                 }
769
770                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
771         again2:
772                         hlist_for_each_entry(pol, entry,
773                                              net->xfrm.policy_bydst[dir].table + i,
774                                              bydst) {
775                                 if (pol->type != type)
776                                         continue;
777                                 __xfrm_policy_unlink(pol, dir);
778                                 write_unlock_bh(&xfrm_policy_lock);
779                                 cnt++;
780
781                                 xfrm_audit_policy_delete(pol, 1,
782                                                          audit_info->loginuid,
783                                                          audit_info->sessionid,
784                                                          audit_info->secid);
785                                 xfrm_policy_kill(pol);
786
787                                 write_lock_bh(&xfrm_policy_lock);
788                                 goto again2;
789                         }
790                 }
791
792         }
793         if (!cnt)
794                 err = -ESRCH;
795 out:
796         write_unlock_bh(&xfrm_policy_lock);
797         return err;
798 }
799 EXPORT_SYMBOL(xfrm_policy_flush);
800
801 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
802                      int (*func)(struct xfrm_policy *, int, int, void*),
803                      void *data)
804 {
805         struct xfrm_policy *pol;
806         struct xfrm_policy_walk_entry *x;
807         int error = 0;
808
809         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
810             walk->type != XFRM_POLICY_TYPE_ANY)
811                 return -EINVAL;
812
813         if (list_empty(&walk->walk.all) && walk->seq != 0)
814                 return 0;
815
816         write_lock_bh(&xfrm_policy_lock);
817         if (list_empty(&walk->walk.all))
818                 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
819         else
820                 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
821         list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
822                 if (x->dead)
823                         continue;
824                 pol = container_of(x, struct xfrm_policy, walk);
825                 if (walk->type != XFRM_POLICY_TYPE_ANY &&
826                     walk->type != pol->type)
827                         continue;
828                 error = func(pol, xfrm_policy_id2dir(pol->index),
829                              walk->seq, data);
830                 if (error) {
831                         list_move_tail(&walk->walk.all, &x->all);
832                         goto out;
833                 }
834                 walk->seq++;
835         }
836         if (walk->seq == 0) {
837                 error = -ENOENT;
838                 goto out;
839         }
840         list_del_init(&walk->walk.all);
841 out:
842         write_unlock_bh(&xfrm_policy_lock);
843         return error;
844 }
845 EXPORT_SYMBOL(xfrm_policy_walk);
846
847 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
848 {
849         INIT_LIST_HEAD(&walk->walk.all);
850         walk->walk.dead = 1;
851         walk->type = type;
852         walk->seq = 0;
853 }
854 EXPORT_SYMBOL(xfrm_policy_walk_init);
855
856 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
857 {
858         if (list_empty(&walk->walk.all))
859                 return;
860
861         write_lock_bh(&xfrm_policy_lock);
862         list_del(&walk->walk.all);
863         write_unlock_bh(&xfrm_policy_lock);
864 }
865 EXPORT_SYMBOL(xfrm_policy_walk_done);
866
867 /*
868  * Find policy to apply to this flow.
869  *
870  * Returns 0 if policy found, else an -errno.
871  */
872 static int xfrm_policy_match(const struct xfrm_policy *pol,
873                              const struct flowi *fl,
874                              u8 type, u16 family, int dir)
875 {
876         const struct xfrm_selector *sel = &pol->selector;
877         int match, ret = -ESRCH;
878
879         if (pol->family != family ||
880             (fl->mark & pol->mark.m) != pol->mark.v ||
881             pol->type != type)
882                 return ret;
883
884         match = xfrm_selector_match(sel, fl, family);
885         if (match)
886                 ret = security_xfrm_policy_lookup(pol->security, fl->secid,
887                                                   dir);
888
889         return ret;
890 }
891
892 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
893                                                      const struct flowi *fl,
894                                                      u16 family, u8 dir)
895 {
896         int err;
897         struct xfrm_policy *pol, *ret;
898         const xfrm_address_t *daddr, *saddr;
899         struct hlist_node *entry;
900         struct hlist_head *chain;
901         u32 priority = ~0U;
902
903         daddr = xfrm_flowi_daddr(fl, family);
904         saddr = xfrm_flowi_saddr(fl, family);
905         if (unlikely(!daddr || !saddr))
906                 return NULL;
907
908         read_lock_bh(&xfrm_policy_lock);
909         chain = policy_hash_direct(net, daddr, saddr, family, dir);
910         ret = NULL;
911         hlist_for_each_entry(pol, entry, chain, bydst) {
912                 err = xfrm_policy_match(pol, fl, type, family, dir);
913                 if (err) {
914                         if (err == -ESRCH)
915                                 continue;
916                         else {
917                                 ret = ERR_PTR(err);
918                                 goto fail;
919                         }
920                 } else {
921                         ret = pol;
922                         priority = ret->priority;
923                         break;
924                 }
925         }
926         chain = &net->xfrm.policy_inexact[dir];
927         hlist_for_each_entry(pol, entry, chain, bydst) {
928                 err = xfrm_policy_match(pol, fl, type, family, dir);
929                 if (err) {
930                         if (err == -ESRCH)
931                                 continue;
932                         else {
933                                 ret = ERR_PTR(err);
934                                 goto fail;
935                         }
936                 } else if (pol->priority < priority) {
937                         ret = pol;
938                         break;
939                 }
940         }
941         if (ret)
942                 xfrm_pol_hold(ret);
943 fail:
944         read_unlock_bh(&xfrm_policy_lock);
945
946         return ret;
947 }
948
949 static struct xfrm_policy *
950 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
951 {
952 #ifdef CONFIG_XFRM_SUB_POLICY
953         struct xfrm_policy *pol;
954
955         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
956         if (pol != NULL)
957                 return pol;
958 #endif
959         return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
960 }
961
962 static struct flow_cache_object *
963 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
964                    u8 dir, struct flow_cache_object *old_obj, void *ctx)
965 {
966         struct xfrm_policy *pol;
967
968         if (old_obj)
969                 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
970
971         pol = __xfrm_policy_lookup(net, fl, family, dir);
972         if (IS_ERR_OR_NULL(pol))
973                 return ERR_CAST(pol);
974
975         /* Resolver returns two references:
976          * one for cache and one for caller of flow_cache_lookup() */
977         xfrm_pol_hold(pol);
978
979         return &pol->flo;
980 }
981
982 static inline int policy_to_flow_dir(int dir)
983 {
984         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
985             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
986             XFRM_POLICY_FWD == FLOW_DIR_FWD)
987                 return dir;
988         switch (dir) {
989         default:
990         case XFRM_POLICY_IN:
991                 return FLOW_DIR_IN;
992         case XFRM_POLICY_OUT:
993                 return FLOW_DIR_OUT;
994         case XFRM_POLICY_FWD:
995                 return FLOW_DIR_FWD;
996         }
997 }
998
999 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1000                                                  const struct flowi *fl)
1001 {
1002         struct xfrm_policy *pol;
1003
1004         read_lock_bh(&xfrm_policy_lock);
1005         if ((pol = sk->sk_policy[dir]) != NULL) {
1006                 int match = xfrm_selector_match(&pol->selector, fl,
1007                                                 sk->sk_family);
1008                 int err = 0;
1009
1010                 if (match) {
1011                         if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1012                                 pol = NULL;
1013                                 goto out;
1014                         }
1015                         err = security_xfrm_policy_lookup(pol->security,
1016                                                       fl->secid,
1017                                                       policy_to_flow_dir(dir));
1018                         if (!err)
1019                                 xfrm_pol_hold(pol);
1020                         else if (err == -ESRCH)
1021                                 pol = NULL;
1022                         else
1023                                 pol = ERR_PTR(err);
1024                 } else
1025                         pol = NULL;
1026         }
1027 out:
1028         read_unlock_bh(&xfrm_policy_lock);
1029         return pol;
1030 }
1031
1032 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1033 {
1034         struct net *net = xp_net(pol);
1035         struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1036                                                      pol->family, dir);
1037
1038         list_add(&pol->walk.all, &net->xfrm.policy_all);
1039         hlist_add_head(&pol->bydst, chain);
1040         hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1041         net->xfrm.policy_count[dir]++;
1042         xfrm_pol_hold(pol);
1043
1044         if (xfrm_bydst_should_resize(net, dir, NULL))
1045                 schedule_work(&net->xfrm.policy_hash_work);
1046 }
1047
1048 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1049                                                 int dir)
1050 {
1051         struct net *net = xp_net(pol);
1052
1053         if (hlist_unhashed(&pol->bydst))
1054                 return NULL;
1055
1056         hlist_del(&pol->bydst);
1057         hlist_del(&pol->byidx);
1058         list_del(&pol->walk.all);
1059         net->xfrm.policy_count[dir]--;
1060
1061         return pol;
1062 }
1063
1064 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1065 {
1066         write_lock_bh(&xfrm_policy_lock);
1067         pol = __xfrm_policy_unlink(pol, dir);
1068         write_unlock_bh(&xfrm_policy_lock);
1069         if (pol) {
1070                 xfrm_policy_kill(pol);
1071                 return 0;
1072         }
1073         return -ENOENT;
1074 }
1075 EXPORT_SYMBOL(xfrm_policy_delete);
1076
1077 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1078 {
1079         struct net *net = xp_net(pol);
1080         struct xfrm_policy *old_pol;
1081
1082 #ifdef CONFIG_XFRM_SUB_POLICY
1083         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1084                 return -EINVAL;
1085 #endif
1086
1087         write_lock_bh(&xfrm_policy_lock);
1088         old_pol = sk->sk_policy[dir];
1089         sk->sk_policy[dir] = pol;
1090         if (pol) {
1091                 pol->curlft.add_time = get_seconds();
1092                 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
1093                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1094         }
1095         if (old_pol)
1096                 /* Unlinking succeeds always. This is the only function
1097                  * allowed to delete or replace socket policy.
1098                  */
1099                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1100         write_unlock_bh(&xfrm_policy_lock);
1101
1102         if (old_pol) {
1103                 xfrm_policy_kill(old_pol);
1104         }
1105         return 0;
1106 }
1107
1108 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1109 {
1110         struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1111
1112         if (newp) {
1113                 newp->selector = old->selector;
1114                 if (security_xfrm_policy_clone(old->security,
1115                                                &newp->security)) {
1116                         kfree(newp);
1117                         return NULL;  /* ENOMEM */
1118                 }
1119                 newp->lft = old->lft;
1120                 newp->curlft = old->curlft;
1121                 newp->mark = old->mark;
1122                 newp->action = old->action;
1123                 newp->flags = old->flags;
1124                 newp->xfrm_nr = old->xfrm_nr;
1125                 newp->index = old->index;
1126                 newp->type = old->type;
1127                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1128                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1129                 write_lock_bh(&xfrm_policy_lock);
1130                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1131                 write_unlock_bh(&xfrm_policy_lock);
1132                 xfrm_pol_put(newp);
1133         }
1134         return newp;
1135 }
1136
1137 int __xfrm_sk_clone_policy(struct sock *sk)
1138 {
1139         struct xfrm_policy *p0 = sk->sk_policy[0],
1140                            *p1 = sk->sk_policy[1];
1141
1142         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1143         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1144                 return -ENOMEM;
1145         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1146                 return -ENOMEM;
1147         return 0;
1148 }
1149
1150 static int
1151 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1152                unsigned short family)
1153 {
1154         int err;
1155         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1156
1157         if (unlikely(afinfo == NULL))
1158                 return -EINVAL;
1159         err = afinfo->get_saddr(net, local, remote);
1160         xfrm_policy_put_afinfo(afinfo);
1161         return err;
1162 }
1163
1164 /* Resolve list of templates for the flow, given policy. */
1165
1166 static int
1167 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1168                       struct xfrm_state **xfrm, unsigned short family)
1169 {
1170         struct net *net = xp_net(policy);
1171         int nx;
1172         int i, error;
1173         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1174         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1175         xfrm_address_t tmp;
1176
1177         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1178                 struct xfrm_state *x;
1179                 xfrm_address_t *remote = daddr;
1180                 xfrm_address_t *local  = saddr;
1181                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1182
1183                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1184                     tmpl->mode == XFRM_MODE_BEET) {
1185                         remote = &tmpl->id.daddr;
1186                         local = &tmpl->saddr;
1187                         if (xfrm_addr_any(local, tmpl->encap_family)) {
1188                                 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1189                                 if (error)
1190                                         goto fail;
1191                                 local = &tmp;
1192                         }
1193                 }
1194
1195                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1196
1197                 if (x && x->km.state == XFRM_STATE_VALID) {
1198                         xfrm[nx++] = x;
1199                         daddr = remote;
1200                         saddr = local;
1201                         continue;
1202                 }
1203                 if (x) {
1204                         error = (x->km.state == XFRM_STATE_ERROR ?
1205                                  -EINVAL : -EAGAIN);
1206                         xfrm_state_put(x);
1207                 }
1208                 else if (error == -ESRCH)
1209                         error = -EAGAIN;
1210
1211                 if (!tmpl->optional)
1212                         goto fail;
1213         }
1214         return nx;
1215
1216 fail:
1217         for (nx--; nx>=0; nx--)
1218                 xfrm_state_put(xfrm[nx]);
1219         return error;
1220 }
1221
1222 static int
1223 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1224                   struct xfrm_state **xfrm, unsigned short family)
1225 {
1226         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1227         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1228         int cnx = 0;
1229         int error;
1230         int ret;
1231         int i;
1232
1233         for (i = 0; i < npols; i++) {
1234                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1235                         error = -ENOBUFS;
1236                         goto fail;
1237                 }
1238
1239                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1240                 if (ret < 0) {
1241                         error = ret;
1242                         goto fail;
1243                 } else
1244                         cnx += ret;
1245         }
1246
1247         /* found states are sorted for outbound processing */
1248         if (npols > 1)
1249                 xfrm_state_sort(xfrm, tpp, cnx, family);
1250
1251         return cnx;
1252
1253  fail:
1254         for (cnx--; cnx>=0; cnx--)
1255                 xfrm_state_put(tpp[cnx]);
1256         return error;
1257
1258 }
1259
1260 /* Check that the bundle accepts the flow and its components are
1261  * still valid.
1262  */
1263
1264 static inline int xfrm_get_tos(const struct flowi *fl, int family)
1265 {
1266         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1267         int tos;
1268
1269         if (!afinfo)
1270                 return -EINVAL;
1271
1272         tos = afinfo->get_tos(fl);
1273
1274         xfrm_policy_put_afinfo(afinfo);
1275
1276         return tos;
1277 }
1278
1279 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1280 {
1281         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1282         struct dst_entry *dst = &xdst->u.dst;
1283
1284         if (xdst->route == NULL) {
1285                 /* Dummy bundle - if it has xfrms we were not
1286                  * able to build bundle as template resolution failed.
1287                  * It means we need to try again resolving. */
1288                 if (xdst->num_xfrms > 0)
1289                         return NULL;
1290         } else {
1291                 /* Real bundle */
1292                 if (stale_bundle(dst))
1293                         return NULL;
1294         }
1295
1296         dst_hold(dst);
1297         return flo;
1298 }
1299
1300 static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1301 {
1302         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1303         struct dst_entry *dst = &xdst->u.dst;
1304
1305         if (!xdst->route)
1306                 return 0;
1307         if (stale_bundle(dst))
1308                 return 0;
1309
1310         return 1;
1311 }
1312
1313 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1314 {
1315         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1316         struct dst_entry *dst = &xdst->u.dst;
1317
1318         dst_free(dst);
1319 }
1320
1321 static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1322         .get = xfrm_bundle_flo_get,
1323         .check = xfrm_bundle_flo_check,
1324         .delete = xfrm_bundle_flo_delete,
1325 };
1326
1327 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1328 {
1329         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1330         struct dst_ops *dst_ops;
1331         struct xfrm_dst *xdst;
1332
1333         if (!afinfo)
1334                 return ERR_PTR(-EINVAL);
1335
1336         switch (family) {
1337         case AF_INET:
1338                 dst_ops = &net->xfrm.xfrm4_dst_ops;
1339                 break;
1340 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1341         case AF_INET6:
1342                 dst_ops = &net->xfrm.xfrm6_dst_ops;
1343                 break;
1344 #endif
1345         default:
1346                 BUG();
1347         }
1348         xdst = dst_alloc(dst_ops, 0);
1349         xfrm_policy_put_afinfo(afinfo);
1350
1351         if (likely(xdst))
1352                 xdst->flo.ops = &xfrm_bundle_fc_ops;
1353         else
1354                 xdst = ERR_PTR(-ENOBUFS);
1355
1356         return xdst;
1357 }
1358
1359 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1360                                  int nfheader_len)
1361 {
1362         struct xfrm_policy_afinfo *afinfo =
1363                 xfrm_policy_get_afinfo(dst->ops->family);
1364         int err;
1365
1366         if (!afinfo)
1367                 return -EINVAL;
1368
1369         err = afinfo->init_path(path, dst, nfheader_len);
1370
1371         xfrm_policy_put_afinfo(afinfo);
1372
1373         return err;
1374 }
1375
1376 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1377                                 const struct flowi *fl)
1378 {
1379         struct xfrm_policy_afinfo *afinfo =
1380                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1381         int err;
1382
1383         if (!afinfo)
1384                 return -EINVAL;
1385
1386         err = afinfo->fill_dst(xdst, dev, fl);
1387
1388         xfrm_policy_put_afinfo(afinfo);
1389
1390         return err;
1391 }
1392
1393
1394 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1395  * all the metrics... Shortly, bundle a bundle.
1396  */
1397
1398 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1399                                             struct xfrm_state **xfrm, int nx,
1400                                             const struct flowi *fl,
1401                                             struct dst_entry *dst)
1402 {
1403         struct net *net = xp_net(policy);
1404         unsigned long now = jiffies;
1405         struct net_device *dev;
1406         struct dst_entry *dst_prev = NULL;
1407         struct dst_entry *dst0 = NULL;
1408         int i = 0;
1409         int err;
1410         int header_len = 0;
1411         int nfheader_len = 0;
1412         int trailer_len = 0;
1413         int tos;
1414         int family = policy->selector.family;
1415         xfrm_address_t saddr, daddr;
1416
1417         xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1418
1419         tos = xfrm_get_tos(fl, family);
1420         err = tos;
1421         if (tos < 0)
1422                 goto put_states;
1423
1424         dst_hold(dst);
1425
1426         for (; i < nx; i++) {
1427                 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1428                 struct dst_entry *dst1 = &xdst->u.dst;
1429
1430                 err = PTR_ERR(xdst);
1431                 if (IS_ERR(xdst)) {
1432                         dst_release(dst);
1433                         goto put_states;
1434                 }
1435
1436                 if (!dst_prev)
1437                         dst0 = dst1;
1438                 else {
1439                         dst_prev->child = dst_clone(dst1);
1440                         dst1->flags |= DST_NOHASH;
1441                 }
1442
1443                 xdst->route = dst;
1444                 dst_copy_metrics(dst1, dst);
1445
1446                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1447                         family = xfrm[i]->props.family;
1448                         dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1449                                               family);
1450                         err = PTR_ERR(dst);
1451                         if (IS_ERR(dst))
1452                                 goto put_states;
1453                 } else
1454                         dst_hold(dst);
1455
1456                 dst1->xfrm = xfrm[i];
1457                 xdst->xfrm_genid = xfrm[i]->genid;
1458
1459                 dst1->obsolete = -1;
1460                 dst1->flags |= DST_HOST;
1461                 dst1->lastuse = now;
1462
1463                 dst1->input = dst_discard;
1464                 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1465
1466                 dst1->next = dst_prev;
1467                 dst_prev = dst1;
1468
1469                 header_len += xfrm[i]->props.header_len;
1470                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1471                         nfheader_len += xfrm[i]->props.header_len;
1472                 trailer_len += xfrm[i]->props.trailer_len;
1473         }
1474
1475         dst_prev->child = dst;
1476         dst0->path = dst;
1477
1478         err = -ENODEV;
1479         dev = dst->dev;
1480         if (!dev)
1481                 goto free_dst;
1482
1483         /* Copy neighbour for reachability confirmation */
1484         dst0->neighbour = neigh_clone(dst->neighbour);
1485
1486         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1487         xfrm_init_pmtu(dst_prev);
1488
1489         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1490                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1491
1492                 err = xfrm_fill_dst(xdst, dev, fl);
1493                 if (err)
1494                         goto free_dst;
1495
1496                 dst_prev->header_len = header_len;
1497                 dst_prev->trailer_len = trailer_len;
1498                 header_len -= xdst->u.dst.xfrm->props.header_len;
1499                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1500         }
1501
1502 out:
1503         return dst0;
1504
1505 put_states:
1506         for (; i < nx; i++)
1507                 xfrm_state_put(xfrm[i]);
1508 free_dst:
1509         if (dst0)
1510                 dst_free(dst0);
1511         dst0 = ERR_PTR(err);
1512         goto out;
1513 }
1514
1515 static int inline
1516 xfrm_dst_alloc_copy(void **target, const void *src, int size)
1517 {
1518         if (!*target) {
1519                 *target = kmalloc(size, GFP_ATOMIC);
1520                 if (!*target)
1521                         return -ENOMEM;
1522         }
1523         memcpy(*target, src, size);
1524         return 0;
1525 }
1526
1527 static int inline
1528 xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel)
1529 {
1530 #ifdef CONFIG_XFRM_SUB_POLICY
1531         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1532         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1533                                    sel, sizeof(*sel));
1534 #else
1535         return 0;
1536 #endif
1537 }
1538
1539 static int inline
1540 xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl)
1541 {
1542 #ifdef CONFIG_XFRM_SUB_POLICY
1543         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1544         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1545 #else
1546         return 0;
1547 #endif
1548 }
1549
1550 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1551                                 struct xfrm_policy **pols,
1552                                 int *num_pols, int *num_xfrms)
1553 {
1554         int i;
1555
1556         if (*num_pols == 0 || !pols[0]) {
1557                 *num_pols = 0;
1558                 *num_xfrms = 0;
1559                 return 0;
1560         }
1561         if (IS_ERR(pols[0]))
1562                 return PTR_ERR(pols[0]);
1563
1564         *num_xfrms = pols[0]->xfrm_nr;
1565
1566 #ifdef CONFIG_XFRM_SUB_POLICY
1567         if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1568             pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1569                 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1570                                                     XFRM_POLICY_TYPE_MAIN,
1571                                                     fl, family,
1572                                                     XFRM_POLICY_OUT);
1573                 if (pols[1]) {
1574                         if (IS_ERR(pols[1])) {
1575                                 xfrm_pols_put(pols, *num_pols);
1576                                 return PTR_ERR(pols[1]);
1577                         }
1578                         (*num_pols) ++;
1579                         (*num_xfrms) += pols[1]->xfrm_nr;
1580                 }
1581         }
1582 #endif
1583         for (i = 0; i < *num_pols; i++) {
1584                 if (pols[i]->action != XFRM_POLICY_ALLOW) {
1585                         *num_xfrms = -1;
1586                         break;
1587                 }
1588         }
1589
1590         return 0;
1591
1592 }
1593
1594 static struct xfrm_dst *
1595 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1596                                const struct flowi *fl, u16 family,
1597                                struct dst_entry *dst_orig)
1598 {
1599         struct net *net = xp_net(pols[0]);
1600         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1601         struct dst_entry *dst;
1602         struct xfrm_dst *xdst;
1603         int err;
1604
1605         /* Try to instantiate a bundle */
1606         err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1607         if (err <= 0) {
1608                 if (err != 0 && err != -EAGAIN)
1609                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1610                 return ERR_PTR(err);
1611         }
1612
1613         dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1614         if (IS_ERR(dst)) {
1615                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1616                 return ERR_CAST(dst);
1617         }
1618
1619         xdst = (struct xfrm_dst *)dst;
1620         xdst->num_xfrms = err;
1621         if (num_pols > 1)
1622                 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1623         else
1624                 err = xfrm_dst_update_origin(dst, fl);
1625         if (unlikely(err)) {
1626                 dst_free(dst);
1627                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1628                 return ERR_PTR(err);
1629         }
1630
1631         xdst->num_pols = num_pols;
1632         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1633         xdst->policy_genid = atomic_read(&pols[0]->genid);
1634
1635         return xdst;
1636 }
1637
1638 static struct flow_cache_object *
1639 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1640                    struct flow_cache_object *oldflo, void *ctx)
1641 {
1642         struct dst_entry *dst_orig = (struct dst_entry *)ctx;
1643         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1644         struct xfrm_dst *xdst, *new_xdst;
1645         int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
1646
1647         /* Check if the policies from old bundle are usable */
1648         xdst = NULL;
1649         if (oldflo) {
1650                 xdst = container_of(oldflo, struct xfrm_dst, flo);
1651                 num_pols = xdst->num_pols;
1652                 num_xfrms = xdst->num_xfrms;
1653                 pol_dead = 0;
1654                 for (i = 0; i < num_pols; i++) {
1655                         pols[i] = xdst->pols[i];
1656                         pol_dead |= pols[i]->walk.dead;
1657                 }
1658                 if (pol_dead) {
1659                         dst_free(&xdst->u.dst);
1660                         xdst = NULL;
1661                         num_pols = 0;
1662                         num_xfrms = 0;
1663                         oldflo = NULL;
1664                 }
1665         }
1666
1667         /* Resolve policies to use if we couldn't get them from
1668          * previous cache entry */
1669         if (xdst == NULL) {
1670                 num_pols = 1;
1671                 pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
1672                 err = xfrm_expand_policies(fl, family, pols,
1673                                            &num_pols, &num_xfrms);
1674                 if (err < 0)
1675                         goto inc_error;
1676                 if (num_pols == 0)
1677                         return NULL;
1678                 if (num_xfrms <= 0)
1679                         goto make_dummy_bundle;
1680         }
1681
1682         new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
1683         if (IS_ERR(new_xdst)) {
1684                 err = PTR_ERR(new_xdst);
1685                 if (err != -EAGAIN)
1686                         goto error;
1687                 if (oldflo == NULL)
1688                         goto make_dummy_bundle;
1689                 dst_hold(&xdst->u.dst);
1690                 return oldflo;
1691         } else if (new_xdst == NULL) {
1692                 num_xfrms = 0;
1693                 if (oldflo == NULL)
1694                         goto make_dummy_bundle;
1695                 xdst->num_xfrms = 0;
1696                 dst_hold(&xdst->u.dst);
1697                 return oldflo;
1698         }
1699
1700         /* Kill the previous bundle */
1701         if (xdst) {
1702                 /* The policies were stolen for newly generated bundle */
1703                 xdst->num_pols = 0;
1704                 dst_free(&xdst->u.dst);
1705         }
1706
1707         /* Flow cache does not have reference, it dst_free()'s,
1708          * but we do need to return one reference for original caller */
1709         dst_hold(&new_xdst->u.dst);
1710         return &new_xdst->flo;
1711
1712 make_dummy_bundle:
1713         /* We found policies, but there's no bundles to instantiate:
1714          * either because the policy blocks, has no transformations or
1715          * we could not build template (no xfrm_states).*/
1716         xdst = xfrm_alloc_dst(net, family);
1717         if (IS_ERR(xdst)) {
1718                 xfrm_pols_put(pols, num_pols);
1719                 return ERR_CAST(xdst);
1720         }
1721         xdst->num_pols = num_pols;
1722         xdst->num_xfrms = num_xfrms;
1723         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1724
1725         dst_hold(&xdst->u.dst);
1726         return &xdst->flo;
1727
1728 inc_error:
1729         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1730 error:
1731         if (xdst != NULL)
1732                 dst_free(&xdst->u.dst);
1733         else
1734                 xfrm_pols_put(pols, num_pols);
1735         return ERR_PTR(err);
1736 }
1737
1738 /* Main function: finds/creates a bundle for given flow.
1739  *
1740  * At the moment we eat a raw IP route. Mostly to speed up lookups
1741  * on interfaces with disabled IPsec.
1742  */
1743 int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
1744                   const struct flowi *fl,
1745                   struct sock *sk, int flags)
1746 {
1747         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1748         struct flow_cache_object *flo;
1749         struct xfrm_dst *xdst;
1750         struct dst_entry *dst, *dst_orig = *dst_p, *route;
1751         u16 family = dst_orig->ops->family;
1752         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1753         int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
1754
1755 restart:
1756         dst = NULL;
1757         xdst = NULL;
1758         route = NULL;
1759
1760         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1761                 num_pols = 1;
1762                 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1763                 err = xfrm_expand_policies(fl, family, pols,
1764                                            &num_pols, &num_xfrms);
1765                 if (err < 0)
1766                         goto dropdst;
1767
1768                 if (num_pols) {
1769                         if (num_xfrms <= 0) {
1770                                 drop_pols = num_pols;
1771                                 goto no_transform;
1772                         }
1773
1774                         xdst = xfrm_resolve_and_create_bundle(
1775                                         pols, num_pols, fl,
1776                                         family, dst_orig);
1777                         if (IS_ERR(xdst)) {
1778                                 xfrm_pols_put(pols, num_pols);
1779                                 err = PTR_ERR(xdst);
1780                                 goto dropdst;
1781                         } else if (xdst == NULL) {
1782                                 num_xfrms = 0;
1783                                 drop_pols = num_pols;
1784                                 goto no_transform;
1785                         }
1786
1787                         spin_lock_bh(&xfrm_policy_sk_bundle_lock);
1788                         xdst->u.dst.next = xfrm_policy_sk_bundles;
1789                         xfrm_policy_sk_bundles = &xdst->u.dst;
1790                         spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
1791
1792                         route = xdst->route;
1793                 }
1794         }
1795
1796         if (xdst == NULL) {
1797                 /* To accelerate a bit...  */
1798                 if ((dst_orig->flags & DST_NOXFRM) ||
1799                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
1800                         goto nopol;
1801
1802                 flo = flow_cache_lookup(net, fl, family, dir,
1803                                         xfrm_bundle_lookup, dst_orig);
1804                 if (flo == NULL)
1805                         goto nopol;
1806                 if (IS_ERR(flo)) {
1807                         err = PTR_ERR(flo);
1808                         goto dropdst;
1809                 }
1810                 xdst = container_of(flo, struct xfrm_dst, flo);
1811
1812                 num_pols = xdst->num_pols;
1813                 num_xfrms = xdst->num_xfrms;
1814                 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
1815                 route = xdst->route;
1816         }
1817
1818         dst = &xdst->u.dst;
1819         if (route == NULL && num_xfrms > 0) {
1820                 /* The only case when xfrm_bundle_lookup() returns a
1821                  * bundle with null route, is when the template could
1822                  * not be resolved. It means policies are there, but
1823                  * bundle could not be created, since we don't yet
1824                  * have the xfrm_state's. We need to wait for KM to
1825                  * negotiate new SA's or bail out with error.*/
1826                 if (net->xfrm.sysctl_larval_drop) {
1827                         /* EREMOTE tells the caller to generate
1828                          * a one-shot blackhole route. */
1829                         dst_release(dst);
1830                         xfrm_pols_put(pols, drop_pols);
1831                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1832                         return -EREMOTE;
1833                 }
1834                 if (flags & XFRM_LOOKUP_WAIT) {
1835                         DECLARE_WAITQUEUE(wait, current);
1836
1837                         add_wait_queue(&net->xfrm.km_waitq, &wait);
1838                         set_current_state(TASK_INTERRUPTIBLE);
1839                         schedule();
1840                         set_current_state(TASK_RUNNING);
1841                         remove_wait_queue(&net->xfrm.km_waitq, &wait);
1842
1843                         if (!signal_pending(current)) {
1844                                 dst_release(dst);
1845                                 goto restart;
1846                         }
1847
1848                         err = -ERESTART;
1849                 } else
1850                         err = -EAGAIN;
1851
1852                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1853                 goto error;
1854         }
1855
1856 no_transform:
1857         if (num_pols == 0)
1858                 goto nopol;
1859
1860         if ((flags & XFRM_LOOKUP_ICMP) &&
1861             !(pols[0]->flags & XFRM_POLICY_ICMP)) {
1862                 err = -ENOENT;
1863                 goto error;
1864         }
1865
1866         for (i = 0; i < num_pols; i++)
1867                 pols[i]->curlft.use_time = get_seconds();
1868
1869         if (num_xfrms < 0) {
1870                 /* Prohibit the flow */
1871                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1872                 err = -EPERM;
1873                 goto error;
1874         } else if (num_xfrms > 0) {
1875                 /* Flow transformed */
1876                 *dst_p = dst;
1877                 dst_release(dst_orig);
1878         } else {
1879                 /* Flow passes untransformed */
1880                 dst_release(dst);
1881         }
1882 ok:
1883         xfrm_pols_put(pols, drop_pols);
1884         return 0;
1885
1886 nopol:
1887         if (!(flags & XFRM_LOOKUP_ICMP))
1888                 goto ok;
1889         err = -ENOENT;
1890 error:
1891         dst_release(dst);
1892 dropdst:
1893         dst_release(dst_orig);
1894         *dst_p = NULL;
1895         xfrm_pols_put(pols, drop_pols);
1896         return err;
1897 }
1898 EXPORT_SYMBOL(__xfrm_lookup);
1899
1900 int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
1901                 const struct flowi *fl,
1902                 struct sock *sk, int flags)
1903 {
1904         int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
1905
1906         if (err == -EREMOTE) {
1907                 dst_release(*dst_p);
1908                 *dst_p = NULL;
1909                 err = -EAGAIN;
1910         }
1911
1912         return err;
1913 }
1914 EXPORT_SYMBOL(xfrm_lookup);
1915
1916 static inline int
1917 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
1918 {
1919         struct xfrm_state *x;
1920
1921         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1922                 return 0;
1923         x = skb->sp->xvec[idx];
1924         if (!x->type->reject)
1925                 return 0;
1926         return x->type->reject(x, skb, fl);
1927 }
1928
1929 /* When skb is transformed back to its "native" form, we have to
1930  * check policy restrictions. At the moment we make this in maximally
1931  * stupid way. Shame on me. :-) Of course, connected sockets must
1932  * have policy cached at them.
1933  */
1934
1935 static inline int
1936 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1937               unsigned short family)
1938 {
1939         if (xfrm_state_kern(x))
1940                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1941         return  x->id.proto == tmpl->id.proto &&
1942                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1943                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1944                 x->props.mode == tmpl->mode &&
1945                 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1946                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1947                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1948                   xfrm_state_addr_cmp(tmpl, x, family));
1949 }
1950
1951 /*
1952  * 0 or more than 0 is returned when validation is succeeded (either bypass
1953  * because of optional transport mode, or next index of the mathced secpath
1954  * state with the template.
1955  * -1 is returned when no matching template is found.
1956  * Otherwise "-2 - errored_index" is returned.
1957  */
1958 static inline int
1959 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1960                unsigned short family)
1961 {
1962         int idx = start;
1963
1964         if (tmpl->optional) {
1965                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1966                         return start;
1967         } else
1968                 start = -1;
1969         for (; idx < sp->len; idx++) {
1970                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1971                         return ++idx;
1972                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1973                         if (start == -1)
1974                                 start = -2-idx;
1975                         break;
1976                 }
1977         }
1978         return start;
1979 }
1980
1981 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1982                           unsigned int family, int reverse)
1983 {
1984         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1985         int err;
1986
1987         if (unlikely(afinfo == NULL))
1988                 return -EAFNOSUPPORT;
1989
1990         afinfo->decode_session(skb, fl, reverse);
1991         err = security_xfrm_decode_session(skb, &fl->secid);
1992         xfrm_policy_put_afinfo(afinfo);
1993         return err;
1994 }
1995 EXPORT_SYMBOL(__xfrm_decode_session);
1996
1997 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1998 {
1999         for (; k < sp->len; k++) {
2000                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2001                         *idxp = k;
2002                         return 1;
2003                 }
2004         }
2005
2006         return 0;
2007 }
2008
2009 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2010                         unsigned short family)
2011 {
2012         struct net *net = dev_net(skb->dev);
2013         struct xfrm_policy *pol;
2014         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2015         int npols = 0;
2016         int xfrm_nr;
2017         int pi;
2018         int reverse;
2019         struct flowi fl;
2020         u8 fl_dir;
2021         int xerr_idx = -1;
2022
2023         reverse = dir & ~XFRM_POLICY_MASK;
2024         dir &= XFRM_POLICY_MASK;
2025         fl_dir = policy_to_flow_dir(dir);
2026
2027         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2028                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2029                 return 0;
2030         }
2031
2032         nf_nat_decode_session(skb, &fl, family);
2033
2034         /* First, check used SA against their selectors. */
2035         if (skb->sp) {
2036                 int i;
2037
2038                 for (i=skb->sp->len-1; i>=0; i--) {
2039                         struct xfrm_state *x = skb->sp->xvec[i];
2040                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
2041                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2042                                 return 0;
2043                         }
2044                 }
2045         }
2046
2047         pol = NULL;
2048         if (sk && sk->sk_policy[dir]) {
2049                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2050                 if (IS_ERR(pol)) {
2051                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2052                         return 0;
2053                 }
2054         }
2055
2056         if (!pol) {
2057                 struct flow_cache_object *flo;
2058
2059                 flo = flow_cache_lookup(net, &fl, family, fl_dir,
2060                                         xfrm_policy_lookup, NULL);
2061                 if (IS_ERR_OR_NULL(flo))
2062                         pol = ERR_CAST(flo);
2063                 else
2064                         pol = container_of(flo, struct xfrm_policy, flo);
2065         }
2066
2067         if (IS_ERR(pol)) {
2068                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2069                 return 0;
2070         }
2071
2072         if (!pol) {
2073                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2074                         xfrm_secpath_reject(xerr_idx, skb, &fl);
2075                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2076                         return 0;
2077                 }
2078                 return 1;
2079         }
2080
2081         pol->curlft.use_time = get_seconds();
2082
2083         pols[0] = pol;
2084         npols ++;
2085 #ifdef CONFIG_XFRM_SUB_POLICY
2086         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2087                 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2088                                                     &fl, family,
2089                                                     XFRM_POLICY_IN);
2090                 if (pols[1]) {
2091                         if (IS_ERR(pols[1])) {
2092                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2093                                 return 0;
2094                         }
2095                         pols[1]->curlft.use_time = get_seconds();
2096                         npols ++;
2097                 }
2098         }
2099 #endif
2100
2101         if (pol->action == XFRM_POLICY_ALLOW) {
2102                 struct sec_path *sp;
2103                 static struct sec_path dummy;
2104                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2105                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2106                 struct xfrm_tmpl **tpp = tp;
2107                 int ti = 0;
2108                 int i, k;
2109
2110                 if ((sp = skb->sp) == NULL)
2111                         sp = &dummy;
2112
2113                 for (pi = 0; pi < npols; pi++) {
2114                         if (pols[pi] != pol &&
2115                             pols[pi]->action != XFRM_POLICY_ALLOW) {
2116                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2117                                 goto reject;
2118                         }
2119                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2120                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2121                                 goto reject_error;
2122                         }
2123                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
2124                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
2125                 }
2126                 xfrm_nr = ti;
2127                 if (npols > 1) {
2128                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2129                         tpp = stp;
2130                 }
2131
2132                 /* For each tunnel xfrm, find the first matching tmpl.
2133                  * For each tmpl before that, find corresponding xfrm.
2134                  * Order is _important_. Later we will implement
2135                  * some barriers, but at the moment barriers
2136                  * are implied between each two transformations.
2137                  */
2138                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2139                         k = xfrm_policy_ok(tpp[i], sp, k, family);
2140                         if (k < 0) {
2141                                 if (k < -1)
2142                                         /* "-2 - errored_index" returned */
2143                                         xerr_idx = -(2+k);
2144                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2145                                 goto reject;
2146                         }
2147                 }
2148
2149                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2150                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2151                         goto reject;
2152                 }
2153
2154                 xfrm_pols_put(pols, npols);
2155                 return 1;
2156         }
2157         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2158
2159 reject:
2160         xfrm_secpath_reject(xerr_idx, skb, &fl);
2161 reject_error:
2162         xfrm_pols_put(pols, npols);
2163         return 0;
2164 }
2165 EXPORT_SYMBOL(__xfrm_policy_check);
2166
2167 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2168 {
2169         struct net *net = dev_net(skb->dev);
2170         struct flowi fl;
2171         struct dst_entry *dst;
2172         int res;
2173
2174         if (xfrm_decode_session(skb, &fl, family) < 0) {
2175                 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2176                 return 0;
2177         }
2178
2179         skb_dst_force(skb);
2180         dst = skb_dst(skb);
2181
2182         res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
2183         skb_dst_set(skb, dst);
2184         return res;
2185 }
2186 EXPORT_SYMBOL(__xfrm_route_forward);
2187
2188 /* Optimize later using cookies and generation ids. */
2189
2190 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2191 {
2192         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2193          * to "-1" to force all XFRM destinations to get validated by
2194          * dst_ops->check on every use.  We do this because when a
2195          * normal route referenced by an XFRM dst is obsoleted we do
2196          * not go looking around for all parent referencing XFRM dsts
2197          * so that we can invalidate them.  It is just too much work.
2198          * Instead we make the checks here on every use.  For example:
2199          *
2200          *      XFRM dst A --> IPv4 dst X
2201          *
2202          * X is the "xdst->route" of A (X is also the "dst->path" of A
2203          * in this example).  If X is marked obsolete, "A" will not
2204          * notice.  That's what we are validating here via the
2205          * stale_bundle() check.
2206          *
2207          * When a policy's bundle is pruned, we dst_free() the XFRM
2208          * dst which causes it's ->obsolete field to be set to a
2209          * positive non-zero integer.  If an XFRM dst has been pruned
2210          * like this, we want to force a new route lookup.
2211          */
2212         if (dst->obsolete < 0 && !stale_bundle(dst))
2213                 return dst;
2214
2215         return NULL;
2216 }
2217
2218 static int stale_bundle(struct dst_entry *dst)
2219 {
2220         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC);
2221 }
2222
2223 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2224 {
2225         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2226                 dst->dev = dev_net(dev)->loopback_dev;
2227                 dev_hold(dst->dev);
2228                 dev_put(dev);
2229         }
2230 }
2231 EXPORT_SYMBOL(xfrm_dst_ifdown);
2232
2233 static void xfrm_link_failure(struct sk_buff *skb)
2234 {
2235         /* Impossible. Such dst must be popped before reaches point of failure. */
2236 }
2237
2238 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2239 {
2240         if (dst) {
2241                 if (dst->obsolete) {
2242                         dst_release(dst);
2243                         dst = NULL;
2244                 }
2245         }
2246         return dst;
2247 }
2248
2249 static void __xfrm_garbage_collect(struct net *net)
2250 {
2251         struct dst_entry *head, *next;
2252
2253         flow_cache_flush();
2254
2255         spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2256         head = xfrm_policy_sk_bundles;
2257         xfrm_policy_sk_bundles = NULL;
2258         spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
2259
2260         while (head) {
2261                 next = head->next;
2262                 dst_free(head);
2263                 head = next;
2264         }
2265 }
2266
2267 static void xfrm_init_pmtu(struct dst_entry *dst)
2268 {
2269         do {
2270                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2271                 u32 pmtu, route_mtu_cached;
2272
2273                 pmtu = dst_mtu(dst->child);
2274                 xdst->child_mtu_cached = pmtu;
2275
2276                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2277
2278                 route_mtu_cached = dst_mtu(xdst->route);
2279                 xdst->route_mtu_cached = route_mtu_cached;
2280
2281                 if (pmtu > route_mtu_cached)
2282                         pmtu = route_mtu_cached;
2283
2284                 dst_metric_set(dst, RTAX_MTU, pmtu);
2285         } while ((dst = dst->next));
2286 }
2287
2288 /* Check that the bundle accepts the flow and its components are
2289  * still valid.
2290  */
2291
2292 static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2293                           const struct flowi *fl, int family)
2294 {
2295         struct dst_entry *dst = &first->u.dst;
2296         struct xfrm_dst *last;
2297         u32 mtu;
2298
2299         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2300             (dst->dev && !netif_running(dst->dev)))
2301                 return 0;
2302 #ifdef CONFIG_XFRM_SUB_POLICY
2303         if (fl) {
2304                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2305                         return 0;
2306                 if (first->partner &&
2307                     !xfrm_selector_match(first->partner, fl, family))
2308                         return 0;
2309         }
2310 #endif
2311
2312         last = NULL;
2313
2314         do {
2315                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2316
2317                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2318                         return 0;
2319                 if (fl && pol &&
2320                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2321                         return 0;
2322                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2323                         return 0;
2324                 if (xdst->xfrm_genid != dst->xfrm->genid)
2325                         return 0;
2326                 if (xdst->num_pols > 0 &&
2327                     xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2328                         return 0;
2329
2330                 mtu = dst_mtu(dst->child);
2331                 if (xdst->child_mtu_cached != mtu) {
2332                         last = xdst;
2333                         xdst->child_mtu_cached = mtu;
2334                 }
2335
2336                 if (!dst_check(xdst->route, xdst->route_cookie))
2337                         return 0;
2338                 mtu = dst_mtu(xdst->route);
2339                 if (xdst->route_mtu_cached != mtu) {
2340                         last = xdst;
2341                         xdst->route_mtu_cached = mtu;
2342                 }
2343
2344                 dst = dst->child;
2345         } while (dst->xfrm);
2346
2347         if (likely(!last))
2348                 return 1;
2349
2350         mtu = last->child_mtu_cached;
2351         for (;;) {
2352                 dst = &last->u.dst;
2353
2354                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2355                 if (mtu > last->route_mtu_cached)
2356                         mtu = last->route_mtu_cached;
2357                 dst_metric_set(dst, RTAX_MTU, mtu);
2358
2359                 if (last == first)
2360                         break;
2361
2362                 last = (struct xfrm_dst *)last->u.dst.next;
2363                 last->child_mtu_cached = mtu;
2364         }
2365
2366         return 1;
2367 }
2368
2369 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2370 {
2371         return dst_metric_advmss(dst->path);
2372 }
2373
2374 static unsigned int xfrm_default_mtu(const struct dst_entry *dst)
2375 {
2376         return dst_mtu(dst->path);
2377 }
2378
2379 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2380 {
2381         struct net *net;
2382         int err = 0;
2383         if (unlikely(afinfo == NULL))
2384                 return -EINVAL;
2385         if (unlikely(afinfo->family >= NPROTO))
2386                 return -EAFNOSUPPORT;
2387         write_lock_bh(&xfrm_policy_afinfo_lock);
2388         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2389                 err = -ENOBUFS;
2390         else {
2391                 struct dst_ops *dst_ops = afinfo->dst_ops;
2392                 if (likely(dst_ops->kmem_cachep == NULL))
2393                         dst_ops->kmem_cachep = xfrm_dst_cache;
2394                 if (likely(dst_ops->check == NULL))
2395                         dst_ops->check = xfrm_dst_check;
2396                 if (likely(dst_ops->default_advmss == NULL))
2397                         dst_ops->default_advmss = xfrm_default_advmss;
2398                 if (likely(dst_ops->default_mtu == NULL))
2399                         dst_ops->default_mtu = xfrm_default_mtu;
2400                 if (likely(dst_ops->negative_advice == NULL))
2401                         dst_ops->negative_advice = xfrm_negative_advice;
2402                 if (likely(dst_ops->link_failure == NULL))
2403                         dst_ops->link_failure = xfrm_link_failure;
2404                 if (likely(afinfo->garbage_collect == NULL))
2405                         afinfo->garbage_collect = __xfrm_garbage_collect;
2406                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2407         }
2408         write_unlock_bh(&xfrm_policy_afinfo_lock);
2409
2410         rtnl_lock();
2411         for_each_net(net) {
2412                 struct dst_ops *xfrm_dst_ops;
2413
2414                 switch (afinfo->family) {
2415                 case AF_INET:
2416                         xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2417                         break;
2418 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2419                 case AF_INET6:
2420                         xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2421                         break;
2422 #endif
2423                 default:
2424                         BUG();
2425                 }
2426                 *xfrm_dst_ops = *afinfo->dst_ops;
2427         }
2428         rtnl_unlock();
2429
2430         return err;
2431 }
2432 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2433
2434 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2435 {
2436         int err = 0;
2437         if (unlikely(afinfo == NULL))
2438                 return -EINVAL;
2439         if (unlikely(afinfo->family >= NPROTO))
2440                 return -EAFNOSUPPORT;
2441         write_lock_bh(&xfrm_policy_afinfo_lock);
2442         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2443                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2444                         err = -EINVAL;
2445                 else {
2446                         struct dst_ops *dst_ops = afinfo->dst_ops;
2447                         xfrm_policy_afinfo[afinfo->family] = NULL;
2448                         dst_ops->kmem_cachep = NULL;
2449                         dst_ops->check = NULL;
2450                         dst_ops->negative_advice = NULL;
2451                         dst_ops->link_failure = NULL;
2452                         afinfo->garbage_collect = NULL;
2453                 }
2454         }
2455         write_unlock_bh(&xfrm_policy_afinfo_lock);
2456         return err;
2457 }
2458 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2459
2460 static void __net_init xfrm_dst_ops_init(struct net *net)
2461 {
2462         struct xfrm_policy_afinfo *afinfo;
2463
2464         read_lock_bh(&xfrm_policy_afinfo_lock);
2465         afinfo = xfrm_policy_afinfo[AF_INET];
2466         if (afinfo)
2467                 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2468 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2469         afinfo = xfrm_policy_afinfo[AF_INET6];
2470         if (afinfo)
2471                 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2472 #endif
2473         read_unlock_bh(&xfrm_policy_afinfo_lock);
2474 }
2475
2476 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2477 {
2478         struct xfrm_policy_afinfo *afinfo;
2479         if (unlikely(family >= NPROTO))
2480                 return NULL;
2481         read_lock(&xfrm_policy_afinfo_lock);
2482         afinfo = xfrm_policy_afinfo[family];
2483         if (unlikely(!afinfo))
2484                 read_unlock(&xfrm_policy_afinfo_lock);
2485         return afinfo;
2486 }
2487
2488 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2489 {
2490         read_unlock(&xfrm_policy_afinfo_lock);
2491 }
2492
2493 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2494 {
2495         struct net_device *dev = ptr;
2496
2497         switch (event) {
2498         case NETDEV_DOWN:
2499                 __xfrm_garbage_collect(dev_net(dev));
2500         }
2501         return NOTIFY_DONE;
2502 }
2503
2504 static struct notifier_block xfrm_dev_notifier = {
2505         .notifier_call  = xfrm_dev_event,
2506 };
2507
2508 #ifdef CONFIG_XFRM_STATISTICS
2509 static int __net_init xfrm_statistics_init(struct net *net)
2510 {
2511         int rv;
2512
2513         if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2514                           sizeof(struct linux_xfrm_mib),
2515                           __alignof__(struct linux_xfrm_mib)) < 0)
2516                 return -ENOMEM;
2517         rv = xfrm_proc_init(net);
2518         if (rv < 0)
2519                 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2520         return rv;
2521 }
2522
2523 static void xfrm_statistics_fini(struct net *net)
2524 {
2525         xfrm_proc_fini(net);
2526         snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2527 }
2528 #else
2529 static int __net_init xfrm_statistics_init(struct net *net)
2530 {
2531         return 0;
2532 }
2533
2534 static void xfrm_statistics_fini(struct net *net)
2535 {
2536 }
2537 #endif
2538
2539 static int __net_init xfrm_policy_init(struct net *net)
2540 {
2541         unsigned int hmask, sz;
2542         int dir;
2543
2544         if (net_eq(net, &init_net))
2545                 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2546                                            sizeof(struct xfrm_dst),
2547                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2548                                            NULL);
2549
2550         hmask = 8 - 1;
2551         sz = (hmask+1) * sizeof(struct hlist_head);
2552
2553         net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2554         if (!net->xfrm.policy_byidx)
2555                 goto out_byidx;
2556         net->xfrm.policy_idx_hmask = hmask;
2557
2558         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2559                 struct xfrm_policy_hash *htab;
2560
2561                 net->xfrm.policy_count[dir] = 0;
2562                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2563
2564                 htab = &net->xfrm.policy_bydst[dir];
2565                 htab->table = xfrm_hash_alloc(sz);
2566                 if (!htab->table)
2567                         goto out_bydst;
2568                 htab->hmask = hmask;
2569         }
2570
2571         INIT_LIST_HEAD(&net->xfrm.policy_all);
2572         INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2573         if (net_eq(net, &init_net))
2574                 register_netdevice_notifier(&xfrm_dev_notifier);
2575         return 0;
2576
2577 out_bydst:
2578         for (dir--; dir >= 0; dir--) {
2579                 struct xfrm_policy_hash *htab;
2580
2581                 htab = &net->xfrm.policy_bydst[dir];
2582                 xfrm_hash_free(htab->table, sz);
2583         }
2584         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2585 out_byidx:
2586         return -ENOMEM;
2587 }
2588
2589 static void xfrm_policy_fini(struct net *net)
2590 {
2591         struct xfrm_audit audit_info;
2592         unsigned int sz;
2593         int dir;
2594
2595         flush_work(&net->xfrm.policy_hash_work);
2596 #ifdef CONFIG_XFRM_SUB_POLICY
2597         audit_info.loginuid = -1;
2598         audit_info.sessionid = -1;
2599         audit_info.secid = 0;
2600         xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
2601 #endif
2602         audit_info.loginuid = -1;
2603         audit_info.sessionid = -1;
2604         audit_info.secid = 0;
2605         xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2606
2607         WARN_ON(!list_empty(&net->xfrm.policy_all));
2608
2609         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2610                 struct xfrm_policy_hash *htab;
2611
2612                 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2613
2614                 htab = &net->xfrm.policy_bydst[dir];
2615                 sz = (htab->hmask + 1);
2616                 WARN_ON(!hlist_empty(htab->table));
2617                 xfrm_hash_free(htab->table, sz);
2618         }
2619
2620         sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2621         WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2622         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2623 }
2624
2625 static int __net_init xfrm_net_init(struct net *net)
2626 {
2627         int rv;
2628
2629         rv = xfrm_statistics_init(net);
2630         if (rv < 0)
2631                 goto out_statistics;
2632         rv = xfrm_state_init(net);
2633         if (rv < 0)
2634                 goto out_state;
2635         rv = xfrm_policy_init(net);
2636         if (rv < 0)
2637                 goto out_policy;
2638         xfrm_dst_ops_init(net);
2639         rv = xfrm_sysctl_init(net);
2640         if (rv < 0)
2641                 goto out_sysctl;
2642         return 0;
2643
2644 out_sysctl:
2645         xfrm_policy_fini(net);
2646 out_policy:
2647         xfrm_state_fini(net);
2648 out_state:
2649         xfrm_statistics_fini(net);
2650 out_statistics:
2651         return rv;
2652 }
2653
2654 static void __net_exit xfrm_net_exit(struct net *net)
2655 {
2656         xfrm_sysctl_fini(net);
2657         xfrm_policy_fini(net);
2658         xfrm_state_fini(net);
2659         xfrm_statistics_fini(net);
2660 }
2661
2662 static struct pernet_operations __net_initdata xfrm_net_ops = {
2663         .init = xfrm_net_init,
2664         .exit = xfrm_net_exit,
2665 };
2666
2667 void __init xfrm_init(void)
2668 {
2669         register_pernet_subsys(&xfrm_net_ops);
2670         xfrm_input_init();
2671 }
2672
2673 #ifdef CONFIG_AUDITSYSCALL
2674 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2675                                          struct audit_buffer *audit_buf)
2676 {
2677         struct xfrm_sec_ctx *ctx = xp->security;
2678         struct xfrm_selector *sel = &xp->selector;
2679
2680         if (ctx)
2681                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2682                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2683
2684         switch(sel->family) {
2685         case AF_INET:
2686                 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2687                 if (sel->prefixlen_s != 32)
2688                         audit_log_format(audit_buf, " src_prefixlen=%d",
2689                                          sel->prefixlen_s);
2690                 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2691                 if (sel->prefixlen_d != 32)
2692                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2693                                          sel->prefixlen_d);
2694                 break;
2695         case AF_INET6:
2696                 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2697                 if (sel->prefixlen_s != 128)
2698                         audit_log_format(audit_buf, " src_prefixlen=%d",
2699                                          sel->prefixlen_s);
2700                 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2701                 if (sel->prefixlen_d != 128)
2702                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2703                                          sel->prefixlen_d);
2704                 break;
2705         }
2706 }
2707
2708 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2709                            uid_t auid, u32 sessionid, u32 secid)
2710 {
2711         struct audit_buffer *audit_buf;
2712
2713         audit_buf = xfrm_audit_start("SPD-add");
2714         if (audit_buf == NULL)
2715                 return;
2716         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2717         audit_log_format(audit_buf, " res=%u", result);
2718         xfrm_audit_common_policyinfo(xp, audit_buf);
2719         audit_log_end(audit_buf);
2720 }
2721 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2722
2723 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2724                               uid_t auid, u32 sessionid, u32 secid)
2725 {
2726         struct audit_buffer *audit_buf;
2727
2728         audit_buf = xfrm_audit_start("SPD-delete");
2729         if (audit_buf == NULL)
2730                 return;
2731         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2732         audit_log_format(audit_buf, " res=%u", result);
2733         xfrm_audit_common_policyinfo(xp, audit_buf);
2734         audit_log_end(audit_buf);
2735 }
2736 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2737 #endif
2738
2739 #ifdef CONFIG_XFRM_MIGRATE
2740 static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
2741                                        const struct xfrm_selector *sel_tgt)
2742 {
2743         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2744                 if (sel_tgt->family == sel_cmp->family &&
2745                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2746                                   sel_cmp->family) == 0 &&
2747                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2748                                   sel_cmp->family) == 0 &&
2749                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2750                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2751                         return 1;
2752                 }
2753         } else {
2754                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2755                         return 1;
2756                 }
2757         }
2758         return 0;
2759 }
2760
2761 static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
2762                                                      u8 dir, u8 type)
2763 {
2764         struct xfrm_policy *pol, *ret = NULL;
2765         struct hlist_node *entry;
2766         struct hlist_head *chain;
2767         u32 priority = ~0U;
2768
2769         read_lock_bh(&xfrm_policy_lock);
2770         chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
2771         hlist_for_each_entry(pol, entry, chain, bydst) {
2772                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2773                     pol->type == type) {
2774                         ret = pol;
2775                         priority = ret->priority;
2776                         break;
2777                 }
2778         }
2779         chain = &init_net.xfrm.policy_inexact[dir];
2780         hlist_for_each_entry(pol, entry, chain, bydst) {
2781                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2782                     pol->type == type &&
2783                     pol->priority < priority) {
2784                         ret = pol;
2785                         break;
2786                 }
2787         }
2788
2789         if (ret)
2790                 xfrm_pol_hold(ret);
2791
2792         read_unlock_bh(&xfrm_policy_lock);
2793
2794         return ret;
2795 }
2796
2797 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
2798 {
2799         int match = 0;
2800
2801         if (t->mode == m->mode && t->id.proto == m->proto &&
2802             (m->reqid == 0 || t->reqid == m->reqid)) {
2803                 switch (t->mode) {
2804                 case XFRM_MODE_TUNNEL:
2805                 case XFRM_MODE_BEET:
2806                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2807                                           m->old_family) == 0 &&
2808                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2809                                           m->old_family) == 0) {
2810                                 match = 1;
2811                         }
2812                         break;
2813                 case XFRM_MODE_TRANSPORT:
2814                         /* in case of transport mode, template does not store
2815                            any IP addresses, hence we just compare mode and
2816                            protocol */
2817                         match = 1;
2818                         break;
2819                 default:
2820                         break;
2821                 }
2822         }
2823         return match;
2824 }
2825
2826 /* update endpoint address(es) of template(s) */
2827 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2828                                struct xfrm_migrate *m, int num_migrate)
2829 {
2830         struct xfrm_migrate *mp;
2831         int i, j, n = 0;
2832
2833         write_lock_bh(&pol->lock);
2834         if (unlikely(pol->walk.dead)) {
2835                 /* target policy has been deleted */
2836                 write_unlock_bh(&pol->lock);
2837                 return -ENOENT;
2838         }
2839
2840         for (i = 0; i < pol->xfrm_nr; i++) {
2841                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2842                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2843                                 continue;
2844                         n++;
2845                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2846                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2847                                 continue;
2848                         /* update endpoints */
2849                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2850                                sizeof(pol->xfrm_vec[i].id.daddr));
2851                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2852                                sizeof(pol->xfrm_vec[i].saddr));
2853                         pol->xfrm_vec[i].encap_family = mp->new_family;
2854                         /* flush bundles */
2855                         atomic_inc(&pol->genid);
2856                 }
2857         }
2858
2859         write_unlock_bh(&pol->lock);
2860
2861         if (!n)
2862                 return -ENODATA;
2863
2864         return 0;
2865 }
2866
2867 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
2868 {
2869         int i, j;
2870
2871         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2872                 return -EINVAL;
2873
2874         for (i = 0; i < num_migrate; i++) {
2875                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2876                                    m[i].old_family) == 0) &&
2877                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2878                                    m[i].old_family) == 0))
2879                         return -EINVAL;
2880                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2881                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2882                         return -EINVAL;
2883
2884                 /* check if there is any duplicated entry */
2885                 for (j = i + 1; j < num_migrate; j++) {
2886                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2887                                     sizeof(m[i].old_daddr)) &&
2888                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2889                                     sizeof(m[i].old_saddr)) &&
2890                             m[i].proto == m[j].proto &&
2891                             m[i].mode == m[j].mode &&
2892                             m[i].reqid == m[j].reqid &&
2893                             m[i].old_family == m[j].old_family)
2894                                 return -EINVAL;
2895                 }
2896         }
2897
2898         return 0;
2899 }
2900
2901 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2902                  struct xfrm_migrate *m, int num_migrate,
2903                  struct xfrm_kmaddress *k)
2904 {
2905         int i, err, nx_cur = 0, nx_new = 0;
2906         struct xfrm_policy *pol = NULL;
2907         struct xfrm_state *x, *xc;
2908         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2909         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2910         struct xfrm_migrate *mp;
2911
2912         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2913                 goto out;
2914
2915         /* Stage 1 - find policy */
2916         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2917                 err = -ENOENT;
2918                 goto out;
2919         }
2920
2921         /* Stage 2 - find and update state(s) */
2922         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2923                 if ((x = xfrm_migrate_state_find(mp))) {
2924                         x_cur[nx_cur] = x;
2925                         nx_cur++;
2926                         if ((xc = xfrm_state_migrate(x, mp))) {
2927                                 x_new[nx_new] = xc;
2928                                 nx_new++;
2929                         } else {
2930                                 err = -ENODATA;
2931                                 goto restore_state;
2932                         }
2933                 }
2934         }
2935
2936         /* Stage 3 - update policy */
2937         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2938                 goto restore_state;
2939
2940         /* Stage 4 - delete old state(s) */
2941         if (nx_cur) {
2942                 xfrm_states_put(x_cur, nx_cur);
2943                 xfrm_states_delete(x_cur, nx_cur);
2944         }
2945
2946         /* Stage 5 - announce */
2947         km_migrate(sel, dir, type, m, num_migrate, k);
2948
2949         xfrm_pol_put(pol);
2950
2951         return 0;
2952 out:
2953         return err;
2954
2955 restore_state:
2956         if (pol)
2957                 xfrm_pol_put(pol);
2958         if (nx_cur)
2959                 xfrm_states_put(x_cur, nx_cur);
2960         if (nx_new)
2961                 xfrm_states_delete(x_new, nx_new);
2962
2963         return err;
2964 }
2965 EXPORT_SYMBOL(xfrm_migrate);
2966 #endif