[NET]: Convert init_timer into setup_timer
[linux-2.6.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/slab.h>
17 #include <linux/kmod.h>
18 #include <linux/list.h>
19 #include <linux/spinlock.h>
20 #include <linux/workqueue.h>
21 #include <linux/notifier.h>
22 #include <linux/netdevice.h>
23 #include <linux/netfilter.h>
24 #include <linux/module.h>
25 #include <linux/cache.h>
26 #include <net/xfrm.h>
27 #include <net/ip.h>
28
29 #include "xfrm_hash.h"
30
31 int sysctl_xfrm_larval_drop __read_mostly;
32
33 DEFINE_MUTEX(xfrm_cfg_mutex);
34 EXPORT_SYMBOL(xfrm_cfg_mutex);
35
36 static DEFINE_RWLOCK(xfrm_policy_lock);
37
38 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
39 EXPORT_SYMBOL(xfrm_policy_count);
40
41 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
42 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
43
44 static struct kmem_cache *xfrm_dst_cache __read_mostly;
45
46 static struct work_struct xfrm_policy_gc_work;
47 static HLIST_HEAD(xfrm_policy_gc_list);
48 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
49
50 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
52
53 static inline int
54 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
55 {
56         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
57                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
58                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
59                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
60                 (fl->proto == sel->proto || !sel->proto) &&
61                 (fl->oif == sel->ifindex || !sel->ifindex);
62 }
63
64 static inline int
65 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
66 {
67         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
68                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
69                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
70                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
71                 (fl->proto == sel->proto || !sel->proto) &&
72                 (fl->oif == sel->ifindex || !sel->ifindex);
73 }
74
75 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
76                     unsigned short family)
77 {
78         switch (family) {
79         case AF_INET:
80                 return __xfrm4_selector_match(sel, fl);
81         case AF_INET6:
82                 return __xfrm6_selector_match(sel, fl);
83         }
84         return 0;
85 }
86
87 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
88                     unsigned short family)
89 {
90         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
91         int err = 0;
92
93         if (unlikely(afinfo == NULL))
94                 return -EAFNOSUPPORT;
95
96         if (likely(afinfo->dst_lookup != NULL))
97                 err = afinfo->dst_lookup(dst, fl);
98         else
99                 err = -EINVAL;
100         xfrm_policy_put_afinfo(afinfo);
101         return err;
102 }
103 EXPORT_SYMBOL(xfrm_dst_lookup);
104
105 static inline unsigned long make_jiffies(long secs)
106 {
107         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
108                 return MAX_SCHEDULE_TIMEOUT-1;
109         else
110                 return secs*HZ;
111 }
112
113 static void xfrm_policy_timer(unsigned long data)
114 {
115         struct xfrm_policy *xp = (struct xfrm_policy*)data;
116         unsigned long now = get_seconds();
117         long next = LONG_MAX;
118         int warn = 0;
119         int dir;
120
121         read_lock(&xp->lock);
122
123         if (xp->dead)
124                 goto out;
125
126         dir = xfrm_policy_id2dir(xp->index);
127
128         if (xp->lft.hard_add_expires_seconds) {
129                 long tmo = xp->lft.hard_add_expires_seconds +
130                         xp->curlft.add_time - now;
131                 if (tmo <= 0)
132                         goto expired;
133                 if (tmo < next)
134                         next = tmo;
135         }
136         if (xp->lft.hard_use_expires_seconds) {
137                 long tmo = xp->lft.hard_use_expires_seconds +
138                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
139                 if (tmo <= 0)
140                         goto expired;
141                 if (tmo < next)
142                         next = tmo;
143         }
144         if (xp->lft.soft_add_expires_seconds) {
145                 long tmo = xp->lft.soft_add_expires_seconds +
146                         xp->curlft.add_time - now;
147                 if (tmo <= 0) {
148                         warn = 1;
149                         tmo = XFRM_KM_TIMEOUT;
150                 }
151                 if (tmo < next)
152                         next = tmo;
153         }
154         if (xp->lft.soft_use_expires_seconds) {
155                 long tmo = xp->lft.soft_use_expires_seconds +
156                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
157                 if (tmo <= 0) {
158                         warn = 1;
159                         tmo = XFRM_KM_TIMEOUT;
160                 }
161                 if (tmo < next)
162                         next = tmo;
163         }
164
165         if (warn)
166                 km_policy_expired(xp, dir, 0, 0);
167         if (next != LONG_MAX &&
168             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
169                 xfrm_pol_hold(xp);
170
171 out:
172         read_unlock(&xp->lock);
173         xfrm_pol_put(xp);
174         return;
175
176 expired:
177         read_unlock(&xp->lock);
178         if (!xfrm_policy_delete(xp, dir))
179                 km_policy_expired(xp, dir, 1, 0);
180         xfrm_pol_put(xp);
181 }
182
183
184 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
185  * SPD calls.
186  */
187
188 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
189 {
190         struct xfrm_policy *policy;
191
192         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
193
194         if (policy) {
195                 INIT_HLIST_NODE(&policy->bydst);
196                 INIT_HLIST_NODE(&policy->byidx);
197                 rwlock_init(&policy->lock);
198                 atomic_set(&policy->refcnt, 1);
199                 setup_timer(&policy->timer, xfrm_policy_timer,
200                                 (unsigned long)policy);
201         }
202         return policy;
203 }
204 EXPORT_SYMBOL(xfrm_policy_alloc);
205
206 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
207
208 void __xfrm_policy_destroy(struct xfrm_policy *policy)
209 {
210         BUG_ON(!policy->dead);
211
212         BUG_ON(policy->bundles);
213
214         if (del_timer(&policy->timer))
215                 BUG();
216
217         security_xfrm_policy_free(policy);
218         kfree(policy);
219 }
220 EXPORT_SYMBOL(__xfrm_policy_destroy);
221
222 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
223 {
224         struct dst_entry *dst;
225
226         while ((dst = policy->bundles) != NULL) {
227                 policy->bundles = dst->next;
228                 dst_free(dst);
229         }
230
231         if (del_timer(&policy->timer))
232                 atomic_dec(&policy->refcnt);
233
234         if (atomic_read(&policy->refcnt) > 1)
235                 flow_cache_flush();
236
237         xfrm_pol_put(policy);
238 }
239
240 static void xfrm_policy_gc_task(struct work_struct *work)
241 {
242         struct xfrm_policy *policy;
243         struct hlist_node *entry, *tmp;
244         struct hlist_head gc_list;
245
246         spin_lock_bh(&xfrm_policy_gc_lock);
247         gc_list.first = xfrm_policy_gc_list.first;
248         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
249         spin_unlock_bh(&xfrm_policy_gc_lock);
250
251         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
252                 xfrm_policy_gc_kill(policy);
253 }
254
255 /* Rule must be locked. Release descentant resources, announce
256  * entry dead. The rule must be unlinked from lists to the moment.
257  */
258
259 static void xfrm_policy_kill(struct xfrm_policy *policy)
260 {
261         int dead;
262
263         write_lock_bh(&policy->lock);
264         dead = policy->dead;
265         policy->dead = 1;
266         write_unlock_bh(&policy->lock);
267
268         if (unlikely(dead)) {
269                 WARN_ON(1);
270                 return;
271         }
272
273         spin_lock(&xfrm_policy_gc_lock);
274         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
275         spin_unlock(&xfrm_policy_gc_lock);
276
277         schedule_work(&xfrm_policy_gc_work);
278 }
279
280 struct xfrm_policy_hash {
281         struct hlist_head       *table;
282         unsigned int            hmask;
283 };
284
285 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
286 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
287 static struct hlist_head *xfrm_policy_byidx __read_mostly;
288 static unsigned int xfrm_idx_hmask __read_mostly;
289 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
290
291 static inline unsigned int idx_hash(u32 index)
292 {
293         return __idx_hash(index, xfrm_idx_hmask);
294 }
295
296 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
297 {
298         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
299         unsigned int hash = __sel_hash(sel, family, hmask);
300
301         return (hash == hmask + 1 ?
302                 &xfrm_policy_inexact[dir] :
303                 xfrm_policy_bydst[dir].table + hash);
304 }
305
306 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
307 {
308         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
309         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
310
311         return xfrm_policy_bydst[dir].table + hash;
312 }
313
314 static void xfrm_dst_hash_transfer(struct hlist_head *list,
315                                    struct hlist_head *ndsttable,
316                                    unsigned int nhashmask)
317 {
318         struct hlist_node *entry, *tmp;
319         struct xfrm_policy *pol;
320
321         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
322                 unsigned int h;
323
324                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
325                                 pol->family, nhashmask);
326                 hlist_add_head(&pol->bydst, ndsttable+h);
327         }
328 }
329
330 static void xfrm_idx_hash_transfer(struct hlist_head *list,
331                                    struct hlist_head *nidxtable,
332                                    unsigned int nhashmask)
333 {
334         struct hlist_node *entry, *tmp;
335         struct xfrm_policy *pol;
336
337         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
338                 unsigned int h;
339
340                 h = __idx_hash(pol->index, nhashmask);
341                 hlist_add_head(&pol->byidx, nidxtable+h);
342         }
343 }
344
345 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
346 {
347         return ((old_hmask + 1) << 1) - 1;
348 }
349
350 static void xfrm_bydst_resize(int dir)
351 {
352         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
353         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
354         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
355         struct hlist_head *odst = xfrm_policy_bydst[dir].table;
356         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
357         int i;
358
359         if (!ndst)
360                 return;
361
362         write_lock_bh(&xfrm_policy_lock);
363
364         for (i = hmask; i >= 0; i--)
365                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
366
367         xfrm_policy_bydst[dir].table = ndst;
368         xfrm_policy_bydst[dir].hmask = nhashmask;
369
370         write_unlock_bh(&xfrm_policy_lock);
371
372         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
373 }
374
375 static void xfrm_byidx_resize(int total)
376 {
377         unsigned int hmask = xfrm_idx_hmask;
378         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
379         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
380         struct hlist_head *oidx = xfrm_policy_byidx;
381         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
382         int i;
383
384         if (!nidx)
385                 return;
386
387         write_lock_bh(&xfrm_policy_lock);
388
389         for (i = hmask; i >= 0; i--)
390                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
391
392         xfrm_policy_byidx = nidx;
393         xfrm_idx_hmask = nhashmask;
394
395         write_unlock_bh(&xfrm_policy_lock);
396
397         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
398 }
399
400 static inline int xfrm_bydst_should_resize(int dir, int *total)
401 {
402         unsigned int cnt = xfrm_policy_count[dir];
403         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
404
405         if (total)
406                 *total += cnt;
407
408         if ((hmask + 1) < xfrm_policy_hashmax &&
409             cnt > hmask)
410                 return 1;
411
412         return 0;
413 }
414
415 static inline int xfrm_byidx_should_resize(int total)
416 {
417         unsigned int hmask = xfrm_idx_hmask;
418
419         if ((hmask + 1) < xfrm_policy_hashmax &&
420             total > hmask)
421                 return 1;
422
423         return 0;
424 }
425
426 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
427 {
428         read_lock_bh(&xfrm_policy_lock);
429         si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
430         si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
431         si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
432         si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
433         si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
434         si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
435         si->spdhcnt = xfrm_idx_hmask;
436         si->spdhmcnt = xfrm_policy_hashmax;
437         read_unlock_bh(&xfrm_policy_lock);
438 }
439 EXPORT_SYMBOL(xfrm_spd_getinfo);
440
441 static DEFINE_MUTEX(hash_resize_mutex);
442 static void xfrm_hash_resize(struct work_struct *__unused)
443 {
444         int dir, total;
445
446         mutex_lock(&hash_resize_mutex);
447
448         total = 0;
449         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
450                 if (xfrm_bydst_should_resize(dir, &total))
451                         xfrm_bydst_resize(dir);
452         }
453         if (xfrm_byidx_should_resize(total))
454                 xfrm_byidx_resize(total);
455
456         mutex_unlock(&hash_resize_mutex);
457 }
458
459 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
460
461 /* Generate new index... KAME seems to generate them ordered by cost
462  * of an absolute inpredictability of ordering of rules. This will not pass. */
463 static u32 xfrm_gen_index(u8 type, int dir)
464 {
465         static u32 idx_generator;
466
467         for (;;) {
468                 struct hlist_node *entry;
469                 struct hlist_head *list;
470                 struct xfrm_policy *p;
471                 u32 idx;
472                 int found;
473
474                 idx = (idx_generator | dir);
475                 idx_generator += 8;
476                 if (idx == 0)
477                         idx = 8;
478                 list = xfrm_policy_byidx + idx_hash(idx);
479                 found = 0;
480                 hlist_for_each_entry(p, entry, list, byidx) {
481                         if (p->index == idx) {
482                                 found = 1;
483                                 break;
484                         }
485                 }
486                 if (!found)
487                         return idx;
488         }
489 }
490
491 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
492 {
493         u32 *p1 = (u32 *) s1;
494         u32 *p2 = (u32 *) s2;
495         int len = sizeof(struct xfrm_selector) / sizeof(u32);
496         int i;
497
498         for (i = 0; i < len; i++) {
499                 if (p1[i] != p2[i])
500                         return 1;
501         }
502
503         return 0;
504 }
505
506 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
507 {
508         struct xfrm_policy *pol;
509         struct xfrm_policy *delpol;
510         struct hlist_head *chain;
511         struct hlist_node *entry, *newpos;
512         struct dst_entry *gc_list;
513
514         write_lock_bh(&xfrm_policy_lock);
515         chain = policy_hash_bysel(&policy->selector, policy->family, dir);
516         delpol = NULL;
517         newpos = NULL;
518         hlist_for_each_entry(pol, entry, chain, bydst) {
519                 if (pol->type == policy->type &&
520                     !selector_cmp(&pol->selector, &policy->selector) &&
521                     xfrm_sec_ctx_match(pol->security, policy->security) &&
522                     !WARN_ON(delpol)) {
523                         if (excl) {
524                                 write_unlock_bh(&xfrm_policy_lock);
525                                 return -EEXIST;
526                         }
527                         delpol = pol;
528                         if (policy->priority > pol->priority)
529                                 continue;
530                 } else if (policy->priority >= pol->priority) {
531                         newpos = &pol->bydst;
532                         continue;
533                 }
534                 if (delpol)
535                         break;
536         }
537         if (newpos)
538                 hlist_add_after(newpos, &policy->bydst);
539         else
540                 hlist_add_head(&policy->bydst, chain);
541         xfrm_pol_hold(policy);
542         xfrm_policy_count[dir]++;
543         atomic_inc(&flow_cache_genid);
544         if (delpol) {
545                 hlist_del(&delpol->bydst);
546                 hlist_del(&delpol->byidx);
547                 xfrm_policy_count[dir]--;
548         }
549         policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
550         hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
551         policy->curlft.add_time = get_seconds();
552         policy->curlft.use_time = 0;
553         if (!mod_timer(&policy->timer, jiffies + HZ))
554                 xfrm_pol_hold(policy);
555         write_unlock_bh(&xfrm_policy_lock);
556
557         if (delpol)
558                 xfrm_policy_kill(delpol);
559         else if (xfrm_bydst_should_resize(dir, NULL))
560                 schedule_work(&xfrm_hash_work);
561
562         read_lock_bh(&xfrm_policy_lock);
563         gc_list = NULL;
564         entry = &policy->bydst;
565         hlist_for_each_entry_continue(policy, entry, bydst) {
566                 struct dst_entry *dst;
567
568                 write_lock(&policy->lock);
569                 dst = policy->bundles;
570                 if (dst) {
571                         struct dst_entry *tail = dst;
572                         while (tail->next)
573                                 tail = tail->next;
574                         tail->next = gc_list;
575                         gc_list = dst;
576
577                         policy->bundles = NULL;
578                 }
579                 write_unlock(&policy->lock);
580         }
581         read_unlock_bh(&xfrm_policy_lock);
582
583         while (gc_list) {
584                 struct dst_entry *dst = gc_list;
585
586                 gc_list = dst->next;
587                 dst_free(dst);
588         }
589
590         return 0;
591 }
592 EXPORT_SYMBOL(xfrm_policy_insert);
593
594 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
595                                           struct xfrm_selector *sel,
596                                           struct xfrm_sec_ctx *ctx, int delete,
597                                           int *err)
598 {
599         struct xfrm_policy *pol, *ret;
600         struct hlist_head *chain;
601         struct hlist_node *entry;
602
603         *err = 0;
604         write_lock_bh(&xfrm_policy_lock);
605         chain = policy_hash_bysel(sel, sel->family, dir);
606         ret = NULL;
607         hlist_for_each_entry(pol, entry, chain, bydst) {
608                 if (pol->type == type &&
609                     !selector_cmp(sel, &pol->selector) &&
610                     xfrm_sec_ctx_match(ctx, pol->security)) {
611                         xfrm_pol_hold(pol);
612                         if (delete) {
613                                 *err = security_xfrm_policy_delete(pol);
614                                 if (*err) {
615                                         write_unlock_bh(&xfrm_policy_lock);
616                                         return pol;
617                                 }
618                                 hlist_del(&pol->bydst);
619                                 hlist_del(&pol->byidx);
620                                 xfrm_policy_count[dir]--;
621                         }
622                         ret = pol;
623                         break;
624                 }
625         }
626         write_unlock_bh(&xfrm_policy_lock);
627
628         if (ret && delete) {
629                 atomic_inc(&flow_cache_genid);
630                 xfrm_policy_kill(ret);
631         }
632         return ret;
633 }
634 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
635
636 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
637                                      int *err)
638 {
639         struct xfrm_policy *pol, *ret;
640         struct hlist_head *chain;
641         struct hlist_node *entry;
642
643         *err = -ENOENT;
644         if (xfrm_policy_id2dir(id) != dir)
645                 return NULL;
646
647         *err = 0;
648         write_lock_bh(&xfrm_policy_lock);
649         chain = xfrm_policy_byidx + idx_hash(id);
650         ret = NULL;
651         hlist_for_each_entry(pol, entry, chain, byidx) {
652                 if (pol->type == type && pol->index == id) {
653                         xfrm_pol_hold(pol);
654                         if (delete) {
655                                 *err = security_xfrm_policy_delete(pol);
656                                 if (*err) {
657                                         write_unlock_bh(&xfrm_policy_lock);
658                                         return pol;
659                                 }
660                                 hlist_del(&pol->bydst);
661                                 hlist_del(&pol->byidx);
662                                 xfrm_policy_count[dir]--;
663                         }
664                         ret = pol;
665                         break;
666                 }
667         }
668         write_unlock_bh(&xfrm_policy_lock);
669
670         if (ret && delete) {
671                 atomic_inc(&flow_cache_genid);
672                 xfrm_policy_kill(ret);
673         }
674         return ret;
675 }
676 EXPORT_SYMBOL(xfrm_policy_byid);
677
678 #ifdef CONFIG_SECURITY_NETWORK_XFRM
679 static inline int
680 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
681 {
682         int dir, err = 0;
683
684         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
685                 struct xfrm_policy *pol;
686                 struct hlist_node *entry;
687                 int i;
688
689                 hlist_for_each_entry(pol, entry,
690                                      &xfrm_policy_inexact[dir], bydst) {
691                         if (pol->type != type)
692                                 continue;
693                         err = security_xfrm_policy_delete(pol);
694                         if (err) {
695                                 xfrm_audit_policy_delete(pol, 0,
696                                                          audit_info->loginuid,
697                                                          audit_info->secid);
698                                 return err;
699                         }
700                 }
701                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
702                         hlist_for_each_entry(pol, entry,
703                                              xfrm_policy_bydst[dir].table + i,
704                                              bydst) {
705                                 if (pol->type != type)
706                                         continue;
707                                 err = security_xfrm_policy_delete(pol);
708                                 if (err) {
709                                         xfrm_audit_policy_delete(pol, 0,
710                                                         audit_info->loginuid,
711                                                         audit_info->secid);
712                                         return err;
713                                 }
714                         }
715                 }
716         }
717         return err;
718 }
719 #else
720 static inline int
721 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
722 {
723         return 0;
724 }
725 #endif
726
727 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
728 {
729         int dir, err = 0;
730
731         write_lock_bh(&xfrm_policy_lock);
732
733         err = xfrm_policy_flush_secctx_check(type, audit_info);
734         if (err)
735                 goto out;
736
737         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
738                 struct xfrm_policy *pol;
739                 struct hlist_node *entry;
740                 int i, killed;
741
742                 killed = 0;
743         again1:
744                 hlist_for_each_entry(pol, entry,
745                                      &xfrm_policy_inexact[dir], bydst) {
746                         if (pol->type != type)
747                                 continue;
748                         hlist_del(&pol->bydst);
749                         hlist_del(&pol->byidx);
750                         write_unlock_bh(&xfrm_policy_lock);
751
752                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
753                                                  audit_info->secid);
754
755                         xfrm_policy_kill(pol);
756                         killed++;
757
758                         write_lock_bh(&xfrm_policy_lock);
759                         goto again1;
760                 }
761
762                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
763         again2:
764                         hlist_for_each_entry(pol, entry,
765                                              xfrm_policy_bydst[dir].table + i,
766                                              bydst) {
767                                 if (pol->type != type)
768                                         continue;
769                                 hlist_del(&pol->bydst);
770                                 hlist_del(&pol->byidx);
771                                 write_unlock_bh(&xfrm_policy_lock);
772
773                                 xfrm_audit_policy_delete(pol, 1,
774                                                          audit_info->loginuid,
775                                                          audit_info->secid);
776                                 xfrm_policy_kill(pol);
777                                 killed++;
778
779                                 write_lock_bh(&xfrm_policy_lock);
780                                 goto again2;
781                         }
782                 }
783
784                 xfrm_policy_count[dir] -= killed;
785         }
786         atomic_inc(&flow_cache_genid);
787 out:
788         write_unlock_bh(&xfrm_policy_lock);
789         return err;
790 }
791 EXPORT_SYMBOL(xfrm_policy_flush);
792
793 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
794                      void *data)
795 {
796         struct xfrm_policy *pol, *last = NULL;
797         struct hlist_node *entry;
798         int dir, last_dir = 0, count, error;
799
800         read_lock_bh(&xfrm_policy_lock);
801         count = 0;
802
803         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
804                 struct hlist_head *table = xfrm_policy_bydst[dir].table;
805                 int i;
806
807                 hlist_for_each_entry(pol, entry,
808                                      &xfrm_policy_inexact[dir], bydst) {
809                         if (pol->type != type)
810                                 continue;
811                         if (last) {
812                                 error = func(last, last_dir % XFRM_POLICY_MAX,
813                                              count, data);
814                                 if (error)
815                                         goto out;
816                         }
817                         last = pol;
818                         last_dir = dir;
819                         count++;
820                 }
821                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
822                         hlist_for_each_entry(pol, entry, table + i, bydst) {
823                                 if (pol->type != type)
824                                         continue;
825                                 if (last) {
826                                         error = func(last, last_dir % XFRM_POLICY_MAX,
827                                                      count, data);
828                                         if (error)
829                                                 goto out;
830                                 }
831                                 last = pol;
832                                 last_dir = dir;
833                                 count++;
834                         }
835                 }
836         }
837         if (count == 0) {
838                 error = -ENOENT;
839                 goto out;
840         }
841         error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
842 out:
843         read_unlock_bh(&xfrm_policy_lock);
844         return error;
845 }
846 EXPORT_SYMBOL(xfrm_policy_walk);
847
848 /*
849  * Find policy to apply to this flow.
850  *
851  * Returns 0 if policy found, else an -errno.
852  */
853 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
854                              u8 type, u16 family, int dir)
855 {
856         struct xfrm_selector *sel = &pol->selector;
857         int match, ret = -ESRCH;
858
859         if (pol->family != family ||
860             pol->type != type)
861                 return ret;
862
863         match = xfrm_selector_match(sel, fl, family);
864         if (match)
865                 ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
866
867         return ret;
868 }
869
870 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
871                                                      u16 family, u8 dir)
872 {
873         int err;
874         struct xfrm_policy *pol, *ret;
875         xfrm_address_t *daddr, *saddr;
876         struct hlist_node *entry;
877         struct hlist_head *chain;
878         u32 priority = ~0U;
879
880         daddr = xfrm_flowi_daddr(fl, family);
881         saddr = xfrm_flowi_saddr(fl, family);
882         if (unlikely(!daddr || !saddr))
883                 return NULL;
884
885         read_lock_bh(&xfrm_policy_lock);
886         chain = policy_hash_direct(daddr, saddr, family, dir);
887         ret = NULL;
888         hlist_for_each_entry(pol, entry, chain, bydst) {
889                 err = xfrm_policy_match(pol, fl, type, family, dir);
890                 if (err) {
891                         if (err == -ESRCH)
892                                 continue;
893                         else {
894                                 ret = ERR_PTR(err);
895                                 goto fail;
896                         }
897                 } else {
898                         ret = pol;
899                         priority = ret->priority;
900                         break;
901                 }
902         }
903         chain = &xfrm_policy_inexact[dir];
904         hlist_for_each_entry(pol, entry, chain, bydst) {
905                 err = xfrm_policy_match(pol, fl, type, family, dir);
906                 if (err) {
907                         if (err == -ESRCH)
908                                 continue;
909                         else {
910                                 ret = ERR_PTR(err);
911                                 goto fail;
912                         }
913                 } else if (pol->priority < priority) {
914                         ret = pol;
915                         break;
916                 }
917         }
918         if (ret)
919                 xfrm_pol_hold(ret);
920 fail:
921         read_unlock_bh(&xfrm_policy_lock);
922
923         return ret;
924 }
925
926 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
927                                void **objp, atomic_t **obj_refp)
928 {
929         struct xfrm_policy *pol;
930         int err = 0;
931
932 #ifdef CONFIG_XFRM_SUB_POLICY
933         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
934         if (IS_ERR(pol)) {
935                 err = PTR_ERR(pol);
936                 pol = NULL;
937         }
938         if (pol || err)
939                 goto end;
940 #endif
941         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
942         if (IS_ERR(pol)) {
943                 err = PTR_ERR(pol);
944                 pol = NULL;
945         }
946 #ifdef CONFIG_XFRM_SUB_POLICY
947 end:
948 #endif
949         if ((*objp = (void *) pol) != NULL)
950                 *obj_refp = &pol->refcnt;
951         return err;
952 }
953
954 static inline int policy_to_flow_dir(int dir)
955 {
956         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
957             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
958             XFRM_POLICY_FWD == FLOW_DIR_FWD)
959                 return dir;
960         switch (dir) {
961         default:
962         case XFRM_POLICY_IN:
963                 return FLOW_DIR_IN;
964         case XFRM_POLICY_OUT:
965                 return FLOW_DIR_OUT;
966         case XFRM_POLICY_FWD:
967                 return FLOW_DIR_FWD;
968         }
969 }
970
971 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
972 {
973         struct xfrm_policy *pol;
974
975         read_lock_bh(&xfrm_policy_lock);
976         if ((pol = sk->sk_policy[dir]) != NULL) {
977                 int match = xfrm_selector_match(&pol->selector, fl,
978                                                 sk->sk_family);
979                 int err = 0;
980
981                 if (match) {
982                         err = security_xfrm_policy_lookup(pol, fl->secid,
983                                         policy_to_flow_dir(dir));
984                         if (!err)
985                                 xfrm_pol_hold(pol);
986                         else if (err == -ESRCH)
987                                 pol = NULL;
988                         else
989                                 pol = ERR_PTR(err);
990                 } else
991                         pol = NULL;
992         }
993         read_unlock_bh(&xfrm_policy_lock);
994         return pol;
995 }
996
997 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
998 {
999         struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1000                                                      pol->family, dir);
1001
1002         hlist_add_head(&pol->bydst, chain);
1003         hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1004         xfrm_policy_count[dir]++;
1005         xfrm_pol_hold(pol);
1006
1007         if (xfrm_bydst_should_resize(dir, NULL))
1008                 schedule_work(&xfrm_hash_work);
1009 }
1010
1011 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1012                                                 int dir)
1013 {
1014         if (hlist_unhashed(&pol->bydst))
1015                 return NULL;
1016
1017         hlist_del(&pol->bydst);
1018         hlist_del(&pol->byidx);
1019         xfrm_policy_count[dir]--;
1020
1021         return pol;
1022 }
1023
1024 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1025 {
1026         write_lock_bh(&xfrm_policy_lock);
1027         pol = __xfrm_policy_unlink(pol, dir);
1028         write_unlock_bh(&xfrm_policy_lock);
1029         if (pol) {
1030                 if (dir < XFRM_POLICY_MAX)
1031                         atomic_inc(&flow_cache_genid);
1032                 xfrm_policy_kill(pol);
1033                 return 0;
1034         }
1035         return -ENOENT;
1036 }
1037 EXPORT_SYMBOL(xfrm_policy_delete);
1038
1039 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1040 {
1041         struct xfrm_policy *old_pol;
1042
1043 #ifdef CONFIG_XFRM_SUB_POLICY
1044         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1045                 return -EINVAL;
1046 #endif
1047
1048         write_lock_bh(&xfrm_policy_lock);
1049         old_pol = sk->sk_policy[dir];
1050         sk->sk_policy[dir] = pol;
1051         if (pol) {
1052                 pol->curlft.add_time = get_seconds();
1053                 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1054                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1055         }
1056         if (old_pol)
1057                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1058         write_unlock_bh(&xfrm_policy_lock);
1059
1060         if (old_pol) {
1061                 xfrm_policy_kill(old_pol);
1062         }
1063         return 0;
1064 }
1065
1066 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1067 {
1068         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1069
1070         if (newp) {
1071                 newp->selector = old->selector;
1072                 if (security_xfrm_policy_clone(old, newp)) {
1073                         kfree(newp);
1074                         return NULL;  /* ENOMEM */
1075                 }
1076                 newp->lft = old->lft;
1077                 newp->curlft = old->curlft;
1078                 newp->action = old->action;
1079                 newp->flags = old->flags;
1080                 newp->xfrm_nr = old->xfrm_nr;
1081                 newp->index = old->index;
1082                 newp->type = old->type;
1083                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1084                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1085                 write_lock_bh(&xfrm_policy_lock);
1086                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1087                 write_unlock_bh(&xfrm_policy_lock);
1088                 xfrm_pol_put(newp);
1089         }
1090         return newp;
1091 }
1092
1093 int __xfrm_sk_clone_policy(struct sock *sk)
1094 {
1095         struct xfrm_policy *p0 = sk->sk_policy[0],
1096                            *p1 = sk->sk_policy[1];
1097
1098         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1099         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1100                 return -ENOMEM;
1101         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1102                 return -ENOMEM;
1103         return 0;
1104 }
1105
1106 static int
1107 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1108                unsigned short family)
1109 {
1110         int err;
1111         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1112
1113         if (unlikely(afinfo == NULL))
1114                 return -EINVAL;
1115         err = afinfo->get_saddr(local, remote);
1116         xfrm_policy_put_afinfo(afinfo);
1117         return err;
1118 }
1119
1120 /* Resolve list of templates for the flow, given policy. */
1121
1122 static int
1123 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1124                       struct xfrm_state **xfrm,
1125                       unsigned short family)
1126 {
1127         int nx;
1128         int i, error;
1129         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1130         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1131         xfrm_address_t tmp;
1132
1133         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1134                 struct xfrm_state *x;
1135                 xfrm_address_t *remote = daddr;
1136                 xfrm_address_t *local  = saddr;
1137                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1138
1139                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1140                     tmpl->mode == XFRM_MODE_BEET) {
1141                         remote = &tmpl->id.daddr;
1142                         local = &tmpl->saddr;
1143                         family = tmpl->encap_family;
1144                         if (xfrm_addr_any(local, family)) {
1145                                 error = xfrm_get_saddr(&tmp, remote, family);
1146                                 if (error)
1147                                         goto fail;
1148                                 local = &tmp;
1149                         }
1150                 }
1151
1152                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1153
1154                 if (x && x->km.state == XFRM_STATE_VALID) {
1155                         xfrm[nx++] = x;
1156                         daddr = remote;
1157                         saddr = local;
1158                         continue;
1159                 }
1160                 if (x) {
1161                         error = (x->km.state == XFRM_STATE_ERROR ?
1162                                  -EINVAL : -EAGAIN);
1163                         xfrm_state_put(x);
1164                 }
1165
1166                 if (!tmpl->optional)
1167                         goto fail;
1168         }
1169         return nx;
1170
1171 fail:
1172         for (nx--; nx>=0; nx--)
1173                 xfrm_state_put(xfrm[nx]);
1174         return error;
1175 }
1176
1177 static int
1178 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1179                   struct xfrm_state **xfrm,
1180                   unsigned short family)
1181 {
1182         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1183         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1184         int cnx = 0;
1185         int error;
1186         int ret;
1187         int i;
1188
1189         for (i = 0; i < npols; i++) {
1190                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1191                         error = -ENOBUFS;
1192                         goto fail;
1193                 }
1194
1195                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1196                 if (ret < 0) {
1197                         error = ret;
1198                         goto fail;
1199                 } else
1200                         cnx += ret;
1201         }
1202
1203         /* found states are sorted for outbound processing */
1204         if (npols > 1)
1205                 xfrm_state_sort(xfrm, tpp, cnx, family);
1206
1207         return cnx;
1208
1209  fail:
1210         for (cnx--; cnx>=0; cnx--)
1211                 xfrm_state_put(tpp[cnx]);
1212         return error;
1213
1214 }
1215
1216 /* Check that the bundle accepts the flow and its components are
1217  * still valid.
1218  */
1219
1220 static struct dst_entry *
1221 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1222 {
1223         struct dst_entry *x;
1224         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1225         if (unlikely(afinfo == NULL))
1226                 return ERR_PTR(-EINVAL);
1227         x = afinfo->find_bundle(fl, policy);
1228         xfrm_policy_put_afinfo(afinfo);
1229         return x;
1230 }
1231
1232 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1233  * all the metrics... Shortly, bundle a bundle.
1234  */
1235
1236 static int
1237 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
1238                    struct flowi *fl, struct dst_entry **dst_p,
1239                    unsigned short family)
1240 {
1241         int err;
1242         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1243         if (unlikely(afinfo == NULL))
1244                 return -EINVAL;
1245         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
1246         xfrm_policy_put_afinfo(afinfo);
1247         return err;
1248 }
1249
1250 static int inline
1251 xfrm_dst_alloc_copy(void **target, void *src, int size)
1252 {
1253         if (!*target) {
1254                 *target = kmalloc(size, GFP_ATOMIC);
1255                 if (!*target)
1256                         return -ENOMEM;
1257         }
1258         memcpy(*target, src, size);
1259         return 0;
1260 }
1261
1262 static int inline
1263 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1264 {
1265 #ifdef CONFIG_XFRM_SUB_POLICY
1266         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1267         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1268                                    sel, sizeof(*sel));
1269 #else
1270         return 0;
1271 #endif
1272 }
1273
1274 static int inline
1275 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1276 {
1277 #ifdef CONFIG_XFRM_SUB_POLICY
1278         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1279         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1280 #else
1281         return 0;
1282 #endif
1283 }
1284
1285 static int stale_bundle(struct dst_entry *dst);
1286
1287 /* Main function: finds/creates a bundle for given flow.
1288  *
1289  * At the moment we eat a raw IP route. Mostly to speed up lookups
1290  * on interfaces with disabled IPsec.
1291  */
1292 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1293                   struct sock *sk, int flags)
1294 {
1295         struct xfrm_policy *policy;
1296         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1297         int npols;
1298         int pol_dead;
1299         int xfrm_nr;
1300         int pi;
1301         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1302         struct dst_entry *dst, *dst_orig = *dst_p;
1303         int nx = 0;
1304         int err;
1305         u32 genid;
1306         u16 family;
1307         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1308
1309 restart:
1310         genid = atomic_read(&flow_cache_genid);
1311         policy = NULL;
1312         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1313                 pols[pi] = NULL;
1314         npols = 0;
1315         pol_dead = 0;
1316         xfrm_nr = 0;
1317
1318         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1319                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1320                 err = PTR_ERR(policy);
1321                 if (IS_ERR(policy))
1322                         goto dropdst;
1323         }
1324
1325         if (!policy) {
1326                 /* To accelerate a bit...  */
1327                 if ((dst_orig->flags & DST_NOXFRM) ||
1328                     !xfrm_policy_count[XFRM_POLICY_OUT])
1329                         return 0;
1330
1331                 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1332                                            dir, xfrm_policy_lookup);
1333                 err = PTR_ERR(policy);
1334                 if (IS_ERR(policy))
1335                         goto dropdst;
1336         }
1337
1338         if (!policy)
1339                 return 0;
1340
1341         family = dst_orig->ops->family;
1342         policy->curlft.use_time = get_seconds();
1343         pols[0] = policy;
1344         npols ++;
1345         xfrm_nr += pols[0]->xfrm_nr;
1346
1347         switch (policy->action) {
1348         default:
1349         case XFRM_POLICY_BLOCK:
1350                 /* Prohibit the flow */
1351                 err = -EPERM;
1352                 goto error;
1353
1354         case XFRM_POLICY_ALLOW:
1355 #ifndef CONFIG_XFRM_SUB_POLICY
1356                 if (policy->xfrm_nr == 0) {
1357                         /* Flow passes not transformed. */
1358                         xfrm_pol_put(policy);
1359                         return 0;
1360                 }
1361 #endif
1362
1363                 /* Try to find matching bundle.
1364                  *
1365                  * LATER: help from flow cache. It is optional, this
1366                  * is required only for output policy.
1367                  */
1368                 dst = xfrm_find_bundle(fl, policy, family);
1369                 if (IS_ERR(dst)) {
1370                         err = PTR_ERR(dst);
1371                         goto error;
1372                 }
1373
1374                 if (dst)
1375                         break;
1376
1377 #ifdef CONFIG_XFRM_SUB_POLICY
1378                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1379                         pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1380                                                             fl, family,
1381                                                             XFRM_POLICY_OUT);
1382                         if (pols[1]) {
1383                                 if (IS_ERR(pols[1])) {
1384                                         err = PTR_ERR(pols[1]);
1385                                         goto error;
1386                                 }
1387                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1388                                         err = -EPERM;
1389                                         goto error;
1390                                 }
1391                                 npols ++;
1392                                 xfrm_nr += pols[1]->xfrm_nr;
1393                         }
1394                 }
1395
1396                 /*
1397                  * Because neither flowi nor bundle information knows about
1398                  * transformation template size. On more than one policy usage
1399                  * we can realize whether all of them is bypass or not after
1400                  * they are searched. See above not-transformed bypass
1401                  * is surrounded by non-sub policy configuration, too.
1402                  */
1403                 if (xfrm_nr == 0) {
1404                         /* Flow passes not transformed. */
1405                         xfrm_pols_put(pols, npols);
1406                         return 0;
1407                 }
1408
1409 #endif
1410                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1411
1412                 if (unlikely(nx<0)) {
1413                         err = nx;
1414                         if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1415                                 /* EREMOTE tells the caller to generate
1416                                  * a one-shot blackhole route.
1417                                  */
1418                                 xfrm_pol_put(policy);
1419                                 return -EREMOTE;
1420                         }
1421                         if (err == -EAGAIN && flags) {
1422                                 DECLARE_WAITQUEUE(wait, current);
1423
1424                                 add_wait_queue(&km_waitq, &wait);
1425                                 set_current_state(TASK_INTERRUPTIBLE);
1426                                 schedule();
1427                                 set_current_state(TASK_RUNNING);
1428                                 remove_wait_queue(&km_waitq, &wait);
1429
1430                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1431
1432                                 if (nx == -EAGAIN && signal_pending(current)) {
1433                                         err = -ERESTART;
1434                                         goto error;
1435                                 }
1436                                 if (nx == -EAGAIN ||
1437                                     genid != atomic_read(&flow_cache_genid)) {
1438                                         xfrm_pols_put(pols, npols);
1439                                         goto restart;
1440                                 }
1441                                 err = nx;
1442                         }
1443                         if (err < 0)
1444                                 goto error;
1445                 }
1446                 if (nx == 0) {
1447                         /* Flow passes not transformed. */
1448                         xfrm_pols_put(pols, npols);
1449                         return 0;
1450                 }
1451
1452                 dst = dst_orig;
1453                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
1454
1455                 if (unlikely(err)) {
1456                         int i;
1457                         for (i=0; i<nx; i++)
1458                                 xfrm_state_put(xfrm[i]);
1459                         goto error;
1460                 }
1461
1462                 for (pi = 0; pi < npols; pi++) {
1463                         read_lock_bh(&pols[pi]->lock);
1464                         pol_dead |= pols[pi]->dead;
1465                         read_unlock_bh(&pols[pi]->lock);
1466                 }
1467
1468                 write_lock_bh(&policy->lock);
1469                 if (unlikely(pol_dead || stale_bundle(dst))) {
1470                         /* Wow! While we worked on resolving, this
1471                          * policy has gone. Retry. It is not paranoia,
1472                          * we just cannot enlist new bundle to dead object.
1473                          * We can't enlist stable bundles either.
1474                          */
1475                         write_unlock_bh(&policy->lock);
1476                         if (dst)
1477                                 dst_free(dst);
1478
1479                         err = -EHOSTUNREACH;
1480                         goto error;
1481                 }
1482
1483                 if (npols > 1)
1484                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1485                 else
1486                         err = xfrm_dst_update_origin(dst, fl);
1487                 if (unlikely(err)) {
1488                         write_unlock_bh(&policy->lock);
1489                         if (dst)
1490                                 dst_free(dst);
1491                         goto error;
1492                 }
1493
1494                 dst->next = policy->bundles;
1495                 policy->bundles = dst;
1496                 dst_hold(dst);
1497                 write_unlock_bh(&policy->lock);
1498         }
1499         *dst_p = dst;
1500         dst_release(dst_orig);
1501         xfrm_pols_put(pols, npols);
1502         return 0;
1503
1504 error:
1505         xfrm_pols_put(pols, npols);
1506 dropdst:
1507         dst_release(dst_orig);
1508         *dst_p = NULL;
1509         return err;
1510 }
1511 EXPORT_SYMBOL(__xfrm_lookup);
1512
1513 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1514                 struct sock *sk, int flags)
1515 {
1516         int err = __xfrm_lookup(dst_p, fl, sk, flags);
1517
1518         if (err == -EREMOTE) {
1519                 dst_release(*dst_p);
1520                 *dst_p = NULL;
1521                 err = -EAGAIN;
1522         }
1523
1524         return err;
1525 }
1526 EXPORT_SYMBOL(xfrm_lookup);
1527
1528 static inline int
1529 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1530 {
1531         struct xfrm_state *x;
1532
1533         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1534                 return 0;
1535         x = skb->sp->xvec[idx];
1536         if (!x->type->reject)
1537                 return 0;
1538         return x->type->reject(x, skb, fl);
1539 }
1540
1541 /* When skb is transformed back to its "native" form, we have to
1542  * check policy restrictions. At the moment we make this in maximally
1543  * stupid way. Shame on me. :-) Of course, connected sockets must
1544  * have policy cached at them.
1545  */
1546
1547 static inline int
1548 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1549               unsigned short family)
1550 {
1551         if (xfrm_state_kern(x))
1552                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1553         return  x->id.proto == tmpl->id.proto &&
1554                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1555                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1556                 x->props.mode == tmpl->mode &&
1557                 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1558                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1559                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1560                   xfrm_state_addr_cmp(tmpl, x, family));
1561 }
1562
1563 /*
1564  * 0 or more than 0 is returned when validation is succeeded (either bypass
1565  * because of optional transport mode, or next index of the mathced secpath
1566  * state with the template.
1567  * -1 is returned when no matching template is found.
1568  * Otherwise "-2 - errored_index" is returned.
1569  */
1570 static inline int
1571 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1572                unsigned short family)
1573 {
1574         int idx = start;
1575
1576         if (tmpl->optional) {
1577                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1578                         return start;
1579         } else
1580                 start = -1;
1581         for (; idx < sp->len; idx++) {
1582                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1583                         return ++idx;
1584                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1585                         if (start == -1)
1586                                 start = -2-idx;
1587                         break;
1588                 }
1589         }
1590         return start;
1591 }
1592
1593 int
1594 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
1595 {
1596         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1597         int err;
1598
1599         if (unlikely(afinfo == NULL))
1600                 return -EAFNOSUPPORT;
1601
1602         afinfo->decode_session(skb, fl);
1603         err = security_xfrm_decode_session(skb, &fl->secid);
1604         xfrm_policy_put_afinfo(afinfo);
1605         return err;
1606 }
1607 EXPORT_SYMBOL(xfrm_decode_session);
1608
1609 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1610 {
1611         for (; k < sp->len; k++) {
1612                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1613                         *idxp = k;
1614                         return 1;
1615                 }
1616         }
1617
1618         return 0;
1619 }
1620
1621 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1622                         unsigned short family)
1623 {
1624         struct xfrm_policy *pol;
1625         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1626         int npols = 0;
1627         int xfrm_nr;
1628         int pi;
1629         struct flowi fl;
1630         u8 fl_dir = policy_to_flow_dir(dir);
1631         int xerr_idx = -1;
1632
1633         if (xfrm_decode_session(skb, &fl, family) < 0)
1634                 return 0;
1635         nf_nat_decode_session(skb, &fl, family);
1636
1637         /* First, check used SA against their selectors. */
1638         if (skb->sp) {
1639                 int i;
1640
1641                 for (i=skb->sp->len-1; i>=0; i--) {
1642                         struct xfrm_state *x = skb->sp->xvec[i];
1643                         if (!xfrm_selector_match(&x->sel, &fl, family))
1644                                 return 0;
1645                 }
1646         }
1647
1648         pol = NULL;
1649         if (sk && sk->sk_policy[dir]) {
1650                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1651                 if (IS_ERR(pol))
1652                         return 0;
1653         }
1654
1655         if (!pol)
1656                 pol = flow_cache_lookup(&fl, family, fl_dir,
1657                                         xfrm_policy_lookup);
1658
1659         if (IS_ERR(pol))
1660                 return 0;
1661
1662         if (!pol) {
1663                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1664                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1665                         return 0;
1666                 }
1667                 return 1;
1668         }
1669
1670         pol->curlft.use_time = get_seconds();
1671
1672         pols[0] = pol;
1673         npols ++;
1674 #ifdef CONFIG_XFRM_SUB_POLICY
1675         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1676                 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1677                                                     &fl, family,
1678                                                     XFRM_POLICY_IN);
1679                 if (pols[1]) {
1680                         if (IS_ERR(pols[1]))
1681                                 return 0;
1682                         pols[1]->curlft.use_time = get_seconds();
1683                         npols ++;
1684                 }
1685         }
1686 #endif
1687
1688         if (pol->action == XFRM_POLICY_ALLOW) {
1689                 struct sec_path *sp;
1690                 static struct sec_path dummy;
1691                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1692                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1693                 struct xfrm_tmpl **tpp = tp;
1694                 int ti = 0;
1695                 int i, k;
1696
1697                 if ((sp = skb->sp) == NULL)
1698                         sp = &dummy;
1699
1700                 for (pi = 0; pi < npols; pi++) {
1701                         if (pols[pi] != pol &&
1702                             pols[pi]->action != XFRM_POLICY_ALLOW)
1703                                 goto reject;
1704                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
1705                                 goto reject_error;
1706                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1707                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1708                 }
1709                 xfrm_nr = ti;
1710                 if (npols > 1) {
1711                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1712                         tpp = stp;
1713                 }
1714
1715                 /* For each tunnel xfrm, find the first matching tmpl.
1716                  * For each tmpl before that, find corresponding xfrm.
1717                  * Order is _important_. Later we will implement
1718                  * some barriers, but at the moment barriers
1719                  * are implied between each two transformations.
1720                  */
1721                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1722                         k = xfrm_policy_ok(tpp[i], sp, k, family);
1723                         if (k < 0) {
1724                                 if (k < -1)
1725                                         /* "-2 - errored_index" returned */
1726                                         xerr_idx = -(2+k);
1727                                 goto reject;
1728                         }
1729                 }
1730
1731                 if (secpath_has_nontransport(sp, k, &xerr_idx))
1732                         goto reject;
1733
1734                 xfrm_pols_put(pols, npols);
1735                 return 1;
1736         }
1737
1738 reject:
1739         xfrm_secpath_reject(xerr_idx, skb, &fl);
1740 reject_error:
1741         xfrm_pols_put(pols, npols);
1742         return 0;
1743 }
1744 EXPORT_SYMBOL(__xfrm_policy_check);
1745
1746 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1747 {
1748         struct flowi fl;
1749
1750         if (xfrm_decode_session(skb, &fl, family) < 0)
1751                 return 0;
1752
1753         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1754 }
1755 EXPORT_SYMBOL(__xfrm_route_forward);
1756
1757 /* Optimize later using cookies and generation ids. */
1758
1759 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1760 {
1761         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
1762          * to "-1" to force all XFRM destinations to get validated by
1763          * dst_ops->check on every use.  We do this because when a
1764          * normal route referenced by an XFRM dst is obsoleted we do
1765          * not go looking around for all parent referencing XFRM dsts
1766          * so that we can invalidate them.  It is just too much work.
1767          * Instead we make the checks here on every use.  For example:
1768          *
1769          *      XFRM dst A --> IPv4 dst X
1770          *
1771          * X is the "xdst->route" of A (X is also the "dst->path" of A
1772          * in this example).  If X is marked obsolete, "A" will not
1773          * notice.  That's what we are validating here via the
1774          * stale_bundle() check.
1775          *
1776          * When a policy's bundle is pruned, we dst_free() the XFRM
1777          * dst which causes it's ->obsolete field to be set to a
1778          * positive non-zero integer.  If an XFRM dst has been pruned
1779          * like this, we want to force a new route lookup.
1780          */
1781         if (dst->obsolete < 0 && !stale_bundle(dst))
1782                 return dst;
1783
1784         return NULL;
1785 }
1786
1787 static int stale_bundle(struct dst_entry *dst)
1788 {
1789         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
1790 }
1791
1792 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1793 {
1794         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1795                 dst->dev = init_net.loopback_dev;
1796                 dev_hold(dst->dev);
1797                 dev_put(dev);
1798         }
1799 }
1800 EXPORT_SYMBOL(xfrm_dst_ifdown);
1801
1802 static void xfrm_link_failure(struct sk_buff *skb)
1803 {
1804         /* Impossible. Such dst must be popped before reaches point of failure. */
1805         return;
1806 }
1807
1808 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1809 {
1810         if (dst) {
1811                 if (dst->obsolete) {
1812                         dst_release(dst);
1813                         dst = NULL;
1814                 }
1815         }
1816         return dst;
1817 }
1818
1819 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
1820 {
1821         struct dst_entry *dst, **dstp;
1822
1823         write_lock(&pol->lock);
1824         dstp = &pol->bundles;
1825         while ((dst=*dstp) != NULL) {
1826                 if (func(dst)) {
1827                         *dstp = dst->next;
1828                         dst->next = *gc_list_p;
1829                         *gc_list_p = dst;
1830                 } else {
1831                         dstp = &dst->next;
1832                 }
1833         }
1834         write_unlock(&pol->lock);
1835 }
1836
1837 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1838 {
1839         struct dst_entry *gc_list = NULL;
1840         int dir;
1841
1842         read_lock_bh(&xfrm_policy_lock);
1843         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
1844                 struct xfrm_policy *pol;
1845                 struct hlist_node *entry;
1846                 struct hlist_head *table;
1847                 int i;
1848
1849                 hlist_for_each_entry(pol, entry,
1850                                      &xfrm_policy_inexact[dir], bydst)
1851                         prune_one_bundle(pol, func, &gc_list);
1852
1853                 table = xfrm_policy_bydst[dir].table;
1854                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
1855                         hlist_for_each_entry(pol, entry, table + i, bydst)
1856                                 prune_one_bundle(pol, func, &gc_list);
1857                 }
1858         }
1859         read_unlock_bh(&xfrm_policy_lock);
1860
1861         while (gc_list) {
1862                 struct dst_entry *dst = gc_list;
1863                 gc_list = dst->next;
1864                 dst_free(dst);
1865         }
1866 }
1867
1868 static int unused_bundle(struct dst_entry *dst)
1869 {
1870         return !atomic_read(&dst->__refcnt);
1871 }
1872
1873 static void __xfrm_garbage_collect(void)
1874 {
1875         xfrm_prune_bundles(unused_bundle);
1876 }
1877
1878 static int xfrm_flush_bundles(void)
1879 {
1880         xfrm_prune_bundles(stale_bundle);
1881         return 0;
1882 }
1883
1884 void xfrm_init_pmtu(struct dst_entry *dst)
1885 {
1886         do {
1887                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1888                 u32 pmtu, route_mtu_cached;
1889
1890                 pmtu = dst_mtu(dst->child);
1891                 xdst->child_mtu_cached = pmtu;
1892
1893                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
1894
1895                 route_mtu_cached = dst_mtu(xdst->route);
1896                 xdst->route_mtu_cached = route_mtu_cached;
1897
1898                 if (pmtu > route_mtu_cached)
1899                         pmtu = route_mtu_cached;
1900
1901                 dst->metrics[RTAX_MTU-1] = pmtu;
1902         } while ((dst = dst->next));
1903 }
1904
1905 EXPORT_SYMBOL(xfrm_init_pmtu);
1906
1907 /* Check that the bundle accepts the flow and its components are
1908  * still valid.
1909  */
1910
1911 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
1912                 struct flowi *fl, int family, int strict)
1913 {
1914         struct dst_entry *dst = &first->u.dst;
1915         struct xfrm_dst *last;
1916         u32 mtu;
1917
1918         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
1919             (dst->dev && !netif_running(dst->dev)))
1920                 return 0;
1921 #ifdef CONFIG_XFRM_SUB_POLICY
1922         if (fl) {
1923                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
1924                         return 0;
1925                 if (first->partner &&
1926                     !xfrm_selector_match(first->partner, fl, family))
1927                         return 0;
1928         }
1929 #endif
1930
1931         last = NULL;
1932
1933         do {
1934                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1935
1936                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
1937                         return 0;
1938                 if (fl && pol &&
1939                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
1940                         return 0;
1941                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
1942                         return 0;
1943                 if (xdst->genid != dst->xfrm->genid)
1944                         return 0;
1945
1946                 if (strict && fl &&
1947                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
1948                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
1949                         return 0;
1950
1951                 mtu = dst_mtu(dst->child);
1952                 if (xdst->child_mtu_cached != mtu) {
1953                         last = xdst;
1954                         xdst->child_mtu_cached = mtu;
1955                 }
1956
1957                 if (!dst_check(xdst->route, xdst->route_cookie))
1958                         return 0;
1959                 mtu = dst_mtu(xdst->route);
1960                 if (xdst->route_mtu_cached != mtu) {
1961                         last = xdst;
1962                         xdst->route_mtu_cached = mtu;
1963                 }
1964
1965                 dst = dst->child;
1966         } while (dst->xfrm);
1967
1968         if (likely(!last))
1969                 return 1;
1970
1971         mtu = last->child_mtu_cached;
1972         for (;;) {
1973                 dst = &last->u.dst;
1974
1975                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
1976                 if (mtu > last->route_mtu_cached)
1977                         mtu = last->route_mtu_cached;
1978                 dst->metrics[RTAX_MTU-1] = mtu;
1979
1980                 if (last == first)
1981                         break;
1982
1983                 last = (struct xfrm_dst *)last->u.dst.next;
1984                 last->child_mtu_cached = mtu;
1985         }
1986
1987         return 1;
1988 }
1989
1990 EXPORT_SYMBOL(xfrm_bundle_ok);
1991
1992 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1993 {
1994         int err = 0;
1995         if (unlikely(afinfo == NULL))
1996                 return -EINVAL;
1997         if (unlikely(afinfo->family >= NPROTO))
1998                 return -EAFNOSUPPORT;
1999         write_lock_bh(&xfrm_policy_afinfo_lock);
2000         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2001                 err = -ENOBUFS;
2002         else {
2003                 struct dst_ops *dst_ops = afinfo->dst_ops;
2004                 if (likely(dst_ops->kmem_cachep == NULL))
2005                         dst_ops->kmem_cachep = xfrm_dst_cache;
2006                 if (likely(dst_ops->check == NULL))
2007                         dst_ops->check = xfrm_dst_check;
2008                 if (likely(dst_ops->negative_advice == NULL))
2009                         dst_ops->negative_advice = xfrm_negative_advice;
2010                 if (likely(dst_ops->link_failure == NULL))
2011                         dst_ops->link_failure = xfrm_link_failure;
2012                 if (likely(afinfo->garbage_collect == NULL))
2013                         afinfo->garbage_collect = __xfrm_garbage_collect;
2014                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2015         }
2016         write_unlock_bh(&xfrm_policy_afinfo_lock);
2017         return err;
2018 }
2019 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2020
2021 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2022 {
2023         int err = 0;
2024         if (unlikely(afinfo == NULL))
2025                 return -EINVAL;
2026         if (unlikely(afinfo->family >= NPROTO))
2027                 return -EAFNOSUPPORT;
2028         write_lock_bh(&xfrm_policy_afinfo_lock);
2029         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2030                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2031                         err = -EINVAL;
2032                 else {
2033                         struct dst_ops *dst_ops = afinfo->dst_ops;
2034                         xfrm_policy_afinfo[afinfo->family] = NULL;
2035                         dst_ops->kmem_cachep = NULL;
2036                         dst_ops->check = NULL;
2037                         dst_ops->negative_advice = NULL;
2038                         dst_ops->link_failure = NULL;
2039                         afinfo->garbage_collect = NULL;
2040                 }
2041         }
2042         write_unlock_bh(&xfrm_policy_afinfo_lock);
2043         return err;
2044 }
2045 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2046
2047 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2048 {
2049         struct xfrm_policy_afinfo *afinfo;
2050         if (unlikely(family >= NPROTO))
2051                 return NULL;
2052         read_lock(&xfrm_policy_afinfo_lock);
2053         afinfo = xfrm_policy_afinfo[family];
2054         if (unlikely(!afinfo))
2055                 read_unlock(&xfrm_policy_afinfo_lock);
2056         return afinfo;
2057 }
2058
2059 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2060 {
2061         read_unlock(&xfrm_policy_afinfo_lock);
2062 }
2063
2064 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2065 {
2066         struct net_device *dev = ptr;
2067
2068         if (dev->nd_net != &init_net)
2069                 return NOTIFY_DONE;
2070
2071         switch (event) {
2072         case NETDEV_DOWN:
2073                 xfrm_flush_bundles();
2074         }
2075         return NOTIFY_DONE;
2076 }
2077
2078 static struct notifier_block xfrm_dev_notifier = {
2079         xfrm_dev_event,
2080         NULL,
2081         0
2082 };
2083
2084 static void __init xfrm_policy_init(void)
2085 {
2086         unsigned int hmask, sz;
2087         int dir;
2088
2089         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2090                                            sizeof(struct xfrm_dst),
2091                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2092                                            NULL);
2093
2094         hmask = 8 - 1;
2095         sz = (hmask+1) * sizeof(struct hlist_head);
2096
2097         xfrm_policy_byidx = xfrm_hash_alloc(sz);
2098         xfrm_idx_hmask = hmask;
2099         if (!xfrm_policy_byidx)
2100                 panic("XFRM: failed to allocate byidx hash\n");
2101
2102         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2103                 struct xfrm_policy_hash *htab;
2104
2105                 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2106
2107                 htab = &xfrm_policy_bydst[dir];
2108                 htab->table = xfrm_hash_alloc(sz);
2109                 htab->hmask = hmask;
2110                 if (!htab->table)
2111                         panic("XFRM: failed to allocate bydst hash\n");
2112         }
2113
2114         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2115         register_netdevice_notifier(&xfrm_dev_notifier);
2116 }
2117
2118 void __init xfrm_init(void)
2119 {
2120         xfrm_state_init();
2121         xfrm_policy_init();
2122         xfrm_input_init();
2123 }
2124
2125 #ifdef CONFIG_AUDITSYSCALL
2126 static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2127                                                 struct audit_buffer *audit_buf)
2128 {
2129         if (xp->security)
2130                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2131                                  xp->security->ctx_alg, xp->security->ctx_doi,
2132                                  xp->security->ctx_str);
2133
2134         switch(xp->selector.family) {
2135         case AF_INET:
2136                 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
2137                                  NIPQUAD(xp->selector.saddr.a4),
2138                                  NIPQUAD(xp->selector.daddr.a4));
2139                 break;
2140         case AF_INET6:
2141                 {
2142                         struct in6_addr saddr6, daddr6;
2143
2144                         memcpy(&saddr6, xp->selector.saddr.a6,
2145                                 sizeof(struct in6_addr));
2146                         memcpy(&daddr6, xp->selector.daddr.a6,
2147                                 sizeof(struct in6_addr));
2148                         audit_log_format(audit_buf,
2149                                 " src=" NIP6_FMT " dst=" NIP6_FMT,
2150                                 NIP6(saddr6), NIP6(daddr6));
2151                 }
2152                 break;
2153         }
2154 }
2155
2156 void
2157 xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
2158 {
2159         struct audit_buffer *audit_buf;
2160         extern int audit_enabled;
2161
2162         if (audit_enabled == 0)
2163                 return;
2164         audit_buf = xfrm_audit_start(auid, sid);
2165         if (audit_buf == NULL)
2166                 return;
2167         audit_log_format(audit_buf, " op=SPD-add res=%u", result);
2168         xfrm_audit_common_policyinfo(xp, audit_buf);
2169         audit_log_end(audit_buf);
2170 }
2171 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2172
2173 void
2174 xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
2175 {
2176         struct audit_buffer *audit_buf;
2177         extern int audit_enabled;
2178
2179         if (audit_enabled == 0)
2180                 return;
2181         audit_buf = xfrm_audit_start(auid, sid);
2182         if (audit_buf == NULL)
2183                 return;
2184         audit_log_format(audit_buf, " op=SPD-delete res=%u", result);
2185         xfrm_audit_common_policyinfo(xp, audit_buf);
2186         audit_log_end(audit_buf);
2187 }
2188 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2189 #endif
2190
2191 #ifdef CONFIG_XFRM_MIGRATE
2192 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2193                                        struct xfrm_selector *sel_tgt)
2194 {
2195         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2196                 if (sel_tgt->family == sel_cmp->family &&
2197                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2198                                   sel_cmp->family) == 0 &&
2199                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2200                                   sel_cmp->family) == 0 &&
2201                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2202                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2203                         return 1;
2204                 }
2205         } else {
2206                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2207                         return 1;
2208                 }
2209         }
2210         return 0;
2211 }
2212
2213 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2214                                                      u8 dir, u8 type)
2215 {
2216         struct xfrm_policy *pol, *ret = NULL;
2217         struct hlist_node *entry;
2218         struct hlist_head *chain;
2219         u32 priority = ~0U;
2220
2221         read_lock_bh(&xfrm_policy_lock);
2222         chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2223         hlist_for_each_entry(pol, entry, chain, bydst) {
2224                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2225                     pol->type == type) {
2226                         ret = pol;
2227                         priority = ret->priority;
2228                         break;
2229                 }
2230         }
2231         chain = &xfrm_policy_inexact[dir];
2232         hlist_for_each_entry(pol, entry, chain, bydst) {
2233                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2234                     pol->type == type &&
2235                     pol->priority < priority) {
2236                         ret = pol;
2237                         break;
2238                 }
2239         }
2240
2241         if (ret)
2242                 xfrm_pol_hold(ret);
2243
2244         read_unlock_bh(&xfrm_policy_lock);
2245
2246         return ret;
2247 }
2248
2249 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2250 {
2251         int match = 0;
2252
2253         if (t->mode == m->mode && t->id.proto == m->proto &&
2254             (m->reqid == 0 || t->reqid == m->reqid)) {
2255                 switch (t->mode) {
2256                 case XFRM_MODE_TUNNEL:
2257                 case XFRM_MODE_BEET:
2258                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2259                                           m->old_family) == 0 &&
2260                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2261                                           m->old_family) == 0) {
2262                                 match = 1;
2263                         }
2264                         break;
2265                 case XFRM_MODE_TRANSPORT:
2266                         /* in case of transport mode, template does not store
2267                            any IP addresses, hence we just compare mode and
2268                            protocol */
2269                         match = 1;
2270                         break;
2271                 default:
2272                         break;
2273                 }
2274         }
2275         return match;
2276 }
2277
2278 /* update endpoint address(es) of template(s) */
2279 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2280                                struct xfrm_migrate *m, int num_migrate)
2281 {
2282         struct xfrm_migrate *mp;
2283         struct dst_entry *dst;
2284         int i, j, n = 0;
2285
2286         write_lock_bh(&pol->lock);
2287         if (unlikely(pol->dead)) {
2288                 /* target policy has been deleted */
2289                 write_unlock_bh(&pol->lock);
2290                 return -ENOENT;
2291         }
2292
2293         for (i = 0; i < pol->xfrm_nr; i++) {
2294                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2295                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2296                                 continue;
2297                         n++;
2298                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2299                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2300                                 continue;
2301                         /* update endpoints */
2302                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2303                                sizeof(pol->xfrm_vec[i].id.daddr));
2304                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2305                                sizeof(pol->xfrm_vec[i].saddr));
2306                         pol->xfrm_vec[i].encap_family = mp->new_family;
2307                         /* flush bundles */
2308                         while ((dst = pol->bundles) != NULL) {
2309                                 pol->bundles = dst->next;
2310                                 dst_free(dst);
2311                         }
2312                 }
2313         }
2314
2315         write_unlock_bh(&pol->lock);
2316
2317         if (!n)
2318                 return -ENODATA;
2319
2320         return 0;
2321 }
2322
2323 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2324 {
2325         int i, j;
2326
2327         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2328                 return -EINVAL;
2329
2330         for (i = 0; i < num_migrate; i++) {
2331                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2332                                    m[i].old_family) == 0) &&
2333                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2334                                    m[i].old_family) == 0))
2335                         return -EINVAL;
2336                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2337                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2338                         return -EINVAL;
2339
2340                 /* check if there is any duplicated entry */
2341                 for (j = i + 1; j < num_migrate; j++) {
2342                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2343                                     sizeof(m[i].old_daddr)) &&
2344                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2345                                     sizeof(m[i].old_saddr)) &&
2346                             m[i].proto == m[j].proto &&
2347                             m[i].mode == m[j].mode &&
2348                             m[i].reqid == m[j].reqid &&
2349                             m[i].old_family == m[j].old_family)
2350                                 return -EINVAL;
2351                 }
2352         }
2353
2354         return 0;
2355 }
2356
2357 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2358                  struct xfrm_migrate *m, int num_migrate)
2359 {
2360         int i, err, nx_cur = 0, nx_new = 0;
2361         struct xfrm_policy *pol = NULL;
2362         struct xfrm_state *x, *xc;
2363         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2364         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2365         struct xfrm_migrate *mp;
2366
2367         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2368                 goto out;
2369
2370         /* Stage 1 - find policy */
2371         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2372                 err = -ENOENT;
2373                 goto out;
2374         }
2375
2376         /* Stage 2 - find and update state(s) */
2377         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2378                 if ((x = xfrm_migrate_state_find(mp))) {
2379                         x_cur[nx_cur] = x;
2380                         nx_cur++;
2381                         if ((xc = xfrm_state_migrate(x, mp))) {
2382                                 x_new[nx_new] = xc;
2383                                 nx_new++;
2384                         } else {
2385                                 err = -ENODATA;
2386                                 goto restore_state;
2387                         }
2388                 }
2389         }
2390
2391         /* Stage 3 - update policy */
2392         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2393                 goto restore_state;
2394
2395         /* Stage 4 - delete old state(s) */
2396         if (nx_cur) {
2397                 xfrm_states_put(x_cur, nx_cur);
2398                 xfrm_states_delete(x_cur, nx_cur);
2399         }
2400
2401         /* Stage 5 - announce */
2402         km_migrate(sel, dir, type, m, num_migrate);
2403
2404         xfrm_pol_put(pol);
2405
2406         return 0;
2407 out:
2408         return err;
2409
2410 restore_state:
2411         if (pol)
2412                 xfrm_pol_put(pol);
2413         if (nx_cur)
2414                 xfrm_states_put(x_cur, nx_cur);
2415         if (nx_new)
2416                 xfrm_states_delete(x_new, nx_new);
2417
2418         return err;
2419 }
2420 EXPORT_SYMBOL(xfrm_migrate);
2421 #endif