netfilter: ipset: references are protected by rwlock instead of mutex
[linux-2.6.git] / include / linux / netfilter / ipset / ip_set_ahash.h
1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
3
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
7
8 /* Hashing which uses arrays to resolve clashing. The hash table is resized
9  * (doubled) when searching becomes too long.
10  * Internally jhash is used with the assumption that the size of the
11  * stored data is a multiple of sizeof(u32). If storage supports timeout,
12  * the timeout field must be the last one in the data structure - that field
13  * is ignored when computing the hash key.
14  *
15  * Readers and resizing
16  *
17  * Resizing can be triggered by userspace command only, and those
18  * are serialized by the nfnl mutex. During resizing the set is
19  * read-locked, so the only possible concurrent operations are
20  * the kernel side readers. Those must be protected by proper RCU locking.
21  */
22
23 /* Number of elements to store in an initial array block */
24 #define AHASH_INIT_SIZE                 4
25 /* Max number of elements to store in an array block */
26 #define AHASH_MAX_SIZE                  (3*4)
27
28 /* A hash bucket */
29 struct hbucket {
30         void *value;            /* the array of the values */
31         u8 size;                /* size of the array */
32         u8 pos;                 /* position of the first free entry */
33 };
34
35 /* The hash table: the table size stored here in order to make resizing easy */
36 struct htable {
37         u8 htable_bits;         /* size of hash table == 2^htable_bits */
38         struct hbucket bucket[0]; /* hashtable buckets */
39 };
40
41 #define hbucket(h, i)           &((h)->bucket[i])
42
43 /* Book-keeping of the prefixes added to the set */
44 struct ip_set_hash_nets {
45         u8 cidr;                /* the different cidr values in the set */
46         u32 nets;               /* number of elements per cidr */
47 };
48
49 /* The generic ip_set hash structure */
50 struct ip_set_hash {
51         struct htable *table;   /* the hash table */
52         u32 maxelem;            /* max elements in the hash */
53         u32 elements;           /* current element (vs timeout) */
54         u32 initval;            /* random jhash init value */
55         u32 timeout;            /* timeout value, if enabled */
56         struct timer_list gc;   /* garbage collection when timeout enabled */
57 #ifdef IP_SET_HASH_WITH_NETMASK
58         u8 netmask;             /* netmask value for subnets to store */
59 #endif
60 #ifdef IP_SET_HASH_WITH_NETS
61         struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
62 #endif
63 };
64
65 /* Compute htable_bits from the user input parameter hashsize */
66 static u8
67 htable_bits(u32 hashsize)
68 {
69         /* Assume that hashsize == 2^htable_bits */
70         u8 bits = fls(hashsize - 1);
71         if (jhash_size(bits) != hashsize)
72                 /* Round up to the first 2^n value */
73                 bits = fls(hashsize);
74
75         return bits;
76 }
77
78 #ifdef IP_SET_HASH_WITH_NETS
79
80 #define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
81
82 /* Network cidr size book keeping when the hash stores different
83  * sized networks */
84 static void
85 add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
86 {
87         u8 i;
88
89         ++h->nets[cidr-1].nets;
90
91         pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
92
93         if (h->nets[cidr-1].nets > 1)
94                 return;
95
96         /* New cidr size */
97         for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
98                 /* Add in increasing prefix order, so larger cidr first */
99                 if (h->nets[i].cidr < cidr)
100                         swap(h->nets[i].cidr, cidr);
101         }
102         if (i < host_mask)
103                 h->nets[i].cidr = cidr;
104 }
105
106 static void
107 del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
108 {
109         u8 i;
110
111         --h->nets[cidr-1].nets;
112
113         pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
114
115         if (h->nets[cidr-1].nets != 0)
116                 return;
117
118         /* All entries with this cidr size deleted, so cleanup h->cidr[] */
119         for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
120                 if (h->nets[i].cidr == cidr)
121                         h->nets[i].cidr = cidr = h->nets[i+1].cidr;
122         }
123         h->nets[i - 1].cidr = 0;
124 }
125 #endif
126
127 /* Destroy the hashtable part of the set */
128 static void
129 ahash_destroy(struct htable *t)
130 {
131         struct hbucket *n;
132         u32 i;
133
134         for (i = 0; i < jhash_size(t->htable_bits); i++) {
135                 n = hbucket(t, i);
136                 if (n->size)
137                         /* FIXME: use slab cache */
138                         kfree(n->value);
139         }
140
141         ip_set_free(t);
142 }
143
144 /* Calculate the actual memory size of the set data */
145 static size_t
146 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
147 {
148         u32 i;
149         struct htable *t = h->table;
150         size_t memsize = sizeof(*h)
151                          + sizeof(*t)
152 #ifdef IP_SET_HASH_WITH_NETS
153                          + sizeof(struct ip_set_hash_nets) * host_mask
154 #endif
155                          + jhash_size(t->htable_bits) * sizeof(struct hbucket);
156
157         for (i = 0; i < jhash_size(t->htable_bits); i++)
158                         memsize += t->bucket[i].size * dsize;
159
160         return memsize;
161 }
162
163 /* Flush a hash type of set: destroy all elements */
164 static void
165 ip_set_hash_flush(struct ip_set *set)
166 {
167         struct ip_set_hash *h = set->data;
168         struct htable *t = h->table;
169         struct hbucket *n;
170         u32 i;
171
172         for (i = 0; i < jhash_size(t->htable_bits); i++) {
173                 n = hbucket(t, i);
174                 if (n->size) {
175                         n->size = n->pos = 0;
176                         /* FIXME: use slab cache */
177                         kfree(n->value);
178                 }
179         }
180 #ifdef IP_SET_HASH_WITH_NETS
181         memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
182                            * SET_HOST_MASK(set->family));
183 #endif
184         h->elements = 0;
185 }
186
187 /* Destroy a hash type of set */
188 static void
189 ip_set_hash_destroy(struct ip_set *set)
190 {
191         struct ip_set_hash *h = set->data;
192
193         if (with_timeout(h->timeout))
194                 del_timer_sync(&h->gc);
195
196         ahash_destroy(h->table);
197         kfree(h);
198
199         set->data = NULL;
200 }
201
202 #define HKEY(data, initval, htable_bits)                                 \
203 (jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
204         & jhash_mask(htable_bits))
205
206 #endif /* _IP_SET_AHASH_H */
207
208 #define CONCAT(a, b, c)         a##b##c
209 #define TOKEN(a, b, c)          CONCAT(a, b, c)
210
211 /* Type/family dependent function prototypes */
212
213 #define type_pf_data_equal      TOKEN(TYPE, PF, _data_equal)
214 #define type_pf_data_isnull     TOKEN(TYPE, PF, _data_isnull)
215 #define type_pf_data_copy       TOKEN(TYPE, PF, _data_copy)
216 #define type_pf_data_zero_out   TOKEN(TYPE, PF, _data_zero_out)
217 #define type_pf_data_netmask    TOKEN(TYPE, PF, _data_netmask)
218 #define type_pf_data_list       TOKEN(TYPE, PF, _data_list)
219 #define type_pf_data_tlist      TOKEN(TYPE, PF, _data_tlist)
220
221 #define type_pf_elem            TOKEN(TYPE, PF, _elem)
222 #define type_pf_telem           TOKEN(TYPE, PF, _telem)
223 #define type_pf_data_timeout    TOKEN(TYPE, PF, _data_timeout)
224 #define type_pf_data_expired    TOKEN(TYPE, PF, _data_expired)
225 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
226
227 #define type_pf_elem_add        TOKEN(TYPE, PF, _elem_add)
228 #define type_pf_add             TOKEN(TYPE, PF, _add)
229 #define type_pf_del             TOKEN(TYPE, PF, _del)
230 #define type_pf_test_cidrs      TOKEN(TYPE, PF, _test_cidrs)
231 #define type_pf_test            TOKEN(TYPE, PF, _test)
232
233 #define type_pf_elem_tadd       TOKEN(TYPE, PF, _elem_tadd)
234 #define type_pf_del_telem       TOKEN(TYPE, PF, _ahash_del_telem)
235 #define type_pf_expire          TOKEN(TYPE, PF, _expire)
236 #define type_pf_tadd            TOKEN(TYPE, PF, _tadd)
237 #define type_pf_tdel            TOKEN(TYPE, PF, _tdel)
238 #define type_pf_ttest_cidrs     TOKEN(TYPE, PF, _ahash_ttest_cidrs)
239 #define type_pf_ttest           TOKEN(TYPE, PF, _ahash_ttest)
240
241 #define type_pf_resize          TOKEN(TYPE, PF, _resize)
242 #define type_pf_tresize         TOKEN(TYPE, PF, _tresize)
243 #define type_pf_flush           ip_set_hash_flush
244 #define type_pf_destroy         ip_set_hash_destroy
245 #define type_pf_head            TOKEN(TYPE, PF, _head)
246 #define type_pf_list            TOKEN(TYPE, PF, _list)
247 #define type_pf_tlist           TOKEN(TYPE, PF, _tlist)
248 #define type_pf_same_set        TOKEN(TYPE, PF, _same_set)
249 #define type_pf_kadt            TOKEN(TYPE, PF, _kadt)
250 #define type_pf_uadt            TOKEN(TYPE, PF, _uadt)
251 #define type_pf_gc              TOKEN(TYPE, PF, _gc)
252 #define type_pf_gc_init         TOKEN(TYPE, PF, _gc_init)
253 #define type_pf_variant         TOKEN(TYPE, PF, _variant)
254 #define type_pf_tvariant        TOKEN(TYPE, PF, _tvariant)
255
256 /* Flavour without timeout */
257
258 /* Get the ith element from the array block n */
259 #define ahash_data(n, i)        \
260         ((struct type_pf_elem *)((n)->value) + (i))
261
262 /* Add an element to the hash table when resizing the set:
263  * we spare the maintenance of the internal counters. */
264 static int
265 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
266 {
267         if (n->pos >= n->size) {
268                 void *tmp;
269
270                 if (n->size >= AHASH_MAX_SIZE)
271                         /* Trigger rehashing */
272                         return -EAGAIN;
273
274                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
275                               * sizeof(struct type_pf_elem),
276                               GFP_ATOMIC);
277                 if (!tmp)
278                         return -ENOMEM;
279                 if (n->size) {
280                         memcpy(tmp, n->value,
281                                sizeof(struct type_pf_elem) * n->size);
282                         kfree(n->value);
283                 }
284                 n->value = tmp;
285                 n->size += AHASH_INIT_SIZE;
286         }
287         type_pf_data_copy(ahash_data(n, n->pos++), value);
288         return 0;
289 }
290
291 /* Resize a hash: create a new hash table with doubling the hashsize
292  * and inserting the elements to it. Repeat until we succeed or
293  * fail due to memory pressures. */
294 static int
295 type_pf_resize(struct ip_set *set, bool retried)
296 {
297         struct ip_set_hash *h = set->data;
298         struct htable *t, *orig = h->table;
299         u8 htable_bits = orig->htable_bits;
300         const struct type_pf_elem *data;
301         struct hbucket *n, *m;
302         u32 i, j;
303         int ret;
304
305 retry:
306         ret = 0;
307         htable_bits++;
308         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
309                  set->name, orig->htable_bits, htable_bits, orig);
310         if (!htable_bits)
311                 /* In case we have plenty of memory :-) */
312                 return -IPSET_ERR_HASH_FULL;
313         t = ip_set_alloc(sizeof(*t)
314                          + jhash_size(htable_bits) * sizeof(struct hbucket));
315         if (!t)
316                 return -ENOMEM;
317         t->htable_bits = htable_bits;
318
319         read_lock_bh(&set->lock);
320         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
321                 n = hbucket(orig, i);
322                 for (j = 0; j < n->pos; j++) {
323                         data = ahash_data(n, j);
324                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
325                         ret = type_pf_elem_add(m, data);
326                         if (ret < 0) {
327                                 read_unlock_bh(&set->lock);
328                                 ahash_destroy(t);
329                                 if (ret == -EAGAIN)
330                                         goto retry;
331                                 return ret;
332                         }
333                 }
334         }
335
336         rcu_assign_pointer(h->table, t);
337         read_unlock_bh(&set->lock);
338
339         /* Give time to other readers of the set */
340         synchronize_rcu_bh();
341
342         pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
343                  orig->htable_bits, orig, t->htable_bits, t);
344         ahash_destroy(orig);
345
346         return 0;
347 }
348
349 /* Add an element to a hash and update the internal counters when succeeded,
350  * otherwise report the proper error code. */
351 static int
352 type_pf_add(struct ip_set *set, void *value, u32 timeout)
353 {
354         struct ip_set_hash *h = set->data;
355         struct htable *t;
356         const struct type_pf_elem *d = value;
357         struct hbucket *n;
358         int i, ret = 0;
359         u32 key;
360
361         if (h->elements >= h->maxelem)
362                 return -IPSET_ERR_HASH_FULL;
363
364         rcu_read_lock_bh();
365         t = rcu_dereference_bh(h->table);
366         key = HKEY(value, h->initval, t->htable_bits);
367         n = hbucket(t, key);
368         for (i = 0; i < n->pos; i++)
369                 if (type_pf_data_equal(ahash_data(n, i), d)) {
370                         ret = -IPSET_ERR_EXIST;
371                         goto out;
372                 }
373
374         ret = type_pf_elem_add(n, value);
375         if (ret != 0)
376                 goto out;
377
378 #ifdef IP_SET_HASH_WITH_NETS
379         add_cidr(h, d->cidr, HOST_MASK);
380 #endif
381         h->elements++;
382 out:
383         rcu_read_unlock_bh();
384         return ret;
385 }
386
387 /* Delete an element from the hash: swap it with the last element
388  * and free up space if possible.
389  */
390 static int
391 type_pf_del(struct ip_set *set, void *value, u32 timeout)
392 {
393         struct ip_set_hash *h = set->data;
394         struct htable *t = h->table;
395         const struct type_pf_elem *d = value;
396         struct hbucket *n;
397         int i;
398         struct type_pf_elem *data;
399         u32 key;
400
401         key = HKEY(value, h->initval, t->htable_bits);
402         n = hbucket(t, key);
403         for (i = 0; i < n->pos; i++) {
404                 data = ahash_data(n, i);
405                 if (!type_pf_data_equal(data, d))
406                         continue;
407                 if (i != n->pos - 1)
408                         /* Not last one */
409                         type_pf_data_copy(data, ahash_data(n, n->pos - 1));
410
411                 n->pos--;
412                 h->elements--;
413 #ifdef IP_SET_HASH_WITH_NETS
414                 del_cidr(h, d->cidr, HOST_MASK);
415 #endif
416                 if (n->pos + AHASH_INIT_SIZE < n->size) {
417                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
418                                             * sizeof(struct type_pf_elem),
419                                             GFP_ATOMIC);
420                         if (!tmp)
421                                 return 0;
422                         n->size -= AHASH_INIT_SIZE;
423                         memcpy(tmp, n->value,
424                                n->size * sizeof(struct type_pf_elem));
425                         kfree(n->value);
426                         n->value = tmp;
427                 }
428                 return 0;
429         }
430
431         return -IPSET_ERR_EXIST;
432 }
433
434 #ifdef IP_SET_HASH_WITH_NETS
435
436 /* Special test function which takes into account the different network
437  * sizes added to the set */
438 static int
439 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
440 {
441         struct ip_set_hash *h = set->data;
442         struct htable *t = h->table;
443         struct hbucket *n;
444         const struct type_pf_elem *data;
445         int i, j = 0;
446         u32 key;
447         u8 host_mask = SET_HOST_MASK(set->family);
448
449         pr_debug("test by nets\n");
450         for (; j < host_mask && h->nets[j].cidr; j++) {
451                 type_pf_data_netmask(d, h->nets[j].cidr);
452                 key = HKEY(d, h->initval, t->htable_bits);
453                 n = hbucket(t, key);
454                 for (i = 0; i < n->pos; i++) {
455                         data = ahash_data(n, i);
456                         if (type_pf_data_equal(data, d))
457                                 return 1;
458                 }
459         }
460         return 0;
461 }
462 #endif
463
464 /* Test whether the element is added to the set */
465 static int
466 type_pf_test(struct ip_set *set, void *value, u32 timeout)
467 {
468         struct ip_set_hash *h = set->data;
469         struct htable *t = h->table;
470         struct type_pf_elem *d = value;
471         struct hbucket *n;
472         const struct type_pf_elem *data;
473         int i;
474         u32 key;
475
476 #ifdef IP_SET_HASH_WITH_NETS
477         /* If we test an IP address and not a network address,
478          * try all possible network sizes */
479         if (d->cidr == SET_HOST_MASK(set->family))
480                 return type_pf_test_cidrs(set, d, timeout);
481 #endif
482
483         key = HKEY(d, h->initval, t->htable_bits);
484         n = hbucket(t, key);
485         for (i = 0; i < n->pos; i++) {
486                 data = ahash_data(n, i);
487                 if (type_pf_data_equal(data, d))
488                         return 1;
489         }
490         return 0;
491 }
492
493 /* Reply a HEADER request: fill out the header part of the set */
494 static int
495 type_pf_head(struct ip_set *set, struct sk_buff *skb)
496 {
497         const struct ip_set_hash *h = set->data;
498         struct nlattr *nested;
499         size_t memsize;
500
501         read_lock_bh(&set->lock);
502         memsize = ahash_memsize(h, with_timeout(h->timeout)
503                                         ? sizeof(struct type_pf_telem)
504                                         : sizeof(struct type_pf_elem),
505                                 set->family == AF_INET ? 32 : 128);
506         read_unlock_bh(&set->lock);
507
508         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
509         if (!nested)
510                 goto nla_put_failure;
511         NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
512                       htonl(jhash_size(h->table->htable_bits)));
513         NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
514 #ifdef IP_SET_HASH_WITH_NETMASK
515         if (h->netmask != HOST_MASK)
516                 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
517 #endif
518         NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
519         NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
520         if (with_timeout(h->timeout))
521                 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
522         ipset_nest_end(skb, nested);
523
524         return 0;
525 nla_put_failure:
526         return -EMSGSIZE;
527 }
528
529 /* Reply a LIST/SAVE request: dump the elements of the specified set */
530 static int
531 type_pf_list(const struct ip_set *set,
532              struct sk_buff *skb, struct netlink_callback *cb)
533 {
534         const struct ip_set_hash *h = set->data;
535         const struct htable *t = h->table;
536         struct nlattr *atd, *nested;
537         const struct hbucket *n;
538         const struct type_pf_elem *data;
539         u32 first = cb->args[2];
540         /* We assume that one hash bucket fills into one page */
541         void *incomplete;
542         int i;
543
544         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
545         if (!atd)
546                 return -EMSGSIZE;
547         pr_debug("list hash set %s\n", set->name);
548         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
549                 incomplete = skb_tail_pointer(skb);
550                 n = hbucket(t, cb->args[2]);
551                 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
552                 for (i = 0; i < n->pos; i++) {
553                         data = ahash_data(n, i);
554                         pr_debug("list hash %lu hbucket %p i %u, data %p\n",
555                                  cb->args[2], n, i, data);
556                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
557                         if (!nested) {
558                                 if (cb->args[2] == first) {
559                                         nla_nest_cancel(skb, atd);
560                                         return -EMSGSIZE;
561                                 } else
562                                         goto nla_put_failure;
563                         }
564                         if (type_pf_data_list(skb, data))
565                                 goto nla_put_failure;
566                         ipset_nest_end(skb, nested);
567                 }
568         }
569         ipset_nest_end(skb, atd);
570         /* Set listing finished */
571         cb->args[2] = 0;
572
573         return 0;
574
575 nla_put_failure:
576         nlmsg_trim(skb, incomplete);
577         ipset_nest_end(skb, atd);
578         if (unlikely(first == cb->args[2])) {
579                 pr_warning("Can't list set %s: one bucket does not fit into "
580                            "a message. Please report it!\n", set->name);
581                 cb->args[2] = 0;
582                 return -EMSGSIZE;
583         }
584         return 0;
585 }
586
587 static int
588 type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
589              enum ipset_adt adt, u8 pf, u8 dim, u8 flags);
590 static int
591 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
592              enum ipset_adt adt, u32 *lineno, u32 flags);
593
594 static const struct ip_set_type_variant type_pf_variant = {
595         .kadt   = type_pf_kadt,
596         .uadt   = type_pf_uadt,
597         .adt    = {
598                 [IPSET_ADD] = type_pf_add,
599                 [IPSET_DEL] = type_pf_del,
600                 [IPSET_TEST] = type_pf_test,
601         },
602         .destroy = type_pf_destroy,
603         .flush  = type_pf_flush,
604         .head   = type_pf_head,
605         .list   = type_pf_list,
606         .resize = type_pf_resize,
607         .same_set = type_pf_same_set,
608 };
609
610 /* Flavour with timeout support */
611
612 #define ahash_tdata(n, i) \
613         (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
614
615 static inline u32
616 type_pf_data_timeout(const struct type_pf_elem *data)
617 {
618         const struct type_pf_telem *tdata =
619                 (const struct type_pf_telem *) data;
620
621         return tdata->timeout;
622 }
623
624 static inline bool
625 type_pf_data_expired(const struct type_pf_elem *data)
626 {
627         const struct type_pf_telem *tdata =
628                 (const struct type_pf_telem *) data;
629
630         return ip_set_timeout_expired(tdata->timeout);
631 }
632
633 static inline void
634 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
635 {
636         struct type_pf_telem *tdata = (struct type_pf_telem *) data;
637
638         tdata->timeout = ip_set_timeout_set(timeout);
639 }
640
641 static int
642 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
643                   u32 timeout)
644 {
645         struct type_pf_elem *data;
646
647         if (n->pos >= n->size) {
648                 void *tmp;
649
650                 if (n->size >= AHASH_MAX_SIZE)
651                         /* Trigger rehashing */
652                         return -EAGAIN;
653
654                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
655                               * sizeof(struct type_pf_telem),
656                               GFP_ATOMIC);
657                 if (!tmp)
658                         return -ENOMEM;
659                 if (n->size) {
660                         memcpy(tmp, n->value,
661                                sizeof(struct type_pf_telem) * n->size);
662                         kfree(n->value);
663                 }
664                 n->value = tmp;
665                 n->size += AHASH_INIT_SIZE;
666         }
667         data = ahash_tdata(n, n->pos++);
668         type_pf_data_copy(data, value);
669         type_pf_data_timeout_set(data, timeout);
670         return 0;
671 }
672
673 /* Delete expired elements from the hashtable */
674 static void
675 type_pf_expire(struct ip_set_hash *h)
676 {
677         struct htable *t = h->table;
678         struct hbucket *n;
679         struct type_pf_elem *data;
680         u32 i;
681         int j;
682
683         for (i = 0; i < jhash_size(t->htable_bits); i++) {
684                 n = hbucket(t, i);
685                 for (j = 0; j < n->pos; j++) {
686                         data = ahash_tdata(n, j);
687                         if (type_pf_data_expired(data)) {
688                                 pr_debug("expired %u/%u\n", i, j);
689 #ifdef IP_SET_HASH_WITH_NETS
690                                 del_cidr(h, data->cidr, HOST_MASK);
691 #endif
692                                 if (j != n->pos - 1)
693                                         /* Not last one */
694                                         type_pf_data_copy(data,
695                                                 ahash_tdata(n, n->pos - 1));
696                                 n->pos--;
697                                 h->elements--;
698                         }
699                 }
700                 if (n->pos + AHASH_INIT_SIZE < n->size) {
701                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
702                                             * sizeof(struct type_pf_telem),
703                                             GFP_ATOMIC);
704                         if (!tmp)
705                                 /* Still try to delete expired elements */
706                                 continue;
707                         n->size -= AHASH_INIT_SIZE;
708                         memcpy(tmp, n->value,
709                                n->size * sizeof(struct type_pf_telem));
710                         kfree(n->value);
711                         n->value = tmp;
712                 }
713         }
714 }
715
716 static int
717 type_pf_tresize(struct ip_set *set, bool retried)
718 {
719         struct ip_set_hash *h = set->data;
720         struct htable *t, *orig = h->table;
721         u8 htable_bits = orig->htable_bits;
722         const struct type_pf_elem *data;
723         struct hbucket *n, *m;
724         u32 i, j;
725         int ret;
726
727         /* Try to cleanup once */
728         if (!retried) {
729                 i = h->elements;
730                 write_lock_bh(&set->lock);
731                 type_pf_expire(set->data);
732                 write_unlock_bh(&set->lock);
733                 if (h->elements <  i)
734                         return 0;
735         }
736
737 retry:
738         ret = 0;
739         htable_bits++;
740         if (!htable_bits)
741                 /* In case we have plenty of memory :-) */
742                 return -IPSET_ERR_HASH_FULL;
743         t = ip_set_alloc(sizeof(*t)
744                          + jhash_size(htable_bits) * sizeof(struct hbucket));
745         if (!t)
746                 return -ENOMEM;
747         t->htable_bits = htable_bits;
748
749         read_lock_bh(&set->lock);
750         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
751                 n = hbucket(orig, i);
752                 for (j = 0; j < n->pos; j++) {
753                         data = ahash_tdata(n, j);
754                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
755                         ret = type_pf_elem_tadd(m, data,
756                                                 type_pf_data_timeout(data));
757                         if (ret < 0) {
758                                 read_unlock_bh(&set->lock);
759                                 ahash_destroy(t);
760                                 if (ret == -EAGAIN)
761                                         goto retry;
762                                 return ret;
763                         }
764                 }
765         }
766
767         rcu_assign_pointer(h->table, t);
768         read_unlock_bh(&set->lock);
769
770         /* Give time to other readers of the set */
771         synchronize_rcu_bh();
772
773         ahash_destroy(orig);
774
775         return 0;
776 }
777
778 static int
779 type_pf_tadd(struct ip_set *set, void *value, u32 timeout)
780 {
781         struct ip_set_hash *h = set->data;
782         struct htable *t = h->table;
783         const struct type_pf_elem *d = value;
784         struct hbucket *n;
785         struct type_pf_elem *data;
786         int ret = 0, i, j = AHASH_MAX_SIZE + 1;
787         u32 key;
788
789         if (h->elements >= h->maxelem)
790                 /* FIXME: when set is full, we slow down here */
791                 type_pf_expire(h);
792         if (h->elements >= h->maxelem)
793                 return -IPSET_ERR_HASH_FULL;
794
795         rcu_read_lock_bh();
796         t = rcu_dereference_bh(h->table);
797         key = HKEY(d, h->initval, t->htable_bits);
798         n = hbucket(t, key);
799         for (i = 0; i < n->pos; i++) {
800                 data = ahash_tdata(n, i);
801                 if (type_pf_data_equal(data, d)) {
802                         if (type_pf_data_expired(data))
803                                 j = i;
804                         else {
805                                 ret = -IPSET_ERR_EXIST;
806                                 goto out;
807                         }
808                 } else if (j == AHASH_MAX_SIZE + 1 &&
809                            type_pf_data_expired(data))
810                         j = i;
811         }
812         if (j != AHASH_MAX_SIZE + 1) {
813                 data = ahash_tdata(n, j);
814 #ifdef IP_SET_HASH_WITH_NETS
815                 del_cidr(h, data->cidr, HOST_MASK);
816                 add_cidr(h, d->cidr, HOST_MASK);
817 #endif
818                 type_pf_data_copy(data, d);
819                 type_pf_data_timeout_set(data, timeout);
820                 goto out;
821         }
822         ret = type_pf_elem_tadd(n, d, timeout);
823         if (ret != 0)
824                 goto out;
825
826 #ifdef IP_SET_HASH_WITH_NETS
827         add_cidr(h, d->cidr, HOST_MASK);
828 #endif
829         h->elements++;
830 out:
831         rcu_read_unlock_bh();
832         return ret;
833 }
834
835 static int
836 type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
837 {
838         struct ip_set_hash *h = set->data;
839         struct htable *t = h->table;
840         const struct type_pf_elem *d = value;
841         struct hbucket *n;
842         int i, ret = 0;
843         struct type_pf_elem *data;
844         u32 key;
845
846         key = HKEY(value, h->initval, t->htable_bits);
847         n = hbucket(t, key);
848         for (i = 0; i < n->pos; i++) {
849                 data = ahash_tdata(n, i);
850                 if (!type_pf_data_equal(data, d))
851                         continue;
852                 if (type_pf_data_expired(data))
853                         ret = -IPSET_ERR_EXIST;
854                 if (i != n->pos - 1)
855                         /* Not last one */
856                         type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
857
858                 n->pos--;
859                 h->elements--;
860 #ifdef IP_SET_HASH_WITH_NETS
861                 del_cidr(h, d->cidr, HOST_MASK);
862 #endif
863                 if (n->pos + AHASH_INIT_SIZE < n->size) {
864                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
865                                             * sizeof(struct type_pf_telem),
866                                             GFP_ATOMIC);
867                         if (!tmp)
868                                 return 0;
869                         n->size -= AHASH_INIT_SIZE;
870                         memcpy(tmp, n->value,
871                                n->size * sizeof(struct type_pf_telem));
872                         kfree(n->value);
873                         n->value = tmp;
874                 }
875                 return 0;
876         }
877
878         return -IPSET_ERR_EXIST;
879 }
880
881 #ifdef IP_SET_HASH_WITH_NETS
882 static int
883 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
884 {
885         struct ip_set_hash *h = set->data;
886         struct htable *t = h->table;
887         struct type_pf_elem *data;
888         struct hbucket *n;
889         int i, j = 0;
890         u32 key;
891         u8 host_mask = SET_HOST_MASK(set->family);
892
893         for (; j < host_mask && h->nets[j].cidr; j++) {
894                 type_pf_data_netmask(d, h->nets[j].cidr);
895                 key = HKEY(d, h->initval, t->htable_bits);
896                 n = hbucket(t, key);
897                 for (i = 0; i < n->pos; i++) {
898                         data = ahash_tdata(n, i);
899                         if (type_pf_data_equal(data, d))
900                                 return !type_pf_data_expired(data);
901                 }
902         }
903         return 0;
904 }
905 #endif
906
907 static int
908 type_pf_ttest(struct ip_set *set, void *value, u32 timeout)
909 {
910         struct ip_set_hash *h = set->data;
911         struct htable *t = h->table;
912         struct type_pf_elem *data, *d = value;
913         struct hbucket *n;
914         int i;
915         u32 key;
916
917 #ifdef IP_SET_HASH_WITH_NETS
918         if (d->cidr == SET_HOST_MASK(set->family))
919                 return type_pf_ttest_cidrs(set, d, timeout);
920 #endif
921         key = HKEY(d, h->initval, t->htable_bits);
922         n = hbucket(t, key);
923         for (i = 0; i < n->pos; i++) {
924                 data = ahash_tdata(n, i);
925                 if (type_pf_data_equal(data, d))
926                         return !type_pf_data_expired(data);
927         }
928         return 0;
929 }
930
931 static int
932 type_pf_tlist(const struct ip_set *set,
933               struct sk_buff *skb, struct netlink_callback *cb)
934 {
935         const struct ip_set_hash *h = set->data;
936         const struct htable *t = h->table;
937         struct nlattr *atd, *nested;
938         const struct hbucket *n;
939         const struct type_pf_elem *data;
940         u32 first = cb->args[2];
941         /* We assume that one hash bucket fills into one page */
942         void *incomplete;
943         int i;
944
945         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
946         if (!atd)
947                 return -EMSGSIZE;
948         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
949                 incomplete = skb_tail_pointer(skb);
950                 n = hbucket(t, cb->args[2]);
951                 for (i = 0; i < n->pos; i++) {
952                         data = ahash_tdata(n, i);
953                         pr_debug("list %p %u\n", n, i);
954                         if (type_pf_data_expired(data))
955                                 continue;
956                         pr_debug("do list %p %u\n", n, i);
957                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
958                         if (!nested) {
959                                 if (cb->args[2] == first) {
960                                         nla_nest_cancel(skb, atd);
961                                         return -EMSGSIZE;
962                                 } else
963                                         goto nla_put_failure;
964                         }
965                         if (type_pf_data_tlist(skb, data))
966                                 goto nla_put_failure;
967                         ipset_nest_end(skb, nested);
968                 }
969         }
970         ipset_nest_end(skb, atd);
971         /* Set listing finished */
972         cb->args[2] = 0;
973
974         return 0;
975
976 nla_put_failure:
977         nlmsg_trim(skb, incomplete);
978         ipset_nest_end(skb, atd);
979         if (unlikely(first == cb->args[2])) {
980                 pr_warning("Can't list set %s: one bucket does not fit into "
981                            "a message. Please report it!\n", set->name);
982                 cb->args[2] = 0;
983                 return -EMSGSIZE;
984         }
985         return 0;
986 }
987
988 static const struct ip_set_type_variant type_pf_tvariant = {
989         .kadt   = type_pf_kadt,
990         .uadt   = type_pf_uadt,
991         .adt    = {
992                 [IPSET_ADD] = type_pf_tadd,
993                 [IPSET_DEL] = type_pf_tdel,
994                 [IPSET_TEST] = type_pf_ttest,
995         },
996         .destroy = type_pf_destroy,
997         .flush  = type_pf_flush,
998         .head   = type_pf_head,
999         .list   = type_pf_tlist,
1000         .resize = type_pf_tresize,
1001         .same_set = type_pf_same_set,
1002 };
1003
1004 static void
1005 type_pf_gc(unsigned long ul_set)
1006 {
1007         struct ip_set *set = (struct ip_set *) ul_set;
1008         struct ip_set_hash *h = set->data;
1009
1010         pr_debug("called\n");
1011         write_lock_bh(&set->lock);
1012         type_pf_expire(h);
1013         write_unlock_bh(&set->lock);
1014
1015         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1016         add_timer(&h->gc);
1017 }
1018
1019 static void
1020 type_pf_gc_init(struct ip_set *set)
1021 {
1022         struct ip_set_hash *h = set->data;
1023
1024         init_timer(&h->gc);
1025         h->gc.data = (unsigned long) set;
1026         h->gc.function = type_pf_gc;
1027         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1028         add_timer(&h->gc);
1029         pr_debug("gc initialized, run in every %u\n",
1030                  IPSET_GC_PERIOD(h->timeout));
1031 }
1032
1033 #undef type_pf_data_equal
1034 #undef type_pf_data_isnull
1035 #undef type_pf_data_copy
1036 #undef type_pf_data_zero_out
1037 #undef type_pf_data_list
1038 #undef type_pf_data_tlist
1039
1040 #undef type_pf_elem
1041 #undef type_pf_telem
1042 #undef type_pf_data_timeout
1043 #undef type_pf_data_expired
1044 #undef type_pf_data_netmask
1045 #undef type_pf_data_timeout_set
1046
1047 #undef type_pf_elem_add
1048 #undef type_pf_add
1049 #undef type_pf_del
1050 #undef type_pf_test_cidrs
1051 #undef type_pf_test
1052
1053 #undef type_pf_elem_tadd
1054 #undef type_pf_expire
1055 #undef type_pf_tadd
1056 #undef type_pf_tdel
1057 #undef type_pf_ttest_cidrs
1058 #undef type_pf_ttest
1059
1060 #undef type_pf_resize
1061 #undef type_pf_tresize
1062 #undef type_pf_flush
1063 #undef type_pf_destroy
1064 #undef type_pf_head
1065 #undef type_pf_list
1066 #undef type_pf_tlist
1067 #undef type_pf_same_set
1068 #undef type_pf_kadt
1069 #undef type_pf_uadt
1070 #undef type_pf_gc
1071 #undef type_pf_gc_init
1072 #undef type_pf_variant
1073 #undef type_pf_tvariant