ec9d9bea1e370e937aaf3006c70c157bc45eb7b4
[linux-2.6.git] / include / linux / netfilter / ipset / ip_set_ahash.h
1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
3
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
7
8 /* Hashing which uses arrays to resolve clashing. The hash table is resized
9  * (doubled) when searching becomes too long.
10  * Internally jhash is used with the assumption that the size of the
11  * stored data is a multiple of sizeof(u32). If storage supports timeout,
12  * the timeout field must be the last one in the data structure - that field
13  * is ignored when computing the hash key.
14  *
15  * Readers and resizing
16  *
17  * Resizing can be triggered by userspace command only, and those
18  * are serialized by the nfnl mutex. During resizing the set is
19  * read-locked, so the only possible concurrent operations are
20  * the kernel side readers. Those must be protected by proper RCU locking.
21  */
22
23 /* Number of elements to store in an initial array block */
24 #define AHASH_INIT_SIZE                 4
25 /* Max number of elements to store in an array block */
26 #define AHASH_MAX_SIZE                  (3*4)
27
28 /* A hash bucket */
29 struct hbucket {
30         void *value;            /* the array of the values */
31         u8 size;                /* size of the array */
32         u8 pos;                 /* position of the first free entry */
33 };
34
35 /* The hash table: the table size stored here in order to make resizing easy */
36 struct htable {
37         u8 htable_bits;         /* size of hash table == 2^htable_bits */
38         struct hbucket bucket[0]; /* hashtable buckets */
39 };
40
41 #define hbucket(h, i)           &((h)->bucket[i])
42
43 /* Book-keeping of the prefixes added to the set */
44 struct ip_set_hash_nets {
45         u8 cidr;                /* the different cidr values in the set */
46         u32 nets;               /* number of elements per cidr */
47 };
48
49 /* The generic ip_set hash structure */
50 struct ip_set_hash {
51         struct htable *table;   /* the hash table */
52         u32 maxelem;            /* max elements in the hash */
53         u32 elements;           /* current element (vs timeout) */
54         u32 initval;            /* random jhash init value */
55         u32 timeout;            /* timeout value, if enabled */
56         struct timer_list gc;   /* garbage collection when timeout enabled */
57 #ifdef IP_SET_HASH_WITH_NETMASK
58         u8 netmask;             /* netmask value for subnets to store */
59 #endif
60 #ifdef IP_SET_HASH_WITH_NETS
61         struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
62 #endif
63 };
64
65 /* Compute htable_bits from the user input parameter hashsize */
66 static u8
67 htable_bits(u32 hashsize)
68 {
69         /* Assume that hashsize == 2^htable_bits */
70         u8 bits = fls(hashsize - 1);
71         if (jhash_size(bits) != hashsize)
72                 /* Round up to the first 2^n value */
73                 bits = fls(hashsize);
74
75         return bits;
76 }
77
78 #ifdef IP_SET_HASH_WITH_NETS
79
80 #define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
81
82 /* Network cidr size book keeping when the hash stores different
83  * sized networks */
84 static void
85 add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
86 {
87         u8 i;
88
89         ++h->nets[cidr-1].nets;
90
91         pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
92
93         if (h->nets[cidr-1].nets > 1)
94                 return;
95
96         /* New cidr size */
97         for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
98                 /* Add in increasing prefix order, so larger cidr first */
99                 if (h->nets[i].cidr < cidr)
100                         swap(h->nets[i].cidr, cidr);
101         }
102         if (i < host_mask)
103                 h->nets[i].cidr = cidr;
104 }
105
106 static void
107 del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
108 {
109         u8 i;
110
111         --h->nets[cidr-1].nets;
112
113         pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
114
115         if (h->nets[cidr-1].nets != 0)
116                 return;
117
118         /* All entries with this cidr size deleted, so cleanup h->cidr[] */
119         for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
120                 if (h->nets[i].cidr == cidr)
121                         h->nets[i].cidr = cidr = h->nets[i+1].cidr;
122         }
123         h->nets[i - 1].cidr = 0;
124 }
125 #endif
126
127 /* Destroy the hashtable part of the set */
128 static void
129 ahash_destroy(struct htable *t)
130 {
131         struct hbucket *n;
132         u32 i;
133
134         for (i = 0; i < jhash_size(t->htable_bits); i++) {
135                 n = hbucket(t, i);
136                 if (n->size)
137                         /* FIXME: use slab cache */
138                         kfree(n->value);
139         }
140
141         ip_set_free(t);
142 }
143
144 /* Calculate the actual memory size of the set data */
145 static size_t
146 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
147 {
148         u32 i;
149         struct htable *t = h->table;
150         size_t memsize = sizeof(*h)
151                          + sizeof(*t)
152 #ifdef IP_SET_HASH_WITH_NETS
153                          + sizeof(struct ip_set_hash_nets) * host_mask
154 #endif
155                          + jhash_size(t->htable_bits) * sizeof(struct hbucket);
156
157         for (i = 0; i < jhash_size(t->htable_bits); i++)
158                         memsize += t->bucket[i].size * dsize;
159
160         return memsize;
161 }
162
163 /* Flush a hash type of set: destroy all elements */
164 static void
165 ip_set_hash_flush(struct ip_set *set)
166 {
167         struct ip_set_hash *h = set->data;
168         struct htable *t = h->table;
169         struct hbucket *n;
170         u32 i;
171
172         for (i = 0; i < jhash_size(t->htable_bits); i++) {
173                 n = hbucket(t, i);
174                 if (n->size) {
175                         n->size = n->pos = 0;
176                         /* FIXME: use slab cache */
177                         kfree(n->value);
178                 }
179         }
180 #ifdef IP_SET_HASH_WITH_NETS
181         memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
182                            * SET_HOST_MASK(set->family));
183 #endif
184         h->elements = 0;
185 }
186
187 /* Destroy a hash type of set */
188 static void
189 ip_set_hash_destroy(struct ip_set *set)
190 {
191         struct ip_set_hash *h = set->data;
192
193         if (with_timeout(h->timeout))
194                 del_timer_sync(&h->gc);
195
196         ahash_destroy(h->table);
197         kfree(h);
198
199         set->data = NULL;
200 }
201
202 #define HKEY(data, initval, htable_bits)                                 \
203 (jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
204         & jhash_mask(htable_bits))
205
206 #endif /* _IP_SET_AHASH_H */
207
208 #define CONCAT(a, b, c)         a##b##c
209 #define TOKEN(a, b, c)          CONCAT(a, b, c)
210
211 /* Type/family dependent function prototypes */
212
213 #define type_pf_data_equal      TOKEN(TYPE, PF, _data_equal)
214 #define type_pf_data_isnull     TOKEN(TYPE, PF, _data_isnull)
215 #define type_pf_data_copy       TOKEN(TYPE, PF, _data_copy)
216 #define type_pf_data_zero_out   TOKEN(TYPE, PF, _data_zero_out)
217 #define type_pf_data_netmask    TOKEN(TYPE, PF, _data_netmask)
218 #define type_pf_data_list       TOKEN(TYPE, PF, _data_list)
219 #define type_pf_data_tlist      TOKEN(TYPE, PF, _data_tlist)
220
221 #define type_pf_elem            TOKEN(TYPE, PF, _elem)
222 #define type_pf_telem           TOKEN(TYPE, PF, _telem)
223 #define type_pf_data_timeout    TOKEN(TYPE, PF, _data_timeout)
224 #define type_pf_data_expired    TOKEN(TYPE, PF, _data_expired)
225 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
226
227 #define type_pf_elem_add        TOKEN(TYPE, PF, _elem_add)
228 #define type_pf_add             TOKEN(TYPE, PF, _add)
229 #define type_pf_del             TOKEN(TYPE, PF, _del)
230 #define type_pf_test_cidrs      TOKEN(TYPE, PF, _test_cidrs)
231 #define type_pf_test            TOKEN(TYPE, PF, _test)
232
233 #define type_pf_elem_tadd       TOKEN(TYPE, PF, _elem_tadd)
234 #define type_pf_del_telem       TOKEN(TYPE, PF, _ahash_del_telem)
235 #define type_pf_expire          TOKEN(TYPE, PF, _expire)
236 #define type_pf_tadd            TOKEN(TYPE, PF, _tadd)
237 #define type_pf_tdel            TOKEN(TYPE, PF, _tdel)
238 #define type_pf_ttest_cidrs     TOKEN(TYPE, PF, _ahash_ttest_cidrs)
239 #define type_pf_ttest           TOKEN(TYPE, PF, _ahash_ttest)
240
241 #define type_pf_resize          TOKEN(TYPE, PF, _resize)
242 #define type_pf_tresize         TOKEN(TYPE, PF, _tresize)
243 #define type_pf_flush           ip_set_hash_flush
244 #define type_pf_destroy         ip_set_hash_destroy
245 #define type_pf_head            TOKEN(TYPE, PF, _head)
246 #define type_pf_list            TOKEN(TYPE, PF, _list)
247 #define type_pf_tlist           TOKEN(TYPE, PF, _tlist)
248 #define type_pf_same_set        TOKEN(TYPE, PF, _same_set)
249 #define type_pf_kadt            TOKEN(TYPE, PF, _kadt)
250 #define type_pf_uadt            TOKEN(TYPE, PF, _uadt)
251 #define type_pf_gc              TOKEN(TYPE, PF, _gc)
252 #define type_pf_gc_init         TOKEN(TYPE, PF, _gc_init)
253 #define type_pf_variant         TOKEN(TYPE, PF, _variant)
254 #define type_pf_tvariant        TOKEN(TYPE, PF, _tvariant)
255
256 /* Flavour without timeout */
257
258 /* Get the ith element from the array block n */
259 #define ahash_data(n, i)        \
260         ((struct type_pf_elem *)((n)->value) + (i))
261
262 /* Add an element to the hash table when resizing the set:
263  * we spare the maintenance of the internal counters. */
264 static int
265 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
266 {
267         if (n->pos >= n->size) {
268                 void *tmp;
269
270                 if (n->size >= AHASH_MAX_SIZE)
271                         /* Trigger rehashing */
272                         return -EAGAIN;
273
274                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
275                               * sizeof(struct type_pf_elem),
276                               GFP_ATOMIC);
277                 if (!tmp)
278                         return -ENOMEM;
279                 if (n->size) {
280                         memcpy(tmp, n->value,
281                                sizeof(struct type_pf_elem) * n->size);
282                         kfree(n->value);
283                 }
284                 n->value = tmp;
285                 n->size += AHASH_INIT_SIZE;
286         }
287         type_pf_data_copy(ahash_data(n, n->pos++), value);
288         return 0;
289 }
290
291 /* Resize a hash: create a new hash table with doubling the hashsize
292  * and inserting the elements to it. Repeat until we succeed or
293  * fail due to memory pressures. */
294 static int
295 type_pf_resize(struct ip_set *set, bool retried)
296 {
297         struct ip_set_hash *h = set->data;
298         struct htable *t, *orig = h->table;
299         u8 htable_bits = orig->htable_bits;
300         const struct type_pf_elem *data;
301         struct hbucket *n, *m;
302         u32 i, j;
303         int ret;
304
305 retry:
306         ret = 0;
307         htable_bits++;
308         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
309                  set->name, orig->htable_bits, htable_bits, orig);
310         if (!htable_bits)
311                 /* In case we have plenty of memory :-) */
312                 return -IPSET_ERR_HASH_FULL;
313         t = ip_set_alloc(sizeof(*t)
314                          + jhash_size(htable_bits) * sizeof(struct hbucket));
315         if (!t)
316                 return -ENOMEM;
317         t->htable_bits = htable_bits;
318
319         read_lock_bh(&set->lock);
320         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
321                 n = hbucket(orig, i);
322                 for (j = 0; j < n->pos; j++) {
323                         data = ahash_data(n, j);
324                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
325                         ret = type_pf_elem_add(m, data);
326                         if (ret < 0) {
327                                 read_unlock_bh(&set->lock);
328                                 ahash_destroy(t);
329                                 if (ret == -EAGAIN)
330                                         goto retry;
331                                 return ret;
332                         }
333                 }
334         }
335
336         rcu_assign_pointer(h->table, t);
337         read_unlock_bh(&set->lock);
338
339         /* Give time to other readers of the set */
340         synchronize_rcu_bh();
341
342         pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
343                  orig->htable_bits, orig, t->htable_bits, t);
344         ahash_destroy(orig);
345
346         return 0;
347 }
348
349 /* Add an element to a hash and update the internal counters when succeeded,
350  * otherwise report the proper error code. */
351 static int
352 type_pf_add(struct ip_set *set, void *value, u32 timeout)
353 {
354         struct ip_set_hash *h = set->data;
355         struct htable *t;
356         const struct type_pf_elem *d = value;
357         struct hbucket *n;
358         int i, ret = 0;
359         u32 key;
360
361         if (h->elements >= h->maxelem)
362                 return -IPSET_ERR_HASH_FULL;
363
364         rcu_read_lock_bh();
365         t = rcu_dereference_bh(h->table);
366         key = HKEY(value, h->initval, t->htable_bits);
367         n = hbucket(t, key);
368         for (i = 0; i < n->pos; i++)
369                 if (type_pf_data_equal(ahash_data(n, i), d)) {
370                         ret = -IPSET_ERR_EXIST;
371                         goto out;
372                 }
373
374         ret = type_pf_elem_add(n, value);
375         if (ret != 0)
376                 goto out;
377
378 #ifdef IP_SET_HASH_WITH_NETS
379         add_cidr(h, d->cidr, HOST_MASK);
380 #endif
381         h->elements++;
382 out:
383         rcu_read_unlock_bh();
384         return ret;
385 }
386
387 /* Delete an element from the hash: swap it with the last element
388  * and free up space if possible.
389  */
390 static int
391 type_pf_del(struct ip_set *set, void *value, u32 timeout)
392 {
393         struct ip_set_hash *h = set->data;
394         struct htable *t = h->table;
395         const struct type_pf_elem *d = value;
396         struct hbucket *n;
397         int i;
398         struct type_pf_elem *data;
399         u32 key;
400
401         key = HKEY(value, h->initval, t->htable_bits);
402         n = hbucket(t, key);
403         for (i = 0; i < n->pos; i++) {
404                 data = ahash_data(n, i);
405                 if (!type_pf_data_equal(data, d))
406                         continue;
407                 if (i != n->pos - 1)
408                         /* Not last one */
409                         type_pf_data_copy(data, ahash_data(n, n->pos - 1));
410
411                 n->pos--;
412                 h->elements--;
413 #ifdef IP_SET_HASH_WITH_NETS
414                 del_cidr(h, d->cidr, HOST_MASK);
415 #endif
416                 if (n->pos + AHASH_INIT_SIZE < n->size) {
417                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
418                                             * sizeof(struct type_pf_elem),
419                                             GFP_ATOMIC);
420                         if (!tmp)
421                                 return 0;
422                         n->size -= AHASH_INIT_SIZE;
423                         memcpy(tmp, n->value,
424                                n->size * sizeof(struct type_pf_elem));
425                         kfree(n->value);
426                         n->value = tmp;
427                 }
428                 return 0;
429         }
430
431         return -IPSET_ERR_EXIST;
432 }
433
434 #ifdef IP_SET_HASH_WITH_NETS
435
436 /* Special test function which takes into account the different network
437  * sizes added to the set */
438 static int
439 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
440 {
441         struct ip_set_hash *h = set->data;
442         struct htable *t = h->table;
443         struct hbucket *n;
444         const struct type_pf_elem *data;
445         int i, j = 0;
446         u32 key;
447         u8 host_mask = SET_HOST_MASK(set->family);
448
449         pr_debug("test by nets\n");
450         for (; j < host_mask && h->nets[j].cidr; j++) {
451                 type_pf_data_netmask(d, h->nets[j].cidr);
452                 key = HKEY(d, h->initval, t->htable_bits);
453                 n = hbucket(t, key);
454                 for (i = 0; i < n->pos; i++) {
455                         data = ahash_data(n, i);
456                         if (type_pf_data_equal(data, d))
457                                 return 1;
458                 }
459         }
460         return 0;
461 }
462 #endif
463
464 /* Test whether the element is added to the set */
465 static int
466 type_pf_test(struct ip_set *set, void *value, u32 timeout)
467 {
468         struct ip_set_hash *h = set->data;
469         struct htable *t = h->table;
470         struct type_pf_elem *d = value;
471         struct hbucket *n;
472         const struct type_pf_elem *data;
473         int i;
474         u32 key;
475
476 #ifdef IP_SET_HASH_WITH_NETS
477         /* If we test an IP address and not a network address,
478          * try all possible network sizes */
479         if (d->cidr == SET_HOST_MASK(set->family))
480                 return type_pf_test_cidrs(set, d, timeout);
481 #endif
482
483         key = HKEY(d, h->initval, t->htable_bits);
484         n = hbucket(t, key);
485         for (i = 0; i < n->pos; i++) {
486                 data = ahash_data(n, i);
487                 if (type_pf_data_equal(data, d))
488                         return 1;
489         }
490         return 0;
491 }
492
493 /* Reply a HEADER request: fill out the header part of the set */
494 static int
495 type_pf_head(struct ip_set *set, struct sk_buff *skb)
496 {
497         const struct ip_set_hash *h = set->data;
498         struct nlattr *nested;
499         size_t memsize;
500
501         read_lock_bh(&set->lock);
502         memsize = ahash_memsize(h, with_timeout(h->timeout)
503                                         ? sizeof(struct type_pf_telem)
504                                         : sizeof(struct type_pf_elem),
505                                 set->family == AF_INET ? 32 : 128);
506         read_unlock_bh(&set->lock);
507
508         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
509         if (!nested)
510                 goto nla_put_failure;
511         NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
512                       htonl(jhash_size(h->table->htable_bits)));
513         NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
514 #ifdef IP_SET_HASH_WITH_NETMASK
515         if (h->netmask != HOST_MASK)
516                 NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
517 #endif
518         NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
519                       htonl(atomic_read(&set->ref) - 1));
520         NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
521         if (with_timeout(h->timeout))
522                 NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
523         ipset_nest_end(skb, nested);
524
525         return 0;
526 nla_put_failure:
527         return -EMSGSIZE;
528 }
529
530 /* Reply a LIST/SAVE request: dump the elements of the specified set */
531 static int
532 type_pf_list(const struct ip_set *set,
533              struct sk_buff *skb, struct netlink_callback *cb)
534 {
535         const struct ip_set_hash *h = set->data;
536         const struct htable *t = h->table;
537         struct nlattr *atd, *nested;
538         const struct hbucket *n;
539         const struct type_pf_elem *data;
540         u32 first = cb->args[2];
541         /* We assume that one hash bucket fills into one page */
542         void *incomplete;
543         int i;
544
545         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
546         if (!atd)
547                 return -EMSGSIZE;
548         pr_debug("list hash set %s\n", set->name);
549         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
550                 incomplete = skb_tail_pointer(skb);
551                 n = hbucket(t, cb->args[2]);
552                 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
553                 for (i = 0; i < n->pos; i++) {
554                         data = ahash_data(n, i);
555                         pr_debug("list hash %lu hbucket %p i %u, data %p\n",
556                                  cb->args[2], n, i, data);
557                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
558                         if (!nested) {
559                                 if (cb->args[2] == first) {
560                                         nla_nest_cancel(skb, atd);
561                                         return -EMSGSIZE;
562                                 } else
563                                         goto nla_put_failure;
564                         }
565                         if (type_pf_data_list(skb, data))
566                                 goto nla_put_failure;
567                         ipset_nest_end(skb, nested);
568                 }
569         }
570         ipset_nest_end(skb, atd);
571         /* Set listing finished */
572         cb->args[2] = 0;
573
574         return 0;
575
576 nla_put_failure:
577         nlmsg_trim(skb, incomplete);
578         ipset_nest_end(skb, atd);
579         if (unlikely(first == cb->args[2])) {
580                 pr_warning("Can't list set %s: one bucket does not fit into "
581                            "a message. Please report it!\n", set->name);
582                 cb->args[2] = 0;
583                 return -EMSGSIZE;
584         }
585         return 0;
586 }
587
588 static int
589 type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
590              enum ipset_adt adt, u8 pf, u8 dim, u8 flags);
591 static int
592 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
593              enum ipset_adt adt, u32 *lineno, u32 flags);
594
595 static const struct ip_set_type_variant type_pf_variant = {
596         .kadt   = type_pf_kadt,
597         .uadt   = type_pf_uadt,
598         .adt    = {
599                 [IPSET_ADD] = type_pf_add,
600                 [IPSET_DEL] = type_pf_del,
601                 [IPSET_TEST] = type_pf_test,
602         },
603         .destroy = type_pf_destroy,
604         .flush  = type_pf_flush,
605         .head   = type_pf_head,
606         .list   = type_pf_list,
607         .resize = type_pf_resize,
608         .same_set = type_pf_same_set,
609 };
610
611 /* Flavour with timeout support */
612
613 #define ahash_tdata(n, i) \
614         (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
615
616 static inline u32
617 type_pf_data_timeout(const struct type_pf_elem *data)
618 {
619         const struct type_pf_telem *tdata =
620                 (const struct type_pf_telem *) data;
621
622         return tdata->timeout;
623 }
624
625 static inline bool
626 type_pf_data_expired(const struct type_pf_elem *data)
627 {
628         const struct type_pf_telem *tdata =
629                 (const struct type_pf_telem *) data;
630
631         return ip_set_timeout_expired(tdata->timeout);
632 }
633
634 static inline void
635 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
636 {
637         struct type_pf_telem *tdata = (struct type_pf_telem *) data;
638
639         tdata->timeout = ip_set_timeout_set(timeout);
640 }
641
642 static int
643 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
644                   u32 timeout)
645 {
646         struct type_pf_elem *data;
647
648         if (n->pos >= n->size) {
649                 void *tmp;
650
651                 if (n->size >= AHASH_MAX_SIZE)
652                         /* Trigger rehashing */
653                         return -EAGAIN;
654
655                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
656                               * sizeof(struct type_pf_telem),
657                               GFP_ATOMIC);
658                 if (!tmp)
659                         return -ENOMEM;
660                 if (n->size) {
661                         memcpy(tmp, n->value,
662                                sizeof(struct type_pf_telem) * n->size);
663                         kfree(n->value);
664                 }
665                 n->value = tmp;
666                 n->size += AHASH_INIT_SIZE;
667         }
668         data = ahash_tdata(n, n->pos++);
669         type_pf_data_copy(data, value);
670         type_pf_data_timeout_set(data, timeout);
671         return 0;
672 }
673
674 /* Delete expired elements from the hashtable */
675 static void
676 type_pf_expire(struct ip_set_hash *h)
677 {
678         struct htable *t = h->table;
679         struct hbucket *n;
680         struct type_pf_elem *data;
681         u32 i;
682         int j;
683
684         for (i = 0; i < jhash_size(t->htable_bits); i++) {
685                 n = hbucket(t, i);
686                 for (j = 0; j < n->pos; j++) {
687                         data = ahash_tdata(n, j);
688                         if (type_pf_data_expired(data)) {
689                                 pr_debug("expired %u/%u\n", i, j);
690 #ifdef IP_SET_HASH_WITH_NETS
691                                 del_cidr(h, data->cidr, HOST_MASK);
692 #endif
693                                 if (j != n->pos - 1)
694                                         /* Not last one */
695                                         type_pf_data_copy(data,
696                                                 ahash_tdata(n, n->pos - 1));
697                                 n->pos--;
698                                 h->elements--;
699                         }
700                 }
701                 if (n->pos + AHASH_INIT_SIZE < n->size) {
702                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
703                                             * sizeof(struct type_pf_telem),
704                                             GFP_ATOMIC);
705                         if (!tmp)
706                                 /* Still try to delete expired elements */
707                                 continue;
708                         n->size -= AHASH_INIT_SIZE;
709                         memcpy(tmp, n->value,
710                                n->size * sizeof(struct type_pf_telem));
711                         kfree(n->value);
712                         n->value = tmp;
713                 }
714         }
715 }
716
717 static int
718 type_pf_tresize(struct ip_set *set, bool retried)
719 {
720         struct ip_set_hash *h = set->data;
721         struct htable *t, *orig = h->table;
722         u8 htable_bits = orig->htable_bits;
723         const struct type_pf_elem *data;
724         struct hbucket *n, *m;
725         u32 i, j;
726         int ret;
727
728         /* Try to cleanup once */
729         if (!retried) {
730                 i = h->elements;
731                 write_lock_bh(&set->lock);
732                 type_pf_expire(set->data);
733                 write_unlock_bh(&set->lock);
734                 if (h->elements <  i)
735                         return 0;
736         }
737
738 retry:
739         ret = 0;
740         htable_bits++;
741         if (!htable_bits)
742                 /* In case we have plenty of memory :-) */
743                 return -IPSET_ERR_HASH_FULL;
744         t = ip_set_alloc(sizeof(*t)
745                          + jhash_size(htable_bits) * sizeof(struct hbucket));
746         if (!t)
747                 return -ENOMEM;
748         t->htable_bits = htable_bits;
749
750         read_lock_bh(&set->lock);
751         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
752                 n = hbucket(orig, i);
753                 for (j = 0; j < n->pos; j++) {
754                         data = ahash_tdata(n, j);
755                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
756                         ret = type_pf_elem_tadd(m, data,
757                                                 type_pf_data_timeout(data));
758                         if (ret < 0) {
759                                 read_unlock_bh(&set->lock);
760                                 ahash_destroy(t);
761                                 if (ret == -EAGAIN)
762                                         goto retry;
763                                 return ret;
764                         }
765                 }
766         }
767
768         rcu_assign_pointer(h->table, t);
769         read_unlock_bh(&set->lock);
770
771         /* Give time to other readers of the set */
772         synchronize_rcu_bh();
773
774         ahash_destroy(orig);
775
776         return 0;
777 }
778
779 static int
780 type_pf_tadd(struct ip_set *set, void *value, u32 timeout)
781 {
782         struct ip_set_hash *h = set->data;
783         struct htable *t = h->table;
784         const struct type_pf_elem *d = value;
785         struct hbucket *n;
786         struct type_pf_elem *data;
787         int ret = 0, i, j = AHASH_MAX_SIZE + 1;
788         u32 key;
789
790         if (h->elements >= h->maxelem)
791                 /* FIXME: when set is full, we slow down here */
792                 type_pf_expire(h);
793         if (h->elements >= h->maxelem)
794                 return -IPSET_ERR_HASH_FULL;
795
796         rcu_read_lock_bh();
797         t = rcu_dereference_bh(h->table);
798         key = HKEY(d, h->initval, t->htable_bits);
799         n = hbucket(t, key);
800         for (i = 0; i < n->pos; i++) {
801                 data = ahash_tdata(n, i);
802                 if (type_pf_data_equal(data, d)) {
803                         if (type_pf_data_expired(data))
804                                 j = i;
805                         else {
806                                 ret = -IPSET_ERR_EXIST;
807                                 goto out;
808                         }
809                 } else if (j == AHASH_MAX_SIZE + 1 &&
810                            type_pf_data_expired(data))
811                         j = i;
812         }
813         if (j != AHASH_MAX_SIZE + 1) {
814                 data = ahash_tdata(n, j);
815 #ifdef IP_SET_HASH_WITH_NETS
816                 del_cidr(h, data->cidr, HOST_MASK);
817                 add_cidr(h, d->cidr, HOST_MASK);
818 #endif
819                 type_pf_data_copy(data, d);
820                 type_pf_data_timeout_set(data, timeout);
821                 goto out;
822         }
823         ret = type_pf_elem_tadd(n, d, timeout);
824         if (ret != 0)
825                 goto out;
826
827 #ifdef IP_SET_HASH_WITH_NETS
828         add_cidr(h, d->cidr, HOST_MASK);
829 #endif
830         h->elements++;
831 out:
832         rcu_read_unlock_bh();
833         return ret;
834 }
835
836 static int
837 type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
838 {
839         struct ip_set_hash *h = set->data;
840         struct htable *t = h->table;
841         const struct type_pf_elem *d = value;
842         struct hbucket *n;
843         int i, ret = 0;
844         struct type_pf_elem *data;
845         u32 key;
846
847         key = HKEY(value, h->initval, t->htable_bits);
848         n = hbucket(t, key);
849         for (i = 0; i < n->pos; i++) {
850                 data = ahash_tdata(n, i);
851                 if (!type_pf_data_equal(data, d))
852                         continue;
853                 if (type_pf_data_expired(data))
854                         ret = -IPSET_ERR_EXIST;
855                 if (i != n->pos - 1)
856                         /* Not last one */
857                         type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
858
859                 n->pos--;
860                 h->elements--;
861 #ifdef IP_SET_HASH_WITH_NETS
862                 del_cidr(h, d->cidr, HOST_MASK);
863 #endif
864                 if (n->pos + AHASH_INIT_SIZE < n->size) {
865                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
866                                             * sizeof(struct type_pf_telem),
867                                             GFP_ATOMIC);
868                         if (!tmp)
869                                 return 0;
870                         n->size -= AHASH_INIT_SIZE;
871                         memcpy(tmp, n->value,
872                                n->size * sizeof(struct type_pf_telem));
873                         kfree(n->value);
874                         n->value = tmp;
875                 }
876                 return 0;
877         }
878
879         return -IPSET_ERR_EXIST;
880 }
881
882 #ifdef IP_SET_HASH_WITH_NETS
883 static int
884 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
885 {
886         struct ip_set_hash *h = set->data;
887         struct htable *t = h->table;
888         struct type_pf_elem *data;
889         struct hbucket *n;
890         int i, j = 0;
891         u32 key;
892         u8 host_mask = SET_HOST_MASK(set->family);
893
894         for (; j < host_mask && h->nets[j].cidr; j++) {
895                 type_pf_data_netmask(d, h->nets[j].cidr);
896                 key = HKEY(d, h->initval, t->htable_bits);
897                 n = hbucket(t, key);
898                 for (i = 0; i < n->pos; i++) {
899                         data = ahash_tdata(n, i);
900                         if (type_pf_data_equal(data, d))
901                                 return !type_pf_data_expired(data);
902                 }
903         }
904         return 0;
905 }
906 #endif
907
908 static int
909 type_pf_ttest(struct ip_set *set, void *value, u32 timeout)
910 {
911         struct ip_set_hash *h = set->data;
912         struct htable *t = h->table;
913         struct type_pf_elem *data, *d = value;
914         struct hbucket *n;
915         int i;
916         u32 key;
917
918 #ifdef IP_SET_HASH_WITH_NETS
919         if (d->cidr == SET_HOST_MASK(set->family))
920                 return type_pf_ttest_cidrs(set, d, timeout);
921 #endif
922         key = HKEY(d, h->initval, t->htable_bits);
923         n = hbucket(t, key);
924         for (i = 0; i < n->pos; i++) {
925                 data = ahash_tdata(n, i);
926                 if (type_pf_data_equal(data, d))
927                         return !type_pf_data_expired(data);
928         }
929         return 0;
930 }
931
932 static int
933 type_pf_tlist(const struct ip_set *set,
934               struct sk_buff *skb, struct netlink_callback *cb)
935 {
936         const struct ip_set_hash *h = set->data;
937         const struct htable *t = h->table;
938         struct nlattr *atd, *nested;
939         const struct hbucket *n;
940         const struct type_pf_elem *data;
941         u32 first = cb->args[2];
942         /* We assume that one hash bucket fills into one page */
943         void *incomplete;
944         int i;
945
946         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
947         if (!atd)
948                 return -EMSGSIZE;
949         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
950                 incomplete = skb_tail_pointer(skb);
951                 n = hbucket(t, cb->args[2]);
952                 for (i = 0; i < n->pos; i++) {
953                         data = ahash_tdata(n, i);
954                         pr_debug("list %p %u\n", n, i);
955                         if (type_pf_data_expired(data))
956                                 continue;
957                         pr_debug("do list %p %u\n", n, i);
958                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
959                         if (!nested) {
960                                 if (cb->args[2] == first) {
961                                         nla_nest_cancel(skb, atd);
962                                         return -EMSGSIZE;
963                                 } else
964                                         goto nla_put_failure;
965                         }
966                         if (type_pf_data_tlist(skb, data))
967                                 goto nla_put_failure;
968                         ipset_nest_end(skb, nested);
969                 }
970         }
971         ipset_nest_end(skb, atd);
972         /* Set listing finished */
973         cb->args[2] = 0;
974
975         return 0;
976
977 nla_put_failure:
978         nlmsg_trim(skb, incomplete);
979         ipset_nest_end(skb, atd);
980         if (unlikely(first == cb->args[2])) {
981                 pr_warning("Can't list set %s: one bucket does not fit into "
982                            "a message. Please report it!\n", set->name);
983                 cb->args[2] = 0;
984                 return -EMSGSIZE;
985         }
986         return 0;
987 }
988
989 static const struct ip_set_type_variant type_pf_tvariant = {
990         .kadt   = type_pf_kadt,
991         .uadt   = type_pf_uadt,
992         .adt    = {
993                 [IPSET_ADD] = type_pf_tadd,
994                 [IPSET_DEL] = type_pf_tdel,
995                 [IPSET_TEST] = type_pf_ttest,
996         },
997         .destroy = type_pf_destroy,
998         .flush  = type_pf_flush,
999         .head   = type_pf_head,
1000         .list   = type_pf_tlist,
1001         .resize = type_pf_tresize,
1002         .same_set = type_pf_same_set,
1003 };
1004
1005 static void
1006 type_pf_gc(unsigned long ul_set)
1007 {
1008         struct ip_set *set = (struct ip_set *) ul_set;
1009         struct ip_set_hash *h = set->data;
1010
1011         pr_debug("called\n");
1012         write_lock_bh(&set->lock);
1013         type_pf_expire(h);
1014         write_unlock_bh(&set->lock);
1015
1016         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1017         add_timer(&h->gc);
1018 }
1019
1020 static void
1021 type_pf_gc_init(struct ip_set *set)
1022 {
1023         struct ip_set_hash *h = set->data;
1024
1025         init_timer(&h->gc);
1026         h->gc.data = (unsigned long) set;
1027         h->gc.function = type_pf_gc;
1028         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1029         add_timer(&h->gc);
1030         pr_debug("gc initialized, run in every %u\n",
1031                  IPSET_GC_PERIOD(h->timeout));
1032 }
1033
1034 #undef type_pf_data_equal
1035 #undef type_pf_data_isnull
1036 #undef type_pf_data_copy
1037 #undef type_pf_data_zero_out
1038 #undef type_pf_data_list
1039 #undef type_pf_data_tlist
1040
1041 #undef type_pf_elem
1042 #undef type_pf_telem
1043 #undef type_pf_data_timeout
1044 #undef type_pf_data_expired
1045 #undef type_pf_data_netmask
1046 #undef type_pf_data_timeout_set
1047
1048 #undef type_pf_elem_add
1049 #undef type_pf_add
1050 #undef type_pf_del
1051 #undef type_pf_test_cidrs
1052 #undef type_pf_test
1053
1054 #undef type_pf_elem_tadd
1055 #undef type_pf_expire
1056 #undef type_pf_tadd
1057 #undef type_pf_tdel
1058 #undef type_pf_ttest_cidrs
1059 #undef type_pf_ttest
1060
1061 #undef type_pf_resize
1062 #undef type_pf_tresize
1063 #undef type_pf_flush
1064 #undef type_pf_destroy
1065 #undef type_pf_head
1066 #undef type_pf_list
1067 #undef type_pf_tlist
1068 #undef type_pf_same_set
1069 #undef type_pf_kadt
1070 #undef type_pf_uadt
1071 #undef type_pf_gc
1072 #undef type_pf_gc_init
1073 #undef type_pf_variant
1074 #undef type_pf_tvariant