4449dc52edf5bcd4094c49dc2ece8fe5de2a87b7
[linux-2.6.git] / net / sunrpc / cache.c
1 /*
2  * net/sunrpc/cache.c
3  *
4  * Generic code for various authentication-related caches
5  * used by sunrpc clients and servers.
6  *
7  * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
8  *
9  * Released under terms in GPL version 2.  See COPYING.
10  *
11  */
12
13 #include <linux/types.h>
14 #include <linux/fs.h>
15 #include <linux/file.h>
16 #include <linux/slab.h>
17 #include <linux/signal.h>
18 #include <linux/sched.h>
19 #include <linux/kmod.h>
20 #include <linux/list.h>
21 #include <linux/module.h>
22 #include <linux/ctype.h>
23 #include <asm/uaccess.h>
24 #include <linux/poll.h>
25 #include <linux/seq_file.h>
26 #include <linux/proc_fs.h>
27 #include <linux/net.h>
28 #include <linux/workqueue.h>
29 #include <linux/mutex.h>
30 #include <asm/ioctls.h>
31 #include <linux/sunrpc/types.h>
32 #include <linux/sunrpc/cache.h>
33 #include <linux/sunrpc/stats.h>
34
35 #define  RPCDBG_FACILITY RPCDBG_CACHE
36
37 static void cache_defer_req(struct cache_req *req, struct cache_head *item);
38 static void cache_revisit_request(struct cache_head *item);
39
40 void cache_init(struct cache_head *h)
41 {
42         time_t now = get_seconds();
43         h->next = NULL;
44         h->flags = 0;
45         atomic_set(&h->refcnt, 1);
46         h->expiry_time = now + CACHE_NEW_EXPIRY;
47         h->last_refresh = now;
48 }
49
50 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
51                                        struct cache_head *key, int hash)
52 {
53         struct cache_head **head,  **hp;
54         struct cache_head *new = NULL;
55
56         head = &detail->hash_table[hash];
57
58         read_lock(&detail->hash_lock);
59
60         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
61                 struct cache_head *tmp = *hp;
62                 if (detail->match(tmp, key)) {
63                         cache_get(tmp);
64                         read_unlock(&detail->hash_lock);
65                         return tmp;
66                 }
67         }
68         read_unlock(&detail->hash_lock);
69         /* Didn't find anything, insert an empty entry */
70
71         new = detail->alloc();
72         if (!new)
73                 return NULL;
74         cache_init(new);
75
76         write_lock(&detail->hash_lock);
77
78         /* check if entry appeared while we slept */
79         for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
80                 struct cache_head *tmp = *hp;
81                 if (detail->match(tmp, key)) {
82                         cache_get(tmp);
83                         write_unlock(&detail->hash_lock);
84                         detail->cache_put(new, detail);
85                         return tmp;
86                 }
87         }
88         detail->init(new, key);
89         new->next = *head;
90         *head = new;
91         detail->entries++;
92         cache_get(new);
93         write_unlock(&detail->hash_lock);
94
95         return new;
96 }
97 EXPORT_SYMBOL(sunrpc_cache_lookup);
98
99 struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
100                                        struct cache_head *new, struct cache_head *old, int hash)
101 {
102         /* The 'old' entry is to be replaced by 'new'.
103          * If 'old' is not VALID, we update it directly,
104          * otherwise we need to replace it
105          */
106         struct cache_head **head;
107         struct cache_head *tmp;
108
109         if (!test_bit(CACHE_VALID, &old->flags)) {
110                 write_lock(&detail->hash_lock);
111                 if (!test_bit(CACHE_VALID, &old->flags)) {
112                         if (test_bit(CACHE_NEGATIVE, &new->flags))
113                                 set_bit(CACHE_NEGATIVE, &old->flags);
114                         else
115                                 detail->update(old, new);
116                         /* FIXME cache_fresh should come first */
117                         write_unlock(&detail->hash_lock);
118                         cache_fresh(detail, old, new->expiry_time);
119                         return old;
120                 }
121                 write_unlock(&detail->hash_lock);
122         }
123         /* We need to insert a new entry */
124         tmp = detail->alloc();
125         if (!tmp) {
126                 detail->cache_put(old, detail);
127                 return NULL;
128         }
129         cache_init(tmp);
130         detail->init(tmp, old);
131         head = &detail->hash_table[hash];
132
133         write_lock(&detail->hash_lock);
134         if (test_bit(CACHE_NEGATIVE, &new->flags))
135                 set_bit(CACHE_NEGATIVE, &tmp->flags);
136         else
137                 detail->update(tmp, new);
138         tmp->next = *head;
139         *head = tmp;
140         cache_get(tmp);
141         write_unlock(&detail->hash_lock);
142         cache_fresh(detail, tmp, new->expiry_time);
143         cache_fresh(detail, old, 0);
144         detail->cache_put(old, detail);
145         return tmp;
146 }
147 EXPORT_SYMBOL(sunrpc_cache_update);
148
149 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
150 /*
151  * This is the generic cache management routine for all
152  * the authentication caches.
153  * It checks the currency of a cache item and will (later)
154  * initiate an upcall to fill it if needed.
155  *
156  *
157  * Returns 0 if the cache_head can be used, or cache_puts it and returns
158  * -EAGAIN if upcall is pending,
159  * -ENOENT if cache entry was negative
160  */
161 int cache_check(struct cache_detail *detail,
162                     struct cache_head *h, struct cache_req *rqstp)
163 {
164         int rv;
165         long refresh_age, age;
166
167         /* First decide return status as best we can */
168         if (!test_bit(CACHE_VALID, &h->flags) ||
169             h->expiry_time < get_seconds())
170                 rv = -EAGAIN;
171         else if (detail->flush_time > h->last_refresh)
172                 rv = -EAGAIN;
173         else {
174                 /* entry is valid */
175                 if (test_bit(CACHE_NEGATIVE, &h->flags))
176                         rv = -ENOENT;
177                 else rv = 0;
178         }
179
180         /* now see if we want to start an upcall */
181         refresh_age = (h->expiry_time - h->last_refresh);
182         age = get_seconds() - h->last_refresh;
183
184         if (rqstp == NULL) {
185                 if (rv == -EAGAIN)
186                         rv = -ENOENT;
187         } else if (rv == -EAGAIN || age > refresh_age/2) {
188                 dprintk("Want update, refage=%ld, age=%ld\n", refresh_age, age);
189                 if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
190                         switch (cache_make_upcall(detail, h)) {
191                         case -EINVAL:
192                                 clear_bit(CACHE_PENDING, &h->flags);
193                                 if (rv == -EAGAIN) {
194                                         set_bit(CACHE_NEGATIVE, &h->flags);
195                                         cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY);
196                                         rv = -ENOENT;
197                                 }
198                                 break;
199
200                         case -EAGAIN:
201                                 clear_bit(CACHE_PENDING, &h->flags);
202                                 cache_revisit_request(h);
203                                 break;
204                         }
205                 }
206         }
207
208         if (rv == -EAGAIN)
209                 cache_defer_req(rqstp, h);
210
211         if (rv && h)
212                 detail->cache_put(h, detail);
213         return rv;
214 }
215
216 static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
217
218 void cache_fresh(struct cache_detail *detail,
219                  struct cache_head *head, time_t expiry)
220 {
221
222         head->expiry_time = expiry;
223         head->last_refresh = get_seconds();
224         if (!test_and_set_bit(CACHE_VALID, &head->flags))
225                 cache_revisit_request(head);
226         if (test_and_clear_bit(CACHE_PENDING, &head->flags))
227                 queue_loose(detail, head);
228 }
229
230 /*
231  * caches need to be periodically cleaned.
232  * For this we maintain a list of cache_detail and
233  * a current pointer into that list and into the table
234  * for that entry.
235  *
236  * Each time clean_cache is called it finds the next non-empty entry
237  * in the current table and walks the list in that entry
238  * looking for entries that can be removed.
239  *
240  * An entry gets removed if:
241  * - The expiry is before current time
242  * - The last_refresh time is before the flush_time for that cache
243  *
244  * later we might drop old entries with non-NEVER expiry if that table
245  * is getting 'full' for some definition of 'full'
246  *
247  * The question of "how often to scan a table" is an interesting one
248  * and is answered in part by the use of the "nextcheck" field in the
249  * cache_detail.
250  * When a scan of a table begins, the nextcheck field is set to a time
251  * that is well into the future.
252  * While scanning, if an expiry time is found that is earlier than the
253  * current nextcheck time, nextcheck is set to that expiry time.
254  * If the flush_time is ever set to a time earlier than the nextcheck
255  * time, the nextcheck time is then set to that flush_time.
256  *
257  * A table is then only scanned if the current time is at least
258  * the nextcheck time.
259  * 
260  */
261
262 static LIST_HEAD(cache_list);
263 static DEFINE_SPINLOCK(cache_list_lock);
264 static struct cache_detail *current_detail;
265 static int current_index;
266
267 static struct file_operations cache_file_operations;
268 static struct file_operations content_file_operations;
269 static struct file_operations cache_flush_operations;
270
271 static void do_cache_clean(void *data);
272 static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
273
274 void cache_register(struct cache_detail *cd)
275 {
276         cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
277         if (cd->proc_ent) {
278                 struct proc_dir_entry *p;
279                 cd->proc_ent->owner = cd->owner;
280                 cd->channel_ent = cd->content_ent = NULL;
281                 
282                 p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
283                                       cd->proc_ent);
284                 cd->flush_ent =  p;
285                 if (p) {
286                         p->proc_fops = &cache_flush_operations;
287                         p->owner = cd->owner;
288                         p->data = cd;
289                 }
290  
291                 if (cd->cache_request || cd->cache_parse) {
292                         p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
293                                               cd->proc_ent);
294                         cd->channel_ent = p;
295                         if (p) {
296                                 p->proc_fops = &cache_file_operations;
297                                 p->owner = cd->owner;
298                                 p->data = cd;
299                         }
300                 }
301                 if (cd->cache_show) {
302                         p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
303                                               cd->proc_ent);
304                         cd->content_ent = p;
305                         if (p) {
306                                 p->proc_fops = &content_file_operations;
307                                 p->owner = cd->owner;
308                                 p->data = cd;
309                         }
310                 }
311         }
312         rwlock_init(&cd->hash_lock);
313         INIT_LIST_HEAD(&cd->queue);
314         spin_lock(&cache_list_lock);
315         cd->nextcheck = 0;
316         cd->entries = 0;
317         atomic_set(&cd->readers, 0);
318         cd->last_close = 0;
319         cd->last_warn = -1;
320         list_add(&cd->others, &cache_list);
321         spin_unlock(&cache_list_lock);
322
323         /* start the cleaning process */
324         schedule_work(&cache_cleaner);
325 }
326
327 int cache_unregister(struct cache_detail *cd)
328 {
329         cache_purge(cd);
330         spin_lock(&cache_list_lock);
331         write_lock(&cd->hash_lock);
332         if (cd->entries || atomic_read(&cd->inuse)) {
333                 write_unlock(&cd->hash_lock);
334                 spin_unlock(&cache_list_lock);
335                 return -EBUSY;
336         }
337         if (current_detail == cd)
338                 current_detail = NULL;
339         list_del_init(&cd->others);
340         write_unlock(&cd->hash_lock);
341         spin_unlock(&cache_list_lock);
342         if (cd->proc_ent) {
343                 if (cd->flush_ent)
344                         remove_proc_entry("flush", cd->proc_ent);
345                 if (cd->channel_ent)
346                         remove_proc_entry("channel", cd->proc_ent);
347                 if (cd->content_ent)
348                         remove_proc_entry("content", cd->proc_ent);
349
350                 cd->proc_ent = NULL;
351                 remove_proc_entry(cd->name, proc_net_rpc);
352         }
353         if (list_empty(&cache_list)) {
354                 /* module must be being unloaded so its safe to kill the worker */
355                 cancel_delayed_work(&cache_cleaner);
356                 flush_scheduled_work();
357         }
358         return 0;
359 }
360
361 /* clean cache tries to find something to clean
362  * and cleans it.
363  * It returns 1 if it cleaned something,
364  *            0 if it didn't find anything this time
365  *           -1 if it fell off the end of the list.
366  */
367 static int cache_clean(void)
368 {
369         int rv = 0;
370         struct list_head *next;
371
372         spin_lock(&cache_list_lock);
373
374         /* find a suitable table if we don't already have one */
375         while (current_detail == NULL ||
376             current_index >= current_detail->hash_size) {
377                 if (current_detail)
378                         next = current_detail->others.next;
379                 else
380                         next = cache_list.next;
381                 if (next == &cache_list) {
382                         current_detail = NULL;
383                         spin_unlock(&cache_list_lock);
384                         return -1;
385                 }
386                 current_detail = list_entry(next, struct cache_detail, others);
387                 if (current_detail->nextcheck > get_seconds())
388                         current_index = current_detail->hash_size;
389                 else {
390                         current_index = 0;
391                         current_detail->nextcheck = get_seconds()+30*60;
392                 }
393         }
394
395         /* find a non-empty bucket in the table */
396         while (current_detail &&
397                current_index < current_detail->hash_size &&
398                current_detail->hash_table[current_index] == NULL)
399                 current_index++;
400
401         /* find a cleanable entry in the bucket and clean it, or set to next bucket */
402         
403         if (current_detail && current_index < current_detail->hash_size) {
404                 struct cache_head *ch, **cp;
405                 struct cache_detail *d;
406                 
407                 write_lock(&current_detail->hash_lock);
408
409                 /* Ok, now to clean this strand */
410                         
411                 cp = & current_detail->hash_table[current_index];
412                 ch = *cp;
413                 for (; ch; cp= & ch->next, ch= *cp) {
414                         if (current_detail->nextcheck > ch->expiry_time)
415                                 current_detail->nextcheck = ch->expiry_time+1;
416                         if (ch->expiry_time >= get_seconds()
417                             && ch->last_refresh >= current_detail->flush_time
418                                 )
419                                 continue;
420                         if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
421                                 queue_loose(current_detail, ch);
422
423                         if (atomic_read(&ch->refcnt) == 1)
424                                 break;
425                 }
426                 if (ch) {
427                         *cp = ch->next;
428                         ch->next = NULL;
429                         current_detail->entries--;
430                         rv = 1;
431                 }
432                 write_unlock(&current_detail->hash_lock);
433                 d = current_detail;
434                 if (!ch)
435                         current_index ++;
436                 spin_unlock(&cache_list_lock);
437                 if (ch)
438                         d->cache_put(ch, d);
439         } else
440                 spin_unlock(&cache_list_lock);
441
442         return rv;
443 }
444
445 /*
446  * We want to regularly clean the cache, so we need to schedule some work ...
447  */
448 static void do_cache_clean(void *data)
449 {
450         int delay = 5;
451         if (cache_clean() == -1)
452                 delay = 30*HZ;
453
454         if (list_empty(&cache_list))
455                 delay = 0;
456
457         if (delay)
458                 schedule_delayed_work(&cache_cleaner, delay);
459 }
460
461
462 /* 
463  * Clean all caches promptly.  This just calls cache_clean
464  * repeatedly until we are sure that every cache has had a chance to 
465  * be fully cleaned
466  */
467 void cache_flush(void)
468 {
469         while (cache_clean() != -1)
470                 cond_resched();
471         while (cache_clean() != -1)
472                 cond_resched();
473 }
474
475 void cache_purge(struct cache_detail *detail)
476 {
477         detail->flush_time = LONG_MAX;
478         detail->nextcheck = get_seconds();
479         cache_flush();
480         detail->flush_time = 1;
481 }
482
483
484
485 /*
486  * Deferral and Revisiting of Requests.
487  *
488  * If a cache lookup finds a pending entry, we
489  * need to defer the request and revisit it later.
490  * All deferred requests are stored in a hash table,
491  * indexed by "struct cache_head *".
492  * As it may be wasteful to store a whole request
493  * structure, we allow the request to provide a 
494  * deferred form, which must contain a
495  * 'struct cache_deferred_req'
496  * This cache_deferred_req contains a method to allow
497  * it to be revisited when cache info is available
498  */
499
500 #define DFR_HASHSIZE    (PAGE_SIZE/sizeof(struct list_head))
501 #define DFR_HASH(item)  ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
502
503 #define DFR_MAX 300     /* ??? */
504
505 static DEFINE_SPINLOCK(cache_defer_lock);
506 static LIST_HEAD(cache_defer_list);
507 static struct list_head cache_defer_hash[DFR_HASHSIZE];
508 static int cache_defer_cnt;
509
510 static void cache_defer_req(struct cache_req *req, struct cache_head *item)
511 {
512         struct cache_deferred_req *dreq;
513         int hash = DFR_HASH(item);
514
515         dreq = req->defer(req);
516         if (dreq == NULL)
517                 return;
518
519         dreq->item = item;
520         dreq->recv_time = get_seconds();
521
522         spin_lock(&cache_defer_lock);
523
524         list_add(&dreq->recent, &cache_defer_list);
525
526         if (cache_defer_hash[hash].next == NULL)
527                 INIT_LIST_HEAD(&cache_defer_hash[hash]);
528         list_add(&dreq->hash, &cache_defer_hash[hash]);
529
530         /* it is in, now maybe clean up */
531         dreq = NULL;
532         if (++cache_defer_cnt > DFR_MAX) {
533                 /* too much in the cache, randomly drop
534                  * first or last
535                  */
536                 if (net_random()&1) 
537                         dreq = list_entry(cache_defer_list.next,
538                                           struct cache_deferred_req,
539                                           recent);
540                 else
541                         dreq = list_entry(cache_defer_list.prev,
542                                           struct cache_deferred_req,
543                                           recent);
544                 list_del(&dreq->recent);
545                 list_del(&dreq->hash);
546                 cache_defer_cnt--;
547         }
548         spin_unlock(&cache_defer_lock);
549
550         if (dreq) {
551                 /* there was one too many */
552                 dreq->revisit(dreq, 1);
553         }
554         if (test_bit(CACHE_VALID, &item->flags)) {
555                 /* must have just been validated... */
556                 cache_revisit_request(item);
557         }
558 }
559
560 static void cache_revisit_request(struct cache_head *item)
561 {
562         struct cache_deferred_req *dreq;
563         struct list_head pending;
564
565         struct list_head *lp;
566         int hash = DFR_HASH(item);
567
568         INIT_LIST_HEAD(&pending);
569         spin_lock(&cache_defer_lock);
570         
571         lp = cache_defer_hash[hash].next;
572         if (lp) {
573                 while (lp != &cache_defer_hash[hash]) {
574                         dreq = list_entry(lp, struct cache_deferred_req, hash);
575                         lp = lp->next;
576                         if (dreq->item == item) {
577                                 list_del(&dreq->hash);
578                                 list_move(&dreq->recent, &pending);
579                                 cache_defer_cnt--;
580                         }
581                 }
582         }
583         spin_unlock(&cache_defer_lock);
584
585         while (!list_empty(&pending)) {
586                 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
587                 list_del_init(&dreq->recent);
588                 dreq->revisit(dreq, 0);
589         }
590 }
591
592 void cache_clean_deferred(void *owner)
593 {
594         struct cache_deferred_req *dreq, *tmp;
595         struct list_head pending;
596
597
598         INIT_LIST_HEAD(&pending);
599         spin_lock(&cache_defer_lock);
600         
601         list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
602                 if (dreq->owner == owner) {
603                         list_del(&dreq->hash);
604                         list_move(&dreq->recent, &pending);
605                         cache_defer_cnt--;
606                 }
607         }
608         spin_unlock(&cache_defer_lock);
609
610         while (!list_empty(&pending)) {
611                 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
612                 list_del_init(&dreq->recent);
613                 dreq->revisit(dreq, 1);
614         }
615 }
616
617 /*
618  * communicate with user-space
619  *
620  * We have a magic /proc file - /proc/sunrpc/cache
621  * On read, you get a full request, or block
622  * On write, an update request is processed
623  * Poll works if anything to read, and always allows write
624  *
625  * Implemented by linked list of requests.  Each open file has 
626  * a ->private that also exists in this list.  New request are added
627  * to the end and may wakeup and preceding readers.
628  * New readers are added to the head.  If, on read, an item is found with
629  * CACHE_UPCALLING clear, we free it from the list.
630  *
631  */
632
633 static DEFINE_SPINLOCK(queue_lock);
634 static DEFINE_MUTEX(queue_io_mutex);
635
636 struct cache_queue {
637         struct list_head        list;
638         int                     reader; /* if 0, then request */
639 };
640 struct cache_request {
641         struct cache_queue      q;
642         struct cache_head       *item;
643         char                    * buf;
644         int                     len;
645         int                     readers;
646 };
647 struct cache_reader {
648         struct cache_queue      q;
649         int                     offset; /* if non-0, we have a refcnt on next request */
650 };
651
652 static ssize_t
653 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
654 {
655         struct cache_reader *rp = filp->private_data;
656         struct cache_request *rq;
657         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
658         int err;
659
660         if (count == 0)
661                 return 0;
662
663         mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
664                               * readers on this file */
665  again:
666         spin_lock(&queue_lock);
667         /* need to find next request */
668         while (rp->q.list.next != &cd->queue &&
669                list_entry(rp->q.list.next, struct cache_queue, list)
670                ->reader) {
671                 struct list_head *next = rp->q.list.next;
672                 list_move(&rp->q.list, next);
673         }
674         if (rp->q.list.next == &cd->queue) {
675                 spin_unlock(&queue_lock);
676                 mutex_unlock(&queue_io_mutex);
677                 BUG_ON(rp->offset);
678                 return 0;
679         }
680         rq = container_of(rp->q.list.next, struct cache_request, q.list);
681         BUG_ON(rq->q.reader);
682         if (rp->offset == 0)
683                 rq->readers++;
684         spin_unlock(&queue_lock);
685
686         if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
687                 err = -EAGAIN;
688                 spin_lock(&queue_lock);
689                 list_move(&rp->q.list, &rq->q.list);
690                 spin_unlock(&queue_lock);
691         } else {
692                 if (rp->offset + count > rq->len)
693                         count = rq->len - rp->offset;
694                 err = -EFAULT;
695                 if (copy_to_user(buf, rq->buf + rp->offset, count))
696                         goto out;
697                 rp->offset += count;
698                 if (rp->offset >= rq->len) {
699                         rp->offset = 0;
700                         spin_lock(&queue_lock);
701                         list_move(&rp->q.list, &rq->q.list);
702                         spin_unlock(&queue_lock);
703                 }
704                 err = 0;
705         }
706  out:
707         if (rp->offset == 0) {
708                 /* need to release rq */
709                 spin_lock(&queue_lock);
710                 rq->readers--;
711                 if (rq->readers == 0 &&
712                     !test_bit(CACHE_PENDING, &rq->item->flags)) {
713                         list_del(&rq->q.list);
714                         spin_unlock(&queue_lock);
715                         cd->cache_put(rq->item, cd);
716                         kfree(rq->buf);
717                         kfree(rq);
718                 } else
719                         spin_unlock(&queue_lock);
720         }
721         if (err == -EAGAIN)
722                 goto again;
723         mutex_unlock(&queue_io_mutex);
724         return err ? err :  count;
725 }
726
727 static char write_buf[8192]; /* protected by queue_io_mutex */
728
729 static ssize_t
730 cache_write(struct file *filp, const char __user *buf, size_t count,
731             loff_t *ppos)
732 {
733         int err;
734         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
735
736         if (count == 0)
737                 return 0;
738         if (count >= sizeof(write_buf))
739                 return -EINVAL;
740
741         mutex_lock(&queue_io_mutex);
742
743         if (copy_from_user(write_buf, buf, count)) {
744                 mutex_unlock(&queue_io_mutex);
745                 return -EFAULT;
746         }
747         write_buf[count] = '\0';
748         if (cd->cache_parse)
749                 err = cd->cache_parse(cd, write_buf, count);
750         else
751                 err = -EINVAL;
752
753         mutex_unlock(&queue_io_mutex);
754         return err ? err : count;
755 }
756
757 static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
758
759 static unsigned int
760 cache_poll(struct file *filp, poll_table *wait)
761 {
762         unsigned int mask;
763         struct cache_reader *rp = filp->private_data;
764         struct cache_queue *cq;
765         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
766
767         poll_wait(filp, &queue_wait, wait);
768
769         /* alway allow write */
770         mask = POLL_OUT | POLLWRNORM;
771
772         if (!rp)
773                 return mask;
774
775         spin_lock(&queue_lock);
776
777         for (cq= &rp->q; &cq->list != &cd->queue;
778              cq = list_entry(cq->list.next, struct cache_queue, list))
779                 if (!cq->reader) {
780                         mask |= POLLIN | POLLRDNORM;
781                         break;
782                 }
783         spin_unlock(&queue_lock);
784         return mask;
785 }
786
787 static int
788 cache_ioctl(struct inode *ino, struct file *filp,
789             unsigned int cmd, unsigned long arg)
790 {
791         int len = 0;
792         struct cache_reader *rp = filp->private_data;
793         struct cache_queue *cq;
794         struct cache_detail *cd = PDE(ino)->data;
795
796         if (cmd != FIONREAD || !rp)
797                 return -EINVAL;
798
799         spin_lock(&queue_lock);
800
801         /* only find the length remaining in current request,
802          * or the length of the next request
803          */
804         for (cq= &rp->q; &cq->list != &cd->queue;
805              cq = list_entry(cq->list.next, struct cache_queue, list))
806                 if (!cq->reader) {
807                         struct cache_request *cr =
808                                 container_of(cq, struct cache_request, q);
809                         len = cr->len - rp->offset;
810                         break;
811                 }
812         spin_unlock(&queue_lock);
813
814         return put_user(len, (int __user *)arg);
815 }
816
817 static int
818 cache_open(struct inode *inode, struct file *filp)
819 {
820         struct cache_reader *rp = NULL;
821
822         nonseekable_open(inode, filp);
823         if (filp->f_mode & FMODE_READ) {
824                 struct cache_detail *cd = PDE(inode)->data;
825
826                 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
827                 if (!rp)
828                         return -ENOMEM;
829                 rp->offset = 0;
830                 rp->q.reader = 1;
831                 atomic_inc(&cd->readers);
832                 spin_lock(&queue_lock);
833                 list_add(&rp->q.list, &cd->queue);
834                 spin_unlock(&queue_lock);
835         }
836         filp->private_data = rp;
837         return 0;
838 }
839
840 static int
841 cache_release(struct inode *inode, struct file *filp)
842 {
843         struct cache_reader *rp = filp->private_data;
844         struct cache_detail *cd = PDE(inode)->data;
845
846         if (rp) {
847                 spin_lock(&queue_lock);
848                 if (rp->offset) {
849                         struct cache_queue *cq;
850                         for (cq= &rp->q; &cq->list != &cd->queue;
851                              cq = list_entry(cq->list.next, struct cache_queue, list))
852                                 if (!cq->reader) {
853                                         container_of(cq, struct cache_request, q)
854                                                 ->readers--;
855                                         break;
856                                 }
857                         rp->offset = 0;
858                 }
859                 list_del(&rp->q.list);
860                 spin_unlock(&queue_lock);
861
862                 filp->private_data = NULL;
863                 kfree(rp);
864
865                 cd->last_close = get_seconds();
866                 atomic_dec(&cd->readers);
867         }
868         return 0;
869 }
870
871
872
873 static struct file_operations cache_file_operations = {
874         .owner          = THIS_MODULE,
875         .llseek         = no_llseek,
876         .read           = cache_read,
877         .write          = cache_write,
878         .poll           = cache_poll,
879         .ioctl          = cache_ioctl, /* for FIONREAD */
880         .open           = cache_open,
881         .release        = cache_release,
882 };
883
884
885 static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
886 {
887         struct cache_queue *cq;
888         spin_lock(&queue_lock);
889         list_for_each_entry(cq, &detail->queue, list)
890                 if (!cq->reader) {
891                         struct cache_request *cr = container_of(cq, struct cache_request, q);
892                         if (cr->item != ch)
893                                 continue;
894                         if (cr->readers != 0)
895                                 break;
896                         list_del(&cr->q.list);
897                         spin_unlock(&queue_lock);
898                         detail->cache_put(cr->item, detail);
899                         kfree(cr->buf);
900                         kfree(cr);
901                         return;
902                 }
903         spin_unlock(&queue_lock);
904 }
905
906 /*
907  * Support routines for text-based upcalls.
908  * Fields are separated by spaces.
909  * Fields are either mangled to quote space tab newline slosh with slosh
910  * or a hexified with a leading \x
911  * Record is terminated with newline.
912  *
913  */
914
915 void qword_add(char **bpp, int *lp, char *str)
916 {
917         char *bp = *bpp;
918         int len = *lp;
919         char c;
920
921         if (len < 0) return;
922
923         while ((c=*str++) && len)
924                 switch(c) {
925                 case ' ':
926                 case '\t':
927                 case '\n':
928                 case '\\':
929                         if (len >= 4) {
930                                 *bp++ = '\\';
931                                 *bp++ = '0' + ((c & 0300)>>6);
932                                 *bp++ = '0' + ((c & 0070)>>3);
933                                 *bp++ = '0' + ((c & 0007)>>0);
934                         }
935                         len -= 4;
936                         break;
937                 default:
938                         *bp++ = c;
939                         len--;
940                 }
941         if (c || len <1) len = -1;
942         else {
943                 *bp++ = ' ';
944                 len--;
945         }
946         *bpp = bp;
947         *lp = len;
948 }
949
950 void qword_addhex(char **bpp, int *lp, char *buf, int blen)
951 {
952         char *bp = *bpp;
953         int len = *lp;
954
955         if (len < 0) return;
956
957         if (len > 2) {
958                 *bp++ = '\\';
959                 *bp++ = 'x';
960                 len -= 2;
961                 while (blen && len >= 2) {
962                         unsigned char c = *buf++;
963                         *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
964                         *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
965                         len -= 2;
966                         blen--;
967                 }
968         }
969         if (blen || len<1) len = -1;
970         else {
971                 *bp++ = ' ';
972                 len--;
973         }
974         *bpp = bp;
975         *lp = len;
976 }
977
978 static void warn_no_listener(struct cache_detail *detail)
979 {
980         if (detail->last_warn != detail->last_close) {
981                 detail->last_warn = detail->last_close;
982                 if (detail->warn_no_listener)
983                         detail->warn_no_listener(detail);
984         }
985 }
986
987 /*
988  * register an upcall request to user-space.
989  * Each request is at most one page long.
990  */
991 static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
992 {
993
994         char *buf;
995         struct cache_request *crq;
996         char *bp;
997         int len;
998
999         if (detail->cache_request == NULL)
1000                 return -EINVAL;
1001
1002         if (atomic_read(&detail->readers) == 0 &&
1003             detail->last_close < get_seconds() - 30) {
1004                         warn_no_listener(detail);
1005                         return -EINVAL;
1006         }
1007
1008         buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1009         if (!buf)
1010                 return -EAGAIN;
1011
1012         crq = kmalloc(sizeof (*crq), GFP_KERNEL);
1013         if (!crq) {
1014                 kfree(buf);
1015                 return -EAGAIN;
1016         }
1017
1018         bp = buf; len = PAGE_SIZE;
1019
1020         detail->cache_request(detail, h, &bp, &len);
1021
1022         if (len < 0) {
1023                 kfree(buf);
1024                 kfree(crq);
1025                 return -EAGAIN;
1026         }
1027         crq->q.reader = 0;
1028         crq->item = cache_get(h);
1029         crq->buf = buf;
1030         crq->len = PAGE_SIZE - len;
1031         crq->readers = 0;
1032         spin_lock(&queue_lock);
1033         list_add_tail(&crq->q.list, &detail->queue);
1034         spin_unlock(&queue_lock);
1035         wake_up(&queue_wait);
1036         return 0;
1037 }
1038
1039 /*
1040  * parse a message from user-space and pass it
1041  * to an appropriate cache
1042  * Messages are, like requests, separated into fields by
1043  * spaces and dequotes as \xHEXSTRING or embedded \nnn octal
1044  *
1045  * Message is 
1046  *   reply cachename expiry key ... content....
1047  *
1048  * key and content are both parsed by cache 
1049  */
1050
1051 #define isodigit(c) (isdigit(c) && c <= '7')
1052 int qword_get(char **bpp, char *dest, int bufsize)
1053 {
1054         /* return bytes copied, or -1 on error */
1055         char *bp = *bpp;
1056         int len = 0;
1057
1058         while (*bp == ' ') bp++;
1059
1060         if (bp[0] == '\\' && bp[1] == 'x') {
1061                 /* HEX STRING */
1062                 bp += 2;
1063                 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
1064                         int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
1065                         bp++;
1066                         byte <<= 4;
1067                         byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
1068                         *dest++ = byte;
1069                         bp++;
1070                         len++;
1071                 }
1072         } else {
1073                 /* text with \nnn octal quoting */
1074                 while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) {
1075                         if (*bp == '\\' &&
1076                             isodigit(bp[1]) && (bp[1] <= '3') &&
1077                             isodigit(bp[2]) &&
1078                             isodigit(bp[3])) {
1079                                 int byte = (*++bp -'0');
1080                                 bp++;
1081                                 byte = (byte << 3) | (*bp++ - '0');
1082                                 byte = (byte << 3) | (*bp++ - '0');
1083                                 *dest++ = byte;
1084                                 len++;
1085                         } else {
1086                                 *dest++ = *bp++;
1087                                 len++;
1088                         }
1089                 }
1090         }
1091
1092         if (*bp != ' ' && *bp != '\n' && *bp != '\0')
1093                 return -1;
1094         while (*bp == ' ') bp++;
1095         *bpp = bp;
1096         *dest = '\0';
1097         return len;
1098 }
1099
1100
1101 /*
1102  * support /proc/sunrpc/cache/$CACHENAME/content
1103  * as a seqfile.
1104  * We call ->cache_show passing NULL for the item to
1105  * get a header, then pass each real item in the cache
1106  */
1107
1108 struct handle {
1109         struct cache_detail *cd;
1110 };
1111
1112 static void *c_start(struct seq_file *m, loff_t *pos)
1113 {
1114         loff_t n = *pos;
1115         unsigned hash, entry;
1116         struct cache_head *ch;
1117         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1118         
1119
1120         read_lock(&cd->hash_lock);
1121         if (!n--)
1122                 return SEQ_START_TOKEN;
1123         hash = n >> 32;
1124         entry = n & ((1LL<<32) - 1);
1125
1126         for (ch=cd->hash_table[hash]; ch; ch=ch->next)
1127                 if (!entry--)
1128                         return ch;
1129         n &= ~((1LL<<32) - 1);
1130         do {
1131                 hash++;
1132                 n += 1LL<<32;
1133         } while(hash < cd->hash_size && 
1134                 cd->hash_table[hash]==NULL);
1135         if (hash >= cd->hash_size)
1136                 return NULL;
1137         *pos = n+1;
1138         return cd->hash_table[hash];
1139 }
1140
1141 static void *c_next(struct seq_file *m, void *p, loff_t *pos)
1142 {
1143         struct cache_head *ch = p;
1144         int hash = (*pos >> 32);
1145         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1146
1147         if (p == SEQ_START_TOKEN)
1148                 hash = 0;
1149         else if (ch->next == NULL) {
1150                 hash++;
1151                 *pos += 1LL<<32;
1152         } else {
1153                 ++*pos;
1154                 return ch->next;
1155         }
1156         *pos &= ~((1LL<<32) - 1);
1157         while (hash < cd->hash_size &&
1158                cd->hash_table[hash] == NULL) {
1159                 hash++;
1160                 *pos += 1LL<<32;
1161         }
1162         if (hash >= cd->hash_size)
1163                 return NULL;
1164         ++*pos;
1165         return cd->hash_table[hash];
1166 }
1167
1168 static void c_stop(struct seq_file *m, void *p)
1169 {
1170         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1171         read_unlock(&cd->hash_lock);
1172 }
1173
1174 static int c_show(struct seq_file *m, void *p)
1175 {
1176         struct cache_head *cp = p;
1177         struct cache_detail *cd = ((struct handle*)m->private)->cd;
1178
1179         if (p == SEQ_START_TOKEN)
1180                 return cd->cache_show(m, cd, NULL);
1181
1182         ifdebug(CACHE)
1183                 seq_printf(m, "# expiry=%ld refcnt=%d\n",
1184                            cp->expiry_time, atomic_read(&cp->refcnt));
1185         cache_get(cp);
1186         if (cache_check(cd, cp, NULL))
1187                 /* cache_check does a cache_put on failure */
1188                 seq_printf(m, "# ");
1189         else
1190                 cache_put(cp, cd);
1191
1192         return cd->cache_show(m, cd, cp);
1193 }
1194
1195 static struct seq_operations cache_content_op = {
1196         .start  = c_start,
1197         .next   = c_next,
1198         .stop   = c_stop,
1199         .show   = c_show,
1200 };
1201
1202 static int content_open(struct inode *inode, struct file *file)
1203 {
1204         int res;
1205         struct handle *han;
1206         struct cache_detail *cd = PDE(inode)->data;
1207
1208         han = kmalloc(sizeof(*han), GFP_KERNEL);
1209         if (han == NULL)
1210                 return -ENOMEM;
1211
1212         han->cd = cd;
1213
1214         res = seq_open(file, &cache_content_op);
1215         if (res)
1216                 kfree(han);
1217         else
1218                 ((struct seq_file *)file->private_data)->private = han;
1219
1220         return res;
1221 }
1222 static int content_release(struct inode *inode, struct file *file)
1223 {
1224         struct seq_file *m = (struct seq_file *)file->private_data;
1225         struct handle *han = m->private;
1226         kfree(han);
1227         m->private = NULL;
1228         return seq_release(inode, file);
1229 }
1230
1231 static struct file_operations content_file_operations = {
1232         .open           = content_open,
1233         .read           = seq_read,
1234         .llseek         = seq_lseek,
1235         .release        = content_release,
1236 };
1237
1238 static ssize_t read_flush(struct file *file, char __user *buf,
1239                             size_t count, loff_t *ppos)
1240 {
1241         struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1242         char tbuf[20];
1243         unsigned long p = *ppos;
1244         int len;
1245
1246         sprintf(tbuf, "%lu\n", cd->flush_time);
1247         len = strlen(tbuf);
1248         if (p >= len)
1249                 return 0;
1250         len -= p;
1251         if (len > count) len = count;
1252         if (copy_to_user(buf, (void*)(tbuf+p), len))
1253                 len = -EFAULT;
1254         else
1255                 *ppos += len;
1256         return len;
1257 }
1258
1259 static ssize_t write_flush(struct file * file, const char __user * buf,
1260                              size_t count, loff_t *ppos)
1261 {
1262         struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1263         char tbuf[20];
1264         char *ep;
1265         long flushtime;
1266         if (*ppos || count > sizeof(tbuf)-1)
1267                 return -EINVAL;
1268         if (copy_from_user(tbuf, buf, count))
1269                 return -EFAULT;
1270         tbuf[count] = 0;
1271         flushtime = simple_strtoul(tbuf, &ep, 0);
1272         if (*ep && *ep != '\n')
1273                 return -EINVAL;
1274
1275         cd->flush_time = flushtime;
1276         cd->nextcheck = get_seconds();
1277         cache_flush();
1278
1279         *ppos += count;
1280         return count;
1281 }
1282
1283 static struct file_operations cache_flush_operations = {
1284         .open           = nonseekable_open,
1285         .read           = read_flush,
1286         .write          = write_flush,
1287 };