[PATCH] splice: add support for SPLICE_F_MOVE flag
[linux-2.6.git] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct inode * inode)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48         mutex_unlock(PIPE_MUTEX(*inode));
49         schedule();
50         finish_wait(PIPE_WAIT(*inode), &wait);
51         mutex_lock(PIPE_MUTEX(*inode));
52 }
53
54 static int
55 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56 {
57         unsigned long copy;
58
59         while (len > 0) {
60                 while (!iov->iov_len)
61                         iov++;
62                 copy = min_t(unsigned long, len, iov->iov_len);
63
64                 if (copy_from_user(to, iov->iov_base, copy))
65                         return -EFAULT;
66                 to += copy;
67                 len -= copy;
68                 iov->iov_base += copy;
69                 iov->iov_len -= copy;
70         }
71         return 0;
72 }
73
74 static int
75 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76 {
77         unsigned long copy;
78
79         while (len > 0) {
80                 while (!iov->iov_len)
81                         iov++;
82                 copy = min_t(unsigned long, len, iov->iov_len);
83
84                 if (copy_to_user(iov->iov_base, from, copy))
85                         return -EFAULT;
86                 from += copy;
87                 len -= copy;
88                 iov->iov_base += copy;
89                 iov->iov_len -= copy;
90         }
91         return 0;
92 }
93
94 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95 {
96         struct page *page = buf->page;
97
98         /*
99          * If nobody else uses this page, and we don't already have a
100          * temporary page, let's keep track of it as a one-deep
101          * allocation cache
102          */
103         if (page_count(page) == 1 && !info->tmp_page) {
104                 info->tmp_page = page;
105                 return;
106         }
107
108         /*
109          * Otherwise just release our reference to it
110          */
111         page_cache_release(page);
112 }
113
114 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
115 {
116         return kmap(buf->page);
117 }
118
119 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
120 {
121         kunmap(buf->page);
122 }
123
124 static int anon_pipe_buf_steal(struct pipe_inode_info *info,
125                                struct pipe_buffer *buf)
126 {
127         buf->stolen = 1;
128         return 0;
129 }
130
131 static struct pipe_buf_operations anon_pipe_buf_ops = {
132         .can_merge = 1,
133         .map = anon_pipe_buf_map,
134         .unmap = anon_pipe_buf_unmap,
135         .release = anon_pipe_buf_release,
136         .steal = anon_pipe_buf_steal,
137 };
138
139 static ssize_t
140 pipe_readv(struct file *filp, const struct iovec *_iov,
141            unsigned long nr_segs, loff_t *ppos)
142 {
143         struct inode *inode = filp->f_dentry->d_inode;
144         struct pipe_inode_info *info;
145         int do_wakeup;
146         ssize_t ret;
147         struct iovec *iov = (struct iovec *)_iov;
148         size_t total_len;
149
150         total_len = iov_length(iov, nr_segs);
151         /* Null read succeeds. */
152         if (unlikely(total_len == 0))
153                 return 0;
154
155         do_wakeup = 0;
156         ret = 0;
157         mutex_lock(PIPE_MUTEX(*inode));
158         info = inode->i_pipe;
159         for (;;) {
160                 int bufs = info->nrbufs;
161                 if (bufs) {
162                         int curbuf = info->curbuf;
163                         struct pipe_buffer *buf = info->bufs + curbuf;
164                         struct pipe_buf_operations *ops = buf->ops;
165                         void *addr;
166                         size_t chars = buf->len;
167                         int error;
168
169                         if (chars > total_len)
170                                 chars = total_len;
171
172                         addr = ops->map(filp, info, buf);
173                         if (IS_ERR(addr)) {
174                                 if (!ret)
175                                         ret = PTR_ERR(addr);
176                                 break;
177                         }
178                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
179                         ops->unmap(info, buf);
180                         if (unlikely(error)) {
181                                 if (!ret) ret = -EFAULT;
182                                 break;
183                         }
184                         ret += chars;
185                         buf->offset += chars;
186                         buf->len -= chars;
187                         if (!buf->len) {
188                                 buf->ops = NULL;
189                                 ops->release(info, buf);
190                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
191                                 info->curbuf = curbuf;
192                                 info->nrbufs = --bufs;
193                                 do_wakeup = 1;
194                         }
195                         total_len -= chars;
196                         if (!total_len)
197                                 break;  /* common path: read succeeded */
198                 }
199                 if (bufs)       /* More to do? */
200                         continue;
201                 if (!PIPE_WRITERS(*inode))
202                         break;
203                 if (!PIPE_WAITING_WRITERS(*inode)) {
204                         /* syscall merging: Usually we must not sleep
205                          * if O_NONBLOCK is set, or if we got some data.
206                          * But if a writer sleeps in kernel space, then
207                          * we can wait for that data without violating POSIX.
208                          */
209                         if (ret)
210                                 break;
211                         if (filp->f_flags & O_NONBLOCK) {
212                                 ret = -EAGAIN;
213                                 break;
214                         }
215                 }
216                 if (signal_pending(current)) {
217                         if (!ret) ret = -ERESTARTSYS;
218                         break;
219                 }
220                 if (do_wakeup) {
221                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
222                         kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
223                 }
224                 pipe_wait(inode);
225         }
226         mutex_unlock(PIPE_MUTEX(*inode));
227         /* Signal writers asynchronously that there is more room.  */
228         if (do_wakeup) {
229                 wake_up_interruptible(PIPE_WAIT(*inode));
230                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
231         }
232         if (ret > 0)
233                 file_accessed(filp);
234         return ret;
235 }
236
237 static ssize_t
238 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
239 {
240         struct iovec iov = { .iov_base = buf, .iov_len = count };
241         return pipe_readv(filp, &iov, 1, ppos);
242 }
243
244 static ssize_t
245 pipe_writev(struct file *filp, const struct iovec *_iov,
246             unsigned long nr_segs, loff_t *ppos)
247 {
248         struct inode *inode = filp->f_dentry->d_inode;
249         struct pipe_inode_info *info;
250         ssize_t ret;
251         int do_wakeup;
252         struct iovec *iov = (struct iovec *)_iov;
253         size_t total_len;
254         ssize_t chars;
255
256         total_len = iov_length(iov, nr_segs);
257         /* Null write succeeds. */
258         if (unlikely(total_len == 0))
259                 return 0;
260
261         do_wakeup = 0;
262         ret = 0;
263         mutex_lock(PIPE_MUTEX(*inode));
264         info = inode->i_pipe;
265
266         if (!PIPE_READERS(*inode)) {
267                 send_sig(SIGPIPE, current, 0);
268                 ret = -EPIPE;
269                 goto out;
270         }
271
272         /* We try to merge small writes */
273         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
274         if (info->nrbufs && chars != 0) {
275                 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
276                 struct pipe_buffer *buf = info->bufs + lastbuf;
277                 struct pipe_buf_operations *ops = buf->ops;
278                 int offset = buf->offset + buf->len;
279                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
280                         void *addr;
281                         int error;
282
283                         addr = ops->map(filp, info, buf);
284                         if (IS_ERR(addr)) {
285                                 error = PTR_ERR(addr);
286                                 goto out;
287                         }
288                         error = pipe_iov_copy_from_user(offset + addr, iov,
289                                                         chars);
290                         ops->unmap(info, buf);
291                         ret = error;
292                         do_wakeup = 1;
293                         if (error)
294                                 goto out;
295                         buf->len += chars;
296                         total_len -= chars;
297                         ret = chars;
298                         if (!total_len)
299                                 goto out;
300                 }
301         }
302
303         for (;;) {
304                 int bufs;
305                 if (!PIPE_READERS(*inode)) {
306                         send_sig(SIGPIPE, current, 0);
307                         if (!ret) ret = -EPIPE;
308                         break;
309                 }
310                 bufs = info->nrbufs;
311                 if (bufs < PIPE_BUFFERS) {
312                         int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
313                         struct pipe_buffer *buf = info->bufs + newbuf;
314                         struct page *page = info->tmp_page;
315                         int error;
316
317                         if (!page) {
318                                 page = alloc_page(GFP_HIGHUSER);
319                                 if (unlikely(!page)) {
320                                         ret = ret ? : -ENOMEM;
321                                         break;
322                                 }
323                                 info->tmp_page = page;
324                         }
325                         /* Always wakeup, even if the copy fails. Otherwise
326                          * we lock up (O_NONBLOCK-)readers that sleep due to
327                          * syscall merging.
328                          * FIXME! Is this really true?
329                          */
330                         do_wakeup = 1;
331                         chars = PAGE_SIZE;
332                         if (chars > total_len)
333                                 chars = total_len;
334
335                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
336                         kunmap(page);
337                         if (unlikely(error)) {
338                                 if (!ret) ret = -EFAULT;
339                                 break;
340                         }
341                         ret += chars;
342
343                         /* Insert it into the buffer array */
344                         buf->page = page;
345                         buf->ops = &anon_pipe_buf_ops;
346                         buf->offset = 0;
347                         buf->len = chars;
348                         info->nrbufs = ++bufs;
349                         info->tmp_page = NULL;
350
351                         total_len -= chars;
352                         if (!total_len)
353                                 break;
354                 }
355                 if (bufs < PIPE_BUFFERS)
356                         continue;
357                 if (filp->f_flags & O_NONBLOCK) {
358                         if (!ret) ret = -EAGAIN;
359                         break;
360                 }
361                 if (signal_pending(current)) {
362                         if (!ret) ret = -ERESTARTSYS;
363                         break;
364                 }
365                 if (do_wakeup) {
366                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
367                         kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
368                         do_wakeup = 0;
369                 }
370                 PIPE_WAITING_WRITERS(*inode)++;
371                 pipe_wait(inode);
372                 PIPE_WAITING_WRITERS(*inode)--;
373         }
374 out:
375         mutex_unlock(PIPE_MUTEX(*inode));
376         if (do_wakeup) {
377                 wake_up_interruptible(PIPE_WAIT(*inode));
378                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
379         }
380         if (ret > 0)
381                 file_update_time(filp);
382         return ret;
383 }
384
385 static ssize_t
386 pipe_write(struct file *filp, const char __user *buf,
387            size_t count, loff_t *ppos)
388 {
389         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
390         return pipe_writev(filp, &iov, 1, ppos);
391 }
392
393 static ssize_t
394 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
395 {
396         return -EBADF;
397 }
398
399 static ssize_t
400 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
401 {
402         return -EBADF;
403 }
404
405 static int
406 pipe_ioctl(struct inode *pino, struct file *filp,
407            unsigned int cmd, unsigned long arg)
408 {
409         struct inode *inode = filp->f_dentry->d_inode;
410         struct pipe_inode_info *info;
411         int count, buf, nrbufs;
412
413         switch (cmd) {
414                 case FIONREAD:
415                         mutex_lock(PIPE_MUTEX(*inode));
416                         info =  inode->i_pipe;
417                         count = 0;
418                         buf = info->curbuf;
419                         nrbufs = info->nrbufs;
420                         while (--nrbufs >= 0) {
421                                 count += info->bufs[buf].len;
422                                 buf = (buf+1) & (PIPE_BUFFERS-1);
423                         }
424                         mutex_unlock(PIPE_MUTEX(*inode));
425                         return put_user(count, (int __user *)arg);
426                 default:
427                         return -EINVAL;
428         }
429 }
430
431 /* No kernel lock held - fine */
432 static unsigned int
433 pipe_poll(struct file *filp, poll_table *wait)
434 {
435         unsigned int mask;
436         struct inode *inode = filp->f_dentry->d_inode;
437         struct pipe_inode_info *info = inode->i_pipe;
438         int nrbufs;
439
440         poll_wait(filp, PIPE_WAIT(*inode), wait);
441
442         /* Reading only -- no need for acquiring the semaphore.  */
443         nrbufs = info->nrbufs;
444         mask = 0;
445         if (filp->f_mode & FMODE_READ) {
446                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
447                 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
448                         mask |= POLLHUP;
449         }
450
451         if (filp->f_mode & FMODE_WRITE) {
452                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
453                 /*
454                  * Most Unices do not set POLLERR for FIFOs but on Linux they
455                  * behave exactly like pipes for poll().
456                  */
457                 if (!PIPE_READERS(*inode))
458                         mask |= POLLERR;
459         }
460
461         return mask;
462 }
463
464 static int
465 pipe_release(struct inode *inode, int decr, int decw)
466 {
467         mutex_lock(PIPE_MUTEX(*inode));
468         PIPE_READERS(*inode) -= decr;
469         PIPE_WRITERS(*inode) -= decw;
470         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
471                 free_pipe_info(inode);
472         } else {
473                 wake_up_interruptible(PIPE_WAIT(*inode));
474                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
475                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
476         }
477         mutex_unlock(PIPE_MUTEX(*inode));
478
479         return 0;
480 }
481
482 static int
483 pipe_read_fasync(int fd, struct file *filp, int on)
484 {
485         struct inode *inode = filp->f_dentry->d_inode;
486         int retval;
487
488         mutex_lock(PIPE_MUTEX(*inode));
489         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
490         mutex_unlock(PIPE_MUTEX(*inode));
491
492         if (retval < 0)
493                 return retval;
494
495         return 0;
496 }
497
498
499 static int
500 pipe_write_fasync(int fd, struct file *filp, int on)
501 {
502         struct inode *inode = filp->f_dentry->d_inode;
503         int retval;
504
505         mutex_lock(PIPE_MUTEX(*inode));
506         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
507         mutex_unlock(PIPE_MUTEX(*inode));
508
509         if (retval < 0)
510                 return retval;
511
512         return 0;
513 }
514
515
516 static int
517 pipe_rdwr_fasync(int fd, struct file *filp, int on)
518 {
519         struct inode *inode = filp->f_dentry->d_inode;
520         int retval;
521
522         mutex_lock(PIPE_MUTEX(*inode));
523
524         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
525
526         if (retval >= 0)
527                 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
528
529         mutex_unlock(PIPE_MUTEX(*inode));
530
531         if (retval < 0)
532                 return retval;
533
534         return 0;
535 }
536
537
538 static int
539 pipe_read_release(struct inode *inode, struct file *filp)
540 {
541         pipe_read_fasync(-1, filp, 0);
542         return pipe_release(inode, 1, 0);
543 }
544
545 static int
546 pipe_write_release(struct inode *inode, struct file *filp)
547 {
548         pipe_write_fasync(-1, filp, 0);
549         return pipe_release(inode, 0, 1);
550 }
551
552 static int
553 pipe_rdwr_release(struct inode *inode, struct file *filp)
554 {
555         int decr, decw;
556
557         pipe_rdwr_fasync(-1, filp, 0);
558         decr = (filp->f_mode & FMODE_READ) != 0;
559         decw = (filp->f_mode & FMODE_WRITE) != 0;
560         return pipe_release(inode, decr, decw);
561 }
562
563 static int
564 pipe_read_open(struct inode *inode, struct file *filp)
565 {
566         /* We could have perhaps used atomic_t, but this and friends
567            below are the only places.  So it doesn't seem worthwhile.  */
568         mutex_lock(PIPE_MUTEX(*inode));
569         PIPE_READERS(*inode)++;
570         mutex_unlock(PIPE_MUTEX(*inode));
571
572         return 0;
573 }
574
575 static int
576 pipe_write_open(struct inode *inode, struct file *filp)
577 {
578         mutex_lock(PIPE_MUTEX(*inode));
579         PIPE_WRITERS(*inode)++;
580         mutex_unlock(PIPE_MUTEX(*inode));
581
582         return 0;
583 }
584
585 static int
586 pipe_rdwr_open(struct inode *inode, struct file *filp)
587 {
588         mutex_lock(PIPE_MUTEX(*inode));
589         if (filp->f_mode & FMODE_READ)
590                 PIPE_READERS(*inode)++;
591         if (filp->f_mode & FMODE_WRITE)
592                 PIPE_WRITERS(*inode)++;
593         mutex_unlock(PIPE_MUTEX(*inode));
594
595         return 0;
596 }
597
598 /*
599  * The file_operations structs are not static because they
600  * are also used in linux/fs/fifo.c to do operations on FIFOs.
601  */
602 const struct file_operations read_fifo_fops = {
603         .llseek         = no_llseek,
604         .read           = pipe_read,
605         .readv          = pipe_readv,
606         .write          = bad_pipe_w,
607         .poll           = pipe_poll,
608         .ioctl          = pipe_ioctl,
609         .open           = pipe_read_open,
610         .release        = pipe_read_release,
611         .fasync         = pipe_read_fasync,
612 };
613
614 const struct file_operations write_fifo_fops = {
615         .llseek         = no_llseek,
616         .read           = bad_pipe_r,
617         .write          = pipe_write,
618         .writev         = pipe_writev,
619         .poll           = pipe_poll,
620         .ioctl          = pipe_ioctl,
621         .open           = pipe_write_open,
622         .release        = pipe_write_release,
623         .fasync         = pipe_write_fasync,
624 };
625
626 const struct file_operations rdwr_fifo_fops = {
627         .llseek         = no_llseek,
628         .read           = pipe_read,
629         .readv          = pipe_readv,
630         .write          = pipe_write,
631         .writev         = pipe_writev,
632         .poll           = pipe_poll,
633         .ioctl          = pipe_ioctl,
634         .open           = pipe_rdwr_open,
635         .release        = pipe_rdwr_release,
636         .fasync         = pipe_rdwr_fasync,
637 };
638
639 static struct file_operations read_pipe_fops = {
640         .llseek         = no_llseek,
641         .read           = pipe_read,
642         .readv          = pipe_readv,
643         .write          = bad_pipe_w,
644         .poll           = pipe_poll,
645         .ioctl          = pipe_ioctl,
646         .open           = pipe_read_open,
647         .release        = pipe_read_release,
648         .fasync         = pipe_read_fasync,
649 };
650
651 static struct file_operations write_pipe_fops = {
652         .llseek         = no_llseek,
653         .read           = bad_pipe_r,
654         .write          = pipe_write,
655         .writev         = pipe_writev,
656         .poll           = pipe_poll,
657         .ioctl          = pipe_ioctl,
658         .open           = pipe_write_open,
659         .release        = pipe_write_release,
660         .fasync         = pipe_write_fasync,
661 };
662
663 static struct file_operations rdwr_pipe_fops = {
664         .llseek         = no_llseek,
665         .read           = pipe_read,
666         .readv          = pipe_readv,
667         .write          = pipe_write,
668         .writev         = pipe_writev,
669         .poll           = pipe_poll,
670         .ioctl          = pipe_ioctl,
671         .open           = pipe_rdwr_open,
672         .release        = pipe_rdwr_release,
673         .fasync         = pipe_rdwr_fasync,
674 };
675
676 void free_pipe_info(struct inode *inode)
677 {
678         int i;
679         struct pipe_inode_info *info = inode->i_pipe;
680
681         inode->i_pipe = NULL;
682         for (i = 0; i < PIPE_BUFFERS; i++) {
683                 struct pipe_buffer *buf = info->bufs + i;
684                 if (buf->ops)
685                         buf->ops->release(info, buf);
686         }
687         if (info->tmp_page)
688                 __free_page(info->tmp_page);
689         kfree(info);
690 }
691
692 struct inode* pipe_new(struct inode* inode)
693 {
694         struct pipe_inode_info *info;
695
696         info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
697         if (!info)
698                 goto fail_page;
699         inode->i_pipe = info;
700
701         init_waitqueue_head(PIPE_WAIT(*inode));
702         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
703
704         return inode;
705 fail_page:
706         return NULL;
707 }
708
709 static struct vfsmount *pipe_mnt __read_mostly;
710 static int pipefs_delete_dentry(struct dentry *dentry)
711 {
712         return 1;
713 }
714 static struct dentry_operations pipefs_dentry_operations = {
715         .d_delete       = pipefs_delete_dentry,
716 };
717
718 static struct inode * get_pipe_inode(void)
719 {
720         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
721
722         if (!inode)
723                 goto fail_inode;
724
725         if(!pipe_new(inode))
726                 goto fail_iput;
727         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
728         inode->i_fop = &rdwr_pipe_fops;
729
730         /*
731          * Mark the inode dirty from the very beginning,
732          * that way it will never be moved to the dirty
733          * list because "mark_inode_dirty()" will think
734          * that it already _is_ on the dirty list.
735          */
736         inode->i_state = I_DIRTY;
737         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
738         inode->i_uid = current->fsuid;
739         inode->i_gid = current->fsgid;
740         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
741         inode->i_blksize = PAGE_SIZE;
742         return inode;
743
744 fail_iput:
745         iput(inode);
746 fail_inode:
747         return NULL;
748 }
749
750 int do_pipe(int *fd)
751 {
752         struct qstr this;
753         char name[32];
754         struct dentry *dentry;
755         struct inode * inode;
756         struct file *f1, *f2;
757         int error;
758         int i,j;
759
760         error = -ENFILE;
761         f1 = get_empty_filp();
762         if (!f1)
763                 goto no_files;
764
765         f2 = get_empty_filp();
766         if (!f2)
767                 goto close_f1;
768
769         inode = get_pipe_inode();
770         if (!inode)
771                 goto close_f12;
772
773         error = get_unused_fd();
774         if (error < 0)
775                 goto close_f12_inode;
776         i = error;
777
778         error = get_unused_fd();
779         if (error < 0)
780                 goto close_f12_inode_i;
781         j = error;
782
783         error = -ENOMEM;
784         sprintf(name, "[%lu]", inode->i_ino);
785         this.name = name;
786         this.len = strlen(name);
787         this.hash = inode->i_ino; /* will go */
788         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
789         if (!dentry)
790                 goto close_f12_inode_i_j;
791         dentry->d_op = &pipefs_dentry_operations;
792         d_add(dentry, inode);
793         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
794         f1->f_dentry = f2->f_dentry = dget(dentry);
795         f1->f_mapping = f2->f_mapping = inode->i_mapping;
796
797         /* read file */
798         f1->f_pos = f2->f_pos = 0;
799         f1->f_flags = O_RDONLY;
800         f1->f_op = &read_pipe_fops;
801         f1->f_mode = FMODE_READ;
802         f1->f_version = 0;
803
804         /* write file */
805         f2->f_flags = O_WRONLY;
806         f2->f_op = &write_pipe_fops;
807         f2->f_mode = FMODE_WRITE;
808         f2->f_version = 0;
809
810         fd_install(i, f1);
811         fd_install(j, f2);
812         fd[0] = i;
813         fd[1] = j;
814         return 0;
815
816 close_f12_inode_i_j:
817         put_unused_fd(j);
818 close_f12_inode_i:
819         put_unused_fd(i);
820 close_f12_inode:
821         free_pipe_info(inode);
822         iput(inode);
823 close_f12:
824         put_filp(f2);
825 close_f1:
826         put_filp(f1);
827 no_files:
828         return error;   
829 }
830
831 /*
832  * pipefs should _never_ be mounted by userland - too much of security hassle,
833  * no real gain from having the whole whorehouse mounted. So we don't need
834  * any operations on the root directory. However, we need a non-trivial
835  * d_name - pipe: will go nicely and kill the special-casing in procfs.
836  */
837
838 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
839         int flags, const char *dev_name, void *data)
840 {
841         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
842 }
843
844 static struct file_system_type pipe_fs_type = {
845         .name           = "pipefs",
846         .get_sb         = pipefs_get_sb,
847         .kill_sb        = kill_anon_super,
848 };
849
850 static int __init init_pipe_fs(void)
851 {
852         int err = register_filesystem(&pipe_fs_type);
853         if (!err) {
854                 pipe_mnt = kern_mount(&pipe_fs_type);
855                 if (IS_ERR(pipe_mnt)) {
856                         err = PTR_ERR(pipe_mnt);
857                         unregister_filesystem(&pipe_fs_type);
858                 }
859         }
860         return err;
861 }
862
863 static void __exit exit_pipe_fs(void)
864 {
865         unregister_filesystem(&pipe_fs_type);
866         mntput(pipe_mnt);
867 }
868
869 fs_initcall(init_pipe_fs);
870 module_exit(exit_pipe_fs);