[PATCH] Introduce sys_splice() system call
[linux-2.6.git] / fs / pipe.c
1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <linux/highmem.h>
18 #include <linux/pagemap.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/ioctls.h>
22
23 /*
24  * We use a start+len construction, which provides full use of the 
25  * allocated memory.
26  * -- Florian Coosmann (FGC)
27  * 
28  * Reads with count = 0 should always return 0.
29  * -- Julian Bradfield 1999-06-07.
30  *
31  * FIFOs and Pipes now generate SIGIO for both readers and writers.
32  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33  *
34  * pipe_read & write cleanup
35  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36  */
37
38 /* Drop the inode semaphore and wait for a pipe event, atomically */
39 void pipe_wait(struct inode * inode)
40 {
41         DEFINE_WAIT(wait);
42
43         /*
44          * Pipes are system-local resources, so sleeping on them
45          * is considered a noninteractive wait:
46          */
47         prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48         mutex_unlock(PIPE_MUTEX(*inode));
49         schedule();
50         finish_wait(PIPE_WAIT(*inode), &wait);
51         mutex_lock(PIPE_MUTEX(*inode));
52 }
53
54 static int
55 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56 {
57         unsigned long copy;
58
59         while (len > 0) {
60                 while (!iov->iov_len)
61                         iov++;
62                 copy = min_t(unsigned long, len, iov->iov_len);
63
64                 if (copy_from_user(to, iov->iov_base, copy))
65                         return -EFAULT;
66                 to += copy;
67                 len -= copy;
68                 iov->iov_base += copy;
69                 iov->iov_len -= copy;
70         }
71         return 0;
72 }
73
74 static int
75 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76 {
77         unsigned long copy;
78
79         while (len > 0) {
80                 while (!iov->iov_len)
81                         iov++;
82                 copy = min_t(unsigned long, len, iov->iov_len);
83
84                 if (copy_to_user(iov->iov_base, from, copy))
85                         return -EFAULT;
86                 from += copy;
87                 len -= copy;
88                 iov->iov_base += copy;
89                 iov->iov_len -= copy;
90         }
91         return 0;
92 }
93
94 static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95 {
96         struct page *page = buf->page;
97
98         /*
99          * If nobody else uses this page, and we don't already have a
100          * temporary page, let's keep track of it as a one-deep
101          * allocation cache
102          */
103         if (page_count(page) == 1 && !info->tmp_page) {
104                 info->tmp_page = page;
105                 return;
106         }
107
108         /*
109          * Otherwise just release our reference to it
110          */
111         page_cache_release(page);
112 }
113
114 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
115 {
116         return kmap(buf->page);
117 }
118
119 static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
120 {
121         kunmap(buf->page);
122 }
123
124 static struct pipe_buf_operations anon_pipe_buf_ops = {
125         .can_merge = 1,
126         .map = anon_pipe_buf_map,
127         .unmap = anon_pipe_buf_unmap,
128         .release = anon_pipe_buf_release,
129 };
130
131 static ssize_t
132 pipe_readv(struct file *filp, const struct iovec *_iov,
133            unsigned long nr_segs, loff_t *ppos)
134 {
135         struct inode *inode = filp->f_dentry->d_inode;
136         struct pipe_inode_info *info;
137         int do_wakeup;
138         ssize_t ret;
139         struct iovec *iov = (struct iovec *)_iov;
140         size_t total_len;
141
142         total_len = iov_length(iov, nr_segs);
143         /* Null read succeeds. */
144         if (unlikely(total_len == 0))
145                 return 0;
146
147         do_wakeup = 0;
148         ret = 0;
149         mutex_lock(PIPE_MUTEX(*inode));
150         info = inode->i_pipe;
151         for (;;) {
152                 int bufs = info->nrbufs;
153                 if (bufs) {
154                         int curbuf = info->curbuf;
155                         struct pipe_buffer *buf = info->bufs + curbuf;
156                         struct pipe_buf_operations *ops = buf->ops;
157                         void *addr;
158                         size_t chars = buf->len;
159                         int error;
160
161                         if (chars > total_len)
162                                 chars = total_len;
163
164                         addr = ops->map(filp, info, buf);
165                         if (IS_ERR(addr)) {
166                                 if (!ret)
167                                         ret = PTR_ERR(addr);
168                                 break;
169                         }
170                         error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
171                         ops->unmap(info, buf);
172                         if (unlikely(error)) {
173                                 if (!ret) ret = -EFAULT;
174                                 break;
175                         }
176                         ret += chars;
177                         buf->offset += chars;
178                         buf->len -= chars;
179                         if (!buf->len) {
180                                 buf->ops = NULL;
181                                 ops->release(info, buf);
182                                 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
183                                 info->curbuf = curbuf;
184                                 info->nrbufs = --bufs;
185                                 do_wakeup = 1;
186                         }
187                         total_len -= chars;
188                         if (!total_len)
189                                 break;  /* common path: read succeeded */
190                 }
191                 if (bufs)       /* More to do? */
192                         continue;
193                 if (!PIPE_WRITERS(*inode))
194                         break;
195                 if (!PIPE_WAITING_WRITERS(*inode)) {
196                         /* syscall merging: Usually we must not sleep
197                          * if O_NONBLOCK is set, or if we got some data.
198                          * But if a writer sleeps in kernel space, then
199                          * we can wait for that data without violating POSIX.
200                          */
201                         if (ret)
202                                 break;
203                         if (filp->f_flags & O_NONBLOCK) {
204                                 ret = -EAGAIN;
205                                 break;
206                         }
207                 }
208                 if (signal_pending(current)) {
209                         if (!ret) ret = -ERESTARTSYS;
210                         break;
211                 }
212                 if (do_wakeup) {
213                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
214                         kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
215                 }
216                 pipe_wait(inode);
217         }
218         mutex_unlock(PIPE_MUTEX(*inode));
219         /* Signal writers asynchronously that there is more room.  */
220         if (do_wakeup) {
221                 wake_up_interruptible(PIPE_WAIT(*inode));
222                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
223         }
224         if (ret > 0)
225                 file_accessed(filp);
226         return ret;
227 }
228
229 static ssize_t
230 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
231 {
232         struct iovec iov = { .iov_base = buf, .iov_len = count };
233         return pipe_readv(filp, &iov, 1, ppos);
234 }
235
236 static ssize_t
237 pipe_writev(struct file *filp, const struct iovec *_iov,
238             unsigned long nr_segs, loff_t *ppos)
239 {
240         struct inode *inode = filp->f_dentry->d_inode;
241         struct pipe_inode_info *info;
242         ssize_t ret;
243         int do_wakeup;
244         struct iovec *iov = (struct iovec *)_iov;
245         size_t total_len;
246         ssize_t chars;
247
248         total_len = iov_length(iov, nr_segs);
249         /* Null write succeeds. */
250         if (unlikely(total_len == 0))
251                 return 0;
252
253         do_wakeup = 0;
254         ret = 0;
255         mutex_lock(PIPE_MUTEX(*inode));
256         info = inode->i_pipe;
257
258         if (!PIPE_READERS(*inode)) {
259                 send_sig(SIGPIPE, current, 0);
260                 ret = -EPIPE;
261                 goto out;
262         }
263
264         /* We try to merge small writes */
265         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
266         if (info->nrbufs && chars != 0) {
267                 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
268                 struct pipe_buffer *buf = info->bufs + lastbuf;
269                 struct pipe_buf_operations *ops = buf->ops;
270                 int offset = buf->offset + buf->len;
271                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
272                         void *addr;
273                         int error;
274
275                         addr = ops->map(filp, info, buf);
276                         if (IS_ERR(addr)) {
277                                 error = PTR_ERR(addr);
278                                 goto out;
279                         }
280                         error = pipe_iov_copy_from_user(offset + addr, iov,
281                                                         chars);
282                         ops->unmap(info, buf);
283                         ret = error;
284                         do_wakeup = 1;
285                         if (error)
286                                 goto out;
287                         buf->len += chars;
288                         total_len -= chars;
289                         ret = chars;
290                         if (!total_len)
291                                 goto out;
292                 }
293         }
294
295         for (;;) {
296                 int bufs;
297                 if (!PIPE_READERS(*inode)) {
298                         send_sig(SIGPIPE, current, 0);
299                         if (!ret) ret = -EPIPE;
300                         break;
301                 }
302                 bufs = info->nrbufs;
303                 if (bufs < PIPE_BUFFERS) {
304                         int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
305                         struct pipe_buffer *buf = info->bufs + newbuf;
306                         struct page *page = info->tmp_page;
307                         int error;
308
309                         if (!page) {
310                                 page = alloc_page(GFP_HIGHUSER);
311                                 if (unlikely(!page)) {
312                                         ret = ret ? : -ENOMEM;
313                                         break;
314                                 }
315                                 info->tmp_page = page;
316                         }
317                         /* Always wakeup, even if the copy fails. Otherwise
318                          * we lock up (O_NONBLOCK-)readers that sleep due to
319                          * syscall merging.
320                          * FIXME! Is this really true?
321                          */
322                         do_wakeup = 1;
323                         chars = PAGE_SIZE;
324                         if (chars > total_len)
325                                 chars = total_len;
326
327                         error = pipe_iov_copy_from_user(kmap(page), iov, chars);
328                         kunmap(page);
329                         if (unlikely(error)) {
330                                 if (!ret) ret = -EFAULT;
331                                 break;
332                         }
333                         ret += chars;
334
335                         /* Insert it into the buffer array */
336                         buf->page = page;
337                         buf->ops = &anon_pipe_buf_ops;
338                         buf->offset = 0;
339                         buf->len = chars;
340                         info->nrbufs = ++bufs;
341                         info->tmp_page = NULL;
342
343                         total_len -= chars;
344                         if (!total_len)
345                                 break;
346                 }
347                 if (bufs < PIPE_BUFFERS)
348                         continue;
349                 if (filp->f_flags & O_NONBLOCK) {
350                         if (!ret) ret = -EAGAIN;
351                         break;
352                 }
353                 if (signal_pending(current)) {
354                         if (!ret) ret = -ERESTARTSYS;
355                         break;
356                 }
357                 if (do_wakeup) {
358                         wake_up_interruptible_sync(PIPE_WAIT(*inode));
359                         kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
360                         do_wakeup = 0;
361                 }
362                 PIPE_WAITING_WRITERS(*inode)++;
363                 pipe_wait(inode);
364                 PIPE_WAITING_WRITERS(*inode)--;
365         }
366 out:
367         mutex_unlock(PIPE_MUTEX(*inode));
368         if (do_wakeup) {
369                 wake_up_interruptible(PIPE_WAIT(*inode));
370                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
371         }
372         if (ret > 0)
373                 file_update_time(filp);
374         return ret;
375 }
376
377 static ssize_t
378 pipe_write(struct file *filp, const char __user *buf,
379            size_t count, loff_t *ppos)
380 {
381         struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
382         return pipe_writev(filp, &iov, 1, ppos);
383 }
384
385 static ssize_t
386 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
387 {
388         return -EBADF;
389 }
390
391 static ssize_t
392 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
393 {
394         return -EBADF;
395 }
396
397 static int
398 pipe_ioctl(struct inode *pino, struct file *filp,
399            unsigned int cmd, unsigned long arg)
400 {
401         struct inode *inode = filp->f_dentry->d_inode;
402         struct pipe_inode_info *info;
403         int count, buf, nrbufs;
404
405         switch (cmd) {
406                 case FIONREAD:
407                         mutex_lock(PIPE_MUTEX(*inode));
408                         info =  inode->i_pipe;
409                         count = 0;
410                         buf = info->curbuf;
411                         nrbufs = info->nrbufs;
412                         while (--nrbufs >= 0) {
413                                 count += info->bufs[buf].len;
414                                 buf = (buf+1) & (PIPE_BUFFERS-1);
415                         }
416                         mutex_unlock(PIPE_MUTEX(*inode));
417                         return put_user(count, (int __user *)arg);
418                 default:
419                         return -EINVAL;
420         }
421 }
422
423 /* No kernel lock held - fine */
424 static unsigned int
425 pipe_poll(struct file *filp, poll_table *wait)
426 {
427         unsigned int mask;
428         struct inode *inode = filp->f_dentry->d_inode;
429         struct pipe_inode_info *info = inode->i_pipe;
430         int nrbufs;
431
432         poll_wait(filp, PIPE_WAIT(*inode), wait);
433
434         /* Reading only -- no need for acquiring the semaphore.  */
435         nrbufs = info->nrbufs;
436         mask = 0;
437         if (filp->f_mode & FMODE_READ) {
438                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
439                 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
440                         mask |= POLLHUP;
441         }
442
443         if (filp->f_mode & FMODE_WRITE) {
444                 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
445                 /*
446                  * Most Unices do not set POLLERR for FIFOs but on Linux they
447                  * behave exactly like pipes for poll().
448                  */
449                 if (!PIPE_READERS(*inode))
450                         mask |= POLLERR;
451         }
452
453         return mask;
454 }
455
456 static int
457 pipe_release(struct inode *inode, int decr, int decw)
458 {
459         mutex_lock(PIPE_MUTEX(*inode));
460         PIPE_READERS(*inode) -= decr;
461         PIPE_WRITERS(*inode) -= decw;
462         if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
463                 free_pipe_info(inode);
464         } else {
465                 wake_up_interruptible(PIPE_WAIT(*inode));
466                 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
467                 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
468         }
469         mutex_unlock(PIPE_MUTEX(*inode));
470
471         return 0;
472 }
473
474 static int
475 pipe_read_fasync(int fd, struct file *filp, int on)
476 {
477         struct inode *inode = filp->f_dentry->d_inode;
478         int retval;
479
480         mutex_lock(PIPE_MUTEX(*inode));
481         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
482         mutex_unlock(PIPE_MUTEX(*inode));
483
484         if (retval < 0)
485                 return retval;
486
487         return 0;
488 }
489
490
491 static int
492 pipe_write_fasync(int fd, struct file *filp, int on)
493 {
494         struct inode *inode = filp->f_dentry->d_inode;
495         int retval;
496
497         mutex_lock(PIPE_MUTEX(*inode));
498         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
499         mutex_unlock(PIPE_MUTEX(*inode));
500
501         if (retval < 0)
502                 return retval;
503
504         return 0;
505 }
506
507
508 static int
509 pipe_rdwr_fasync(int fd, struct file *filp, int on)
510 {
511         struct inode *inode = filp->f_dentry->d_inode;
512         int retval;
513
514         mutex_lock(PIPE_MUTEX(*inode));
515
516         retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
517
518         if (retval >= 0)
519                 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
520
521         mutex_unlock(PIPE_MUTEX(*inode));
522
523         if (retval < 0)
524                 return retval;
525
526         return 0;
527 }
528
529
530 static int
531 pipe_read_release(struct inode *inode, struct file *filp)
532 {
533         pipe_read_fasync(-1, filp, 0);
534         return pipe_release(inode, 1, 0);
535 }
536
537 static int
538 pipe_write_release(struct inode *inode, struct file *filp)
539 {
540         pipe_write_fasync(-1, filp, 0);
541         return pipe_release(inode, 0, 1);
542 }
543
544 static int
545 pipe_rdwr_release(struct inode *inode, struct file *filp)
546 {
547         int decr, decw;
548
549         pipe_rdwr_fasync(-1, filp, 0);
550         decr = (filp->f_mode & FMODE_READ) != 0;
551         decw = (filp->f_mode & FMODE_WRITE) != 0;
552         return pipe_release(inode, decr, decw);
553 }
554
555 static int
556 pipe_read_open(struct inode *inode, struct file *filp)
557 {
558         /* We could have perhaps used atomic_t, but this and friends
559            below are the only places.  So it doesn't seem worthwhile.  */
560         mutex_lock(PIPE_MUTEX(*inode));
561         PIPE_READERS(*inode)++;
562         mutex_unlock(PIPE_MUTEX(*inode));
563
564         return 0;
565 }
566
567 static int
568 pipe_write_open(struct inode *inode, struct file *filp)
569 {
570         mutex_lock(PIPE_MUTEX(*inode));
571         PIPE_WRITERS(*inode)++;
572         mutex_unlock(PIPE_MUTEX(*inode));
573
574         return 0;
575 }
576
577 static int
578 pipe_rdwr_open(struct inode *inode, struct file *filp)
579 {
580         mutex_lock(PIPE_MUTEX(*inode));
581         if (filp->f_mode & FMODE_READ)
582                 PIPE_READERS(*inode)++;
583         if (filp->f_mode & FMODE_WRITE)
584                 PIPE_WRITERS(*inode)++;
585         mutex_unlock(PIPE_MUTEX(*inode));
586
587         return 0;
588 }
589
590 /*
591  * The file_operations structs are not static because they
592  * are also used in linux/fs/fifo.c to do operations on FIFOs.
593  */
594 const struct file_operations read_fifo_fops = {
595         .llseek         = no_llseek,
596         .read           = pipe_read,
597         .readv          = pipe_readv,
598         .write          = bad_pipe_w,
599         .poll           = pipe_poll,
600         .ioctl          = pipe_ioctl,
601         .open           = pipe_read_open,
602         .release        = pipe_read_release,
603         .fasync         = pipe_read_fasync,
604 };
605
606 const struct file_operations write_fifo_fops = {
607         .llseek         = no_llseek,
608         .read           = bad_pipe_r,
609         .write          = pipe_write,
610         .writev         = pipe_writev,
611         .poll           = pipe_poll,
612         .ioctl          = pipe_ioctl,
613         .open           = pipe_write_open,
614         .release        = pipe_write_release,
615         .fasync         = pipe_write_fasync,
616 };
617
618 const struct file_operations rdwr_fifo_fops = {
619         .llseek         = no_llseek,
620         .read           = pipe_read,
621         .readv          = pipe_readv,
622         .write          = pipe_write,
623         .writev         = pipe_writev,
624         .poll           = pipe_poll,
625         .ioctl          = pipe_ioctl,
626         .open           = pipe_rdwr_open,
627         .release        = pipe_rdwr_release,
628         .fasync         = pipe_rdwr_fasync,
629 };
630
631 static struct file_operations read_pipe_fops = {
632         .llseek         = no_llseek,
633         .read           = pipe_read,
634         .readv          = pipe_readv,
635         .write          = bad_pipe_w,
636         .poll           = pipe_poll,
637         .ioctl          = pipe_ioctl,
638         .open           = pipe_read_open,
639         .release        = pipe_read_release,
640         .fasync         = pipe_read_fasync,
641 };
642
643 static struct file_operations write_pipe_fops = {
644         .llseek         = no_llseek,
645         .read           = bad_pipe_r,
646         .write          = pipe_write,
647         .writev         = pipe_writev,
648         .poll           = pipe_poll,
649         .ioctl          = pipe_ioctl,
650         .open           = pipe_write_open,
651         .release        = pipe_write_release,
652         .fasync         = pipe_write_fasync,
653 };
654
655 static struct file_operations rdwr_pipe_fops = {
656         .llseek         = no_llseek,
657         .read           = pipe_read,
658         .readv          = pipe_readv,
659         .write          = pipe_write,
660         .writev         = pipe_writev,
661         .poll           = pipe_poll,
662         .ioctl          = pipe_ioctl,
663         .open           = pipe_rdwr_open,
664         .release        = pipe_rdwr_release,
665         .fasync         = pipe_rdwr_fasync,
666 };
667
668 void free_pipe_info(struct inode *inode)
669 {
670         int i;
671         struct pipe_inode_info *info = inode->i_pipe;
672
673         inode->i_pipe = NULL;
674         for (i = 0; i < PIPE_BUFFERS; i++) {
675                 struct pipe_buffer *buf = info->bufs + i;
676                 if (buf->ops)
677                         buf->ops->release(info, buf);
678         }
679         if (info->tmp_page)
680                 __free_page(info->tmp_page);
681         kfree(info);
682 }
683
684 struct inode* pipe_new(struct inode* inode)
685 {
686         struct pipe_inode_info *info;
687
688         info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
689         if (!info)
690                 goto fail_page;
691         inode->i_pipe = info;
692
693         init_waitqueue_head(PIPE_WAIT(*inode));
694         PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
695
696         return inode;
697 fail_page:
698         return NULL;
699 }
700
701 static struct vfsmount *pipe_mnt __read_mostly;
702 static int pipefs_delete_dentry(struct dentry *dentry)
703 {
704         return 1;
705 }
706 static struct dentry_operations pipefs_dentry_operations = {
707         .d_delete       = pipefs_delete_dentry,
708 };
709
710 static struct inode * get_pipe_inode(void)
711 {
712         struct inode *inode = new_inode(pipe_mnt->mnt_sb);
713
714         if (!inode)
715                 goto fail_inode;
716
717         if(!pipe_new(inode))
718                 goto fail_iput;
719         PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
720         inode->i_fop = &rdwr_pipe_fops;
721
722         /*
723          * Mark the inode dirty from the very beginning,
724          * that way it will never be moved to the dirty
725          * list because "mark_inode_dirty()" will think
726          * that it already _is_ on the dirty list.
727          */
728         inode->i_state = I_DIRTY;
729         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
730         inode->i_uid = current->fsuid;
731         inode->i_gid = current->fsgid;
732         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
733         inode->i_blksize = PAGE_SIZE;
734         return inode;
735
736 fail_iput:
737         iput(inode);
738 fail_inode:
739         return NULL;
740 }
741
742 int do_pipe(int *fd)
743 {
744         struct qstr this;
745         char name[32];
746         struct dentry *dentry;
747         struct inode * inode;
748         struct file *f1, *f2;
749         int error;
750         int i,j;
751
752         error = -ENFILE;
753         f1 = get_empty_filp();
754         if (!f1)
755                 goto no_files;
756
757         f2 = get_empty_filp();
758         if (!f2)
759                 goto close_f1;
760
761         inode = get_pipe_inode();
762         if (!inode)
763                 goto close_f12;
764
765         error = get_unused_fd();
766         if (error < 0)
767                 goto close_f12_inode;
768         i = error;
769
770         error = get_unused_fd();
771         if (error < 0)
772                 goto close_f12_inode_i;
773         j = error;
774
775         error = -ENOMEM;
776         sprintf(name, "[%lu]", inode->i_ino);
777         this.name = name;
778         this.len = strlen(name);
779         this.hash = inode->i_ino; /* will go */
780         dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
781         if (!dentry)
782                 goto close_f12_inode_i_j;
783         dentry->d_op = &pipefs_dentry_operations;
784         d_add(dentry, inode);
785         f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
786         f1->f_dentry = f2->f_dentry = dget(dentry);
787         f1->f_mapping = f2->f_mapping = inode->i_mapping;
788
789         /* read file */
790         f1->f_pos = f2->f_pos = 0;
791         f1->f_flags = O_RDONLY;
792         f1->f_op = &read_pipe_fops;
793         f1->f_mode = FMODE_READ;
794         f1->f_version = 0;
795
796         /* write file */
797         f2->f_flags = O_WRONLY;
798         f2->f_op = &write_pipe_fops;
799         f2->f_mode = FMODE_WRITE;
800         f2->f_version = 0;
801
802         fd_install(i, f1);
803         fd_install(j, f2);
804         fd[0] = i;
805         fd[1] = j;
806         return 0;
807
808 close_f12_inode_i_j:
809         put_unused_fd(j);
810 close_f12_inode_i:
811         put_unused_fd(i);
812 close_f12_inode:
813         free_pipe_info(inode);
814         iput(inode);
815 close_f12:
816         put_filp(f2);
817 close_f1:
818         put_filp(f1);
819 no_files:
820         return error;   
821 }
822
823 /*
824  * pipefs should _never_ be mounted by userland - too much of security hassle,
825  * no real gain from having the whole whorehouse mounted. So we don't need
826  * any operations on the root directory. However, we need a non-trivial
827  * d_name - pipe: will go nicely and kill the special-casing in procfs.
828  */
829
830 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
831         int flags, const char *dev_name, void *data)
832 {
833         return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
834 }
835
836 static struct file_system_type pipe_fs_type = {
837         .name           = "pipefs",
838         .get_sb         = pipefs_get_sb,
839         .kill_sb        = kill_anon_super,
840 };
841
842 static int __init init_pipe_fs(void)
843 {
844         int err = register_filesystem(&pipe_fs_type);
845         if (!err) {
846                 pipe_mnt = kern_mount(&pipe_fs_type);
847                 if (IS_ERR(pipe_mnt)) {
848                         err = PTR_ERR(pipe_mnt);
849                         unregister_filesystem(&pipe_fs_type);
850                 }
851         }
852         return err;
853 }
854
855 static void __exit exit_pipe_fs(void)
856 {
857         unregister_filesystem(&pipe_fs_type);
858         mntput(pipe_mnt);
859 }
860
861 fs_initcall(init_pipe_fs);
862 module_exit(exit_pipe_fs);