]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - fs/pipe.c
IB/mlx4: Fix shutdown crash accessing a non-existent bitmap
[linux-2.6.git] / fs / pipe.c
index db6eaaba0dd81fb777f2b64f1e385d40eb3f1819..0e0be1dc0f8ef67e433cefb9d46c708b2a2ca52d 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
 
 /*
  * The max size that a non-root user is allowed to grow the pipe. Can
- * be set by root in /proc/sys/fs/pipe-max-pages
+ * be set by root in /proc/sys/fs/pipe-max-size
  */
-unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16;
+unsigned int pipe_max_size = 1048576;
+
+/*
+ * Minimum pipe size, as required by POSIX
+ */
+unsigned int pipe_min_size = PAGE_SIZE;
 
 /*
  * We use a start+len construction, which provides full use of the 
@@ -377,7 +382,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                        error = ops->confirm(pipe, buf);
                        if (error) {
                                if (!ret)
-                                       error = ret;
+                                       ret = error;
                                break;
                        }
 
@@ -436,7 +441,7 @@ redo:
                        break;
                }
                if (do_wakeup) {
-                       wake_up_interruptible_sync(&pipe->wait);
+                       wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
                        kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
                }
                pipe_wait(pipe);
@@ -445,7 +450,7 @@ redo:
 
        /* Signal writers asynchronously that there is more room. */
        if (do_wakeup) {
-               wake_up_interruptible_sync(&pipe->wait);
+               wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
        }
        if (ret > 0)
@@ -607,7 +612,7 @@ redo2:
                        break;
                }
                if (do_wakeup) {
-                       wake_up_interruptible_sync(&pipe->wait);
+                       wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
                        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                        do_wakeup = 0;
                }
@@ -618,7 +623,7 @@ redo2:
 out:
        mutex_unlock(&inode->i_mutex);
        if (do_wakeup) {
-               wake_up_interruptible_sync(&pipe->wait);
+               wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
        }
        if (ret > 0)
@@ -710,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw)
        if (!pipe->readers && !pipe->writers) {
                free_pipe_info(inode);
        } else {
-               wake_up_interruptible_sync(&pipe->wait);
+               wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
        }
@@ -943,12 +948,14 @@ static const struct dentry_operations pipefs_dentry_operations = {
 
 static struct inode * get_pipe_inode(void)
 {
-       struct inode *inode = new_inode(pipe_mnt->mnt_sb);
+       struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
        struct pipe_inode_info *pipe;
 
        if (!inode)
                goto fail_inode;
 
+       inode->i_ino = get_next_ino();
+
        pipe = alloc_pipe_info(inode);
        if (!pipe)
                goto fail_iput;
@@ -992,12 +999,11 @@ struct file *create_write_pipe(int flags)
                goto err;
 
        err = -ENOMEM;
-       path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+       path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
        if (!path.dentry)
                goto err_inode;
        path.mnt = mntget(pipe_mnt);
 
-       path.dentry->d_op = &pipefs_dentry_operations;
        d_instantiate(path.dentry, inode);
 
        err = -ENFILE;
@@ -1118,26 +1124,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
  * Allocate a new array of pipe buffers and copy the info over. Returns the
  * pipe size if successful, or return -ERROR on error.
  */
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
 {
        struct pipe_buffer *bufs;
 
-       /*
-        * Must be a power-of-2 currently
-        */
-       if (!is_power_of_2(arg))
-               return -EINVAL;
-
        /*
         * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
         * expect a lot of shrink+grow operations, just free and allocate
         * again like we would do for growing. If the pipe currently
         * contains more buffers than arg, then return busy.
         */
-       if (arg < pipe->nrbufs)
+       if (nr_pages < pipe->nrbufs)
                return -EBUSY;
 
-       bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
+       bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
        if (unlikely(!bufs))
                return -ENOMEM;
 
@@ -1146,20 +1146,68 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
         * and adjust the indexes.
         */
        if (pipe->nrbufs) {
-               const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
-               const unsigned int head = pipe->nrbufs - tail;
+               unsigned int tail;
+               unsigned int head;
+
+               tail = pipe->curbuf + pipe->nrbufs;
+               if (tail < pipe->buffers)
+                       tail = 0;
+               else
+                       tail &= (pipe->buffers - 1);
 
+               head = pipe->nrbufs - tail;
                if (head)
                        memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
                if (tail)
-                       memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+                       memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
        }
 
        pipe->curbuf = 0;
        kfree(pipe->bufs);
        pipe->bufs = bufs;
-       pipe->buffers = arg;
-       return arg;
+       pipe->buffers = nr_pages;
+       return nr_pages * PAGE_SIZE;
+}
+
+/*
+ * Currently we rely on the pipe array holding a power-of-2 number
+ * of pages.
+ */
+static inline unsigned int round_pipe_size(unsigned int size)
+{
+       unsigned long nr_pages;
+
+       nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+}
+
+/*
+ * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
+ * will return an error.
+ */
+int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
+                size_t *lenp, loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
+       if (ret < 0 || !write)
+               return ret;
+
+       pipe_max_size = round_pipe_size(pipe_max_size);
+       return ret;
+}
+
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+struct pipe_inode_info *get_pipe_info(struct file *file)
+{
+       struct inode *i = file->f_path.dentry->d_inode;
+
+       return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
 }
 
 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1167,30 +1215,32 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
        struct pipe_inode_info *pipe;
        long ret;
 
-       pipe = file->f_path.dentry->d_inode->i_pipe;
+       pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
 
        mutex_lock(&pipe->inode->i_mutex);
 
        switch (cmd) {
-       case F_SETPIPE_SZ:
-               if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) {
-                       ret = -EINVAL;
+       case F_SETPIPE_SZ: {
+               unsigned int size, nr_pages;
+
+               size = round_pipe_size(arg);
+               nr_pages = size >> PAGE_SHIFT;
+
+               ret = -EINVAL;
+               if (!nr_pages)
                        goto out;
-               }
-               /*
-                * The pipe needs to be at least 2 pages large to
-                * guarantee POSIX behaviour.
-                */
-               if (arg < 2) {
-                       ret = -EINVAL;
+
+               if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
+                       ret = -EPERM;
                        goto out;
                }
-               ret = pipe_set_size(pipe, arg);
+               ret = pipe_set_size(pipe, nr_pages);
                break;
+               }
        case F_GETPIPE_SZ:
-               ret = pipe->buffers;
+               ret = pipe->buffers * PAGE_SIZE;
                break;
        default:
                ret = -EINVAL;
@@ -1202,22 +1252,26 @@ out:
        return ret;
 }
 
+static const struct super_operations pipefs_ops = {
+       .destroy_inode = free_inode_nonrcu,
+};
+
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
  * no real gain from having the whole whorehouse mounted. So we don't need
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pipe: will go nicely and kill the special-casing in procfs.
  */
-static int pipefs_get_sb(struct file_system_type *fs_type,
-                        int flags, const char *dev_name, void *data,
-                        struct vfsmount *mnt)
+static struct dentry *pipefs_mount(struct file_system_type *fs_type,
+                        int flags, const char *dev_name, void *data)
 {
-       return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
+       return mount_pseudo(fs_type, "pipe:", &pipefs_ops,
+                       &pipefs_dentry_operations, PIPEFS_MAGIC);
 }
 
 static struct file_system_type pipe_fs_type = {
        .name           = "pipefs",
-       .get_sb         = pipefs_get_sb,
+       .mount          = pipefs_mount,
        .kill_sb        = kill_anon_super,
 };
 
@@ -1237,8 +1291,8 @@ static int __init init_pipe_fs(void)
 
 static void __exit exit_pipe_fs(void)
 {
+       kern_unmount(pipe_mnt);
        unregister_filesystem(&pipe_fs_type);
-       mntput(pipe_mnt);
 }
 
 fs_initcall(init_pipe_fs);