splice: fix kmaps in default_file_splice_write()
Miklos Szeredi [Tue, 19 May 2009 09:37:46 +0000 (11:37 +0200)]
Unfortunately multiple kmap() within a single thread are deadlockable,
so writing out multiple buffers with writev() isn't possible.

Change the implementation so that it does a separate write() for each
buffer.  This actually simplifies the code a lot since the
splice_from_pipe() helper can be used.

This limitation is caused by HIGHMEM pages, and so only affects a
subset of architectures and configurations.  In the future it may be
worth to implement default_file_splice_write() in a more efficient way
on configs that allow it.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

fs/splice.c

index 41179c0..73766d2 100644 (file)
@@ -535,8 +535,8 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
        return res;
 }
 
-static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
-                           unsigned long vlen, loff_t *ppos)
+static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+                           loff_t pos)
 {
        mm_segment_t old_fs;
        ssize_t res;
@@ -544,7 +544,7 @@ static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
        old_fs = get_fs();
        set_fs(get_ds());
        /* The cast to a user pointer is valid due to the set_fs() */
-       res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos);
+       res = vfs_write(file, (const char __user *)buf, count, &pos);
        set_fs(old_fs);
 
        return res;
@@ -1003,120 +1003,34 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
-static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n)
+static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                         struct splice_desc *sd)
 {
-       return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS];
+       int ret;
+       void *data;
+
+       ret = buf->ops->confirm(pipe, buf);
+       if (ret)
+               return ret;
+
+       data = buf->ops->map(pipe, buf, 0);
+       ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+       buf->ops->unmap(pipe, buf, data);
+
+       return ret;
 }
 
 static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
                                         struct file *out, loff_t *ppos,
                                         size_t len, unsigned int flags)
 {
-       ssize_t ret = 0;
-       ssize_t total_len = 0;
-       int do_wakeup = 0;
-
-       pipe_lock(pipe);
-       while (len) {
-               struct pipe_buffer *buf;
-               void *data[PIPE_BUFFERS];
-               struct iovec vec[PIPE_BUFFERS];
-               unsigned int nr_pages = 0;
-               unsigned int write_len = 0;
-               unsigned int now_len = len;
-               unsigned int this_len;
-               int i;
-
-               BUG_ON(pipe->nrbufs > PIPE_BUFFERS);
-               for (i = 0; i < pipe->nrbufs && now_len; i++) {
-                       buf = nth_pipe_buf(pipe, i);
-
-                       ret = buf->ops->confirm(pipe, buf);
-                       if (ret)
-                               break;
-
-                       data[i] = buf->ops->map(pipe, buf, 0);
-                       this_len = min(buf->len, now_len);
-                       vec[i].iov_base = (void __user *) data[i] + buf->offset;
-                       vec[i].iov_len = this_len;
-                       now_len -= this_len;
-                       write_len += this_len;
-                       nr_pages++;
-               }
-
-               if (nr_pages) {
-                       ret = kernel_writev(out, vec, nr_pages, ppos);
-                       if (ret == 0)
-                               ret = -EIO;
-                       if (ret > 0) {
-                               len -= ret;
-                               total_len += ret;
-                       }
-               }
-
-               for (i = 0; i < nr_pages; i++) {
-                       buf = nth_pipe_buf(pipe, i);
-                       buf->ops->unmap(pipe, buf, data[i]);
-
-                       if (ret > 0) {
-                               this_len = min_t(unsigned, vec[i].iov_len, ret);
-                               buf->offset += this_len;
-                               buf->len -= this_len;
-                               ret -= this_len;
-                       }
-               }
-
-               if (ret < 0)
-                       break;
-
-               while (pipe->nrbufs) {
-                       const struct pipe_buf_operations *ops;
-
-                       buf = nth_pipe_buf(pipe, 0);
-                       if (buf->len)
-                               break;
-
-                       ops = buf->ops;
-                       buf->ops = NULL;
-                       ops->release(pipe, buf);
-                       pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS;
-                       pipe->nrbufs--;
-                       if (pipe->inode)
-                               do_wakeup = 1;
-               }
-
-               if (pipe->nrbufs)
-                       continue;
-               if (!pipe->writers)
-                       break;
-               if (!pipe->waiting_writers) {
-                       if (total_len)
-                               break;
-               }
-
-               if (flags & SPLICE_F_NONBLOCK) {
-                       ret = -EAGAIN;
-                       break;
-               }
-
-               if (signal_pending(current)) {
-                       ret = -ERESTARTSYS;
-                       break;
-               }
-
-               if (do_wakeup) {
-                       wakeup_pipe_writers(pipe);
-                       do_wakeup = 0;
-               }
-
-               pipe_wait(pipe);
-       }
-       pipe_unlock(pipe);
+       ssize_t ret;
 
-       if (do_wakeup)
-               wakeup_pipe_writers(pipe);
+       ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+       if (ret > 0)
+               *ppos += ret;
 
-       return total_len ? total_len : ret;
+       return ret;
 }
 
 /**