[PATCH] Kill PF_SYNCWRITE flag A process flag to indicate whether we are doing sync io is incredibly ugly. It also causes performance problems when one does a lot of async io and then proceeds to sync it. Part of the io will go out as async, and the other part as sync. This causes a disconnect between the previously submitted io and the synced io. For io schedulers such as CFQ, this will cause us lost merges and suboptimal behaviour in scheduling. Remove PF_SYNCWRITE completely from the fsync/msync paths, and let the O_DIRECT path just directly indicate that the writes are sync by using WRITE_SYNC instead. Signed-off-by: Jens Axboe <axboe@suse.de>

commit: b31dc66a54ad986b6b73bdc49c8efc17cbad1833 [log] [tgz]
author: Jens Axboe <axboe@suse.de> Tue Jun 13 08:26:10 2006 +0200
committer: Jens Axboe <axboe@nelson.home.kernel.dk> Fri Jun 23 17:10:39 2006 +0200
tree: 5591383c1cbffe11512da889c971f899333f1a44
parent: 271f18f102c789f59644bb6c53a69da1df72b2f4 [diff]
diff --git a/fs/buffer.c b/fs/buffer.c
index 23f1f3a..373bb62 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -331,7 +331,6 @@
 		goto out;
 	}
 
-	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
 	/*
@@ -346,7 +345,6 @@
 	err = filemap_fdatawait(mapping);
 	if (!ret)
 		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
 out:
 	return ret;
 }

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b05d1b2..538fb04 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c

@@ -162,7 +162,7 @@
 		NULL);				/* vmas */
 	up_read(&current->mm->mmap_sem);
 
-	if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
+	if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
 		struct page *page = ZERO_PAGE(dio->curr_user_address);
 		/*
 		 * A memory fault, but the filesystem has some outstanding
@@ -535,7 +535,7 @@
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
-		create = dio->rw == WRITE;
+		create = dio->rw & WRITE;
 		if (dio->lock_type == DIO_LOCKING) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
 							dio->blkbits))
@@ -867,7 +867,7 @@
 				loff_t i_size_aligned;
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
-				if (dio->rw == WRITE) {
+				if (dio->rw & WRITE) {
 					page_cache_release(page);
 					return -ENOTBLK;
 				}
@@ -1045,7 +1045,7 @@
 		}
 	} /* end iovec loop */
 
-	if (ret == -ENOTBLK && rw == WRITE) {
+	if (ret == -ENOTBLK && (rw & WRITE)) {
 		/*
 		 * The remaining part of the request will be
 		 * be handled by buffered I/O when we return
@@ -1089,7 +1089,7 @@
 	if (dio->is_async) {
 		int should_wait = 0;
 
-		if (dio->result < dio->size && rw == WRITE) {
+		if (dio->result < dio->size && (rw & WRITE)) {
 			dio->waiter = current;
 			should_wait = 1;
 		}
@@ -1142,7 +1142,7 @@
 			ret = transferred;
 
 		/* We could have also come here on an AIO file extend */
-		if (!is_sync_kiocb(iocb) && rw == WRITE &&
+		if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
 		    ret >= 0 && dio->result == dio->size)
 			/*
 			 * For AIO writes where we have completed the
@@ -1194,7 +1194,7 @@
 	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
-		current->flags |= PF_SYNCWRITE;
+		rw = WRITE_SYNC;
 
 	if (bdev)
 		bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
@@ -1270,7 +1270,7 @@
 	 * even for AIO, we need to wait for i/o to complete before
 	 * returning in this case.
 	 */
-	dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) &&
+	dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
 		(end > i_size_read(inode)));
 
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
@@ -1284,8 +1284,6 @@
 		mutex_unlock(&inode->i_mutex);
 	else if (acquire_i_mutex)
 		mutex_lock(&inode->i_mutex);
-	if (rw & WRITE)
-		current->flags &= ~PF_SYNCWRITE;
 	return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6db95cf..031b27a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -623,7 +623,6 @@
 	int need_write_inode_now = 0;
 	int err2;
 
-	current->flags |= PF_SYNCWRITE;
 	if (what & OSYNC_DATA)
 		err = filemap_fdatawrite(mapping);
 	if (what & (OSYNC_METADATA|OSYNC_DATA)) {
@@ -636,7 +635,6 @@
 		if (!err)
 			err = err2;
 	}
-	current->flags &= ~PF_SYNCWRITE;
 
 	spin_lock(&inode_lock);
 	if ((inode->i_state & I_DIRTY) &&
commit	b31dc66a54ad986b6b73bdc49c8efc17cbad1833	[log] [tgz]
author	Jens Axboe <axboe@suse.de>	Tue Jun 13 08:26:10 2006 +0200
committer	Jens Axboe <axboe@nelson.home.kernel.dk>	Fri Jun 23 17:10:39 2006 +0200
tree	5591383c1cbffe11512da889c971f899333f1a44
parent	271f18f102c789f59644bb6c53a69da1df72b2f4 [diff]