Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
[linux-2.6.git] / fs / jbd2 / commit.c
index d6aeb1f..7f21cf3 100644 (file)
@@ -26,7 +26,9 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/bitops.h>
 #include <trace/events/jbd2.h>
+#include <asm/system.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -103,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
        int ret;
        struct timespec now = current_kernel_time();
 
+       *cbh = NULL;
+
        if (is_journal_aborted(journal))
                return 0;
 
@@ -134,25 +138,11 @@ static int journal_submit_commit_record(journal_t *journal,
 
        if (journal->j_flags & JBD2_BARRIER &&
            !JBD2_HAS_INCOMPAT_FEATURE(journal,
-                                      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-               ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
-               if (ret == -EOPNOTSUPP) {
-                       printk(KERN_WARNING
-                              "JBD2: Disabling barriers on %s, "
-                              "not supported by device\n", journal->j_devname);
-                       write_lock(&journal->j_state_lock);
-                       journal->j_flags &= ~JBD2_BARRIER;
-                       write_unlock(&journal->j_state_lock);
+                                      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
+               ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
+       else
+               ret = submit_bh(WRITE_SYNC, bh);
 
-                       /* And try again, without the barrier */
-                       lock_buffer(bh);
-                       set_buffer_uptodate(bh);
-                       clear_buffer_dirty(bh);
-                       ret = submit_bh(WRITE_SYNC_PLUG, bh);
-               }
-       } else {
-               ret = submit_bh(WRITE_SYNC_PLUG, bh);
-       }
        *cbh = bh;
        return ret;
 }
@@ -166,29 +156,8 @@ static int journal_wait_on_commit_record(journal_t *journal,
 {
        int ret = 0;
 
-retry:
        clear_buffer_dirty(bh);
        wait_on_buffer(bh);
-       if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
-               printk(KERN_WARNING
-                      "JBD2: %s: disabling barries on %s - not supported "
-                      "by device\n", __func__, journal->j_devname);
-               write_lock(&journal->j_state_lock);
-               journal->j_flags &= ~JBD2_BARRIER;
-               write_unlock(&journal->j_state_lock);
-
-               lock_buffer(bh);
-               clear_buffer_dirty(bh);
-               set_buffer_uptodate(bh);
-               bh->b_end_io = journal_end_buffer_io_sync;
-
-               ret = submit_bh(WRITE_SYNC_PLUG, bh);
-               if (ret) {
-                       unlock_buffer(bh);
-                       return ret;
-               }
-               goto retry;
-       }
 
        if (unlikely(!buffer_uptodate(bh)))
                ret = -EIO;
@@ -236,7 +205,7 @@ static int journal_submit_data_buffers(journal_t *journal,
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
                mapping = jinode->i_vfs_inode->i_mapping;
-               jinode->i_flags |= JI_COMMIT_RUNNING;
+               set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
                spin_unlock(&journal->j_list_lock);
                /*
                 * submit the inode data buffers. We use writepage
@@ -250,8 +219,8 @@ static int journal_submit_data_buffers(journal_t *journal,
                        ret = err;
                spin_lock(&journal->j_list_lock);
                J_ASSERT(jinode->i_transaction == commit_transaction);
-               commit_transaction->t_flushed_data_blocks = 1;
-               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+               smp_mb__after_clear_bit();
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
        spin_unlock(&journal->j_list_lock);
@@ -272,7 +241,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
        /* For locking, see the comment in journal_submit_data_buffers() */
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
-               jinode->i_flags |= JI_COMMIT_RUNNING;
+               set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
                spin_unlock(&journal->j_list_lock);
                err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
                if (err) {
@@ -288,7 +257,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
                                ret = err;
                }
                spin_lock(&journal->j_list_lock);
-               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+               smp_mb__after_clear_bit();
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
 
@@ -360,19 +330,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        int tag_bytes = journal_tag_bytes(journal);
        struct buffer_head *cbh = NULL; /* For transactional checksums */
        __u32 crc32_sum = ~0;
-       int write_op = WRITE;
+       struct blk_plug plug;
 
        /*
         * First job: lock down the current transaction and wait for
         * all outstanding updates to complete.
         */
 
-#ifdef COMMIT_STATS
-       spin_lock(&journal->j_list_lock);
-       summarise_journal_usage(journal);
-       spin_unlock(&journal->j_list_lock);
-#endif
-
        /* Do we need to erase the effects of a prior jbd2_journal_flush? */
        if (journal->j_flags & JBD2_FLUSHED) {
                jbd_debug(3, "super block updated\n");
@@ -394,13 +358,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        write_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_LOCKED;
 
-       /*
-        * Use plugged writes here, since we want to submit several before
-        * we unplug the device. We don't do explicit unplugging in here,
-        * instead we rely on sync_buffer() doing the unplug for us.
-        */
-       if (commit_transaction->t_synchronous_commit)
-               write_op = WRITE_SYNC_PLUG;
        trace_jbd2_commit_locking(journal, commit_transaction);
        stats.run.rs_wait = commit_transaction->t_max_wait;
        stats.run.rs_locked = jiffies;
@@ -441,7 +398,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * we do not require it to remember exactly which old buffers it
         * has reserved.  This is consistent with the existing behaviour
         * that multiple jbd2_journal_get_write_access() calls to the same
-        * buffer are perfectly permissable.
+        * buffer are perfectly permissible.
         */
        while (commit_transaction->t_reserved_list) {
                jh = commit_transaction->t_reserved_list;
@@ -500,8 +457,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        if (err)
                jbd2_journal_abort(journal, err);
 
+       blk_start_plug(&plug);
        jbd2_journal_write_revoke_records(journal, commit_transaction,
-                                         write_op);
+                                         WRITE_SYNC);
+       blk_finish_plug(&plug);
 
        jbd_debug(3, "JBD: commit phase 2\n");
 
@@ -528,6 +487,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        err = 0;
        descriptor = NULL;
        bufs = 0;
+       blk_start_plug(&plug);
        while (commit_transaction->t_buffers) {
 
                /* Find the next buffer to be journaled... */
@@ -689,7 +649,7 @@ start_journal_io:
                                clear_buffer_dirty(bh);
                                set_buffer_uptodate(bh);
                                bh->b_end_io = journal_end_buffer_io_sync;
-                               submit_bh(write_op, bh);
+                               submit_bh(WRITE_SYNC, bh);
                        }
                        cond_resched();
                        stats.run.rs_blocks_logged += bufs;
@@ -711,16 +671,19 @@ start_journal_io:
                err = 0;
        }
 
+       write_lock(&journal->j_state_lock);
+       J_ASSERT(commit_transaction->t_state == T_COMMIT);
+       commit_transaction->t_state = T_COMMIT_DFLUSH;
+       write_unlock(&journal->j_state_lock);
        /* 
         * If the journal is not located on the file system device,
         * then we must flush the file system device before we issue
         * the commit record
         */
-       if (commit_transaction->t_flushed_data_blocks &&
+       if (commit_transaction->t_need_data_flush &&
            (journal->j_fs_dev != journal->j_dev) &&
            (journal->j_flags & JBD2_BARRIER))
-               blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
-                       BLKDEV_IFL_WAIT);
+               blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
 
        /* Done it all: now write the commit record asynchronously. */
        if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -731,6 +694,8 @@ start_journal_io:
                        __jbd2_journal_abort_hard(journal);
        }
 
+       blk_finish_plug(&plug);
+
        /* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
@@ -792,8 +757,13 @@ wait_for_iobuf:
                    required. */
                JBUFFER_TRACE(jh, "file as BJ_Forget");
                jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
-               /* Wake up any transactions which were waiting for this
-                  IO to complete */
+               /*
+                * Wake up any transactions which were waiting for this IO to
+                * complete. The barrier must be here so that changes by
+                * jbd2_journal_file_buffer() take effect before wake_up_bit()
+                * does the waitqueue check.
+                */
+               smp_mb();
                wake_up_bit(&bh->b_state, BH_Unshadow);
                JBUFFER_TRACE(jh, "brelse shadowed buffer");
                __brelse(bh);
@@ -832,6 +802,10 @@ wait_for_iobuf:
                jbd2_journal_abort(journal, err);
 
        jbd_debug(3, "JBD: commit phase 5\n");
+       write_lock(&journal->j_state_lock);
+       J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
+       commit_transaction->t_state = T_COMMIT_JFLUSH;
+       write_unlock(&journal->j_state_lock);
 
        if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -840,13 +814,12 @@ wait_for_iobuf:
                if (err)
                        __jbd2_journal_abort_hard(journal);
        }
-       if (!err && !is_journal_aborted(journal))
+       if (cbh)
                err = journal_wait_on_commit_record(journal, cbh);
        if (JBD2_HAS_INCOMPAT_FEATURE(journal,
                                      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
            journal->j_flags & JBD2_BARRIER) {
-               blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
-                                  BLKDEV_IFL_WAIT);
+               blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
        }
 
        if (err)
@@ -988,7 +961,7 @@ restart_loop:
 
        jbd_debug(3, "JBD: commit phase 7\n");
 
-       J_ASSERT(commit_transaction->t_state == T_COMMIT);
+       J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
 
        commit_transaction->t_start = jiffies;
        stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,