[PATCH] Fix race in do_get_write_access()
Jan Kara [Tue, 6 Sep 2005 22:19:17 +0000 (15:19 -0700)]
  attached patch should fix the following race:
     Proc 1                               Proc 2

     __flush_batch()
       ll_rw_block()
                                        do_get_write_access()
   lock_buffer
                                             jh is only waiting for checkpoint
     -> b_transaction == NULL ->
     do nothing
                                           unlock_buffer
    test_set_buffer_locked()
    test_clear_buffer_dirty()
                                           __journal_file_buffer()
                                        change the data
    submit_bh()

and we have sent wrong data to disk...  We now clean the dirty buffer flag
under buffer lock in all cases and hence we know that whenever a buffer is
starting to be journaled we either finish the pending write-out before
attaching a buffer to a transaction or we won't write the buffer until the
transaction is going to be committed.

The test in jbd_unexpected_dirty_buffer() is redundant - remove it.
Furthermore we have to clear the buffer dirty bit under the buffer lock to
prevent races with buffer write-out (and hence prevent returning a buffer with
IO happening).

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

fs/jbd/transaction.c

index 77b7662..c6ec66f 100644 (file)
@@ -490,23 +490,21 @@ void journal_unlock_updates (journal_t *journal)
  */
 static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-       struct buffer_head *bh = jh2bh(jh);
        int jlist;
 
-       if (buffer_dirty(bh)) {
-               /* If this buffer is one which might reasonably be dirty
-                * --- ie. data, or not part of this journal --- then
-                * we're OK to leave it alone, but otherwise we need to
-                * move the dirty bit to the journal's own internal
-                * JBDDirty bit. */
-               jlist = jh->b_jlist;
-
-               if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
-                   jlist == BJ_Shadow || jlist == BJ_Forget) {
-                       if (test_clear_buffer_dirty(jh2bh(jh))) {
-                               set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
-                       }
-               }
+       /* If this buffer is one which might reasonably be dirty
+        * --- ie. data, or not part of this journal --- then
+        * we're OK to leave it alone, but otherwise we need to
+        * move the dirty bit to the journal's own internal
+        * JBDDirty bit. */
+       jlist = jh->b_jlist;
+
+       if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+           jlist == BJ_Shadow || jlist == BJ_Forget) {
+               struct buffer_head *bh = jh2bh(jh);
+
+               if (test_clear_buffer_dirty(bh))
+                       set_buffer_jbddirty(bh);
        }
 }
 
@@ -574,9 +572,14 @@ repeat:
                        if (jh->b_next_transaction)
                                J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                        transaction);
-                       JBUFFER_TRACE(jh, "Unexpected dirty buffer");
-                       jbd_unexpected_dirty_buffer(jh);
-               }
+               }
+               /*
+                * In any case we need to clean the dirty flag and we must
+                * do it under the buffer lock to be sure we don't race
+                * with running write-out.
+                */
+               JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+               jbd_unexpected_dirty_buffer(jh);
        }
 
        unlock_buffer(bh);