backing-dev: ensure wakeup_timer is deleted
[linux-2.6.git] / mm / backing-dev.c
index 2ef0dc9..253b071 100644 (file)
@@ -45,6 +45,17 @@ static struct timer_list sync_supers_timer;
 static int bdi_sync_supers(void *);
 static void sync_supers_timer_fn(unsigned long);
 
+void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
+{
+       if (wb1 < wb2) {
+               spin_lock(&wb1->list_lock);
+               spin_lock_nested(&wb2->list_lock, 1);
+       } else {
+               spin_lock(&wb2->list_lock);
+               spin_lock_nested(&wb1->list_lock, 1);
+       }
+}
+
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -67,34 +78,42 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
        struct inode *inode;
 
        nr_dirty = nr_io = nr_more_io = 0;
-       spin_lock(&inode_wb_list_lock);
+       spin_lock(&wb->list_lock);
        list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
                nr_dirty++;
        list_for_each_entry(inode, &wb->b_io, i_wb_list)
                nr_io++;
        list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
                nr_more_io++;
-       spin_unlock(&inode_wb_list_lock);
+       spin_unlock(&wb->list_lock);
 
        global_dirty_limits(&background_thresh, &dirty_thresh);
        bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
 
 #define K(x) ((x) << (PAGE_SHIFT - 10))
        seq_printf(m,
-                  "BdiWriteback:     %8lu kB\n"
-                  "BdiReclaimable:   %8lu kB\n"
-                  "BdiDirtyThresh:   %8lu kB\n"
-                  "DirtyThresh:      %8lu kB\n"
-                  "BackgroundThresh: %8lu kB\n"
-                  "b_dirty:          %8lu\n"
-                  "b_io:             %8lu\n"
-                  "b_more_io:        %8lu\n"
-                  "bdi_list:         %8u\n"
-                  "state:            %8lx\n",
+                  "BdiWriteback:       %10lu kB\n"
+                  "BdiReclaimable:     %10lu kB\n"
+                  "BdiDirtyThresh:     %10lu kB\n"
+                  "DirtyThresh:        %10lu kB\n"
+                  "BackgroundThresh:   %10lu kB\n"
+                  "BdiWritten:         %10lu kB\n"
+                  "BdiWriteBandwidth:  %10lu kBps\n"
+                  "b_dirty:            %10lu\n"
+                  "b_io:               %10lu\n"
+                  "b_more_io:          %10lu\n"
+                  "bdi_list:           %10u\n"
+                  "state:              %10lx\n",
                   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
                   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
-                  K(bdi_thresh), K(dirty_thresh),
-                  K(background_thresh), nr_dirty, nr_io, nr_more_io,
+                  K(bdi_thresh),
+                  K(dirty_thresh),
+                  K(background_thresh),
+                  (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
+                  (unsigned long) K(bdi->write_bandwidth),
+                  nr_dirty,
+                  nr_io,
+                  nr_more_io,
                   !list_empty(&bdi->bdi_list), bdi->state);
 #undef K
 
@@ -249,18 +268,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
        return wb_has_dirty_io(&bdi->wb);
 }
 
-static void bdi_flush_io(struct backing_dev_info *bdi)
-{
-       struct writeback_control wbc = {
-               .sync_mode              = WB_SYNC_NONE,
-               .older_than_this        = NULL,
-               .range_cyclic           = 1,
-               .nr_to_write            = 1024,
-       };
-
-       writeback_inodes_wb(&bdi->wb, &wbc);
-}
-
 /*
  * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
  * or we risk deadlocking on ->s_umount. The longer term solution would be
@@ -352,6 +359,17 @@ static unsigned long bdi_longest_inactive(void)
        return max(5UL * 60 * HZ, interval);
 }
 
+/*
+ * Clear pending bit and wakeup anybody waiting for flusher thread creation or
+ * shutdown
+ */
+static void bdi_clear_pending(struct backing_dev_info *bdi)
+{
+       clear_bit(BDI_pending, &bdi->state);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&bdi->state, BDI_pending);
+}
+
 static int bdi_forker_thread(void *ptr)
 {
        struct bdi_writeback *me = ptr;
@@ -383,6 +401,13 @@ static int bdi_forker_thread(void *ptr)
                }
 
                spin_lock_bh(&bdi_lock);
+               /*
+                * In the following loop we are going to check whether we have
+                * some work to do without any synchronization with tasks
+                * waking us up to do work for them. So we have to set task
+                * state already here so that we don't miss wakeups coming
+                * after we verify some condition.
+                */
                set_current_state(TASK_INTERRUPTIBLE);
 
                list_for_each_entry(bdi, &bdi_list, bdi_list) {
@@ -446,9 +471,10 @@ static int bdi_forker_thread(void *ptr)
                        if (IS_ERR(task)) {
                                /*
                                 * If thread creation fails, force writeout of
-                                * the bdi from the thread.
+                                * the bdi from the thread. Hopefully 1024 is
+                                * large enough for efficient IO.
                                 */
-                               bdi_flush_io(bdi);
+                               writeback_inodes_wb(&bdi->wb, 1024);
                        } else {
                                /*
                                 * The spinlock makes sure we do not lose
@@ -461,11 +487,13 @@ static int bdi_forker_thread(void *ptr)
                                spin_unlock_bh(&bdi->wb_lock);
                                wake_up_process(task);
                        }
+                       bdi_clear_pending(bdi);
                        break;
 
                case KILL_THREAD:
                        __set_current_state(TASK_RUNNING);
                        kthread_stop(task);
+                       bdi_clear_pending(bdi);
                        break;
 
                case NO_ACTION:
@@ -481,16 +509,8 @@ static int bdi_forker_thread(void *ptr)
                        else
                                schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
                        try_to_freeze();
-                       /* Back to the main loop */
-                       continue;
+                       break;
                }
-
-               /*
-                * Clear pending bit and wakeup anybody waiting to tear us down.
-                */
-               clear_bit(BDI_pending, &bdi->state);
-               smp_mb__after_clear_bit();
-               wake_up_bit(&bdi->state, BDI_pending);
        }
 
        return 0;
@@ -606,6 +626,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
        if (bdi->dev) {
+               bdi_set_min_ratio(bdi, 0);
                trace_writeback_bdi_unregister(bdi);
                bdi_prune_sb(bdi);
                del_timer_sync(&bdi->wb.wakeup_timer);
@@ -628,9 +649,15 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
        INIT_LIST_HEAD(&wb->b_dirty);
        INIT_LIST_HEAD(&wb->b_io);
        INIT_LIST_HEAD(&wb->b_more_io);
+       spin_lock_init(&wb->list_lock);
        setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
 }
 
+/*
+ * Initial write bandwidth: 100 MB/s
+ */
+#define INIT_BW                (100 << (20 - PAGE_SHIFT))
+
 int bdi_init(struct backing_dev_info *bdi)
 {
        int i, err;
@@ -653,6 +680,13 @@ int bdi_init(struct backing_dev_info *bdi)
        }
 
        bdi->dirty_exceeded = 0;
+
+       bdi->bw_time_stamp = jiffies;
+       bdi->written_stamp = 0;
+
+       bdi->write_bandwidth = INIT_BW;
+       bdi->avg_write_bandwidth = INIT_BW;
+
        err = prop_local_init_percpu(&bdi->completions);
 
        if (err) {
@@ -676,15 +710,24 @@ void bdi_destroy(struct backing_dev_info *bdi)
        if (bdi_has_dirty_io(bdi)) {
                struct bdi_writeback *dst = &default_backing_dev_info.wb;
 
-               spin_lock(&inode_wb_list_lock);
+               bdi_lock_two(&bdi->wb, dst);
                list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
                list_splice(&bdi->wb.b_io, &dst->b_io);
                list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
-               spin_unlock(&inode_wb_list_lock);
+               spin_unlock(&bdi->wb.list_lock);
+               spin_unlock(&dst->list_lock);
        }
 
        bdi_unregister(bdi);
 
+       /*
+        * If bdi_unregister() had already been called earlier, the
+        * wakeup_timer could still be armed because bdi_prune_sb()
+        * can race with the bdi_wakeup_thread_delayed() calls from
+        * __mark_inode_dirty().
+        */
+       del_timer_sync(&bdi->wb.wakeup_timer);
+
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
                percpu_counter_destroy(&bdi->bdi_stat[i]);