Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
[linux-2.6.git] / mm / page-writeback.c
index 997186c..dd73d29 100644 (file)
 #include <linux/pagevec.h>
 
 /*
- * The maximum number of pages to writeout in a single bdflush/kupdate
- * operation.  We do this so we don't hold I_SYNC against an inode for
- * enormous amounts of time, which would block a userspace task which has
- * been forced to throttle against that inode.  Also, the code reevaluates
- * the dirty each time it has written this many pages.
- */
-#define MAX_WRITEBACK_PAGES    1024
-
-/*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
  * will look to see if it needs to force writeback or throttling.
  */
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode);
 /* End of sysctl-exported parameters */
 
 
-static void background_writeout(unsigned long _min_pages);
-
 /*
  * Scale the writeback cache size proportional to the relative writeout speeds.
  *
@@ -320,15 +309,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
 /*
  *
  */
-static DEFINE_SPINLOCK(bdi_lock);
 static unsigned int bdi_min_ratio;
 
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
        int ret = 0;
-       unsigned long flags;
 
-       spin_lock_irqsave(&bdi_lock, flags);
+       spin_lock(&bdi_lock);
        if (min_ratio > bdi->max_ratio) {
                ret = -EINVAL;
        } else {
@@ -340,27 +327,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
                        ret = -EINVAL;
                }
        }
-       spin_unlock_irqrestore(&bdi_lock, flags);
+       spin_unlock(&bdi_lock);
 
        return ret;
 }
 
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 {
-       unsigned long flags;
        int ret = 0;
 
        if (max_ratio > 100)
                return -EINVAL;
 
-       spin_lock_irqsave(&bdi_lock, flags);
+       spin_lock(&bdi_lock);
        if (bdi->min_ratio > max_ratio) {
                ret = -EINVAL;
        } else {
                bdi->max_ratio = max_ratio;
                bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
        }
-       spin_unlock_irqrestore(&bdi_lock, flags);
+       spin_unlock(&bdi_lock);
 
        return ret;
 }
@@ -546,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping)
                 * up.
                 */
                if (bdi_nr_reclaimable > bdi_thresh) {
-                       writeback_inodes(&wbc);
+                       writeback_inodes_wbc(&wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
@@ -575,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping)
                if (pages_written >= write_chunk)
                        break;          /* We've done our duty */
 
-               congestion_wait(BLK_RW_ASYNC, HZ/10);
+               schedule_timeout(1);
        }
 
        if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -594,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping)
         * background_thresh, to keep the amount of dirty memory low.
         */
        if ((laptop_mode && pages_written) ||
-                       (!laptop_mode && (global_page_state(NR_FILE_DIRTY)
-                                         + global_page_state(NR_UNSTABLE_NFS)
-                                         > background_thresh)))
-               pdflush_operation(background_writeout, 0);
+           (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
+                                         + global_page_state(NR_UNSTABLE_NFS))
+                                         > background_thresh))) {
+               struct writeback_control wbc = {
+                       .bdi            = bdi,
+                       .sync_mode      = WB_SYNC_NONE,
+                       .nr_to_write    = nr_writeback,
+               };
+
+
+               bdi_start_writeback(&wbc);
+       }
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -682,153 +676,35 @@ void throttle_vm_writeout(gfp_t gfp_mask)
         }
 }
 
-/*
- * writeback at least _min_pages, and keep writing until the amount of dirty
- * memory is less than the background threshold, or until we're all clean.
- */
-static void background_writeout(unsigned long _min_pages)
-{
-       long min_pages = _min_pages;
-       struct writeback_control wbc = {
-               .bdi            = NULL,
-               .sync_mode      = WB_SYNC_NONE,
-               .older_than_this = NULL,
-               .nr_to_write    = 0,
-               .nonblocking    = 1,
-               .range_cyclic   = 1,
-       };
-
-       for ( ; ; ) {
-               unsigned long background_thresh;
-               unsigned long dirty_thresh;
-
-               get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
-               if (global_page_state(NR_FILE_DIRTY) +
-                       global_page_state(NR_UNSTABLE_NFS) < background_thresh
-                               && min_pages <= 0)
-                       break;
-               wbc.more_io = 0;
-               wbc.encountered_congestion = 0;
-               wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-               wbc.pages_skipped = 0;
-               writeback_inodes(&wbc);
-               min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-               if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
-                       /* Wrote less than expected */
-                       if (wbc.encountered_congestion || wbc.more_io)
-                               congestion_wait(BLK_RW_ASYNC, HZ/10);
-                       else
-                               break;
-               }
-       }
-}
-
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.  Returns 0 if a pdflush thread was dispatched.  Returns
- * -1 if all pdflush threads were busy.
- */
-int wakeup_pdflush(long nr_pages)
-{
-       if (nr_pages == 0)
-               nr_pages = global_page_state(NR_FILE_DIRTY) +
-                               global_page_state(NR_UNSTABLE_NFS);
-       return pdflush_operation(background_writeout, nr_pages);
-}
-
-static void wb_timer_fn(unsigned long unused);
 static void laptop_timer_fn(unsigned long unused);
 
-static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
 static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
 
 /*
- * Periodic writeback of "old" data.
- *
- * Define "old": the first time one of an inode's pages is dirtied, we mark the
- * dirtying-time in the inode's address_space.  So this periodic writeback code
- * just walks the superblock inode list, writing back any inodes which are
- * older than a specific point in time.
- *
- * Try to run once per dirty_writeback_interval.  But if a writeback event
- * takes longer than a dirty_writeback_interval interval, then leave a
- * one-second gap.
- *
- * older_than_this takes precedence over nr_to_write.  So we'll only write back
- * all dirty pages if they are all attached to "old" mappings.
- */
-static void wb_kupdate(unsigned long arg)
-{
-       unsigned long oldest_jif;
-       unsigned long start_jif;
-       unsigned long next_jif;
-       long nr_to_write;
-       struct writeback_control wbc = {
-               .bdi            = NULL,
-               .sync_mode      = WB_SYNC_NONE,
-               .older_than_this = &oldest_jif,
-               .nr_to_write    = 0,
-               .nonblocking    = 1,
-               .for_kupdate    = 1,
-               .range_cyclic   = 1,
-       };
-
-       sync_supers();
-
-       oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
-       start_jif = jiffies;
-       next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
-       nr_to_write = global_page_state(NR_FILE_DIRTY) +
-                       global_page_state(NR_UNSTABLE_NFS) +
-                       (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-       while (nr_to_write > 0) {
-               wbc.more_io = 0;
-               wbc.encountered_congestion = 0;
-               wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-               writeback_inodes(&wbc);
-               if (wbc.nr_to_write > 0) {
-                       if (wbc.encountered_congestion || wbc.more_io)
-                               congestion_wait(BLK_RW_ASYNC, HZ/10);
-                       else
-                               break;  /* All the old data is written */
-               }
-               nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-       }
-       if (time_before(next_jif, jiffies + HZ))
-               next_jif = jiffies + HZ;
-       if (dirty_writeback_interval)
-               mod_timer(&wb_timer, next_jif);
-}
-
-/*
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
 int dirty_writeback_centisecs_handler(ctl_table *table, int write,
        struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec(table, write, file, buffer, length, ppos);
-       if (dirty_writeback_interval)
-               mod_timer(&wb_timer, jiffies +
-                       msecs_to_jiffies(dirty_writeback_interval * 10));
-       else
-               del_timer(&wb_timer);
        return 0;
 }
 
-static void wb_timer_fn(unsigned long unused)
-{
-       if (pdflush_operation(wb_kupdate, 0) < 0)
-               mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
-}
-
-static void laptop_flush(unsigned long unused)
+static void do_laptop_sync(struct work_struct *work)
 {
-       sys_sync();
+       wakeup_flusher_threads(0);
+       kfree(work);
 }
 
 static void laptop_timer_fn(unsigned long unused)
 {
-       pdflush_operation(laptop_flush, 0);
+       struct work_struct *work;
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (work) {
+               INIT_WORK(work, do_laptop_sync);
+               schedule_work(work);
+       }
 }
 
 /*
@@ -911,8 +787,6 @@ void __init page_writeback_init(void)
 {
        int shift;
 
-       mod_timer(&wb_timer,
-                 jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
        writeback_set_ratelimit();
        register_cpu_notifier(&ratelimit_nb);