]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - mm/page-writeback.c
s3c-fb: protect window-specific registers during updates
[linux-2.6.git] / mm / page-writeback.c
index 69b5fbabc8bd4e9248f54f460d0f01b4e75f8e86..0c6258bd1ba37ff62884f914dc77b4c057dbb28c 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -495,7 +496,6 @@ static void balance_dirty_pages(struct address_space *mapping,
 
        for (;;) {
                struct writeback_control wbc = {
-                       .bdi            = bdi,
                        .sync_mode      = WB_SYNC_NONE,
                        .older_than_this = NULL,
                        .nr_to_write    = write_chunk,
@@ -536,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * threshold otherwise wait until the disk writes catch
                 * up.
                 */
+               trace_wbc_balance_dirty_start(&wbc, bdi);
                if (bdi_nr_reclaimable > bdi_thresh) {
-                       writeback_inodes_wbc(&wbc);
+                       writeback_inodes_wb(&bdi->wb, &wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
+                       trace_wbc_balance_dirty_written(&wbc, bdi);
                }
 
                /*
@@ -566,7 +568,9 @@ static void balance_dirty_pages(struct address_space *mapping,
                if (pages_written >= write_chunk)
                        break;          /* We've done our duty */
 
-               schedule_timeout_interruptible(pause);
+               trace_wbc_balance_dirty_wait(&wbc, bdi);
+               __set_current_state(TASK_INTERRUPTIBLE);
+               io_schedule_timeout(pause);
 
                /*
                 * Increase the delay for each loop, up to our previous
@@ -596,7 +600,7 @@ static void balance_dirty_pages(struct address_space *mapping,
            (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
                               + global_page_state(NR_UNSTABLE_NFS))
                                          > background_thresh)))
-               bdi_start_writeback(bdi, 0);
+               bdi_start_background_writeback(bdi);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -682,10 +686,6 @@ void throttle_vm_writeout(gfp_t gfp_mask)
         }
 }
 
-static void laptop_timer_fn(unsigned long unused);
-
-static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
-
 /*
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
@@ -693,24 +693,23 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec(table, write, buffer, length, ppos);
+       bdi_arm_supers_timer();
        return 0;
 }
 
-static void do_laptop_sync(struct work_struct *work)
-{
-       wakeup_flusher_threads(0);
-       kfree(work);
-}
-
-static void laptop_timer_fn(unsigned long unused)
+#ifdef CONFIG_BLOCK
+void laptop_mode_timer_fn(unsigned long data)
 {
-       struct work_struct *work;
+       struct request_queue *q = (struct request_queue *)data;
+       int nr_pages = global_page_state(NR_FILE_DIRTY) +
+               global_page_state(NR_UNSTABLE_NFS);
 
-       work = kmalloc(sizeof(*work), GFP_ATOMIC);
-       if (work) {
-               INIT_WORK(work, do_laptop_sync);
-               schedule_work(work);
-       }
+       /*
+        * We want to write everything out, not just down to the dirty
+        * threshold
+        */
+       if (bdi_has_dirty_io(&q->backing_dev_info))
+               bdi_start_writeback(&q->backing_dev_info, nr_pages);
 }
 
 /*
@@ -718,9 +717,9 @@ static void laptop_timer_fn(unsigned long unused)
  * of all dirty data a few seconds from now.  If the flush is already scheduled
  * then push it back - the user is still using the disk.
  */
-void laptop_io_completion(void)
+void laptop_io_completion(struct backing_dev_info *info)
 {
-       mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode);
+       mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
 }
 
 /*
@@ -730,8 +729,16 @@ void laptop_io_completion(void)
  */
 void laptop_sync_completion(void)
 {
-       del_timer(&laptop_mode_wb_timer);
+       struct backing_dev_info *bdi;
+
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
+               del_timer(&bdi->laptop_mode_wb_timer);
+
+       rcu_read_unlock();
 }
+#endif
 
 /*
  * If ratelimit_pages is too high then we can get into dirty-data overload
@@ -801,6 +808,41 @@ void __init page_writeback_init(void)
        prop_descriptor_init(&vm_dirties, shift);
 }
 
+/**
+ * tag_pages_for_writeback - tag pages to be written by write_cache_pages
+ * @mapping: address space structure to write
+ * @start: starting page index
+ * @end: ending page index (inclusive)
+ *
+ * This function scans the page range from @start to @end (inclusive) and tags
+ * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
+ * that write_cache_pages (or whoever calls this function) will then use
+ * TOWRITE tag to identify pages eligible for writeback.  This mechanism is
+ * used to avoid livelocking of writeback by a process steadily creating new
+ * dirty pages in the file (thus it is important for this function to be quick
+ * so that it can tag pages faster than a dirtying process can create them).
+ */
+/*
+ * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
+ */
+#define WRITEBACK_TAG_BATCH 4096
+void tag_pages_for_writeback(struct address_space *mapping,
+                            pgoff_t start, pgoff_t end)
+{
+       unsigned long tagged;
+
+       do {
+               spin_lock_irq(&mapping->tree_lock);
+               tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
+                               &start, end, WRITEBACK_TAG_BATCH,
+                               PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+               spin_unlock_irq(&mapping->tree_lock);
+               WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
+               cond_resched();
+       } while (tagged >= WRITEBACK_TAG_BATCH);
+}
+EXPORT_SYMBOL(tag_pages_for_writeback);
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -815,12 +857,18 @@ void __init page_writeback_init(void)
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
+ *
+ * To avoid livelocks (when other process dirties new pages), we first tag
+ * pages which should be written back with TOWRITE tag and only then start
+ * writing them. For data-integrity sync we have to be careful so that we do
+ * not miss some pages (e.g., because some other process has cleared TOWRITE
+ * tag we set). The rule we follow is that TOWRITE tag can be cleared only
+ * by the process clearing the DIRTY tag (and submitting the page for IO).
  */
 int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
                      void *data)
 {
-       struct backing_dev_info *bdi = mapping->backing_dev_info;
        int ret = 0;
        int done = 0;
        struct pagevec pvec;
@@ -831,12 +879,7 @@ int write_cache_pages(struct address_space *mapping,
        pgoff_t done_index;
        int cycled;
        int range_whole = 0;
-       long nr_to_write = wbc->nr_to_write;
-
-       if (wbc->nonblocking && bdi_write_congested(bdi)) {
-               wbc->encountered_congestion = 1;
-               return 0;
-       }
+       int tag;
 
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -854,13 +897,18 @@ int write_cache_pages(struct address_space *mapping,
                        range_whole = 1;
                cycled = 1; /* ignore range_cyclic tests */
        }
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
 retry:
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag_pages_for_writeback(mapping, index, end);
        done_index = index;
        while (!done && (index <= end)) {
                int i;
 
-               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                             PAGECACHE_TAG_DIRTY,
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
                if (nr_pages == 0)
                        break;
@@ -918,6 +966,7 @@ continue_unlock:
                        if (!clear_page_dirty_for_io(page))
                                goto continue_unlock;
 
+                       trace_wbc_writepage(wbc, mapping->backing_dev_info);
                        ret = (*writepage)(page, wbc, data);
                        if (unlikely(ret)) {
                                if (ret == AOP_WRITEPAGE_ACTIVATE) {
@@ -936,11 +985,10 @@ continue_unlock:
                                        done = 1;
                                        break;
                                }
-                       }
+                       }
 
-                       if (nr_to_write > 0) {
-                               nr_to_write--;
-                               if (nr_to_write == 0 &&
+                       if (wbc->nr_to_write > 0) {
+                               if (--wbc->nr_to_write == 0 &&
                                    wbc->sync_mode == WB_SYNC_NONE) {
                                        /*
                                         * We stop writing back only if we are
@@ -956,12 +1004,6 @@ continue_unlock:
                                        break;
                                }
                        }
-
-                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
-                               wbc->encountered_congestion = 1;
-                               done = 1;
-                               break;
-                       }
                }
                pagevec_release(&pvec);
                cond_resched();
@@ -977,11 +1019,8 @@ continue_unlock:
                end = writeback_index - 1;
                goto retry;
        }
-       if (!wbc->no_nrwrite_index_update) {
-               if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
-                       mapping->writeback_index = done_index;
-               wbc->nr_to_write = nr_to_write;
-       }
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = done_index;
 
        return ret;
 }
@@ -1326,6 +1365,9 @@ int test_set_page_writeback(struct page *page)
                        radix_tree_tag_clear(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_DIRTY);
+               radix_tree_tag_clear(&mapping->page_tree,
+                                    page_index(page),
+                                    PAGECACHE_TAG_TOWRITE);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestSetPageWriteback(page);