Fix balance_dirty_page() calculations with CONFIG_HIGHMEM
[sfrench/cifs-2.6.git] / mm / page-writeback.c
index c0d4ce144dec41130e552723c5b3ecd4dca1876a..be0efbde4994f5454198384ce274b62eb827adc4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * mm/page-writeback.c.
+ * mm/page-writeback.c
  *
  * Copyright (C) 2002, Linus Torvalds.
  *
@@ -21,6 +21,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/backing-dev.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/blkdev.h>
 #include <linux/mpage.h>
 #include <linux/rmap.h>
@@ -132,11 +133,9 @@ get_dirty_limits(long *pbackground, long *pdirty,
 
 #ifdef CONFIG_HIGHMEM
        /*
-        * If this mapping can only allocate from low memory,
-        * we exclude high memory from our count.
+        * We always exclude high memory from our count.
         */
-       if (mapping && !(mapping_gfp_mask(mapping) & __GFP_HIGHMEM))
-               available_memory -= totalhigh_pages;
+       available_memory -= totalhigh_pages;
 #endif
 
 
@@ -222,7 +221,7 @@ static void balance_dirty_pages(struct address_space *mapping)
                        if (pages_written >= write_chunk)
                                break;          /* We've done our duty */
                }
-               blk_congestion_wait(WRITE, HZ/10);
+               congestion_wait(WRITE, HZ/10);
        }
 
        if (nr_reclaimable + global_page_state(NR_WRITEBACK)
@@ -314,7 +313,7 @@ void throttle_vm_writeout(void)
                 if (global_page_state(NR_UNSTABLE_NFS) +
                        global_page_state(NR_WRITEBACK) <= dirty_thresh)
                                break;
-                blk_congestion_wait(WRITE, HZ/10);
+                congestion_wait(WRITE, HZ/10);
         }
 }
 
@@ -351,7 +350,7 @@ static void background_writeout(unsigned long _min_pages)
                min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
                        /* Wrote less than expected */
-                       blk_congestion_wait(WRITE, HZ/10);
+                       congestion_wait(WRITE, HZ/10);
                        if (!wbc.encountered_congestion)
                                break;
                }
@@ -422,7 +421,7 @@ static void wb_kupdate(unsigned long arg)
                writeback_inodes(&wbc);
                if (wbc.nr_to_write > 0) {
                        if (wbc.encountered_congestion)
-                               blk_congestion_wait(WRITE, HZ/10);
+                               congestion_wait(WRITE, HZ/10);
                        else
                                break;  /* All the old data is written */
                }
@@ -525,28 +524,25 @@ static struct notifier_block __cpuinitdata ratelimit_nb = {
 };
 
 /*
- * If the machine has a large highmem:lowmem ratio then scale back the default
- * dirty memory thresholds: allowing too much dirty highmem pins an excessive
- * number of buffer_heads.
+ * Called early on to tune the page writeback dirty limits.
+ *
+ * We used to scale dirty pages according to how total memory
+ * related to pages that could be allocated for buffers (by
+ * comparing nr_free_buffer_pages() to vm_total_pages.
+ *
+ * However, that was when we used "dirty_ratio" to scale with
+ * all memory, and we don't do that any more. "dirty_ratio"
+ * is now applied to total non-HIGHPAGE memory (by subtracting
+ * totalhigh_pages from vm_total_pages), and as such we can't
+ * get into the old insane situation any more where we had
+ * large amounts of dirty pages compared to a small amount of
+ * non-HIGHMEM memory.
+ *
+ * But we might still want to scale the dirty_ratio by how
+ * much memory the box has..
  */
 void __init page_writeback_init(void)
 {
-       long buffer_pages = nr_free_buffer_pages();
-       long correction;
-
-       correction = (100 * 4 * buffer_pages) / vm_total_pages;
-
-       if (correction < 100) {
-               dirty_background_ratio *= correction;
-               dirty_background_ratio /= 100;
-               vm_dirty_ratio *= correction;
-               vm_dirty_ratio /= 100;
-
-               if (dirty_background_ratio <= 0)
-                       dirty_background_ratio = 1;
-               if (vm_dirty_ratio <= 0)
-                       vm_dirty_ratio = 1;
-       }
        mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
        writeback_set_ratelimit();
        register_cpu_notifier(&ratelimit_nb);
@@ -761,23 +757,24 @@ int __set_page_dirty_nobuffers(struct page *page)
                struct address_space *mapping = page_mapping(page);
                struct address_space *mapping2;
 
-               if (mapping) {
-                       write_lock_irq(&mapping->tree_lock);
-                       mapping2 = page_mapping(page);
-                       if (mapping2) { /* Race with truncate? */
-                               BUG_ON(mapping2 != mapping);
-                               if (mapping_cap_account_dirty(mapping))
-                                       __inc_zone_page_state(page,
-                                                               NR_FILE_DIRTY);
-                               radix_tree_tag_set(&mapping->page_tree,
-                                       page_index(page), PAGECACHE_TAG_DIRTY);
-                       }
-                       write_unlock_irq(&mapping->tree_lock);
-                       if (mapping->host) {
-                               /* !PageAnon && !swapper_space */
-                               __mark_inode_dirty(mapping->host,
-                                                       I_DIRTY_PAGES);
+               if (!mapping)
+                       return 1;
+
+               write_lock_irq(&mapping->tree_lock);
+               mapping2 = page_mapping(page);
+               if (mapping2) { /* Race with truncate? */
+                       BUG_ON(mapping2 != mapping);
+                       if (mapping_cap_account_dirty(mapping)) {
+                               __inc_zone_page_state(page, NR_FILE_DIRTY);
+                               task_io_account_write(PAGE_CACHE_SIZE);
                        }
+                       radix_tree_tag_set(&mapping->page_tree,
+                               page_index(page), PAGECACHE_TAG_DIRTY);
+               }
+               write_unlock_irq(&mapping->tree_lock);
+               if (mapping->host) {
+                       /* !PageAnon && !swapper_space */
+                       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
                }
                return 1;
        }
@@ -842,39 +839,6 @@ int set_page_dirty_lock(struct page *page)
 }
 EXPORT_SYMBOL(set_page_dirty_lock);
 
-/*
- * Clear a page's dirty flag, while caring for dirty memory accounting. 
- * Returns true if the page was previously dirty.
- */
-int test_clear_page_dirty(struct page *page)
-{
-       struct address_space *mapping = page_mapping(page);
-       unsigned long flags;
-
-       if (mapping) {
-               write_lock_irqsave(&mapping->tree_lock, flags);
-               if (TestClearPageDirty(page)) {
-                       radix_tree_tag_clear(&mapping->page_tree,
-                                               page_index(page),
-                                               PAGECACHE_TAG_DIRTY);
-                       write_unlock_irqrestore(&mapping->tree_lock, flags);
-                       /*
-                        * We can continue to use `mapping' here because the
-                        * page is locked, which pins the address_space
-                        */
-                       if (mapping_cap_account_dirty(mapping)) {
-                               page_mkclean(page);
-                               dec_zone_page_state(page, NR_FILE_DIRTY);
-                       }
-                       return 1;
-               }
-               write_unlock_irqrestore(&mapping->tree_lock, flags);
-               return 0;
-       }
-       return TestClearPageDirty(page);
-}
-EXPORT_SYMBOL(test_clear_page_dirty);
-
 /*
  * Clear a page's dirty flag, while caring for dirty memory accounting.
  * Returns true if the page was previously dirty.
@@ -893,12 +857,41 @@ int clear_page_dirty_for_io(struct page *page)
 {
        struct address_space *mapping = page_mapping(page);
 
-       if (mapping) {
+       if (mapping && mapping_cap_account_dirty(mapping)) {
+               /*
+                * Yes, Virginia, this is indeed insane.
+                *
+                * We use this sequence to make sure that
+                *  (a) we account for dirty stats properly
+                *  (b) we tell the low-level filesystem to
+                *      mark the whole page dirty if it was
+                *      dirty in a pagetable. Only to then
+                *  (c) clean the page again and return 1 to
+                *      cause the writeback.
+                *
+                * This way we avoid all nasty races with the
+                * dirty bit in multiple places and clearing
+                * them concurrently from different threads.
+                *
+                * Note! Normally the "set_page_dirty(page)"
+                * has no effect on the actual dirty bit - since
+                * that will already usually be set. But we
+                * need the side effects, and it can help us
+                * avoid races.
+                *
+                * We basically use the page "master dirty bit"
+                * as a serialization point for all the different
+                * threads doing their things.
+                *
+                * FIXME! We still have a race here: if somebody
+                * adds the page back to the page tables in
+                * between the "page_mkclean()" and the "TestClearPageDirty()",
+                * we might have it mapped without the dirty bit set.
+                */
+               if (page_mkclean(page))
+                       set_page_dirty(page);
                if (TestClearPageDirty(page)) {
-                       if (mapping_cap_account_dirty(mapping)) {
-                               page_mkclean(page);
-                               dec_zone_page_state(page, NR_FILE_DIRTY);
-                       }
+                       dec_zone_page_state(page, NR_FILE_DIRTY);
                        return 1;
                }
                return 0;
@@ -955,15 +948,6 @@ int test_set_page_writeback(struct page *page)
 }
 EXPORT_SYMBOL(test_set_page_writeback);
 
-/*
- * Wakes up tasks that are being throttled due to writeback congestion
- */
-void writeback_congestion_end(void)
-{
-       blk_congestion_end(WRITE);
-}
-EXPORT_SYMBOL(writeback_congestion_end);
-
 /*
  * Return true if any of the pages in the mapping are marged with the
  * passed tag.