[PATCH] overcommit: add calculate_totalreserve_pages()
authorHideo AOKI <haoki@redhat.com>
Tue, 11 Apr 2006 05:52:59 +0000 (22:52 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Tue, 11 Apr 2006 13:18:32 +0000 (06:18 -0700)
These patches are an enhancement of OVERCOMMIT_GUESS algorithm in
__vm_enough_memory().

- why the kernel needed patching

  When the kernel can't allocate anonymous pages in practice, currnet
  OVERCOMMIT_GUESS could return success. This implementation might be
  the cause of oom kill in memory pressure situation.

  If the Linux runs with page reservation features like
  /proc/sys/vm/lowmem_reserve_ratio and without swap region, I think
  the oom kill occurs easily.

- the overall design approach in the patch

  When the OVERCOMMET_GUESS algorithm calculates number of free pages,
  the reserved free pages are regarded as non-free pages.

  This change helps to avoid the pitfall that the number of free pages
  become less than the number which the kernel tries to keep free.

- testing results

  I tested the patches using my test kernel module.

  If the patches aren't applied to the kernel, __vm_enough_memory()
  returns success in the situation but autual page allocation is
  failed.

  On the other hand, if the patches are applied to the kernel, memory
  allocation failure is avoided since __vm_enough_memory() returns
  failure in the situation.

  I checked that on i386 SMP 16GB memory machine. I haven't tested on
  nommu environment currently.

This patch adds totalreserve_pages for __vm_enough_memory().

Calculate_totalreserve_pages() checks maximum lowmem_reserve pages and
pages_high in each zone. Finally, the function stores the sum of each
zone to totalreserve_pages.

The totalreserve_pages is calculated when the VM is initilized.
And the variable is updated when /proc/sys/vm/lowmem_reserve_raito
or /proc/sys/vm/min_free_kbytes are changed.

Signed-off-by: Hideo Aoki <haoki@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
include/linux/swap.h
mm/page_alloc.c

index 54eac8a39a4c11d8c760064a8f2fa987ce45d13a..5b1fdf1cff4f057a67eb1c81e487acad031a89b2 100644 (file)
@@ -155,6 +155,7 @@ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalhigh_pages;
+extern unsigned long totalreserve_pages;
 extern long nr_swap_pages;
 extern unsigned int nr_free_pages(void);
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
index b8165e037dee9e4110cdb30e60822a9be2de9491..97d6827c7d669529fb7e5607cad4ba838d1847bf 100644 (file)
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
 
@@ -2476,6 +2477,38 @@ void __init page_alloc_init(void)
        hotcpu_notifier(page_alloc_cpu_notify, 0);
 }
 
+/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *     or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+       struct pglist_data *pgdat;
+       unsigned long reserve_pages = 0;
+       int i, j;
+
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
+                       struct zone *zone = pgdat->node_zones + i;
+                       unsigned long max = 0;
+
+                       /* Find valid and maximum lowmem_reserve in the zone */
+                       for (j = i; j < MAX_NR_ZONES; j++) {
+                               if (zone->lowmem_reserve[j] > max)
+                                       max = zone->lowmem_reserve[j];
+                       }
+
+                       /* we treat pages_high as reserved pages. */
+                       max += zone->pages_high;
+
+                       if (max > zone->present_pages)
+                               max = zone->present_pages;
+                       reserve_pages += max;
+               }
+       }
+       totalreserve_pages = reserve_pages;
+}
+
 /*
  * setup_per_zone_lowmem_reserve - called whenever
  *     sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
@@ -2507,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void)
                        }
                }
        }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
 }
 
 /*
@@ -2561,6 +2597,9 @@ void setup_per_zone_pages_min(void)
                zone->pages_high  = zone->pages_min + tmp / 2;
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
 }
 
 /*