hugetlb: fix pool shrinking while in restricted cpuset
authorNishanth Aravamudan <nacc@us.ibm.com>
Tue, 4 Mar 2008 22:29:42 +0000 (14:29 -0800)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Wed, 5 Mar 2008 00:35:18 +0000 (16:35 -0800)
Adam Litke noticed that currently we grow the hugepage pool independent of any
cpuset the running process may be in, but when shrinking the pool, the cpuset
is checked.  This leads to inconsistency when shrinking the pool in a
restricted cpuset -- an administrator may have been able to grow the pool on a
node restricted by a containing cpuset, but they cannot shrink it there.

There are two options: either prevent growing of the pool outside of the
cpuset or allow shrinking outside of the cpuset.  >From previous discussions
on linux-mm, /proc/sys/vm/nr_hugepages is an administrative interface that
should not be restricted by cpusets.  So allow shrinking the pool by removing
pages from nodes outside of current's cpuset.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: Adam Litke <agl@us.ibm.com>
Cc: William Irwin <wli@holomorphy.com>
Cc: Lee Schermerhorn <Lee.Schermerhonr@hp.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/hugetlb.c

index 20e04c64468dd9b42b32d9af70a4386bd8336786..dcacc811e70ede9cda49d4d2cf56a6d5d5404a56 100644 (file)
@@ -71,7 +71,25 @@ static void enqueue_huge_page(struct page *page)
        free_huge_pages_node[nid]++;
 }
 
        free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+static struct page *dequeue_huge_page(void)
+{
+       int nid;
+       struct page *page = NULL;
+
+       for (nid = 0; nid < MAX_NUMNODES; ++nid) {
+               if (!list_empty(&hugepage_freelists[nid])) {
+                       page = list_entry(hugepage_freelists[nid].next,
+                                         struct page, lru);
+                       list_del(&page->lru);
+                       free_huge_pages--;
+                       free_huge_pages_node[nid]--;
+                       break;
+               }
+       }
+       return page;
+}
+
+static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
                                unsigned long address)
 {
        int nid;
                                unsigned long address)
 {
        int nid;
@@ -410,7 +428,7 @@ static struct page *alloc_huge_page_shared(struct vm_area_struct *vma,
        struct page *page;
 
        spin_lock(&hugetlb_lock);
        struct page *page;
 
        spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page(vma, addr);
+       page = dequeue_huge_page_vma(vma, addr);
        spin_unlock(&hugetlb_lock);
        return page ? page : ERR_PTR(-VM_FAULT_OOM);
 }
        spin_unlock(&hugetlb_lock);
        return page ? page : ERR_PTR(-VM_FAULT_OOM);
 }
@@ -425,7 +443,7 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
 
        spin_lock(&hugetlb_lock);
        if (free_huge_pages > resv_huge_pages)
 
        spin_lock(&hugetlb_lock);
        if (free_huge_pages > resv_huge_pages)
-               page = dequeue_huge_page(vma, addr);
+               page = dequeue_huge_page_vma(vma, addr);
        spin_unlock(&hugetlb_lock);
        if (!page) {
                page = alloc_buddy_huge_page(vma, addr);
        spin_unlock(&hugetlb_lock);
        if (!page) {
                page = alloc_buddy_huge_page(vma, addr);
@@ -578,7 +596,7 @@ static unsigned long set_max_huge_pages(unsigned long count)
        min_count = max(count, min_count);
        try_to_free_low(min_count);
        while (min_count < persistent_huge_pages) {
        min_count = max(count, min_count);
        try_to_free_low(min_count);
        while (min_count < persistent_huge_pages) {
-               struct page *page = dequeue_huge_page(NULL, 0);
+               struct page *page = dequeue_huge_page();
                if (!page)
                        break;
                update_and_free_page(page);
                if (!page)
                        break;
                update_and_free_page(page);