Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6
[sfrench/cifs-2.6.git] / arch / powerpc / mm / numa.c
index a8397bbad3d4d54a5cd50ae9ee201118c767d1d4..5ac08b8ab654f50391ebc8087c57a2b84c48aa52 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/notifier.h>
 #include <linux/lmb.h>
 #include <linux/of.h>
+#include <linux/pfn.h>
 #include <asm/sparsemem.h>
 #include <asm/prom.h>
 #include <asm/system.h>
@@ -822,42 +823,50 @@ static void __init dump_numa_memory_topology(void)
  * required. nid is the preferred node and end is the physical address of
  * the highest address in the node.
  *
- * Returns the physical address of the memory.
+ * Returns the virtual address of the memory.
  */
-static void __init *careful_allocation(int nid, unsigned long size,
+static void __init *careful_zallocation(int nid, unsigned long size,
                                       unsigned long align,
                                       unsigned long end_pfn)
 {
+       void *ret;
        int new_nid;
-       unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+       unsigned long ret_paddr;
+
+       ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
 
        /* retry over all memory */
-       if (!ret)
-               ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
+       if (!ret_paddr)
+               ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM());
 
-       if (!ret)
-               panic("numa.c: cannot allocate %lu bytes on node %d",
+       if (!ret_paddr)
+               panic("numa.c: cannot allocate %lu bytes for node %d",
                      size, nid);
 
+       ret = __va(ret_paddr);
+
        /*
-        * If the memory came from a previously allocated node, we must
-        * retry with the bootmem allocator.
+        * We initialize the nodes in numeric order: 0, 1, 2...
+        * and hand over control from the LMB allocator to the
+        * bootmem allocator.  If this function is called for
+        * node 5, then we know that all nodes <5 are using the
+        * bootmem allocator instead of the LMB allocator.
+        *
+        * So, check the nid from which this allocation came
+        * and double check to see if we need to use bootmem
+        * instead of the LMB.  We don't free the LMB memory
+        * since it would be useless.
         */
-       new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
+       new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
        if (new_nid < nid) {
-               ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
+               ret = __alloc_bootmem_node(NODE_DATA(new_nid),
                                size, align, 0);
 
-               if (!ret)
-                       panic("numa.c: cannot allocate %lu bytes on node %d",
-                             size, new_nid);
-
-               ret = __pa(ret);
-
-               dbg("alloc_bootmem %lx %lx\n", ret, size);
+               dbg("alloc_bootmem %p %lx\n", ret, size);
        }
 
-       return (void *)ret;
+       memset(ret, 0, size);
+       return ret;
 }
 
 static struct notifier_block __cpuinitdata ppc64_numa_nb = {
@@ -874,7 +883,7 @@ static void mark_reserved_regions_for_nid(int nid)
                unsigned long physbase = lmb.reserved.region[i].base;
                unsigned long size = lmb.reserved.region[i].size;
                unsigned long start_pfn = physbase >> PAGE_SHIFT;
-               unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT);
+               unsigned long end_pfn = PFN_UP(physbase + size);
                struct node_active_region node_ar;
                unsigned long node_end_pfn = node->node_start_pfn +
                                             node->node_spanned_pages;
@@ -900,11 +909,18 @@ static void mark_reserved_regions_for_nid(int nid)
                         */
                        if (end_pfn > node_ar.end_pfn)
                                reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
-                                       - (start_pfn << PAGE_SHIFT);
-                       dbg("reserve_bootmem %lx %lx nid=%d\n", physbase,
-                               reserve_size, node_ar.nid);
-                       reserve_bootmem_node(NODE_DATA(node_ar.nid), physbase,
-                                               reserve_size, BOOTMEM_DEFAULT);
+                                       - physbase;
+                       /*
+                        * Only worry about *this* node, others may not
+                        * yet have valid NODE_DATA().
+                        */
+                       if (node_ar.nid == nid) {
+                               dbg("reserve_bootmem %lx %lx nid=%d\n",
+                                       physbase, reserve_size, node_ar.nid);
+                               reserve_bootmem_node(NODE_DATA(node_ar.nid),
+                                               physbase, reserve_size,
+                                               BOOTMEM_DEFAULT);
+                       }
                        /*
                         * if reserved region is contained in the active region
                         * then done.
@@ -929,7 +945,6 @@ static void mark_reserved_regions_for_nid(int nid)
 void __init do_init_bootmem(void)
 {
        int nid;
-       unsigned int i;
 
        min_low_pfn = 0;
        max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
@@ -946,7 +961,7 @@ void __init do_init_bootmem(void)
 
        for_each_online_node(nid) {
                unsigned long start_pfn, end_pfn;
-               unsigned long bootmem_paddr;
+               void *bootmem_vaddr;
                unsigned long bootmap_pages;
 
                get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
@@ -958,11 +973,9 @@ void __init do_init_bootmem(void)
                 * previous nodes' bootmem to be initialized and have
                 * all reserved areas marked.
                 */
-               NODE_DATA(nid) = careful_allocation(nid,
+               NODE_DATA(nid) = careful_zallocation(nid,
                                        sizeof(struct pglist_data),
                                        SMP_CACHE_BYTES, end_pfn);
-               NODE_DATA(nid) = __va(NODE_DATA(nid));
-               memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
                dbg("node %d\n", nid);
                dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
@@ -978,20 +991,20 @@ void __init do_init_bootmem(void)
                dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
 
                bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-               bootmem_paddr = (unsigned long)careful_allocation(nid,
+               bootmem_vaddr = careful_zallocation(nid,
                                        bootmap_pages << PAGE_SHIFT,
                                        PAGE_SIZE, end_pfn);
-               memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
 
-               dbg("bootmap_paddr = %lx\n", bootmem_paddr);
+               dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
 
-               init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+               init_bootmem_node(NODE_DATA(nid),
+                                 __pa(bootmem_vaddr) >> PAGE_SHIFT,
                                  start_pfn, end_pfn);
 
                free_bootmem_with_active_regions(nid, end_pfn);
                /*
                 * Be very careful about moving this around.  Future
-                * calls to careful_allocation() depend on this getting
+                * calls to careful_zallocation() depend on this getting
                 * done correctly.
                 */
                mark_reserved_regions_for_nid(nid);