Merge git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
[sfrench/cifs-2.6.git] / arch / powerpc / mm / hugetlbpage.c
index 426c269e552eec77bbee18cb086f900716cffb4a..7370f9f33e2943d038025a6d05ba6b4e02755beb 100644 (file)
@@ -133,58 +133,63 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
        return __pte(old);
 }
 
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
-       if (len & ~HPAGE_MASK)
-               return -EINVAL;
-       if (addr & ~HPAGE_MASK)
-               return -EINVAL;
-       if (! (within_hugepage_low_range(addr, len)
-              || within_hugepage_high_range(addr, len)) )
-               return -EINVAL;
-       return 0;
-}
+struct slb_flush_info {
+       struct mm_struct *mm;
+       u16 newareas;
+};
 
 static void flush_low_segments(void *parm)
 {
-       u16 areas = (unsigned long) parm;
+       struct slb_flush_info *fi = parm;
        unsigned long i;
 
-       asm volatile("isync" : : : "memory");
+       BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);
+
+       if (current->active_mm != fi->mm)
+               return;
 
-       BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
+       /* Only need to do anything if this CPU is working in the same
+        * mm as the one which has changed */
 
+       /* update the paca copy of the context struct */
+       get_paca()->context = current->active_mm->context;
+
+       asm volatile("isync" : : : "memory");
        for (i = 0; i < NUM_LOW_AREAS; i++) {
-               if (! (areas & (1U << i)))
+               if (! (fi->newareas & (1U << i)))
                        continue;
                asm volatile("slbie %0"
                             : : "r" ((i << SID_SHIFT) | SLBIE_C));
        }
-
        asm volatile("isync" : : : "memory");
 }
 
 static void flush_high_segments(void *parm)
 {
-       u16 areas = (unsigned long) parm;
+       struct slb_flush_info *fi = parm;
        unsigned long i, j;
 
-       asm volatile("isync" : : : "memory");
 
-       BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
+       BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);
+
+       if (current->active_mm != fi->mm)
+               return;
+
+       /* Only need to do anything if this CPU is working in the same
+        * mm as the one which has changed */
 
+       /* update the paca copy of the context struct */
+       get_paca()->context = current->active_mm->context;
+
+       asm volatile("isync" : : : "memory");
        for (i = 0; i < NUM_HIGH_AREAS; i++) {
-               if (! (areas & (1U << i)))
+               if (! (fi->newareas & (1U << i)))
                        continue;
                for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
                        asm volatile("slbie %0"
                                     :: "r" (((i << HTLB_AREA_SHIFT)
-                                            + (j << SID_SHIFT)) | SLBIE_C));
+                                             + (j << SID_SHIFT)) | SLBIE_C));
        }
-
        asm volatile("isync" : : : "memory");
 }
 
@@ -229,6 +234,7 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
 static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
 {
        unsigned long i;
+       struct slb_flush_info fi;
 
        BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
        BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
@@ -244,19 +250,20 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
 
        mm->context.low_htlb_areas |= newareas;
 
-       /* update the paca copy of the context struct */
-       get_paca()->context = mm->context;
-
        /* the context change must make it to memory before the flush,
         * so that further SLB misses do the right thing. */
        mb();
-       on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
+
+       fi.mm = mm;
+       fi.newareas = newareas;
+       on_each_cpu(flush_low_segments, &fi, 0, 1);
 
        return 0;
 }
 
 static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
 {
+       struct slb_flush_info fi;
        unsigned long i;
 
        BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
@@ -280,22 +287,25 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
        /* the context change must make it to memory before the flush,
         * so that further SLB misses do the right thing. */
        mb();
-       on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
+
+       fi.mm = mm;
+       fi.newareas = newareas;
+       on_each_cpu(flush_high_segments, &fi, 0, 1);
 
        return 0;
 }
 
 int prepare_hugepage_range(unsigned long addr, unsigned long len)
 {
-       int err;
+       int err = 0;
 
        if ( (addr+len) < addr )
                return -EINVAL;
 
-       if ((addr + len) < 0x100000000UL)
+       if (addr < 0x100000000UL)
                err = open_low_hpage_areas(current->mm,
                                          LOW_ESID_MASK(addr, len));
-       else
+       if ((addr + len) > 0x100000000UL)
                err = open_high_hpage_areas(current->mm,
                                            HTLB_AREA_MASK(addr, len));
        if (err) {
@@ -524,6 +534,17 @@ fail:
        return addr;
 }
 
+static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
+{
+       struct vm_area_struct *vma;
+
+       vma = find_vma(current->mm, addr);
+       if (!vma || ((addr + len) <= vma->vm_start))
+               return 0;
+
+       return -ENOMEM;
+}
+
 static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
 {
        unsigned long addr = 0;
@@ -593,15 +614,28 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        if (!cpu_has_feature(CPU_FTR_16M_PAGE))
                return -EINVAL;
 
+       /* Paranoia, caller should have dealt with this */
+       BUG_ON((addr + len)  < addr);
+
        if (test_thread_flag(TIF_32BIT)) {
+               /* Paranoia, caller should have dealt with this */
+               BUG_ON((addr + len) > 0x100000000UL);
+
                curareas = current->mm->context.low_htlb_areas;
 
-               /* First see if we can do the mapping in the existing
-                * low areas */
+               /* First see if we can use the hint address */
+               if (addr && (htlb_check_hinted_area(addr, len) == 0)) {
+                       areamask = LOW_ESID_MASK(addr, len);
+                       if (open_low_hpage_areas(current->mm, areamask) == 0)
+                               return addr;
+               }
+
+               /* Next see if we can map in the existing low areas */
                addr = htlb_get_low_area(len, curareas);
                if (addr != -ENOMEM)
                        return addr;
 
+               /* Finally go looking for areas to open */
                lastshift = 0;
                for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
                     ! lastshift; areamask >>=1) {
@@ -616,12 +650,22 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        } else {
                curareas = current->mm->context.high_htlb_areas;
 
-               /* First see if we can do the mapping in the existing
-                * high areas */
+               /* First see if we can use the hint address */
+               /* We discourage 64-bit processes from doing hugepage
+                * mappings below 4GB (must use MAP_FIXED) */
+               if ((addr >= 0x100000000UL)
+                   && (htlb_check_hinted_area(addr, len) == 0)) {
+                       areamask = HTLB_AREA_MASK(addr, len);
+                       if (open_high_hpage_areas(current->mm, areamask) == 0)
+                               return addr;
+               }
+
+               /* Next see if we can map in the existing high areas */
                addr = htlb_get_high_area(len, curareas);
                if (addr != -ENOMEM)
                        return addr;
 
+               /* Finally go looking for areas to open */
                lastshift = 0;
                for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
                     ! lastshift; areamask >>=1) {
@@ -639,8 +683,36 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        return -ENOMEM;
 }
 
+/*
+ * Called by asm hashtable.S for doing lazy icache flush
+ */
+static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
+                                                 pte_t pte, int trap)
+{
+       struct page *page;
+       int i;
+
+       if (!pfn_valid(pte_pfn(pte)))
+               return rflags;
+
+       page = pte_page(pte);
+
+       /* page is dirty */
+       if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
+               if (trap == 0x400) {
+                       for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++)
+                               __flush_dcache_icache(page_address(page+i));
+                       set_bit(PG_arch_1, &page->flags);
+               } else {
+                       rflags |= HPTE_R_N;
+               }
+       }
+       return rflags;
+}
+
 int hash_huge_page(struct mm_struct *mm, unsigned long access,
-                  unsigned long ea, unsigned long vsid, int local)
+                  unsigned long ea, unsigned long vsid, int local,
+                  unsigned long trap)
 {
        pte_t *ptep;
        unsigned long old_pte, new_pte;
@@ -691,6 +763,11 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
        rflags = 0x2 | (!(new_pte & _PAGE_RW));
        /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
        rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+       if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+               /* No CPU has hugepages but lacks no execute, so we
+                * don't need to worry about that case */
+               rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
+                                                      trap);
 
        /* Check if pte already has an hpte (case 2) */
        if (unlikely(old_pte & _PAGE_HASHPTE)) {
@@ -703,7 +780,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
                slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
                slot += (old_pte & _PAGE_F_GIX) >> 12;
 
-               if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
+               if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize,
+                                        local) == -1)
                        old_pte &= ~_PAGE_HPTEFLAGS;
        }
 
@@ -754,9 +832,7 @@ repeat:
        }
 
        /*
-        * No need to use ldarx/stdcx here because all who
-        * might be updating the pte will hold the
-        * page_table_lock
+        * No need to use ldarx/stdcx here
         */
        *ptep = __pte(new_pte & ~_PAGE_BUSY);