arch/powerpc/mm/subpage-prot.c

   1 /*
   2  * Copyright 2007-2008 Paul Mackerras, IBM Corp.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License
   6  * as published by the Free Software Foundation; either version
   7  * 2 of the License, or (at your option) any later version.
   8  */
   9
  10 #include <linux/errno.h>
  11 #include <linux/kernel.h>
  12 #include <linux/gfp.h>
  13 #include <linux/types.h>
  14 #include <linux/mm.h>
  15 #include <linux/hugetlb.h>
  16 #include <linux/syscalls.h>
  17
  18 #include <asm/pgtable.h>
  19 #include <linux/uaccess.h>
  20 #include <asm/tlbflush.h>
  21
  22 /*
  23  * Free all pages allocated for subpage protection maps and pointers.
  24  * Also makes sure that the subpage_prot_table structure is
  25  * reinitialized for the next user.
  26  */
  27 void subpage_prot_free(struct mm_struct *mm)
  28 {
  29         struct subpage_prot_table *spt = &mm->context.spt;
  30         unsigned long i, j, addr;
  31         u32 **p;
  32
  33         for (i = 0; i < 4; ++i) {
  34                 if (spt->low_prot[i]) {
  35                         free_page((unsigned long)spt->low_prot[i]);
  36                         spt->low_prot[i] = NULL;
  37                 }
  38         }
  39         addr = 0;
  40         for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
  41                 p = spt->protptrs[i];
  42                 if (!p)
  43                         continue;
  44                 spt->protptrs[i] = NULL;
  45                 for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr;
  46                      ++j, addr += PAGE_SIZE)
  47                         if (p[j])
  48                                 free_page((unsigned long)p[j]);
  49                 free_page((unsigned long)p);
  50         }
  51         spt->maxaddr = 0;
  52 }
  53
  54 void subpage_prot_init_new_context(struct mm_struct *mm)
  55 {
  56         struct subpage_prot_table *spt = &mm->context.spt;
  57
  58         memset(spt, 0, sizeof(*spt));
  59 }
  60
  61 static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
  62                              int npages)
  63 {
  64         pgd_t *pgd;
  65         pud_t *pud;
  66         pmd_t *pmd;
  67         pte_t *pte;
  68         spinlock_t *ptl;
  69
  70         pgd = pgd_offset(mm, addr);
  71         if (pgd_none(*pgd))
  72                 return;
  73         pud = pud_offset(pgd, addr);
  74         if (pud_none(*pud))
  75                 return;
  76         pmd = pmd_offset(pud, addr);
  77         if (pmd_none(*pmd))
  78                 return;
  79         pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
  80         arch_enter_lazy_mmu_mode();
  81         for (; npages > 0; --npages) {
  82                 pte_update(mm, addr, pte, 0, 0, 0);
  83                 addr += PAGE_SIZE;
  84                 ++pte;
  85         }
  86         arch_leave_lazy_mmu_mode();
  87         pte_unmap_unlock(pte - 1, ptl);
  88 }
  89
  90 /*
  91  * Clear the subpage protection map for an address range, allowing
  92  * all accesses that are allowed by the pte permissions.
  93  */
  94 static void subpage_prot_clear(unsigned long addr, unsigned long len)
  95 {
  96         struct mm_struct *mm = current->mm;
  97         struct subpage_prot_table *spt = &mm->context.spt;
  98         u32 **spm, *spp;
  99         unsigned long i;
 100         size_t nw;
 101         unsigned long next, limit;
 102
 103         down_write(&mm->mmap_sem);
 104         limit = addr + len;
 105         if (limit > spt->maxaddr)
 106                 limit = spt->maxaddr;
 107         for (; addr < limit; addr = next) {
 108                 next = pmd_addr_end(addr, limit);
 109                 if (addr < 0x100000000UL) {
 110                         spm = spt->low_prot;
 111                 } else {
 112                         spm = spt->protptrs[addr >> SBP_L3_SHIFT];
 113                         if (!spm)
 114                                 continue;
 115                 }
 116                 spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
 117                 if (!spp)
 118                         continue;
 119                 spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
 120
 121                 i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 122                 nw = PTRS_PER_PTE - i;
 123                 if (addr + (nw << PAGE_SHIFT) > next)
 124                         nw = (next - addr) >> PAGE_SHIFT;
 125
 126                 memset(spp, 0, nw * sizeof(u32));
 127
 128                 /* now flush any existing HPTEs for the range */
 129                 hpte_flush_range(mm, addr, nw);
 130         }
 131         up_write(&mm->mmap_sem);
 132 }
 133
 134 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 135 static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 136                                   unsigned long end, struct mm_walk *walk)
 137 {
 138         struct vm_area_struct *vma = walk->vma;
 139         split_huge_pmd(vma, pmd, addr);
 140         return 0;
 141 }
 142
 143 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
 144                                     unsigned long len)
 145 {
 146         struct vm_area_struct *vma;
 147         struct mm_walk subpage_proto_walk = {
 148                 .mm = mm,
 149                 .pmd_entry = subpage_walk_pmd_entry,
 150         };
 151
 152         /*
 153          * We don't try too hard, we just mark all the vma in that range
 154          * VM_NOHUGEPAGE and split them.
 155          */
 156         vma = find_vma(mm, addr);
 157         /*
 158          * If the range is in unmapped range, just return
 159          */
 160         if (vma && ((addr + len) <= vma->vm_start))
 161                 return;
 162
 163         while (vma) {
 164                 if (vma->vm_start >= (addr + len))
 165                         break;
 166                 vma->vm_flags |= VM_NOHUGEPAGE;
 167                 walk_page_vma(vma, &subpage_proto_walk);
 168                 vma = vma->vm_next;
 169         }
 170 }
 171 #else
 172 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
 173                                     unsigned long len)
 174 {
 175         return;
 176 }
 177 #endif
 178
 179 /*
 180  * Copy in a subpage protection map for an address range.
 181  * The map has 2 bits per 4k subpage, so 32 bits per 64k page.
 182  * Each 2-bit field is 0 to allow any access, 1 to prevent writes,
 183  * 2 or 3 to prevent all accesses.
 184  * Note that the normal page protections also apply; the subpage
 185  * protection mechanism is an additional constraint, so putting 0
 186  * in a 2-bit field won't allow writes to a page that is otherwise
 187  * write-protected.
 188  */
 189 SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
 190                 unsigned long, len, u32 __user *, map)
 191 {
 192         struct mm_struct *mm = current->mm;
 193         struct subpage_prot_table *spt = &mm->context.spt;
 194         u32 **spm, *spp;
 195         unsigned long i;
 196         size_t nw;
 197         unsigned long next, limit;
 198         int err;
 199
 200         if (radix_enabled())
 201                 return -ENOENT;
 202
 203         /* Check parameters */
 204         if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
 205             addr >= mm->task_size || len >= mm->task_size ||
 206             addr + len > mm->task_size)
 207                 return -EINVAL;
 208
 209         if (is_hugepage_only_range(mm, addr, len))
 210                 return -EINVAL;
 211
 212         if (!map) {
 213                 /* Clear out the protection map for the address range */
 214                 subpage_prot_clear(addr, len);
 215                 return 0;
 216         }
 217
 218         if (!access_ok(VERIFY_READ, map, (len >> PAGE_SHIFT) * sizeof(u32)))
 219                 return -EFAULT;
 220
 221         down_write(&mm->mmap_sem);
 222         subpage_mark_vma_nohuge(mm, addr, len);
 223         for (limit = addr + len; addr < limit; addr = next) {
 224                 next = pmd_addr_end(addr, limit);
 225                 err = -ENOMEM;
 226                 if (addr < 0x100000000UL) {
 227                         spm = spt->low_prot;
 228                 } else {
 229                         spm = spt->protptrs[addr >> SBP_L3_SHIFT];
 230                         if (!spm) {
 231                                 spm = (u32 **)get_zeroed_page(GFP_KERNEL);
 232                                 if (!spm)
 233                                         goto out;
 234                                 spt->protptrs[addr >> SBP_L3_SHIFT] = spm;
 235                         }
 236                 }
 237                 spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1);
 238                 spp = *spm;
 239                 if (!spp) {
 240                         spp = (u32 *)get_zeroed_page(GFP_KERNEL);
 241                         if (!spp)
 242                                 goto out;
 243                         *spm = spp;
 244                 }
 245                 spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
 246
 247                 local_irq_disable();
 248                 demote_segment_4k(mm, addr);
 249                 local_irq_enable();
 250
 251                 i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 252                 nw = PTRS_PER_PTE - i;
 253                 if (addr + (nw << PAGE_SHIFT) > next)
 254                         nw = (next - addr) >> PAGE_SHIFT;
 255
 256                 up_write(&mm->mmap_sem);
 257                 if (__copy_from_user(spp, map, nw * sizeof(u32)))
 258                         return -EFAULT;
 259                 map += nw;
 260                 down_write(&mm->mmap_sem);
 261
 262                 /* now flush any existing HPTEs for the range */
 263                 hpte_flush_range(mm, addr, nw);
 264         }
 265         if (limit > spt->maxaddr)
 266                 spt->maxaddr = limit;
 267         err = 0;
 268  out:
 269         up_write(&mm->mmap_sem);
 270         return err;
 271 }