arch/powerpc/mm/mmu_context_iommu.c

   1 /*
   2  *  IOMMU helpers in MMU context.
   3  *
   4  *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
   5  *
   6  *  This program is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU General Public License
   8  *  as published by the Free Software Foundation; either version
   9  *  2 of the License, or (at your option) any later version.
  10  *
  11  */
  12
  13 #include <linux/sched/signal.h>
  14 #include <linux/slab.h>
  15 #include <linux/rculist.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/mutex.h>
  18 #include <linux/migrate.h>
  19 #include <linux/hugetlb.h>
  20 #include <linux/swap.h>
  21 #include <linux/sizes.h>
  22 #include <asm/mmu_context.h>
  23 #include <asm/pte-walk.h>
  24
  25 static DEFINE_MUTEX(mem_list_mutex);
  26
  27 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
  28 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK  ~(SZ_4K - 1)
  29
  30 struct mm_iommu_table_group_mem_t {
  31         struct list_head next;
  32         struct rcu_head rcu;
  33         unsigned long used;
  34         atomic64_t mapped;
  35         unsigned int pageshift;
  36         u64 ua;                 /* userspace address */
  37         u64 entries;            /* number of entries in hpas[] */
  38         u64 *hpas;              /* vmalloc'ed */
  39 #define MM_IOMMU_TABLE_INVALID_HPA      ((uint64_t)-1)
  40         u64 dev_hpa;            /* Device memory base address */
  41 };
  42
  43 static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
  44                 unsigned long npages, bool incr)
  45 {
  46         long ret = 0, locked, lock_limit;
  47
  48         if (!npages)
  49                 return 0;
  50
  51         down_write(&mm->mmap_sem);
  52
  53         if (incr) {
  54                 locked = mm->locked_vm + npages;
  55                 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  56                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
  57                         ret = -ENOMEM;
  58                 else
  59                         mm->locked_vm += npages;
  60         } else {
  61                 if (WARN_ON_ONCE(npages > mm->locked_vm))
  62                         npages = mm->locked_vm;
  63                 mm->locked_vm -= npages;
  64         }
  65
  66         pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
  67                         current ? current->pid : 0,
  68                         incr ? '+' : '-',
  69                         npages << PAGE_SHIFT,
  70                         mm->locked_vm << PAGE_SHIFT,
  71                         rlimit(RLIMIT_MEMLOCK));
  72         up_write(&mm->mmap_sem);
  73
  74         return ret;
  75 }
  76
  77 bool mm_iommu_preregistered(struct mm_struct *mm)
  78 {
  79         return !list_empty(&mm->context.iommu_group_mem_list);
  80 }
  81 EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
  82
  83 /*
  84  * Taken from alloc_migrate_target with changes to remove CMA allocations
  85  */
  86 struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
  87 {
  88         gfp_t gfp_mask = GFP_USER;
  89         struct page *new_page;
  90
  91         if (PageCompound(page))
  92                 return NULL;
  93
  94         if (PageHighMem(page))
  95                 gfp_mask |= __GFP_HIGHMEM;
  96
  97         /*
  98          * We don't want the allocation to force an OOM if possibe
  99          */
 100         new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
 101         return new_page;
 102 }
 103
 104 static int mm_iommu_move_page_from_cma(struct page *page)
 105 {
 106         int ret = 0;
 107         LIST_HEAD(cma_migrate_pages);
 108
 109         /* Ignore huge pages for now */
 110         if (PageCompound(page))
 111                 return -EBUSY;
 112
 113         lru_add_drain();
 114         ret = isolate_lru_page(page);
 115         if (ret)
 116                 return ret;
 117
 118         list_add(&page->lru, &cma_migrate_pages);
 119         put_page(page); /* Drop the gup reference */
 120
 121         ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
 122                                 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
 123         if (ret) {
 124                 if (!list_empty(&cma_migrate_pages))
 125                         putback_movable_pages(&cma_migrate_pages);
 126         }
 127
 128         return 0;
 129 }
 130
 131 static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 132                 unsigned long entries, unsigned long dev_hpa,
 133                 struct mm_iommu_table_group_mem_t **pmem)
 134 {
 135         struct mm_iommu_table_group_mem_t *mem;
 136         long i, j, ret = 0, locked_entries = 0;
 137         unsigned int pageshift;
 138         unsigned long flags;
 139         unsigned long cur_ua;
 140         struct page *page = NULL;
 141
 142         mutex_lock(&mem_list_mutex);
 143
 144         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
 145                         next) {
 146                 /* Overlap? */
 147                 if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
 148                                 (ua < (mem->ua +
 149                                        (mem->entries << PAGE_SHIFT)))) {
 150                         ret = -EINVAL;
 151                         goto unlock_exit;
 152                 }
 153
 154         }
 155
 156         if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
 157                 ret = mm_iommu_adjust_locked_vm(mm, entries, true);
 158                 if (ret)
 159                         goto unlock_exit;
 160
 161                 locked_entries = entries;
 162         }
 163
 164         mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 165         if (!mem) {
 166                 ret = -ENOMEM;
 167                 goto unlock_exit;
 168         }
 169
 170         if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
 171                 mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
 172                 mem->dev_hpa = dev_hpa;
 173                 goto good_exit;
 174         }
 175         mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
 176
 177         /*
 178          * For a starting point for a maximum page size calculation
 179          * we use @ua and @entries natural alignment to allow IOMMU pages
 180          * smaller than huge pages but still bigger than PAGE_SIZE.
 181          */
 182         mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
 183         mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
 184         if (!mem->hpas) {
 185                 kfree(mem);
 186                 ret = -ENOMEM;
 187                 goto unlock_exit;
 188         }
 189
 190         for (i = 0; i < entries; ++i) {
 191                 cur_ua = ua + (i << PAGE_SHIFT);
 192                 if (1 != get_user_pages_fast(cur_ua,
 193                                         1/* pages */, 1/* iswrite */, &page)) {
 194                         ret = -EFAULT;
 195                         for (j = 0; j < i; ++j)
 196                                 put_page(pfn_to_page(mem->hpas[j] >>
 197                                                 PAGE_SHIFT));
 198                         vfree(mem->hpas);
 199                         kfree(mem);
 200                         goto unlock_exit;
 201                 }
 202                 /*
 203                  * If we get a page from the CMA zone, since we are going to
 204                  * be pinning these entries, we might as well move them out
 205                  * of the CMA zone if possible. NOTE: faulting in + migration
 206                  * can be expensive. Batching can be considered later
 207                  */
 208                 if (is_migrate_cma_page(page)) {
 209                         if (mm_iommu_move_page_from_cma(page))
 210                                 goto populate;
 211                         if (1 != get_user_pages_fast(cur_ua,
 212                                                 1/* pages */, 1/* iswrite */,
 213                                                 &page)) {
 214                                 ret = -EFAULT;
 215                                 for (j = 0; j < i; ++j)
 216                                         put_page(pfn_to_page(mem->hpas[j] >>
 217                                                                 PAGE_SHIFT));
 218                                 vfree(mem->hpas);
 219                                 kfree(mem);
 220                                 goto unlock_exit;
 221                         }
 222                 }
 223 populate:
 224                 pageshift = PAGE_SHIFT;
 225                 if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
 226                         pte_t *pte;
 227                         struct page *head = compound_head(page);
 228                         unsigned int compshift = compound_order(head);
 229                         unsigned int pteshift;
 230
 231                         local_irq_save(flags); /* disables as well */
 232                         pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift);
 233
 234                         /* Double check it is still the same pinned page */
 235                         if (pte && pte_page(*pte) == head &&
 236                             pteshift == compshift + PAGE_SHIFT)
 237                                 pageshift = max_t(unsigned int, pteshift,
 238                                                 PAGE_SHIFT);
 239                         local_irq_restore(flags);
 240                 }
 241                 mem->pageshift = min(mem->pageshift, pageshift);
 242                 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
 243         }
 244
 245 good_exit:
 246         atomic64_set(&mem->mapped, 1);
 247         mem->used = 1;
 248         mem->ua = ua;
 249         mem->entries = entries;
 250         *pmem = mem;
 251
 252         list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
 253
 254 unlock_exit:
 255         if (locked_entries && ret)
 256                 mm_iommu_adjust_locked_vm(mm, locked_entries, false);
 257
 258         mutex_unlock(&mem_list_mutex);
 259
 260         return ret;
 261 }
 262
 263 long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
 264                 struct mm_iommu_table_group_mem_t **pmem)
 265 {
 266         return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
 267                         pmem);
 268 }
 269 EXPORT_SYMBOL_GPL(mm_iommu_new);
 270
 271 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
 272                 unsigned long entries, unsigned long dev_hpa,
 273                 struct mm_iommu_table_group_mem_t **pmem)
 274 {
 275         return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
 276 }
 277 EXPORT_SYMBOL_GPL(mm_iommu_newdev);
 278
 279 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
 280 {
 281         long i;
 282         struct page *page = NULL;
 283
 284         if (!mem->hpas)
 285                 return;
 286
 287         for (i = 0; i < mem->entries; ++i) {
 288                 if (!mem->hpas[i])
 289                         continue;
 290
 291                 page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
 292                 if (!page)
 293                         continue;
 294
 295                 if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
 296                         SetPageDirty(page);
 297
 298                 put_page(page);
 299                 mem->hpas[i] = 0;
 300         }
 301 }
 302
 303 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
 304 {
 305
 306         mm_iommu_unpin(mem);
 307         vfree(mem->hpas);
 308         kfree(mem);
 309 }
 310
 311 static void mm_iommu_free(struct rcu_head *head)
 312 {
 313         struct mm_iommu_table_group_mem_t *mem = container_of(head,
 314                         struct mm_iommu_table_group_mem_t, rcu);
 315
 316         mm_iommu_do_free(mem);
 317 }
 318
 319 static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
 320 {
 321         list_del_rcu(&mem->next);
 322         call_rcu(&mem->rcu, mm_iommu_free);
 323 }
 324
 325 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 326 {
 327         long ret = 0;
 328         unsigned long entries, dev_hpa;
 329
 330         mutex_lock(&mem_list_mutex);
 331
 332         if (mem->used == 0) {
 333                 ret = -ENOENT;
 334                 goto unlock_exit;
 335         }
 336
 337         --mem->used;
 338         /* There are still users, exit */
 339         if (mem->used)
 340                 goto unlock_exit;
 341
 342         /* Are there still mappings? */
 343         if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
 344                 ++mem->used;
 345                 ret = -EBUSY;
 346                 goto unlock_exit;
 347         }
 348
 349         /* @mapped became 0 so now mappings are disabled, release the region */
 350         entries = mem->entries;
 351         dev_hpa = mem->dev_hpa;
 352         mm_iommu_release(mem);
 353
 354         if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
 355                 mm_iommu_adjust_locked_vm(mm, entries, false);
 356
 357 unlock_exit:
 358         mutex_unlock(&mem_list_mutex);
 359
 360         return ret;
 361 }
 362 EXPORT_SYMBOL_GPL(mm_iommu_put);
 363
 364 struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 365                 unsigned long ua, unsigned long size)
 366 {
 367         struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 368
 369         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 370                 if ((mem->ua <= ua) &&
 371                                 (ua + size <= mem->ua +
 372                                  (mem->entries << PAGE_SHIFT))) {
 373                         ret = mem;
 374                         break;
 375                 }
 376         }
 377
 378         return ret;
 379 }
 380 EXPORT_SYMBOL_GPL(mm_iommu_lookup);
 381
 382 struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
 383                 unsigned long ua, unsigned long size)
 384 {
 385         struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 386
 387         list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
 388                         next) {
 389                 if ((mem->ua <= ua) &&
 390                                 (ua + size <= mem->ua +
 391                                  (mem->entries << PAGE_SHIFT))) {
 392                         ret = mem;
 393                         break;
 394                 }
 395         }
 396
 397         return ret;
 398 }
 399
 400 struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
 401                 unsigned long ua, unsigned long entries)
 402 {
 403         struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 404
 405         mutex_lock(&mem_list_mutex);
 406
 407         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 408                 if ((mem->ua == ua) && (mem->entries == entries)) {
 409                         ret = mem;
 410                         ++mem->used;
 411                         break;
 412                 }
 413         }
 414
 415         mutex_unlock(&mem_list_mutex);
 416
 417         return ret;
 418 }
 419 EXPORT_SYMBOL_GPL(mm_iommu_get);
 420
 421 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
 422                 unsigned long ua, unsigned int pageshift, unsigned long *hpa)
 423 {
 424         const long entry = (ua - mem->ua) >> PAGE_SHIFT;
 425         u64 *va;
 426
 427         if (entry >= mem->entries)
 428                 return -EFAULT;
 429
 430         if (pageshift > mem->pageshift)
 431                 return -EFAULT;
 432
 433         if (!mem->hpas) {
 434                 *hpa = mem->dev_hpa + (ua - mem->ua);
 435                 return 0;
 436         }
 437
 438         va = &mem->hpas[entry];
 439         *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
 440
 441         return 0;
 442 }
 443 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
 444
 445 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
 446                 unsigned long ua, unsigned int pageshift, unsigned long *hpa)
 447 {
 448         const long entry = (ua - mem->ua) >> PAGE_SHIFT;
 449         unsigned long *pa;
 450
 451         if (entry >= mem->entries)
 452                 return -EFAULT;
 453
 454         if (pageshift > mem->pageshift)
 455                 return -EFAULT;
 456
 457         if (!mem->hpas) {
 458                 *hpa = mem->dev_hpa + (ua - mem->ua);
 459                 return 0;
 460         }
 461
 462         pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
 463         if (!pa)
 464                 return -EFAULT;
 465
 466         *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
 467
 468         return 0;
 469 }
 470
 471 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
 472 {
 473         struct mm_iommu_table_group_mem_t *mem;
 474         long entry;
 475         void *va;
 476         unsigned long *pa;
 477
 478         mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
 479         if (!mem)
 480                 return;
 481
 482         if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
 483                 return;
 484
 485         entry = (ua - mem->ua) >> PAGE_SHIFT;
 486         va = &mem->hpas[entry];
 487
 488         pa = (void *) vmalloc_to_phys(va);
 489         if (!pa)
 490                 return;
 491
 492         *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
 493 }
 494
 495 bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 496                 unsigned int pageshift, unsigned long *size)
 497 {
 498         struct mm_iommu_table_group_mem_t *mem;
 499         unsigned long end;
 500
 501         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 502                 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
 503                         continue;
 504
 505                 end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
 506                 if ((mem->dev_hpa <= hpa) && (hpa < end)) {
 507                         /*
 508                          * Since the IOMMU page size might be bigger than
 509                          * PAGE_SIZE, the amount of preregistered memory
 510                          * starting from @hpa might be smaller than 1<<pageshift
 511                          * and the caller needs to distinguish this situation.
 512                          */
 513                         *size = min(1UL << pageshift, end - hpa);
 514                         return true;
 515                 }
 516         }
 517
 518         return false;
 519 }
 520 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
 521
 522 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
 523 {
 524         if (atomic64_inc_not_zero(&mem->mapped))
 525                 return 0;
 526
 527         /* Last mm_iommu_put() has been called, no more mappings allowed() */
 528         return -ENXIO;
 529 }
 530 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
 531
 532 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
 533 {
 534         atomic64_add_unless(&mem->mapped, -1, 1);
 535 }
 536 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
 537
 538 void mm_iommu_init(struct mm_struct *mm)
 539 {
 540         INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
 541 }