arch/powerpc/mm/mmu_context_iommu.c

   1 /*
   2  *  IOMMU helpers in MMU context.
   3  *
   4  *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
   5  *
   6  *  This program is free software; you can redistribute it and/or
   7  *  modify it under the terms of the GNU General Public License
   8  *  as published by the Free Software Foundation; either version
   9  *  2 of the License, or (at your option) any later version.
  10  *
  11  */
  12
  13 #include <linux/sched/signal.h>
  14 #include <linux/slab.h>
  15 #include <linux/rculist.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/mutex.h>
  18 #include <linux/migrate.h>
  19 #include <linux/hugetlb.h>
  20 #include <linux/swap.h>
  21 #include <linux/sizes.h>
  22 #include <asm/mmu_context.h>
  23 #include <asm/pte-walk.h>
  24 #include <linux/mm_inline.h>
  25
  26 static DEFINE_MUTEX(mem_list_mutex);
  27
  28 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
  29 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK  ~(SZ_4K - 1)
  30
  31 struct mm_iommu_table_group_mem_t {
  32         struct list_head next;
  33         struct rcu_head rcu;
  34         unsigned long used;
  35         atomic64_t mapped;
  36         unsigned int pageshift;
  37         u64 ua;                 /* userspace address */
  38         u64 entries;            /* number of entries in hpas/hpages[] */
  39         /*
  40          * in mm_iommu_get we temporarily use this to store
  41          * struct page address.
  42          *
  43          * We need to convert ua to hpa in real mode. Make it
  44          * simpler by storing physical address.
  45          */
  46         union {
  47                 struct page **hpages;   /* vmalloc'ed */
  48                 phys_addr_t *hpas;
  49         };
  50 #define MM_IOMMU_TABLE_INVALID_HPA      ((uint64_t)-1)
  51         u64 dev_hpa;            /* Device memory base address */
  52 };
  53
  54 static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
  55                 unsigned long npages, bool incr)
  56 {
  57         long ret = 0, locked, lock_limit;
  58
  59         if (!npages)
  60                 return 0;
  61
  62         down_write(&mm->mmap_sem);
  63
  64         if (incr) {
  65                 locked = mm->locked_vm + npages;
  66                 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  67                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
  68                         ret = -ENOMEM;
  69                 else
  70                         mm->locked_vm += npages;
  71         } else {
  72                 if (WARN_ON_ONCE(npages > mm->locked_vm))
  73                         npages = mm->locked_vm;
  74                 mm->locked_vm -= npages;
  75         }
  76
  77         pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
  78                         current ? current->pid : 0,
  79                         incr ? '+' : '-',
  80                         npages << PAGE_SHIFT,
  81                         mm->locked_vm << PAGE_SHIFT,
  82                         rlimit(RLIMIT_MEMLOCK));
  83         up_write(&mm->mmap_sem);
  84
  85         return ret;
  86 }
  87
  88 bool mm_iommu_preregistered(struct mm_struct *mm)
  89 {
  90         return !list_empty(&mm->context.iommu_group_mem_list);
  91 }
  92 EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
  93
  94 static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
  95                               unsigned long entries, unsigned long dev_hpa,
  96                               struct mm_iommu_table_group_mem_t **pmem)
  97 {
  98         struct mm_iommu_table_group_mem_t *mem, *mem2;
  99         long i, ret, locked_entries = 0, pinned = 0;
 100         unsigned int pageshift;
 101         unsigned long entry, chunk;
 102
 103         if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
 104                 ret = mm_iommu_adjust_locked_vm(mm, entries, true);
 105                 if (ret)
 106                         return ret;
 107
 108                 locked_entries = entries;
 109         }
 110
 111         mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 112         if (!mem) {
 113                 ret = -ENOMEM;
 114                 goto unlock_exit;
 115         }
 116
 117         if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
 118                 mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
 119                 mem->dev_hpa = dev_hpa;
 120                 goto good_exit;
 121         }
 122         mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
 123
 124         /*
 125          * For a starting point for a maximum page size calculation
 126          * we use @ua and @entries natural alignment to allow IOMMU pages
 127          * smaller than huge pages but still bigger than PAGE_SIZE.
 128          */
 129         mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
 130         mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
 131         if (!mem->hpas) {
 132                 kfree(mem);
 133                 ret = -ENOMEM;
 134                 goto unlock_exit;
 135         }
 136
 137         down_read(&mm->mmap_sem);
 138         chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
 139                         sizeof(struct vm_area_struct *);
 140         chunk = min(chunk, entries);
 141         for (entry = 0; entry < entries; entry += chunk) {
 142                 unsigned long n = min(entries - entry, chunk);
 143
 144                 ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
 145                                 FOLL_WRITE, mem->hpages + entry, NULL);
 146                 if (ret == n) {
 147                         pinned += n;
 148                         continue;
 149                 }
 150                 if (ret > 0)
 151                         pinned += ret;
 152                 break;
 153         }
 154         up_read(&mm->mmap_sem);
 155         if (pinned != entries) {
 156                 if (!ret)
 157                         ret = -EFAULT;
 158                 goto free_exit;
 159         }
 160
 161         pageshift = PAGE_SHIFT;
 162         for (i = 0; i < entries; ++i) {
 163                 struct page *page = mem->hpages[i];
 164
 165                 /*
 166                  * Allow to use larger than 64k IOMMU pages. Only do that
 167                  * if we are backed by hugetlb.
 168                  */
 169                 if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
 170                         struct page *head = compound_head(page);
 171
 172                         pageshift = compound_order(head) + PAGE_SHIFT;
 173                 }
 174                 mem->pageshift = min(mem->pageshift, pageshift);
 175                 /*
 176                  * We don't need struct page reference any more, switch
 177                  * to physical address.
 178                  */
 179                 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
 180         }
 181
 182 good_exit:
 183         atomic64_set(&mem->mapped, 1);
 184         mem->used = 1;
 185         mem->ua = ua;
 186         mem->entries = entries;
 187
 188         mutex_lock(&mem_list_mutex);
 189
 190         list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
 191                 /* Overlap? */
 192                 if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
 193                                 (ua < (mem2->ua +
 194                                        (mem2->entries << PAGE_SHIFT)))) {
 195                         ret = -EINVAL;
 196                         mutex_unlock(&mem_list_mutex);
 197                         goto free_exit;
 198                 }
 199         }
 200
 201         list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
 202
 203         mutex_unlock(&mem_list_mutex);
 204
 205         *pmem = mem;
 206
 207         return 0;
 208
 209 free_exit:
 210         /* free the reference taken */
 211         for (i = 0; i < pinned; i++)
 212                 put_page(mem->hpages[i]);
 213
 214         vfree(mem->hpas);
 215         kfree(mem);
 216
 217 unlock_exit:
 218         mm_iommu_adjust_locked_vm(mm, locked_entries, false);
 219
 220         return ret;
 221 }
 222
 223 long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
 224                 struct mm_iommu_table_group_mem_t **pmem)
 225 {
 226         return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
 227                         pmem);
 228 }
 229 EXPORT_SYMBOL_GPL(mm_iommu_new);
 230
 231 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
 232                 unsigned long entries, unsigned long dev_hpa,
 233                 struct mm_iommu_table_group_mem_t **pmem)
 234 {
 235         return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
 236 }
 237 EXPORT_SYMBOL_GPL(mm_iommu_newdev);
 238
 239 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
 240 {
 241         long i;
 242         struct page *page = NULL;
 243
 244         if (!mem->hpas)
 245                 return;
 246
 247         for (i = 0; i < mem->entries; ++i) {
 248                 if (!mem->hpas[i])
 249                         continue;
 250
 251                 page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
 252                 if (!page)
 253                         continue;
 254
 255                 if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
 256                         SetPageDirty(page);
 257
 258                 put_page(page);
 259                 mem->hpas[i] = 0;
 260         }
 261 }
 262
 263 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
 264 {
 265
 266         mm_iommu_unpin(mem);
 267         vfree(mem->hpas);
 268         kfree(mem);
 269 }
 270
 271 static void mm_iommu_free(struct rcu_head *head)
 272 {
 273         struct mm_iommu_table_group_mem_t *mem = container_of(head,
 274                         struct mm_iommu_table_group_mem_t, rcu);
 275
 276         mm_iommu_do_free(mem);
 277 }
 278
 279 static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
 280 {
 281         list_del_rcu(&mem->next);
 282         call_rcu(&mem->rcu, mm_iommu_free);
 283 }
 284
 285 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 286 {
 287         long ret = 0;
 288         unsigned long unlock_entries = 0;
 289
 290         mutex_lock(&mem_list_mutex);
 291
 292         if (mem->used == 0) {
 293                 ret = -ENOENT;
 294                 goto unlock_exit;
 295         }
 296
 297         --mem->used;
 298         /* There are still users, exit */
 299         if (mem->used)
 300                 goto unlock_exit;
 301
 302         /* Are there still mappings? */
 303         if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
 304                 ++mem->used;
 305                 ret = -EBUSY;
 306                 goto unlock_exit;
 307         }
 308
 309         if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
 310                 unlock_entries = mem->entries;
 311
 312         /* @mapped became 0 so now mappings are disabled, release the region */
 313         mm_iommu_release(mem);
 314
 315 unlock_exit:
 316         mutex_unlock(&mem_list_mutex);
 317
 318         mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
 319
 320         return ret;
 321 }
 322 EXPORT_SYMBOL_GPL(mm_iommu_put);
 323
 324 struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 325                 unsigned long ua, unsigned long size)
 326 {
 327         struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 328
 329         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 330                 if ((mem->ua <= ua) &&
 331                                 (ua + size <= mem->ua +
 332                                  (mem->entries << PAGE_SHIFT))) {
 333                         ret = mem;
 334                         break;
 335                 }
 336         }
 337
 338         return ret;
 339 }
 340 EXPORT_SYMBOL_GPL(mm_iommu_lookup);
 341
 342 struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
 343                 unsigned long ua, unsigned long size)
 344 {
 345         struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 346
 347         list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
 348                         next) {
 349                 if ((mem->ua <= ua) &&
 350                                 (ua + size <= mem->ua +
 351                                  (mem->entries << PAGE_SHIFT))) {
 352                         ret = mem;
 353                         break;
 354                 }
 355         }
 356
 357         return ret;
 358 }
 359
 360 struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
 361                 unsigned long ua, unsigned long entries)
 362 {
 363         struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 364
 365         mutex_lock(&mem_list_mutex);
 366
 367         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 368                 if ((mem->ua == ua) && (mem->entries == entries)) {
 369                         ret = mem;
 370                         ++mem->used;
 371                         break;
 372                 }
 373         }
 374
 375         mutex_unlock(&mem_list_mutex);
 376
 377         return ret;
 378 }
 379 EXPORT_SYMBOL_GPL(mm_iommu_get);
 380
 381 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
 382                 unsigned long ua, unsigned int pageshift, unsigned long *hpa)
 383 {
 384         const long entry = (ua - mem->ua) >> PAGE_SHIFT;
 385         u64 *va;
 386
 387         if (entry >= mem->entries)
 388                 return -EFAULT;
 389
 390         if (pageshift > mem->pageshift)
 391                 return -EFAULT;
 392
 393         if (!mem->hpas) {
 394                 *hpa = mem->dev_hpa + (ua - mem->ua);
 395                 return 0;
 396         }
 397
 398         va = &mem->hpas[entry];
 399         *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
 400
 401         return 0;
 402 }
 403 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
 404
 405 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
 406                 unsigned long ua, unsigned int pageshift, unsigned long *hpa)
 407 {
 408         const long entry = (ua - mem->ua) >> PAGE_SHIFT;
 409         unsigned long *pa;
 410
 411         if (entry >= mem->entries)
 412                 return -EFAULT;
 413
 414         if (pageshift > mem->pageshift)
 415                 return -EFAULT;
 416
 417         if (!mem->hpas) {
 418                 *hpa = mem->dev_hpa + (ua - mem->ua);
 419                 return 0;
 420         }
 421
 422         pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
 423         if (!pa)
 424                 return -EFAULT;
 425
 426         *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
 427
 428         return 0;
 429 }
 430
 431 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
 432 {
 433         struct mm_iommu_table_group_mem_t *mem;
 434         long entry;
 435         void *va;
 436         unsigned long *pa;
 437
 438         mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
 439         if (!mem)
 440                 return;
 441
 442         if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
 443                 return;
 444
 445         entry = (ua - mem->ua) >> PAGE_SHIFT;
 446         va = &mem->hpas[entry];
 447
 448         pa = (void *) vmalloc_to_phys(va);
 449         if (!pa)
 450                 return;
 451
 452         *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
 453 }
 454
 455 bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 456                 unsigned int pageshift, unsigned long *size)
 457 {
 458         struct mm_iommu_table_group_mem_t *mem;
 459         unsigned long end;
 460
 461         list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 462                 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
 463                         continue;
 464
 465                 end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
 466                 if ((mem->dev_hpa <= hpa) && (hpa < end)) {
 467                         /*
 468                          * Since the IOMMU page size might be bigger than
 469                          * PAGE_SIZE, the amount of preregistered memory
 470                          * starting from @hpa might be smaller than 1<<pageshift
 471                          * and the caller needs to distinguish this situation.
 472                          */
 473                         *size = min(1UL << pageshift, end - hpa);
 474                         return true;
 475                 }
 476         }
 477
 478         return false;
 479 }
 480 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
 481
 482 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
 483 {
 484         if (atomic64_inc_not_zero(&mem->mapped))
 485                 return 0;
 486
 487         /* Last mm_iommu_put() has been called, no more mappings allowed() */
 488         return -ENXIO;
 489 }
 490 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
 491
 492 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
 493 {
 494         atomic64_add_unless(&mem->mapped, -1, 1);
 495 }
 496 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
 497
 498 void mm_iommu_init(struct mm_struct *mm)
 499 {
 500         INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
 501 }