arch/arm/mm/dma-mapping.c

   1 /*
   2  *  linux/arch/arm/mm/dma-mapping.c
   3  *
   4  *  Copyright (C) 2000-2004 Russell King
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  *
  10  *  DMA uncached mapping support.
  11  */
  12 #include <linux/module.h>
  13 #include <linux/mm.h>
  14 #include <linux/genalloc.h>
  15 #include <linux/gfp.h>
  16 #include <linux/errno.h>
  17 #include <linux/list.h>
  18 #include <linux/init.h>
  19 #include <linux/device.h>
  20 #include <linux/dma-mapping.h>
  21 #include <linux/dma-contiguous.h>
  22 #include <linux/highmem.h>
  23 #include <linux/memblock.h>
  24 #include <linux/slab.h>
  25 #include <linux/iommu.h>
  26 #include <linux/io.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/sizes.h>
  29 #include <linux/cma.h>
  30
  31 #include <asm/memory.h>
  32 #include <asm/highmem.h>
  33 #include <asm/cacheflush.h>
  34 #include <asm/tlbflush.h>
  35 #include <asm/mach/arch.h>
  36 #include <asm/dma-iommu.h>
  37 #include <asm/mach/map.h>
  38 #include <asm/system_info.h>
  39 #include <asm/dma-contiguous.h>
  40
  41 #include "dma.h"
  42 #include "mm.h"
  43
  44 struct arm_dma_alloc_args {
  45         struct device *dev;
  46         size_t size;
  47         gfp_t gfp;
  48         pgprot_t prot;
  49         const void *caller;
  50         bool want_vaddr;
  51         int coherent_flag;
  52 };
  53
  54 struct arm_dma_free_args {
  55         struct device *dev;
  56         size_t size;
  57         void *cpu_addr;
  58         struct page *page;
  59         bool want_vaddr;
  60 };
  61
  62 #define NORMAL      0
  63 #define COHERENT    1
  64
  65 struct arm_dma_allocator {
  66         void *(*alloc)(struct arm_dma_alloc_args *args,
  67                        struct page **ret_page);
  68         void (*free)(struct arm_dma_free_args *args);
  69 };
  70
  71 struct arm_dma_buffer {
  72         struct list_head list;
  73         void *virt;
  74         struct arm_dma_allocator *allocator;
  75 };
  76
  77 static LIST_HEAD(arm_dma_bufs);
  78 static DEFINE_SPINLOCK(arm_dma_bufs_lock);
  79
  80 static struct arm_dma_buffer *arm_dma_buffer_find(void *virt)
  81 {
  82         struct arm_dma_buffer *buf, *found = NULL;
  83         unsigned long flags;
  84
  85         spin_lock_irqsave(&arm_dma_bufs_lock, flags);
  86         list_for_each_entry(buf, &arm_dma_bufs, list) {
  87                 if (buf->virt == virt) {
  88                         list_del(&buf->list);
  89                         found = buf;
  90                         break;
  91                 }
  92         }
  93         spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
  94         return found;
  95 }
  96
  97 /*
  98  * The DMA API is built upon the notion of "buffer ownership".  A buffer
  99  * is either exclusively owned by the CPU (and therefore may be accessed
 100  * by it) or exclusively owned by the DMA device.  These helper functions
 101  * represent the transitions between these two ownership states.
 102  *
 103  * Note, however, that on later ARMs, this notion does not work due to
 104  * speculative prefetches.  We model our approach on the assumption that
 105  * the CPU does do speculative prefetches, which means we clean caches
 106  * before transfers and delay cache invalidation until transfer completion.
 107  *
 108  */
 109 static void __dma_page_cpu_to_dev(struct page *, unsigned long,
 110                 size_t, enum dma_data_direction);
 111 static void __dma_page_dev_to_cpu(struct page *, unsigned long,
 112                 size_t, enum dma_data_direction);
 113
 114 /**
 115  * arm_dma_map_page - map a portion of a page for streaming DMA
 116  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 117  * @page: page that buffer resides in
 118  * @offset: offset into page for start of buffer
 119  * @size: size of buffer to map
 120  * @dir: DMA transfer direction
 121  *
 122  * Ensure that any data held in the cache is appropriately discarded
 123  * or written back.
 124  *
 125  * The device owns this memory once this call has completed.  The CPU
 126  * can regain ownership by calling dma_unmap_page().
 127  */
 128 static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
 129              unsigned long offset, size_t size, enum dma_data_direction dir,
 130              unsigned long attrs)
 131 {
 132         if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 133                 __dma_page_cpu_to_dev(page, offset, size, dir);
 134         return pfn_to_dma(dev, page_to_pfn(page)) + offset;
 135 }
 136
 137 static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
 138              unsigned long offset, size_t size, enum dma_data_direction dir,
 139              unsigned long attrs)
 140 {
 141         return pfn_to_dma(dev, page_to_pfn(page)) + offset;
 142 }
 143
 144 /**
 145  * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
 146  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
 147  * @handle: DMA address of buffer
 148  * @size: size of buffer (same as passed to dma_map_page)
 149  * @dir: DMA transfer direction (same as passed to dma_map_page)
 150  *
 151  * Unmap a page streaming mode DMA translation.  The handle and size
 152  * must match what was provided in the previous dma_map_page() call.
 153  * All other usages are undefined.
 154  *
 155  * After this call, reads by the CPU to the buffer are guaranteed to see
 156  * whatever the device wrote there.
 157  */
 158 static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
 159                 size_t size, enum dma_data_direction dir, unsigned long attrs)
 160 {
 161         if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
 162                 __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
 163                                       handle & ~PAGE_MASK, size, dir);
 164 }
 165
 166 static void arm_dma_sync_single_for_cpu(struct device *dev,
 167                 dma_addr_t handle, size_t size, enum dma_data_direction dir)
 168 {
 169         unsigned int offset = handle & (PAGE_SIZE - 1);
 170         struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
 171         __dma_page_dev_to_cpu(page, offset, size, dir);
 172 }
 173
 174 static void arm_dma_sync_single_for_device(struct device *dev,
 175                 dma_addr_t handle, size_t size, enum dma_data_direction dir)
 176 {
 177         unsigned int offset = handle & (PAGE_SIZE - 1);
 178         struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
 179         __dma_page_cpu_to_dev(page, offset, size, dir);
 180 }
 181
 182 const struct dma_map_ops arm_dma_ops = {
 183         .alloc                  = arm_dma_alloc,
 184         .free                   = arm_dma_free,
 185         .mmap                   = arm_dma_mmap,
 186         .get_sgtable            = arm_dma_get_sgtable,
 187         .map_page               = arm_dma_map_page,
 188         .unmap_page             = arm_dma_unmap_page,
 189         .map_sg                 = arm_dma_map_sg,
 190         .unmap_sg               = arm_dma_unmap_sg,
 191         .sync_single_for_cpu    = arm_dma_sync_single_for_cpu,
 192         .sync_single_for_device = arm_dma_sync_single_for_device,
 193         .sync_sg_for_cpu        = arm_dma_sync_sg_for_cpu,
 194         .sync_sg_for_device     = arm_dma_sync_sg_for_device,
 195         .dma_supported          = arm_dma_supported,
 196 };
 197 EXPORT_SYMBOL(arm_dma_ops);
 198
 199 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 200         dma_addr_t *handle, gfp_t gfp, unsigned long attrs);
 201 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
 202                                   dma_addr_t handle, unsigned long attrs);
 203 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 204                  void *cpu_addr, dma_addr_t dma_addr, size_t size,
 205                  unsigned long attrs);
 206
 207 const struct dma_map_ops arm_coherent_dma_ops = {
 208         .alloc                  = arm_coherent_dma_alloc,
 209         .free                   = arm_coherent_dma_free,
 210         .mmap                   = arm_coherent_dma_mmap,
 211         .get_sgtable            = arm_dma_get_sgtable,
 212         .map_page               = arm_coherent_dma_map_page,
 213         .map_sg                 = arm_dma_map_sg,
 214         .dma_supported          = arm_dma_supported,
 215 };
 216 EXPORT_SYMBOL(arm_coherent_dma_ops);
 217
 218 static int __dma_supported(struct device *dev, u64 mask, bool warn)
 219 {
 220         unsigned long max_dma_pfn;
 221
 222         /*
 223          * If the mask allows for more memory than we can address,
 224          * and we actually have that much memory, then we must
 225          * indicate that DMA to this device is not supported.
 226          */
 227         if (sizeof(mask) != sizeof(dma_addr_t) &&
 228             mask > (dma_addr_t)~0 &&
 229             dma_to_pfn(dev, ~0) < max_pfn - 1) {
 230                 if (warn) {
 231                         dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
 232                                  mask);
 233                         dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
 234                 }
 235                 return 0;
 236         }
 237
 238         max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
 239
 240         /*
 241          * Translate the device's DMA mask to a PFN limit.  This
 242          * PFN number includes the page which we can DMA to.
 243          */
 244         if (dma_to_pfn(dev, mask) < max_dma_pfn) {
 245                 if (warn)
 246                         dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n",
 247                                  mask,
 248                                  dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1,
 249                                  max_dma_pfn + 1);
 250                 return 0;
 251         }
 252
 253         return 1;
 254 }
 255
 256 static u64 get_coherent_dma_mask(struct device *dev)
 257 {
 258         u64 mask = (u64)DMA_BIT_MASK(32);
 259
 260         if (dev) {
 261                 mask = dev->coherent_dma_mask;
 262
 263                 /*
 264                  * Sanity check the DMA mask - it must be non-zero, and
 265                  * must be able to be satisfied by a DMA allocation.
 266                  */
 267                 if (mask == 0) {
 268                         dev_warn(dev, "coherent DMA mask is unset\n");
 269                         return 0;
 270                 }
 271
 272                 if (!__dma_supported(dev, mask, true))
 273                         return 0;
 274         }
 275
 276         return mask;
 277 }
 278
 279 static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag)
 280 {
 281         /*
 282          * Ensure that the allocated pages are zeroed, and that any data
 283          * lurking in the kernel direct-mapped region is invalidated.
 284          */
 285         if (PageHighMem(page)) {
 286                 phys_addr_t base = __pfn_to_phys(page_to_pfn(page));
 287                 phys_addr_t end = base + size;
 288                 while (size > 0) {
 289                         void *ptr = kmap_atomic(page);
 290                         memset(ptr, 0, PAGE_SIZE);
 291                         if (coherent_flag != COHERENT)
 292                                 dmac_flush_range(ptr, ptr + PAGE_SIZE);
 293                         kunmap_atomic(ptr);
 294                         page++;
 295                         size -= PAGE_SIZE;
 296                 }
 297                 if (coherent_flag != COHERENT)
 298                         outer_flush_range(base, end);
 299         } else {
 300                 void *ptr = page_address(page);
 301                 memset(ptr, 0, size);
 302                 if (coherent_flag != COHERENT) {
 303                         dmac_flush_range(ptr, ptr + size);
 304                         outer_flush_range(__pa(ptr), __pa(ptr) + size);
 305                 }
 306         }
 307 }
 308
 309 /*
 310  * Allocate a DMA buffer for 'dev' of size 'size' using the
 311  * specified gfp mask.  Note that 'size' must be page aligned.
 312  */
 313 static struct page *__dma_alloc_buffer(struct device *dev, size_t size,
 314                                        gfp_t gfp, int coherent_flag)
 315 {
 316         unsigned long order = get_order(size);
 317         struct page *page, *p, *e;
 318
 319         page = alloc_pages(gfp, order);
 320         if (!page)
 321                 return NULL;
 322
 323         /*
 324          * Now split the huge page and free the excess pages
 325          */
 326         split_page(page, order);
 327         for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
 328                 __free_page(p);
 329
 330         __dma_clear_buffer(page, size, coherent_flag);
 331
 332         return page;
 333 }
 334
 335 /*
 336  * Free a DMA buffer.  'size' must be page aligned.
 337  */
 338 static void __dma_free_buffer(struct page *page, size_t size)
 339 {
 340         struct page *e = page + (size >> PAGE_SHIFT);
 341
 342         while (page < e) {
 343                 __free_page(page);
 344                 page++;
 345         }
 346 }
 347
 348 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 349                                      pgprot_t prot, struct page **ret_page,
 350                                      const void *caller, bool want_vaddr,
 351                                      int coherent_flag, gfp_t gfp);
 352
 353 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 354                                  pgprot_t prot, struct page **ret_page,
 355                                  const void *caller, bool want_vaddr);
 356
 357 static void *
 358 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
 359         const void *caller)
 360 {
 361         /*
 362          * DMA allocation can be mapped to user space, so lets
 363          * set VM_USERMAP flags too.
 364          */
 365         return dma_common_contiguous_remap(page, size,
 366                         VM_ARM_DMA_CONSISTENT | VM_USERMAP,
 367                         prot, caller);
 368 }
 369
 370 static void __dma_free_remap(void *cpu_addr, size_t size)
 371 {
 372         dma_common_free_remap(cpu_addr, size,
 373                         VM_ARM_DMA_CONSISTENT | VM_USERMAP);
 374 }
 375
 376 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
 377 static struct gen_pool *atomic_pool __ro_after_init;
 378
 379 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
 380
 381 static int __init early_coherent_pool(char *p)
 382 {
 383         atomic_pool_size = memparse(p, &p);
 384         return 0;
 385 }
 386 early_param("coherent_pool", early_coherent_pool);
 387
 388 /*
 389  * Initialise the coherent pool for atomic allocations.
 390  */
 391 static int __init atomic_pool_init(void)
 392 {
 393         pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL);
 394         gfp_t gfp = GFP_KERNEL | GFP_DMA;
 395         struct page *page;
 396         void *ptr;
 397
 398         atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
 399         if (!atomic_pool)
 400                 goto out;
 401         /*
 402          * The atomic pool is only used for non-coherent allocations
 403          * so we must pass NORMAL for coherent_flag.
 404          */
 405         if (dev_get_cma_area(NULL))
 406                 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
 407                                       &page, atomic_pool_init, true, NORMAL,
 408                                       GFP_KERNEL);
 409         else
 410                 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
 411                                            &page, atomic_pool_init, true);
 412         if (ptr) {
 413                 int ret;
 414
 415                 ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
 416                                         page_to_phys(page),
 417                                         atomic_pool_size, -1);
 418                 if (ret)
 419                         goto destroy_genpool;
 420
 421                 gen_pool_set_algo(atomic_pool,
 422                                 gen_pool_first_fit_order_align,
 423                                 NULL);
 424                 pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n",
 425                        atomic_pool_size / 1024);
 426                 return 0;
 427         }
 428
 429 destroy_genpool:
 430         gen_pool_destroy(atomic_pool);
 431         atomic_pool = NULL;
 432 out:
 433         pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
 434                atomic_pool_size / 1024);
 435         return -ENOMEM;
 436 }
 437 /*
 438  * CMA is activated by core_initcall, so we must be called after it.
 439  */
 440 postcore_initcall(atomic_pool_init);
 441
 442 struct dma_contig_early_reserve {
 443         phys_addr_t base;
 444         unsigned long size;
 445 };
 446
 447 static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
 448
 449 static int dma_mmu_remap_num __initdata;
 450
 451 void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
 452 {
 453         dma_mmu_remap[dma_mmu_remap_num].base = base;
 454         dma_mmu_remap[dma_mmu_remap_num].size = size;
 455         dma_mmu_remap_num++;
 456 }
 457
 458 void __init dma_contiguous_remap(void)
 459 {
 460         int i;
 461         for (i = 0; i < dma_mmu_remap_num; i++) {
 462                 phys_addr_t start = dma_mmu_remap[i].base;
 463                 phys_addr_t end = start + dma_mmu_remap[i].size;
 464                 struct map_desc map;
 465                 unsigned long addr;
 466
 467                 if (end > arm_lowmem_limit)
 468                         end = arm_lowmem_limit;
 469                 if (start >= end)
 470                         continue;
 471
 472                 map.pfn = __phys_to_pfn(start);
 473                 map.virtual = __phys_to_virt(start);
 474                 map.length = end - start;
 475                 map.type = MT_MEMORY_DMA_READY;
 476
 477                 /*
 478                  * Clear previous low-memory mapping to ensure that the
 479                  * TLB does not see any conflicting entries, then flush
 480                  * the TLB of the old entries before creating new mappings.
 481                  *
 482                  * This ensures that any speculatively loaded TLB entries
 483                  * (even though they may be rare) can not cause any problems,
 484                  * and ensures that this code is architecturally compliant.
 485                  */
 486                 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
 487                      addr += PMD_SIZE)
 488                         pmd_clear(pmd_off_k(addr));
 489
 490                 flush_tlb_kernel_range(__phys_to_virt(start),
 491                                        __phys_to_virt(end));
 492
 493                 iotable_init(&map, 1);
 494         }
 495 }
 496
 497 static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
 498                             void *data)
 499 {
 500         struct page *page = virt_to_page(addr);
 501         pgprot_t prot = *(pgprot_t *)data;
 502
 503         set_pte_ext(pte, mk_pte(page, prot), 0);
 504         return 0;
 505 }
 506
 507 static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
 508 {
 509         unsigned long start = (unsigned long) page_address(page);
 510         unsigned end = start + size;
 511
 512         apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
 513         flush_tlb_kernel_range(start, end);
 514 }
 515
 516 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 517                                  pgprot_t prot, struct page **ret_page,
 518                                  const void *caller, bool want_vaddr)
 519 {
 520         struct page *page;
 521         void *ptr = NULL;
 522         /*
 523          * __alloc_remap_buffer is only called when the device is
 524          * non-coherent
 525          */
 526         page = __dma_alloc_buffer(dev, size, gfp, NORMAL);
 527         if (!page)
 528                 return NULL;
 529         if (!want_vaddr)
 530                 goto out;
 531
 532         ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
 533         if (!ptr) {
 534                 __dma_free_buffer(page, size);
 535                 return NULL;
 536         }
 537
 538  out:
 539         *ret_page = page;
 540         return ptr;
 541 }
 542
 543 static void *__alloc_from_pool(size_t size, struct page **ret_page)
 544 {
 545         unsigned long val;
 546         void *ptr = NULL;
 547
 548         if (!atomic_pool) {
 549                 WARN(1, "coherent pool not initialised!\n");
 550                 return NULL;
 551         }
 552
 553         val = gen_pool_alloc(atomic_pool, size);
 554         if (val) {
 555                 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
 556
 557                 *ret_page = phys_to_page(phys);
 558                 ptr = (void *)val;
 559         }
 560
 561         return ptr;
 562 }
 563
 564 static bool __in_atomic_pool(void *start, size_t size)
 565 {
 566         return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
 567 }
 568
 569 static int __free_from_pool(void *start, size_t size)
 570 {
 571         if (!__in_atomic_pool(start, size))
 572                 return 0;
 573
 574         gen_pool_free(atomic_pool, (unsigned long)start, size);
 575
 576         return 1;
 577 }
 578
 579 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 580                                      pgprot_t prot, struct page **ret_page,
 581                                      const void *caller, bool want_vaddr,
 582                                      int coherent_flag, gfp_t gfp)
 583 {
 584         unsigned long order = get_order(size);
 585         size_t count = size >> PAGE_SHIFT;
 586         struct page *page;
 587         void *ptr = NULL;
 588
 589         page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN);
 590         if (!page)
 591                 return NULL;
 592
 593         __dma_clear_buffer(page, size, coherent_flag);
 594
 595         if (!want_vaddr)
 596                 goto out;
 597
 598         if (PageHighMem(page)) {
 599                 ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
 600                 if (!ptr) {
 601                         dma_release_from_contiguous(dev, page, count);
 602                         return NULL;
 603                 }
 604         } else {
 605                 __dma_remap(page, size, prot);
 606                 ptr = page_address(page);
 607         }
 608
 609  out:
 610         *ret_page = page;
 611         return ptr;
 612 }
 613
 614 static void __free_from_contiguous(struct device *dev, struct page *page,
 615                                    void *cpu_addr, size_t size, bool want_vaddr)
 616 {
 617         if (want_vaddr) {
 618                 if (PageHighMem(page))
 619                         __dma_free_remap(cpu_addr, size);
 620                 else
 621                         __dma_remap(page, size, PAGE_KERNEL);
 622         }
 623         dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
 624 }
 625
 626 static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot)
 627 {
 628         prot = (attrs & DMA_ATTR_WRITE_COMBINE) ?
 629                         pgprot_writecombine(prot) :
 630                         pgprot_dmacoherent(prot);
 631         return prot;
 632 }
 633
 634 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
 635                                    struct page **ret_page)
 636 {
 637         struct page *page;
 638         /* __alloc_simple_buffer is only called when the device is coherent */
 639         page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
 640         if (!page)
 641                 return NULL;
 642
 643         *ret_page = page;
 644         return page_address(page);
 645 }
 646
 647 static void *simple_allocator_alloc(struct arm_dma_alloc_args *args,
 648                                     struct page **ret_page)
 649 {
 650         return __alloc_simple_buffer(args->dev, args->size, args->gfp,
 651                                      ret_page);
 652 }
 653
 654 static void simple_allocator_free(struct arm_dma_free_args *args)
 655 {
 656         __dma_free_buffer(args->page, args->size);
 657 }
 658
 659 static struct arm_dma_allocator simple_allocator = {
 660         .alloc = simple_allocator_alloc,
 661         .free = simple_allocator_free,
 662 };
 663
 664 static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
 665                                  struct page **ret_page)
 666 {
 667         return __alloc_from_contiguous(args->dev, args->size, args->prot,
 668                                        ret_page, args->caller,
 669                                        args->want_vaddr, args->coherent_flag,
 670                                        args->gfp);
 671 }
 672
 673 static void cma_allocator_free(struct arm_dma_free_args *args)
 674 {
 675         __free_from_contiguous(args->dev, args->page, args->cpu_addr,
 676                                args->size, args->want_vaddr);
 677 }
 678
 679 static struct arm_dma_allocator cma_allocator = {
 680         .alloc = cma_allocator_alloc,
 681         .free = cma_allocator_free,
 682 };
 683
 684 static void *pool_allocator_alloc(struct arm_dma_alloc_args *args,
 685                                   struct page **ret_page)
 686 {
 687         return __alloc_from_pool(args->size, ret_page);
 688 }
 689
 690 static void pool_allocator_free(struct arm_dma_free_args *args)
 691 {
 692         __free_from_pool(args->cpu_addr, args->size);
 693 }
 694
 695 static struct arm_dma_allocator pool_allocator = {
 696         .alloc = pool_allocator_alloc,
 697         .free = pool_allocator_free,
 698 };
 699
 700 static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
 701                                    struct page **ret_page)
 702 {
 703         return __alloc_remap_buffer(args->dev, args->size, args->gfp,
 704                                     args->prot, ret_page, args->caller,
 705                                     args->want_vaddr);
 706 }
 707
 708 static void remap_allocator_free(struct arm_dma_free_args *args)
 709 {
 710         if (args->want_vaddr)
 711                 __dma_free_remap(args->cpu_addr, args->size);
 712
 713         __dma_free_buffer(args->page, args->size);
 714 }
 715
 716 static struct arm_dma_allocator remap_allocator = {
 717         .alloc = remap_allocator_alloc,
 718         .free = remap_allocator_free,
 719 };
 720
 721 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 722                          gfp_t gfp, pgprot_t prot, bool is_coherent,
 723                          unsigned long attrs, const void *caller)
 724 {
 725         u64 mask = get_coherent_dma_mask(dev);
 726         struct page *page = NULL;
 727         void *addr;
 728         bool allowblock, cma;
 729         struct arm_dma_buffer *buf;
 730         struct arm_dma_alloc_args args = {
 731                 .dev = dev,
 732                 .size = PAGE_ALIGN(size),
 733                 .gfp = gfp,
 734                 .prot = prot,
 735                 .caller = caller,
 736                 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
 737                 .coherent_flag = is_coherent ? COHERENT : NORMAL,
 738         };
 739
 740 #ifdef CONFIG_DMA_API_DEBUG
 741         u64 limit = (mask + 1) & ~mask;
 742         if (limit && size >= limit) {
 743                 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
 744                         size, mask);
 745                 return NULL;
 746         }
 747 #endif
 748
 749         if (!mask)
 750                 return NULL;
 751
 752         buf = kzalloc(sizeof(*buf),
 753                       gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM));
 754         if (!buf)
 755                 return NULL;
 756
 757         if (mask < 0xffffffffULL)
 758                 gfp |= GFP_DMA;
 759
 760         /*
 761          * Following is a work-around (a.k.a. hack) to prevent pages
 762          * with __GFP_COMP being passed to split_page() which cannot
 763          * handle them.  The real problem is that this flag probably
 764          * should be 0 on ARM as it is not supported on this
 765          * platform; see CONFIG_HUGETLBFS.
 766          */
 767         gfp &= ~(__GFP_COMP);
 768         args.gfp = gfp;
 769
 770         *handle = DMA_MAPPING_ERROR;
 771         allowblock = gfpflags_allow_blocking(gfp);
 772         cma = allowblock ? dev_get_cma_area(dev) : false;
 773
 774         if (cma)
 775                 buf->allocator = &cma_allocator;
 776         else if (is_coherent)
 777                 buf->allocator = &simple_allocator;
 778         else if (allowblock)
 779                 buf->allocator = &remap_allocator;
 780         else
 781                 buf->allocator = &pool_allocator;
 782
 783         addr = buf->allocator->alloc(&args, &page);
 784
 785         if (page) {
 786                 unsigned long flags;
 787
 788                 *handle = pfn_to_dma(dev, page_to_pfn(page));
 789                 buf->virt = args.want_vaddr ? addr : page;
 790
 791                 spin_lock_irqsave(&arm_dma_bufs_lock, flags);
 792                 list_add(&buf->list, &arm_dma_bufs);
 793                 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
 794         } else {
 795                 kfree(buf);
 796         }
 797
 798         return args.want_vaddr ? addr : page;
 799 }
 800
 801 /*
 802  * Allocate DMA-coherent memory space and return both the kernel remapped
 803  * virtual and bus address for that space.
 804  */
 805 void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 806                     gfp_t gfp, unsigned long attrs)
 807 {
 808         pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
 809
 810         return __dma_alloc(dev, size, handle, gfp, prot, false,
 811                            attrs, __builtin_return_address(0));
 812 }
 813
 814 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 815         dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 816 {
 817         return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true,
 818                            attrs, __builtin_return_address(0));
 819 }
 820
 821 static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 822                  void *cpu_addr, dma_addr_t dma_addr, size_t size,
 823                  unsigned long attrs)
 824 {
 825         int ret = -ENXIO;
 826         unsigned long nr_vma_pages = vma_pages(vma);
 827         unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 828         unsigned long pfn = dma_to_pfn(dev, dma_addr);
 829         unsigned long off = vma->vm_pgoff;
 830
 831         if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 832                 return ret;
 833
 834         if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
 835                 ret = remap_pfn_range(vma, vma->vm_start,
 836                                       pfn + off,
 837                                       vma->vm_end - vma->vm_start,
 838                                       vma->vm_page_prot);
 839         }
 840
 841         return ret;
 842 }
 843
 844 /*
 845  * Create userspace mapping for the DMA-coherent memory.
 846  */
 847 static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 848                  void *cpu_addr, dma_addr_t dma_addr, size_t size,
 849                  unsigned long attrs)
 850 {
 851         return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 852 }
 853
 854 int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 855                  void *cpu_addr, dma_addr_t dma_addr, size_t size,
 856                  unsigned long attrs)
 857 {
 858         vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
 859         return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 860 }
 861
 862 /*
 863  * Free a buffer as defined by the above mapping.
 864  */
 865 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 866                            dma_addr_t handle, unsigned long attrs,
 867                            bool is_coherent)
 868 {
 869         struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
 870         struct arm_dma_buffer *buf;
 871         struct arm_dma_free_args args = {
 872                 .dev = dev,
 873                 .size = PAGE_ALIGN(size),
 874                 .cpu_addr = cpu_addr,
 875                 .page = page,
 876                 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
 877         };
 878
 879         buf = arm_dma_buffer_find(cpu_addr);
 880         if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr))
 881                 return;
 882
 883         buf->allocator->free(&args);
 884         kfree(buf);
 885 }
 886
 887 void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 888                   dma_addr_t handle, unsigned long attrs)
 889 {
 890         __arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
 891 }
 892
 893 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
 894                                   dma_addr_t handle, unsigned long attrs)
 895 {
 896         __arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
 897 }
 898
 899 /*
 900  * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
 901  * that the intention is to allow exporting memory allocated via the
 902  * coherent DMA APIs through the dma_buf API, which only accepts a
 903  * scattertable.  This presents a couple of problems:
 904  * 1. Not all memory allocated via the coherent DMA APIs is backed by
 905  *    a struct page
 906  * 2. Passing coherent DMA memory into the streaming APIs is not allowed
 907  *    as we will try to flush the memory through a different alias to that
 908  *    actually being used (and the flushes are redundant.)
 909  */
 910 int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 911                  void *cpu_addr, dma_addr_t handle, size_t size,
 912                  unsigned long attrs)
 913 {
 914         unsigned long pfn = dma_to_pfn(dev, handle);
 915         struct page *page;
 916         int ret;
 917
 918         /* If the PFN is not valid, we do not have a struct page */
 919         if (!pfn_valid(pfn))
 920                 return -ENXIO;
 921
 922         page = pfn_to_page(pfn);
 923
 924         ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
 925         if (unlikely(ret))
 926                 return ret;
 927
 928         sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
 929         return 0;
 930 }
 931
 932 static void dma_cache_maint_page(struct page *page, unsigned long offset,
 933         size_t size, enum dma_data_direction dir,
 934         void (*op)(const void *, size_t, int))
 935 {
 936         unsigned long pfn;
 937         size_t left = size;
 938
 939         pfn = page_to_pfn(page) + offset / PAGE_SIZE;
 940         offset %= PAGE_SIZE;
 941
 942         /*
 943          * A single sg entry may refer to multiple physically contiguous
 944          * pages.  But we still need to process highmem pages individually.
 945          * If highmem is not configured then the bulk of this loop gets
 946          * optimized out.
 947          */
 948         do {
 949                 size_t len = left;
 950                 void *vaddr;
 951
 952                 page = pfn_to_page(pfn);
 953
 954                 if (PageHighMem(page)) {
 955                         if (len + offset > PAGE_SIZE)
 956                                 len = PAGE_SIZE - offset;
 957
 958                         if (cache_is_vipt_nonaliasing()) {
 959                                 vaddr = kmap_atomic(page);
 960                                 op(vaddr + offset, len, dir);
 961                                 kunmap_atomic(vaddr);
 962                         } else {
 963                                 vaddr = kmap_high_get(page);
 964                                 if (vaddr) {
 965                                         op(vaddr + offset, len, dir);
 966                                         kunmap_high(page);
 967                                 }
 968                         }
 969                 } else {
 970                         vaddr = page_address(page) + offset;
 971                         op(vaddr, len, dir);
 972                 }
 973                 offset = 0;
 974                 pfn++;
 975                 left -= len;
 976         } while (left);
 977 }
 978
 979 /*
 980  * Make an area consistent for devices.
 981  * Note: Drivers should NOT use this function directly, as it will break
 982  * platforms with CONFIG_DMABOUNCE.
 983  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
 984  */
 985 static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
 986         size_t size, enum dma_data_direction dir)
 987 {
 988         phys_addr_t paddr;
 989
 990         dma_cache_maint_page(page, off, size, dir, dmac_map_area);
 991
 992         paddr = page_to_phys(page) + off;
 993         if (dir == DMA_FROM_DEVICE) {
 994                 outer_inv_range(paddr, paddr + size);
 995         } else {
 996                 outer_clean_range(paddr, paddr + size);
 997         }
 998         /* FIXME: non-speculating: flush on bidirectional mappings? */
 999 }
1000
1001 static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
1002         size_t size, enum dma_data_direction dir)
1003 {
1004         phys_addr_t paddr = page_to_phys(page) + off;
1005
1006         /* FIXME: non-speculating: not required */
1007         /* in any case, don't bother invalidating if DMA to device */
1008         if (dir != DMA_TO_DEVICE) {
1009                 outer_inv_range(paddr, paddr + size);
1010
1011                 dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
1012         }
1013
1014         /*
1015          * Mark the D-cache clean for these pages to avoid extra flushing.
1016          */
1017         if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
1018                 unsigned long pfn;
1019                 size_t left = size;
1020
1021                 pfn = page_to_pfn(page) + off / PAGE_SIZE;
1022                 off %= PAGE_SIZE;
1023                 if (off) {
1024                         pfn++;
1025                         left -= PAGE_SIZE - off;
1026                 }
1027                 while (left >= PAGE_SIZE) {
1028                         page = pfn_to_page(pfn++);
1029                         set_bit(PG_dcache_clean, &page->flags);
1030                         left -= PAGE_SIZE;
1031                 }
1032         }
1033 }
1034
1035 /**
1036  * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA
1037  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1038  * @sg: list of buffers
1039  * @nents: number of buffers to map
1040  * @dir: DMA transfer direction
1041  *
1042  * Map a set of buffers described by scatterlist in streaming mode for DMA.
1043  * This is the scatter-gather version of the dma_map_single interface.
1044  * Here the scatter gather list elements are each tagged with the
1045  * appropriate dma address and length.  They are obtained via
1046  * sg_dma_{address,length}.
1047  *
1048  * Device ownership issues as mentioned for dma_map_single are the same
1049  * here.
1050  */
1051 int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1052                 enum dma_data_direction dir, unsigned long attrs)
1053 {
1054         const struct dma_map_ops *ops = get_dma_ops(dev);
1055         struct scatterlist *s;
1056         int i, j;
1057
1058         for_each_sg(sg, s, nents, i) {
1059 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1060                 s->dma_length = s->length;
1061 #endif
1062                 s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
1063                                                 s->length, dir, attrs);
1064                 if (dma_mapping_error(dev, s->dma_address))
1065                         goto bad_mapping;
1066         }
1067         return nents;
1068
1069  bad_mapping:
1070         for_each_sg(sg, s, i, j)
1071                 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
1072         return 0;
1073 }
1074
1075 /**
1076  * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1077  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1078  * @sg: list of buffers
1079  * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
1080  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1081  *
1082  * Unmap a set of streaming mode DMA translations.  Again, CPU access
1083  * rules concerning calls here are the same as for dma_unmap_single().
1084  */
1085 void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
1086                 enum dma_data_direction dir, unsigned long attrs)
1087 {
1088         const struct dma_map_ops *ops = get_dma_ops(dev);
1089         struct scatterlist *s;
1090
1091         int i;
1092
1093         for_each_sg(sg, s, nents, i)
1094                 ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
1095 }
1096
1097 /**
1098  * arm_dma_sync_sg_for_cpu
1099  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1100  * @sg: list of buffers
1101  * @nents: number of buffers to map (returned from dma_map_sg)
1102  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1103  */
1104 void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
1105                         int nents, enum dma_data_direction dir)
1106 {
1107         const struct dma_map_ops *ops = get_dma_ops(dev);
1108         struct scatterlist *s;
1109         int i;
1110
1111         for_each_sg(sg, s, nents, i)
1112                 ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length,
1113                                          dir);
1114 }
1115
1116 /**
1117  * arm_dma_sync_sg_for_device
1118  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
1119  * @sg: list of buffers
1120  * @nents: number of buffers to map (returned from dma_map_sg)
1121  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1122  */
1123 void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
1124                         int nents, enum dma_data_direction dir)
1125 {
1126         const struct dma_map_ops *ops = get_dma_ops(dev);
1127         struct scatterlist *s;
1128         int i;
1129
1130         for_each_sg(sg, s, nents, i)
1131                 ops->sync_single_for_device(dev, sg_dma_address(s), s->length,
1132                                             dir);
1133 }
1134
1135 /*
1136  * Return whether the given device DMA address mask can be supported
1137  * properly.  For example, if your device can only drive the low 24-bits
1138  * during bus mastering, then you would pass 0x00ffffff as the mask
1139  * to this function.
1140  */
1141 int arm_dma_supported(struct device *dev, u64 mask)
1142 {
1143         return __dma_supported(dev, mask, false);
1144 }
1145
1146 static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
1147 {
1148         return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
1149 }
1150
1151 #ifdef CONFIG_ARM_DMA_USE_IOMMU
1152
1153 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
1154 {
1155         int prot = 0;
1156
1157         if (attrs & DMA_ATTR_PRIVILEGED)
1158                 prot |= IOMMU_PRIV;
1159
1160         switch (dir) {
1161         case DMA_BIDIRECTIONAL:
1162                 return prot | IOMMU_READ | IOMMU_WRITE;
1163         case DMA_TO_DEVICE:
1164                 return prot | IOMMU_READ;
1165         case DMA_FROM_DEVICE:
1166                 return prot | IOMMU_WRITE;
1167         default:
1168                 return prot;
1169         }
1170 }
1171
1172 /* IOMMU */
1173
1174 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping);
1175
1176 static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
1177                                       size_t size)
1178 {
1179         unsigned int order = get_order(size);
1180         unsigned int align = 0;
1181         unsigned int count, start;
1182         size_t mapping_size = mapping->bits << PAGE_SHIFT;
1183         unsigned long flags;
1184         dma_addr_t iova;
1185         int i;
1186
1187         if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT)
1188                 order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT;
1189
1190         count = PAGE_ALIGN(size) >> PAGE_SHIFT;
1191         align = (1 << order) - 1;
1192
1193         spin_lock_irqsave(&mapping->lock, flags);
1194         for (i = 0; i < mapping->nr_bitmaps; i++) {
1195                 start = bitmap_find_next_zero_area(mapping->bitmaps[i],
1196                                 mapping->bits, 0, count, align);
1197
1198                 if (start > mapping->bits)
1199                         continue;
1200
1201                 bitmap_set(mapping->bitmaps[i], start, count);
1202                 break;
1203         }
1204
1205         /*
1206          * No unused range found. Try to extend the existing mapping
1207          * and perform a second attempt to reserve an IO virtual
1208          * address range of size bytes.
1209          */
1210         if (i == mapping->nr_bitmaps) {
1211                 if (extend_iommu_mapping(mapping)) {
1212                         spin_unlock_irqrestore(&mapping->lock, flags);
1213                         return DMA_MAPPING_ERROR;
1214                 }
1215
1216                 start = bitmap_find_next_zero_area(mapping->bitmaps[i],
1217                                 mapping->bits, 0, count, align);
1218
1219                 if (start > mapping->bits) {
1220                         spin_unlock_irqrestore(&mapping->lock, flags);
1221                         return DMA_MAPPING_ERROR;
1222                 }
1223
1224                 bitmap_set(mapping->bitmaps[i], start, count);
1225         }
1226         spin_unlock_irqrestore(&mapping->lock, flags);
1227
1228         iova = mapping->base + (mapping_size * i);
1229         iova += start << PAGE_SHIFT;
1230
1231         return iova;
1232 }
1233
1234 static inline void __free_iova(struct dma_iommu_mapping *mapping,
1235                                dma_addr_t addr, size_t size)
1236 {
1237         unsigned int start, count;
1238         size_t mapping_size = mapping->bits << PAGE_SHIFT;
1239         unsigned long flags;
1240         dma_addr_t bitmap_base;
1241         u32 bitmap_index;
1242
1243         if (!size)
1244                 return;
1245
1246         bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size;
1247         BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions);
1248
1249         bitmap_base = mapping->base + mapping_size * bitmap_index;
1250
1251         start = (addr - bitmap_base) >> PAGE_SHIFT;
1252
1253         if (addr + size > bitmap_base + mapping_size) {
1254                 /*
1255                  * The address range to be freed reaches into the iova
1256                  * range of the next bitmap. This should not happen as
1257                  * we don't allow this in __alloc_iova (at the
1258                  * moment).
1259                  */
1260                 BUG();
1261         } else
1262                 count = size >> PAGE_SHIFT;
1263
1264         spin_lock_irqsave(&mapping->lock, flags);
1265         bitmap_clear(mapping->bitmaps[bitmap_index], start, count);
1266         spin_unlock_irqrestore(&mapping->lock, flags);
1267 }
1268
1269 /* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */
1270 static const int iommu_order_array[] = { 9, 8, 4, 0 };
1271
1272 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
1273                                           gfp_t gfp, unsigned long attrs,
1274                                           int coherent_flag)
1275 {
1276         struct page **pages;
1277         int count = size >> PAGE_SHIFT;
1278         int array_size = count * sizeof(struct page *);
1279         int i = 0;
1280         int order_idx = 0;
1281
1282         if (array_size <= PAGE_SIZE)
1283                 pages = kzalloc(array_size, GFP_KERNEL);
1284         else
1285                 pages = vzalloc(array_size);
1286         if (!pages)
1287                 return NULL;
1288
1289         if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
1290         {
1291                 unsigned long order = get_order(size);
1292                 struct page *page;
1293
1294                 page = dma_alloc_from_contiguous(dev, count, order,
1295                                                  gfp & __GFP_NOWARN);
1296                 if (!page)
1297                         goto error;
1298
1299                 __dma_clear_buffer(page, size, coherent_flag);
1300
1301                 for (i = 0; i < count; i++)
1302                         pages[i] = page + i;
1303
1304                 return pages;
1305         }
1306
1307         /* Go straight to 4K chunks if caller says it's OK. */
1308         if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
1309                 order_idx = ARRAY_SIZE(iommu_order_array) - 1;
1310
1311         /*
1312          * IOMMU can map any pages, so himem can also be used here
1313          */
1314         gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
1315
1316         while (count) {
1317                 int j, order;
1318
1319                 order = iommu_order_array[order_idx];
1320
1321                 /* Drop down when we get small */
1322                 if (__fls(count) < order) {
1323                         order_idx++;
1324                         continue;
1325                 }
1326
1327                 if (order) {
1328                         /* See if it's easy to allocate a high-order chunk */
1329                         pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
1330
1331                         /* Go down a notch at first sign of pressure */
1332                         if (!pages[i]) {
1333                                 order_idx++;
1334                                 continue;
1335                         }
1336                 } else {
1337                         pages[i] = alloc_pages(gfp, 0);
1338                         if (!pages[i])
1339                                 goto error;
1340                 }
1341
1342                 if (order) {
1343                         split_page(pages[i], order);
1344                         j = 1 << order;
1345                         while (--j)
1346                                 pages[i + j] = pages[i] + j;
1347                 }
1348
1349                 __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag);
1350                 i += 1 << order;
1351                 count -= 1 << order;
1352         }
1353
1354         return pages;
1355 error:
1356         while (i--)
1357                 if (pages[i])
1358                         __free_pages(pages[i], 0);
1359         kvfree(pages);
1360         return NULL;
1361 }
1362
1363 static int __iommu_free_buffer(struct device *dev, struct page **pages,
1364                                size_t size, unsigned long attrs)
1365 {
1366         int count = size >> PAGE_SHIFT;
1367         int i;
1368
1369         if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
1370                 dma_release_from_contiguous(dev, pages[0], count);
1371         } else {
1372                 for (i = 0; i < count; i++)
1373                         if (pages[i])
1374                                 __free_pages(pages[i], 0);
1375         }
1376
1377         kvfree(pages);
1378         return 0;
1379 }
1380
1381 /*
1382  * Create a CPU mapping for a specified pages
1383  */
1384 static void *
1385 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
1386                     const void *caller)
1387 {
1388         return dma_common_pages_remap(pages, size,
1389                         VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
1390 }
1391
1392 /*
1393  * Create a mapping in device IO address space for specified pages
1394  */
1395 static dma_addr_t
1396 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
1397                        unsigned long attrs)
1398 {
1399         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
1400         unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
1401         dma_addr_t dma_addr, iova;
1402         int i;
1403
1404         dma_addr = __alloc_iova(mapping, size);
1405         if (dma_addr == DMA_MAPPING_ERROR)
1406                 return dma_addr;
1407
1408         iova = dma_addr;
1409         for (i = 0; i < count; ) {
1410                 int ret;
1411
1412                 unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
1413                 phys_addr_t phys = page_to_phys(pages[i]);
1414                 unsigned int len, j;
1415
1416                 for (j = i + 1; j < count; j++, next_pfn++)
1417                         if (page_to_pfn(pages[j]) != next_pfn)
1418                                 break;
1419
1420                 len = (j - i) << PAGE_SHIFT;
1421                 ret = iommu_map(mapping->domain, iova, phys, len,
1422                                 __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
1423                 if (ret < 0)
1424                         goto fail;
1425                 iova += len;
1426                 i = j;
1427         }
1428         return dma_addr;
1429 fail:
1430         iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
1431         __free_iova(mapping, dma_addr, size);
1432         return DMA_MAPPING_ERROR;
1433 }
1434
1435 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
1436 {
1437         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
1438
1439         /*
1440          * add optional in-page offset from iova to size and align
1441          * result to page size
1442          */
1443         size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
1444         iova &= PAGE_MASK;
1445
1446         iommu_unmap(mapping->domain, iova, size);
1447         __free_iova(mapping, iova, size);
1448         return 0;
1449 }
1450
1451 static struct page **__atomic_get_pages(void *addr)
1452 {
1453         struct page *page;
1454         phys_addr_t phys;
1455
1456         phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr);
1457         page = phys_to_page(phys);
1458
1459         return (struct page **)page;
1460 }
1461
1462 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
1463 {
1464         struct vm_struct *area;
1465
1466         if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
1467                 return __atomic_get_pages(cpu_addr);
1468
1469         if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
1470                 return cpu_addr;
1471
1472         area = find_vm_area(cpu_addr);
1473         if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
1474                 return area->pages;
1475         return NULL;
1476 }
1477
1478 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
1479                                   dma_addr_t *handle, int coherent_flag,
1480                                   unsigned long attrs)
1481 {
1482         struct page *page;
1483         void *addr;
1484
1485         if (coherent_flag  == COHERENT)
1486                 addr = __alloc_simple_buffer(dev, size, gfp, &page);
1487         else
1488                 addr = __alloc_from_pool(size, &page);
1489         if (!addr)
1490                 return NULL;
1491
1492         *handle = __iommu_create_mapping(dev, &page, size, attrs);
1493         if (*handle == DMA_MAPPING_ERROR)
1494                 goto err_mapping;
1495
1496         return addr;
1497
1498 err_mapping:
1499         __free_from_pool(addr, size);
1500         return NULL;
1501 }
1502
1503 static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
1504                         dma_addr_t handle, size_t size, int coherent_flag)
1505 {
1506         __iommu_remove_mapping(dev, handle, size);
1507         if (coherent_flag == COHERENT)
1508                 __dma_free_buffer(virt_to_page(cpu_addr), size);
1509         else
1510                 __free_from_pool(cpu_addr, size);
1511 }
1512
1513 static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
1514             dma_addr_t *handle, gfp_t gfp, unsigned long attrs,
1515             int coherent_flag)
1516 {
1517         pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
1518         struct page **pages;
1519         void *addr = NULL;
1520
1521         *handle = DMA_MAPPING_ERROR;
1522         size = PAGE_ALIGN(size);
1523
1524         if (coherent_flag  == COHERENT || !gfpflags_allow_blocking(gfp))
1525                 return __iommu_alloc_simple(dev, size, gfp, handle,
1526                                             coherent_flag, attrs);
1527
1528         /*
1529          * Following is a work-around (a.k.a. hack) to prevent pages
1530          * with __GFP_COMP being passed to split_page() which cannot
1531          * handle them.  The real problem is that this flag probably
1532          * should be 0 on ARM as it is not supported on this
1533          * platform; see CONFIG_HUGETLBFS.
1534          */
1535         gfp &= ~(__GFP_COMP);
1536
1537         pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag);
1538         if (!pages)
1539                 return NULL;
1540
1541         *handle = __iommu_create_mapping(dev, pages, size, attrs);
1542         if (*handle == DMA_MAPPING_ERROR)
1543                 goto err_buffer;
1544
1545         if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
1546                 return pages;
1547
1548         addr = __iommu_alloc_remap(pages, size, gfp, prot,
1549                                    __builtin_return_address(0));
1550         if (!addr)
1551                 goto err_mapping;
1552
1553         return addr;
1554
1555 err_mapping:
1556         __iommu_remove_mapping(dev, *handle, size);
1557 err_buffer:
1558         __iommu_free_buffer(dev, pages, size, attrs);
1559         return NULL;
1560 }
1561
1562 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
1563             dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1564 {
1565         return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL);
1566 }
1567
1568 static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size,
1569                     dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1570 {
1571         return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT);
1572 }
1573
1574 static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
1575                     void *cpu_addr, dma_addr_t dma_addr, size_t size,
1576                     unsigned long attrs)
1577 {
1578         unsigned long uaddr = vma->vm_start;
1579         unsigned long usize = vma->vm_end - vma->vm_start;
1580         struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1581         unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1582         unsigned long off = vma->vm_pgoff;
1583
1584         if (!pages)
1585                 return -ENXIO;
1586
1587         if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off)
1588                 return -ENXIO;
1589
1590         pages += off;
1591
1592         do {
1593                 int ret = vm_insert_page(vma, uaddr, *pages++);
1594                 if (ret) {
1595                         pr_err("Remapping memory failed: %d\n", ret);
1596                         return ret;
1597                 }
1598                 uaddr += PAGE_SIZE;
1599                 usize -= PAGE_SIZE;
1600         } while (usize > 0);
1601
1602         return 0;
1603 }
1604 static int arm_iommu_mmap_attrs(struct device *dev,
1605                 struct vm_area_struct *vma, void *cpu_addr,
1606                 dma_addr_t dma_addr, size_t size, unsigned long attrs)
1607 {
1608         vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
1609
1610         return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs);
1611 }
1612
1613 static int arm_coherent_iommu_mmap_attrs(struct device *dev,
1614                 struct vm_area_struct *vma, void *cpu_addr,
1615                 dma_addr_t dma_addr, size_t size, unsigned long attrs)
1616 {
1617         return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs);
1618 }
1619
1620 /*
1621  * free a page as defined by the above mapping.
1622  * Must not be called with IRQs disabled.
1623  */
1624 void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
1625         dma_addr_t handle, unsigned long attrs, int coherent_flag)
1626 {
1627         struct page **pages;
1628         size = PAGE_ALIGN(size);
1629
1630         if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) {
1631                 __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag);
1632                 return;
1633         }
1634
1635         pages = __iommu_get_pages(cpu_addr, attrs);
1636         if (!pages) {
1637                 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
1638                 return;
1639         }
1640
1641         if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
1642                 dma_common_free_remap(cpu_addr, size,
1643                         VM_ARM_DMA_CONSISTENT | VM_USERMAP);
1644         }
1645
1646         __iommu_remove_mapping(dev, handle, size);
1647         __iommu_free_buffer(dev, pages, size, attrs);
1648 }
1649
1650 void arm_iommu_free_attrs(struct device *dev, size_t size,
1651                     void *cpu_addr, dma_addr_t handle, unsigned long attrs)
1652 {
1653         __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL);
1654 }
1655
1656 void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
1657                     void *cpu_addr, dma_addr_t handle, unsigned long attrs)
1658 {
1659         __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT);
1660 }
1661
1662 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
1663                                  void *cpu_addr, dma_addr_t dma_addr,
1664                                  size_t size, unsigned long attrs)
1665 {
1666         unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
1667         struct page **pages = __iommu_get_pages(cpu_addr, attrs);
1668
1669         if (!pages)
1670                 return -ENXIO;
1671
1672         return sg_alloc_table_from_pages(sgt, pages, count, 0, size,
1673                                          GFP_KERNEL);
1674 }
1675
1676 /*
1677  * Map a part of the scatter-gather list into contiguous io address space
1678  */
1679 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
1680                           size_t size, dma_addr_t *handle,
1681                           enum dma_data_direction dir, unsigned long attrs,
1682                           bool is_coherent)
1683 {
1684         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
1685         dma_addr_t iova, iova_base;
1686         int ret = 0;
1687         unsigned int count;
1688         struct scatterlist *s;
1689         int prot;
1690
1691         size = PAGE_ALIGN(size);
1692         *handle = DMA_MAPPING_ERROR;
1693
1694         iova_base = iova = __alloc_iova(mapping, size);
1695         if (iova == DMA_MAPPING_ERROR)
1696                 return -ENOMEM;
1697
1698         for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
1699                 phys_addr_t phys = page_to_phys(sg_page(s));
1700                 unsigned int len = PAGE_ALIGN(s->offset + s->length);
1701
1702                 if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
1703                         __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
1704
1705                 prot = __dma_info_to_prot(dir, attrs);
1706
1707                 ret = iommu_map(mapping->domain, iova, phys, len, prot);
1708                 if (ret < 0)
1709                         goto fail;
1710                 count += len >> PAGE_SHIFT;
1711                 iova += len;
1712         }
1713         *handle = iova_base;
1714
1715         return 0;
1716 fail:
1717         iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
1718         __free_iova(mapping, iova_base, size);
1719         return ret;
1720 }
1721
1722 static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
1723                      enum dma_data_direction dir, unsigned long attrs,
1724                      bool is_coherent)
1725 {
1726         struct scatterlist *s = sg, *dma = sg, *start = sg;
1727         int i, count = 0;
1728         unsigned int offset = s->offset;
1729         unsigned int size = s->offset + s->length;
1730         unsigned int max = dma_get_max_seg_size(dev);
1731
1732         for (i = 1; i < nents; i++) {
1733                 s = sg_next(s);
1734
1735                 s->dma_address = DMA_MAPPING_ERROR;
1736                 s->dma_length = 0;
1737
1738                 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
1739                         if (__map_sg_chunk(dev, start, size, &dma->dma_address,
1740                             dir, attrs, is_coherent) < 0)
1741                                 goto bad_mapping;
1742
1743                         dma->dma_address += offset;
1744                         dma->dma_length = size - offset;
1745
1746                         size = offset = s->offset;
1747                         start = s;
1748                         dma = sg_next(dma);
1749                         count += 1;
1750                 }
1751                 size += s->length;
1752         }
1753         if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
1754                 is_coherent) < 0)
1755                 goto bad_mapping;
1756
1757         dma->dma_address += offset;
1758         dma->dma_length = size - offset;
1759
1760         return count+1;
1761
1762 bad_mapping:
1763         for_each_sg(sg, s, count, i)
1764                 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
1765         return 0;
1766 }
1767
1768 /**
1769  * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA
1770  * @dev: valid struct device pointer
1771  * @sg: list of buffers
1772  * @nents: number of buffers to map
1773  * @dir: DMA transfer direction
1774  *
1775  * Map a set of i/o coherent buffers described by scatterlist in streaming
1776  * mode for DMA. The scatter gather list elements are merged together (if
1777  * possible) and tagged with the appropriate dma address and length. They are
1778  * obtained via sg_dma_{address,length}.
1779  */
1780 int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
1781                 int nents, enum dma_data_direction dir, unsigned long attrs)
1782 {
1783         return __iommu_map_sg(dev, sg, nents, dir, attrs, true);
1784 }
1785
1786 /**
1787  * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
1788  * @dev: valid struct device pointer
1789  * @sg: list of buffers
1790  * @nents: number of buffers to map
1791  * @dir: DMA transfer direction
1792  *
1793  * Map a set of buffers described by scatterlist in streaming mode for DMA.
1794  * The scatter gather list elements are merged together (if possible) and
1795  * tagged with the appropriate dma address and length. They are obtained via
1796  * sg_dma_{address,length}.
1797  */
1798 int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
1799                 int nents, enum dma_data_direction dir, unsigned long attrs)
1800 {
1801         return __iommu_map_sg(dev, sg, nents, dir, attrs, false);
1802 }
1803
1804 static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
1805                 int nents, enum dma_data_direction dir,
1806                 unsigned long attrs, bool is_coherent)
1807 {
1808         struct scatterlist *s;
1809         int i;
1810
1811         for_each_sg(sg, s, nents, i) {
1812                 if (sg_dma_len(s))
1813                         __iommu_remove_mapping(dev, sg_dma_address(s),
1814                                                sg_dma_len(s));
1815                 if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
1816                         __dma_page_dev_to_cpu(sg_page(s), s->offset,
1817                                               s->length, dir);
1818         }
1819 }
1820
1821 /**
1822  * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1823  * @dev: valid struct device pointer
1824  * @sg: list of buffers
1825  * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
1826  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1827  *
1828  * Unmap a set of streaming mode DMA translations.  Again, CPU access
1829  * rules concerning calls here are the same as for dma_unmap_single().
1830  */
1831 void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
1832                 int nents, enum dma_data_direction dir,
1833                 unsigned long attrs)
1834 {
1835         __iommu_unmap_sg(dev, sg, nents, dir, attrs, true);
1836 }
1837
1838 /**
1839  * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
1840  * @dev: valid struct device pointer
1841  * @sg: list of buffers
1842  * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
1843  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1844  *
1845  * Unmap a set of streaming mode DMA translations.  Again, CPU access
1846  * rules concerning calls here are the same as for dma_unmap_single().
1847  */
1848 void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
1849                         enum dma_data_direction dir,
1850                         unsigned long attrs)
1851 {
1852         __iommu_unmap_sg(dev, sg, nents, dir, attrs, false);
1853 }
1854
1855 /**
1856  * arm_iommu_sync_sg_for_cpu
1857  * @dev: valid struct device pointer
1858  * @sg: list of buffers
1859  * @nents: number of buffers to map (returned from dma_map_sg)
1860  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1861  */
1862 void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
1863                         int nents, enum dma_data_direction dir)
1864 {
1865         struct scatterlist *s;
1866         int i;
1867
1868         for_each_sg(sg, s, nents, i)
1869                 __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
1870
1871 }
1872
1873 /**
1874  * arm_iommu_sync_sg_for_device
1875  * @dev: valid struct device pointer
1876  * @sg: list of buffers
1877  * @nents: number of buffers to map (returned from dma_map_sg)
1878  * @dir: DMA transfer direction (same as was passed to dma_map_sg)
1879  */
1880 void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
1881                         int nents, enum dma_data_direction dir)
1882 {
1883         struct scatterlist *s;
1884         int i;
1885
1886         for_each_sg(sg, s, nents, i)
1887                 __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
1888 }
1889
1890
1891 /**
1892  * arm_coherent_iommu_map_page
1893  * @dev: valid struct device pointer
1894  * @page: page that buffer resides in
1895  * @offset: offset into page for start of buffer
1896  * @size: size of buffer to map
1897  * @dir: DMA transfer direction
1898  *
1899  * Coherent IOMMU aware version of arm_dma_map_page()
1900  */
1901 static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page,
1902              unsigned long offset, size_t size, enum dma_data_direction dir,
1903              unsigned long attrs)
1904 {
1905         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
1906         dma_addr_t dma_addr;
1907         int ret, prot, len = PAGE_ALIGN(size + offset);
1908
1909         dma_addr = __alloc_iova(mapping, len);
1910         if (dma_addr == DMA_MAPPING_ERROR)
1911                 return dma_addr;
1912
1913         prot = __dma_info_to_prot(dir, attrs);
1914
1915         ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
1916         if (ret < 0)
1917                 goto fail;
1918
1919         return dma_addr + offset;
1920 fail:
1921         __free_iova(mapping, dma_addr, len);
1922         return DMA_MAPPING_ERROR;
1923 }
1924
1925 /**
1926  * arm_iommu_map_page
1927  * @dev: valid struct device pointer
1928  * @page: page that buffer resides in
1929  * @offset: offset into page for start of buffer
1930  * @size: size of buffer to map
1931  * @dir: DMA transfer direction
1932  *
1933  * IOMMU aware version of arm_dma_map_page()
1934  */
1935 static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
1936              unsigned long offset, size_t size, enum dma_data_direction dir,
1937              unsigned long attrs)
1938 {
1939         if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
1940                 __dma_page_cpu_to_dev(page, offset, size, dir);
1941
1942         return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs);
1943 }
1944
1945 /**
1946  * arm_coherent_iommu_unmap_page
1947  * @dev: valid struct device pointer
1948  * @handle: DMA address of buffer
1949  * @size: size of buffer (same as passed to dma_map_page)
1950  * @dir: DMA transfer direction (same as passed to dma_map_page)
1951  *
1952  * Coherent IOMMU aware version of arm_dma_unmap_page()
1953  */
1954 static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
1955                 size_t size, enum dma_data_direction dir, unsigned long attrs)
1956 {
1957         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
1958         dma_addr_t iova = handle & PAGE_MASK;
1959         int offset = handle & ~PAGE_MASK;
1960         int len = PAGE_ALIGN(size + offset);
1961
1962         if (!iova)
1963                 return;
1964
1965         iommu_unmap(mapping->domain, iova, len);
1966         __free_iova(mapping, iova, len);
1967 }
1968
1969 /**
1970  * arm_iommu_unmap_page
1971  * @dev: valid struct device pointer
1972  * @handle: DMA address of buffer
1973  * @size: size of buffer (same as passed to dma_map_page)
1974  * @dir: DMA transfer direction (same as passed to dma_map_page)
1975  *
1976  * IOMMU aware version of arm_dma_unmap_page()
1977  */
1978 static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
1979                 size_t size, enum dma_data_direction dir, unsigned long attrs)
1980 {
1981         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
1982         dma_addr_t iova = handle & PAGE_MASK;
1983         struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
1984         int offset = handle & ~PAGE_MASK;
1985         int len = PAGE_ALIGN(size + offset);
1986
1987         if (!iova)
1988                 return;
1989
1990         if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
1991                 __dma_page_dev_to_cpu(page, offset, size, dir);
1992
1993         iommu_unmap(mapping->domain, iova, len);
1994         __free_iova(mapping, iova, len);
1995 }
1996
1997 /**
1998  * arm_iommu_map_resource - map a device resource for DMA
1999  * @dev: valid struct device pointer
2000  * @phys_addr: physical address of resource
2001  * @size: size of resource to map
2002  * @dir: DMA transfer direction
2003  */
2004 static dma_addr_t arm_iommu_map_resource(struct device *dev,
2005                 phys_addr_t phys_addr, size_t size,
2006                 enum dma_data_direction dir, unsigned long attrs)
2007 {
2008         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
2009         dma_addr_t dma_addr;
2010         int ret, prot;
2011         phys_addr_t addr = phys_addr & PAGE_MASK;
2012         unsigned int offset = phys_addr & ~PAGE_MASK;
2013         size_t len = PAGE_ALIGN(size + offset);
2014
2015         dma_addr = __alloc_iova(mapping, len);
2016         if (dma_addr == DMA_MAPPING_ERROR)
2017                 return dma_addr;
2018
2019         prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
2020
2021         ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
2022         if (ret < 0)
2023                 goto fail;
2024
2025         return dma_addr + offset;
2026 fail:
2027         __free_iova(mapping, dma_addr, len);
2028         return DMA_MAPPING_ERROR;
2029 }
2030
2031 /**
2032  * arm_iommu_unmap_resource - unmap a device DMA resource
2033  * @dev: valid struct device pointer
2034  * @dma_handle: DMA address to resource
2035  * @size: size of resource to map
2036  * @dir: DMA transfer direction
2037  */
2038 static void arm_iommu_unmap_resource(struct device *dev, dma_addr_t dma_handle,
2039                 size_t size, enum dma_data_direction dir,
2040                 unsigned long attrs)
2041 {
2042         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
2043         dma_addr_t iova = dma_handle & PAGE_MASK;
2044         unsigned int offset = dma_handle & ~PAGE_MASK;
2045         size_t len = PAGE_ALIGN(size + offset);
2046
2047         if (!iova)
2048                 return;
2049
2050         iommu_unmap(mapping->domain, iova, len);
2051         __free_iova(mapping, iova, len);
2052 }
2053
2054 static void arm_iommu_sync_single_for_cpu(struct device *dev,
2055                 dma_addr_t handle, size_t size, enum dma_data_direction dir)
2056 {
2057         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
2058         dma_addr_t iova = handle & PAGE_MASK;
2059         struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
2060         unsigned int offset = handle & ~PAGE_MASK;
2061
2062         if (!iova)
2063                 return;
2064
2065         __dma_page_dev_to_cpu(page, offset, size, dir);
2066 }
2067
2068 static void arm_iommu_sync_single_for_device(struct device *dev,
2069                 dma_addr_t handle, size_t size, enum dma_data_direction dir)
2070 {
2071         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
2072         dma_addr_t iova = handle & PAGE_MASK;
2073         struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
2074         unsigned int offset = handle & ~PAGE_MASK;
2075
2076         if (!iova)
2077                 return;
2078
2079         __dma_page_cpu_to_dev(page, offset, size, dir);
2080 }
2081
2082 const struct dma_map_ops iommu_ops = {
2083         .alloc          = arm_iommu_alloc_attrs,
2084         .free           = arm_iommu_free_attrs,
2085         .mmap           = arm_iommu_mmap_attrs,
2086         .get_sgtable    = arm_iommu_get_sgtable,
2087
2088         .map_page               = arm_iommu_map_page,
2089         .unmap_page             = arm_iommu_unmap_page,
2090         .sync_single_for_cpu    = arm_iommu_sync_single_for_cpu,
2091         .sync_single_for_device = arm_iommu_sync_single_for_device,
2092
2093         .map_sg                 = arm_iommu_map_sg,
2094         .unmap_sg               = arm_iommu_unmap_sg,
2095         .sync_sg_for_cpu        = arm_iommu_sync_sg_for_cpu,
2096         .sync_sg_for_device     = arm_iommu_sync_sg_for_device,
2097
2098         .map_resource           = arm_iommu_map_resource,
2099         .unmap_resource         = arm_iommu_unmap_resource,
2100
2101         .dma_supported          = arm_dma_supported,
2102 };
2103
2104 const struct dma_map_ops iommu_coherent_ops = {
2105         .alloc          = arm_coherent_iommu_alloc_attrs,
2106         .free           = arm_coherent_iommu_free_attrs,
2107         .mmap           = arm_coherent_iommu_mmap_attrs,
2108         .get_sgtable    = arm_iommu_get_sgtable,
2109
2110         .map_page       = arm_coherent_iommu_map_page,
2111         .unmap_page     = arm_coherent_iommu_unmap_page,
2112
2113         .map_sg         = arm_coherent_iommu_map_sg,
2114         .unmap_sg       = arm_coherent_iommu_unmap_sg,
2115
2116         .map_resource   = arm_iommu_map_resource,
2117         .unmap_resource = arm_iommu_unmap_resource,
2118
2119         .dma_supported          = arm_dma_supported,
2120 };
2121
2122 /**
2123  * arm_iommu_create_mapping
2124  * @bus: pointer to the bus holding the client device (for IOMMU calls)
2125  * @base: start address of the valid IO address space
2126  * @size: maximum size of the valid IO address space
2127  *
2128  * Creates a mapping structure which holds information about used/unused
2129  * IO address ranges, which is required to perform memory allocation and
2130  * mapping with IOMMU aware functions.
2131  *
2132  * The client device need to be attached to the mapping with
2133  * arm_iommu_attach_device function.
2134  */
2135 struct dma_iommu_mapping *
2136 arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
2137 {
2138         unsigned int bits = size >> PAGE_SHIFT;
2139         unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
2140         struct dma_iommu_mapping *mapping;
2141         int extensions = 1;
2142         int err = -ENOMEM;
2143
2144         /* currently only 32-bit DMA address space is supported */
2145         if (size > DMA_BIT_MASK(32) + 1)
2146                 return ERR_PTR(-ERANGE);
2147
2148         if (!bitmap_size)
2149                 return ERR_PTR(-EINVAL);
2150
2151         if (bitmap_size > PAGE_SIZE) {
2152                 extensions = bitmap_size / PAGE_SIZE;
2153                 bitmap_size = PAGE_SIZE;
2154         }
2155
2156         mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
2157         if (!mapping)
2158                 goto err;
2159
2160         mapping->bitmap_size = bitmap_size;
2161         mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *),
2162                                    GFP_KERNEL);
2163         if (!mapping->bitmaps)
2164                 goto err2;
2165
2166         mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL);
2167         if (!mapping->bitmaps[0])
2168                 goto err3;
2169
2170         mapping->nr_bitmaps = 1;
2171         mapping->extensions = extensions;
2172         mapping->base = base;
2173         mapping->bits = BITS_PER_BYTE * bitmap_size;
2174
2175         spin_lock_init(&mapping->lock);
2176
2177         mapping->domain = iommu_domain_alloc(bus);
2178         if (!mapping->domain)
2179                 goto err4;
2180
2181         kref_init(&mapping->kref);
2182         return mapping;
2183 err4:
2184         kfree(mapping->bitmaps[0]);
2185 err3:
2186         kfree(mapping->bitmaps);
2187 err2:
2188         kfree(mapping);
2189 err:
2190         return ERR_PTR(err);
2191 }
2192 EXPORT_SYMBOL_GPL(arm_iommu_create_mapping);
2193
2194 static void release_iommu_mapping(struct kref *kref)
2195 {
2196         int i;
2197         struct dma_iommu_mapping *mapping =
2198                 container_of(kref, struct dma_iommu_mapping, kref);
2199
2200         iommu_domain_free(mapping->domain);
2201         for (i = 0; i < mapping->nr_bitmaps; i++)
2202                 kfree(mapping->bitmaps[i]);
2203         kfree(mapping->bitmaps);
2204         kfree(mapping);
2205 }
2206
2207 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping)
2208 {
2209         int next_bitmap;
2210
2211         if (mapping->nr_bitmaps >= mapping->extensions)
2212                 return -EINVAL;
2213
2214         next_bitmap = mapping->nr_bitmaps;
2215         mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size,
2216                                                 GFP_ATOMIC);
2217         if (!mapping->bitmaps[next_bitmap])
2218                 return -ENOMEM;
2219
2220         mapping->nr_bitmaps++;
2221
2222         return 0;
2223 }
2224
2225 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
2226 {
2227         if (mapping)
2228                 kref_put(&mapping->kref, release_iommu_mapping);
2229 }
2230 EXPORT_SYMBOL_GPL(arm_iommu_release_mapping);
2231
2232 static int __arm_iommu_attach_device(struct device *dev,
2233                                      struct dma_iommu_mapping *mapping)
2234 {
2235         int err;
2236
2237         err = iommu_attach_device(mapping->domain, dev);
2238         if (err)
2239                 return err;
2240
2241         kref_get(&mapping->kref);
2242         to_dma_iommu_mapping(dev) = mapping;
2243
2244         pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
2245         return 0;
2246 }
2247
2248 /**
2249  * arm_iommu_attach_device
2250  * @dev: valid struct device pointer
2251  * @mapping: io address space mapping structure (returned from
2252  *      arm_iommu_create_mapping)
2253  *
2254  * Attaches specified io address space mapping to the provided device.
2255  * This replaces the dma operations (dma_map_ops pointer) with the
2256  * IOMMU aware version.
2257  *
2258  * More than one client might be attached to the same io address space
2259  * mapping.
2260  */
2261 int arm_iommu_attach_device(struct device *dev,
2262                             struct dma_iommu_mapping *mapping)
2263 {
2264         int err;
2265
2266         err = __arm_iommu_attach_device(dev, mapping);
2267         if (err)
2268                 return err;
2269
2270         set_dma_ops(dev, &iommu_ops);
2271         return 0;
2272 }
2273 EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
2274
2275 /**
2276  * arm_iommu_detach_device
2277  * @dev: valid struct device pointer
2278  *
2279  * Detaches the provided device from a previously attached map.
2280  * This voids the dma operations (dma_map_ops pointer)
2281  */
2282 void arm_iommu_detach_device(struct device *dev)
2283 {
2284         struct dma_iommu_mapping *mapping;
2285
2286         mapping = to_dma_iommu_mapping(dev);
2287         if (!mapping) {
2288                 dev_warn(dev, "Not attached\n");
2289                 return;
2290         }
2291
2292         iommu_detach_device(mapping->domain, dev);
2293         kref_put(&mapping->kref, release_iommu_mapping);
2294         to_dma_iommu_mapping(dev) = NULL;
2295         set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent));
2296
2297         pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
2298 }
2299 EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
2300
2301 static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
2302 {
2303         return coherent ? &iommu_coherent_ops : &iommu_ops;
2304 }
2305
2306 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
2307                                     const struct iommu_ops *iommu)
2308 {
2309         struct dma_iommu_mapping *mapping;
2310
2311         if (!iommu)
2312                 return false;
2313
2314         mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
2315         if (IS_ERR(mapping)) {
2316                 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
2317                                 size, dev_name(dev));
2318                 return false;
2319         }
2320
2321         if (__arm_iommu_attach_device(dev, mapping)) {
2322                 pr_warn("Failed to attached device %s to IOMMU_mapping\n",
2323                                 dev_name(dev));
2324                 arm_iommu_release_mapping(mapping);
2325                 return false;
2326         }
2327
2328         return true;
2329 }
2330
2331 static void arm_teardown_iommu_dma_ops(struct device *dev)
2332 {
2333         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
2334
2335         if (!mapping)
2336                 return;
2337
2338         arm_iommu_detach_device(dev);
2339         arm_iommu_release_mapping(mapping);
2340 }
2341
2342 #else
2343
2344 static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
2345                                     const struct iommu_ops *iommu)
2346 {
2347         return false;
2348 }
2349
2350 static void arm_teardown_iommu_dma_ops(struct device *dev) { }
2351
2352 #define arm_get_iommu_dma_map_ops arm_get_dma_map_ops
2353
2354 #endif  /* CONFIG_ARM_DMA_USE_IOMMU */
2355
2356 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
2357                         const struct iommu_ops *iommu, bool coherent)
2358 {
2359         const struct dma_map_ops *dma_ops;
2360
2361         dev->archdata.dma_coherent = coherent;
2362
2363         /*
2364          * Don't override the dma_ops if they have already been set. Ideally
2365          * this should be the only location where dma_ops are set, remove this
2366          * check when all other callers of set_dma_ops will have disappeared.
2367          */
2368         if (dev->dma_ops)
2369                 return;
2370
2371         if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
2372                 dma_ops = arm_get_iommu_dma_map_ops(coherent);
2373         else
2374                 dma_ops = arm_get_dma_map_ops(coherent);
2375
2376         set_dma_ops(dev, dma_ops);
2377
2378 #ifdef CONFIG_XEN
2379         if (xen_initial_domain()) {
2380                 dev->archdata.dev_dma_ops = dev->dma_ops;
2381                 dev->dma_ops = xen_dma_ops;
2382         }
2383 #endif
2384         dev->archdata.dma_ops_setup = true;
2385 }
2386
2387 void arch_teardown_dma_ops(struct device *dev)
2388 {
2389         if (!dev->archdata.dma_ops_setup)
2390                 return;
2391
2392         arm_teardown_iommu_dma_ops(dev);
2393 }