arch/arc/mm/dma.c

   1 /*
   2  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  */
   8
   9 /*
  10  * DMA Coherent API Notes
  11  *
  12  * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
  13  * implemented by accessing it using a kernel virtual address, with
  14  * Cache bit off in the TLB entry.
  15  *
  16  * The default DMA address == Phy address which is 0x8000_0000 based.
  17  */
  18
  19 #include <linux/dma-noncoherent.h>
  20 #include <asm/cache.h>
  21 #include <asm/cacheflush.h>
  22
  23 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
  24                 gfp_t gfp, unsigned long attrs)
  25 {
  26         unsigned long order = get_order(size);
  27         struct page *page;
  28         phys_addr_t paddr;
  29         void *kvaddr;
  30         int need_coh = 1, need_kvaddr = 0;
  31
  32         page = alloc_pages(gfp, order);
  33         if (!page)
  34                 return NULL;
  35
  36         /*
  37          * IOC relies on all data (even coherent DMA data) being in cache
  38          * Thus allocate normal cached memory
  39          *
  40          * The gains with IOC are two pronged:
  41          *   -For streaming data, elides need for cache maintenance, saving
  42          *    cycles in flush code, and bus bandwidth as all the lines of a
  43          *    buffer need to be flushed out to memory
  44          *   -For coherent data, Read/Write to buffers terminate early in cache
  45          *   (vs. always going to memory - thus are faster)
  46          */
  47         if ((is_isa_arcv2() && ioc_enable) ||
  48             (attrs & DMA_ATTR_NON_CONSISTENT))
  49                 need_coh = 0;
  50
  51         /*
  52          * - A coherent buffer needs MMU mapping to enforce non-cachability
  53          * - A highmem page needs a virtual handle (hence MMU mapping)
  54          *   independent of cachability
  55          */
  56         if (PageHighMem(page) || need_coh)
  57                 need_kvaddr = 1;
  58
  59         /* This is linear addr (0x8000_0000 based) */
  60         paddr = page_to_phys(page);
  61
  62         *dma_handle = paddr;
  63
  64         /* This is kernel Virtual address (0x7000_0000 based) */
  65         if (need_kvaddr) {
  66                 kvaddr = ioremap_nocache(paddr, size);
  67                 if (kvaddr == NULL) {
  68                         __free_pages(page, order);
  69                         return NULL;
  70                 }
  71         } else {
  72                 kvaddr = (void *)(u32)paddr;
  73         }
  74
  75         /*
  76          * Evict any existing L1 and/or L2 lines for the backing page
  77          * in case it was used earlier as a normal "cached" page.
  78          * Yeah this bit us - STAR 9000898266
  79          *
  80          * Although core does call flush_cache_vmap(), it gets kvaddr hence
  81          * can't be used to efficiently flush L1 and/or L2 which need paddr
  82          * Currently flush_cache_vmap nukes the L1 cache completely which
  83          * will be optimized as a separate commit
  84          */
  85         if (need_coh)
  86                 dma_cache_wback_inv(paddr, size);
  87
  88         return kvaddr;
  89 }
  90
  91 void arch_dma_free(struct device *dev, size_t size, void *vaddr,
  92                 dma_addr_t dma_handle, unsigned long attrs)
  93 {
  94         phys_addr_t paddr = dma_handle;
  95         struct page *page = virt_to_page(paddr);
  96         int is_non_coh = 1;
  97
  98         is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
  99                         (is_isa_arcv2() && ioc_enable);
 100
 101         if (PageHighMem(page) || !is_non_coh)
 102                 iounmap((void __force __iomem *)vaddr);
 103
 104         __free_pages(page, get_order(size));
 105 }
 106
 107 int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 108                 void *cpu_addr, dma_addr_t dma_addr, size_t size,
 109                 unsigned long attrs)
 110 {
 111         unsigned long user_count = vma_pages(vma);
 112         unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 113         unsigned long pfn = __phys_to_pfn(dma_addr);
 114         unsigned long off = vma->vm_pgoff;
 115         int ret = -ENXIO;
 116
 117         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 118
 119         if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 120                 return ret;
 121
 122         if (off < count && user_count <= (count - off)) {
 123                 ret = remap_pfn_range(vma, vma->vm_start,
 124                                       pfn + off,
 125                                       user_count << PAGE_SHIFT,
 126                                       vma->vm_page_prot);
 127         }
 128
 129         return ret;
 130 }
 131
 132 /*
 133  * Cache operations depending on function and direction argument, inspired by
 134  * https://lkml.org/lkml/2018/5/18/979
 135  * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
 136  * dma-mapping: provide a generic dma-noncoherent implementation)"
 137  *
 138  *          |   map          ==  for_device     |   unmap     ==  for_cpu
 139  *          |----------------------------------------------------------------
 140  * TO_DEV   |   writeback        writeback      |   none          none
 141  * FROM_DEV |   invalidate       invalidate     |   invalidate*   invalidate*
 142  * BIDIR    |   writeback+inv    writeback+inv  |   invalidate    invalidate
 143  *
 144  *     [*] needed for CPU speculative prefetches
 145  *
 146  * NOTE: we don't check the validity of direction argument as it is done in
 147  * upper layer functions (in include/linux/dma-mapping.h)
 148  */
 149
 150 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 151                 size_t size, enum dma_data_direction dir)
 152 {
 153         switch (dir) {
 154         case DMA_TO_DEVICE:
 155                 dma_cache_wback(paddr, size);
 156                 break;
 157
 158         case DMA_FROM_DEVICE:
 159                 dma_cache_inv(paddr, size);
 160                 break;
 161
 162         case DMA_BIDIRECTIONAL:
 163                 dma_cache_wback_inv(paddr, size);
 164                 break;
 165
 166         default:
 167                 break;
 168         }
 169 }
 170
 171 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 172                 size_t size, enum dma_data_direction dir)
 173 {
 174         switch (dir) {
 175         case DMA_TO_DEVICE:
 176                 break;
 177
 178         /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
 179         case DMA_FROM_DEVICE:
 180         case DMA_BIDIRECTIONAL:
 181                 dma_cache_inv(paddr, size);
 182                 break;
 183
 184         default:
 185                 break;
 186         }
 187 }