Merge tag 'armsoc-dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
[sfrench/cifs-2.6.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 /* IO virtual address start page frame number */
75 #define IOVA_START_PFN          (1)
76
77 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
78 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
79 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
80
81 /* page table handling */
82 #define LEVEL_STRIDE            (9)
83 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
84
85 /*
86  * This bitmap is used to advertise the page sizes our hardware support
87  * to the IOMMU core, which will then use this information to split
88  * physically contiguous memory regions it is mapping into page sizes
89  * that we support.
90  *
91  * Traditionally the IOMMU core just handed us the mappings directly,
92  * after making sure the size is an order of a 4KiB page and that the
93  * mapping has natural alignment.
94  *
95  * To retain this behavior, we currently advertise that we support
96  * all page sizes that are an order of 4KiB.
97  *
98  * If at some point we'd like to utilize the IOMMU core's new behavior,
99  * we could change this to advertise the real page sizes we support.
100  */
101 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
102
103 static inline int agaw_to_level(int agaw)
104 {
105         return agaw + 2;
106 }
107
108 static inline int agaw_to_width(int agaw)
109 {
110         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
111 }
112
113 static inline int width_to_agaw(int width)
114 {
115         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
116 }
117
118 static inline unsigned int level_to_offset_bits(int level)
119 {
120         return (level - 1) * LEVEL_STRIDE;
121 }
122
123 static inline int pfn_level_offset(unsigned long pfn, int level)
124 {
125         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126 }
127
128 static inline unsigned long level_mask(int level)
129 {
130         return -1UL << level_to_offset_bits(level);
131 }
132
133 static inline unsigned long level_size(int level)
134 {
135         return 1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long align_to_level(unsigned long pfn, int level)
139 {
140         return (pfn + level_size(level) - 1) & level_mask(level);
141 }
142
143 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144 {
145         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
146 }
147
148 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149    are never going to work. */
150 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151 {
152         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153 }
154
155 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156 {
157         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159 static inline unsigned long page_to_dma_pfn(struct page *pg)
160 {
161         return mm_to_dma_pfn(page_to_pfn(pg));
162 }
163 static inline unsigned long virt_to_dma_pfn(void *p)
164 {
165         return page_to_dma_pfn(virt_to_page(p));
166 }
167
168 /* global iommu list, set NULL for ignored DMAR units */
169 static struct intel_iommu **g_iommus;
170
171 static void __init check_tylersburg_isoch(void);
172 static int rwbf_quirk;
173
174 /*
175  * set to 1 to panic kernel if can't successfully enable VT-d
176  * (used when kernel is launched w/ TXT)
177  */
178 static int force_on = 0;
179
180 /*
181  * 0: Present
182  * 1-11: Reserved
183  * 12-63: Context Ptr (12 - (haw-1))
184  * 64-127: Reserved
185  */
186 struct root_entry {
187         u64     val;
188         u64     rsvd1;
189 };
190 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191 static inline bool root_present(struct root_entry *root)
192 {
193         return (root->val & 1);
194 }
195 static inline void set_root_present(struct root_entry *root)
196 {
197         root->val |= 1;
198 }
199 static inline void set_root_value(struct root_entry *root, unsigned long value)
200 {
201         root->val &= ~VTD_PAGE_MASK;
202         root->val |= value & VTD_PAGE_MASK;
203 }
204
205 static inline struct context_entry *
206 get_context_addr_from_root(struct root_entry *root)
207 {
208         return (struct context_entry *)
209                 (root_present(root)?phys_to_virt(
210                 root->val & VTD_PAGE_MASK) :
211                 NULL);
212 }
213
214 /*
215  * low 64 bits:
216  * 0: present
217  * 1: fault processing disable
218  * 2-3: translation type
219  * 12-63: address space root
220  * high 64 bits:
221  * 0-2: address width
222  * 3-6: aval
223  * 8-23: domain id
224  */
225 struct context_entry {
226         u64 lo;
227         u64 hi;
228 };
229
230 static inline bool context_present(struct context_entry *context)
231 {
232         return (context->lo & 1);
233 }
234 static inline void context_set_present(struct context_entry *context)
235 {
236         context->lo |= 1;
237 }
238
239 static inline void context_set_fault_enable(struct context_entry *context)
240 {
241         context->lo &= (((u64)-1) << 2) | 1;
242 }
243
244 static inline void context_set_translation_type(struct context_entry *context,
245                                                 unsigned long value)
246 {
247         context->lo &= (((u64)-1) << 4) | 3;
248         context->lo |= (value & 3) << 2;
249 }
250
251 static inline void context_set_address_root(struct context_entry *context,
252                                             unsigned long value)
253 {
254         context->lo &= ~VTD_PAGE_MASK;
255         context->lo |= value & VTD_PAGE_MASK;
256 }
257
258 static inline void context_set_address_width(struct context_entry *context,
259                                              unsigned long value)
260 {
261         context->hi |= value & 7;
262 }
263
264 static inline void context_set_domain_id(struct context_entry *context,
265                                          unsigned long value)
266 {
267         context->hi |= (value & ((1 << 16) - 1)) << 8;
268 }
269
270 static inline void context_clear_entry(struct context_entry *context)
271 {
272         context->lo = 0;
273         context->hi = 0;
274 }
275
276 /*
277  * 0: readable
278  * 1: writable
279  * 2-6: reserved
280  * 7: super page
281  * 8-10: available
282  * 11: snoop behavior
283  * 12-63: Host physcial address
284  */
285 struct dma_pte {
286         u64 val;
287 };
288
289 static inline void dma_clear_pte(struct dma_pte *pte)
290 {
291         pte->val = 0;
292 }
293
294 static inline u64 dma_pte_addr(struct dma_pte *pte)
295 {
296 #ifdef CONFIG_64BIT
297         return pte->val & VTD_PAGE_MASK;
298 #else
299         /* Must have a full atomic 64-bit read */
300         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
301 #endif
302 }
303
304 static inline bool dma_pte_present(struct dma_pte *pte)
305 {
306         return (pte->val & 3) != 0;
307 }
308
309 static inline bool dma_pte_superpage(struct dma_pte *pte)
310 {
311         return (pte->val & DMA_PTE_LARGE_PAGE);
312 }
313
314 static inline int first_pte_in_page(struct dma_pte *pte)
315 {
316         return !((unsigned long)pte & ~VTD_PAGE_MASK);
317 }
318
319 /*
320  * This domain is a statically identity mapping domain.
321  *      1. This domain creats a static 1:1 mapping to all usable memory.
322  *      2. It maps to each iommu if successful.
323  *      3. Each iommu mapps to this domain if successful.
324  */
325 static struct dmar_domain *si_domain;
326 static int hw_pass_through = 1;
327
328 /* domain represents a virtual machine, more than one devices
329  * across iommus may be owned in one domain, e.g. kvm guest.
330  */
331 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
332
333 /* si_domain contains mulitple devices */
334 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
335
336 struct dmar_domain {
337         int     id;                     /* domain id */
338         int     nid;                    /* node id */
339         DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
340                                         /* bitmap of iommus this domain uses*/
341
342         struct list_head devices;       /* all devices' list */
343         struct iova_domain iovad;       /* iova's that belong to this domain */
344
345         struct dma_pte  *pgd;           /* virtual address */
346         int             gaw;            /* max guest address width */
347
348         /* adjusted guest address width, 0 is level 2 30-bit */
349         int             agaw;
350
351         int             flags;          /* flags to find out type of domain */
352
353         int             iommu_coherency;/* indicate coherency of iommu access */
354         int             iommu_snooping; /* indicate snooping control feature*/
355         int             iommu_count;    /* reference count of iommu */
356         int             iommu_superpage;/* Level of superpages supported:
357                                            0 == 4KiB (no superpages), 1 == 2MiB,
358                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
359         spinlock_t      iommu_lock;     /* protect iommu set in domain */
360         u64             max_addr;       /* maximum mapped address */
361
362         struct iommu_domain domain;     /* generic domain data structure for
363                                            iommu core */
364 };
365
366 /* PCI domain-device relationship */
367 struct device_domain_info {
368         struct list_head link;  /* link to domain siblings */
369         struct list_head global; /* link to global list */
370         u8 bus;                 /* PCI bus number */
371         u8 devfn;               /* PCI devfn number */
372         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
373         struct intel_iommu *iommu; /* IOMMU used by this device */
374         struct dmar_domain *domain; /* pointer to domain */
375 };
376
377 struct dmar_rmrr_unit {
378         struct list_head list;          /* list of rmrr units   */
379         struct acpi_dmar_header *hdr;   /* ACPI header          */
380         u64     base_address;           /* reserved base address*/
381         u64     end_address;            /* reserved end address */
382         struct dmar_dev_scope *devices; /* target devices */
383         int     devices_cnt;            /* target device count */
384 };
385
386 struct dmar_atsr_unit {
387         struct list_head list;          /* list of ATSR units */
388         struct acpi_dmar_header *hdr;   /* ACPI header */
389         struct dmar_dev_scope *devices; /* target devices */
390         int devices_cnt;                /* target device count */
391         u8 include_all:1;               /* include all ports */
392 };
393
394 static LIST_HEAD(dmar_atsr_units);
395 static LIST_HEAD(dmar_rmrr_units);
396
397 #define for_each_rmrr_units(rmrr) \
398         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
399
400 static void flush_unmaps_timeout(unsigned long data);
401
402 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
403
404 #define HIGH_WATER_MARK 250
405 struct deferred_flush_tables {
406         int next;
407         struct iova *iova[HIGH_WATER_MARK];
408         struct dmar_domain *domain[HIGH_WATER_MARK];
409         struct page *freelist[HIGH_WATER_MARK];
410 };
411
412 static struct deferred_flush_tables *deferred_flush;
413
414 /* bitmap for indexing intel_iommus */
415 static int g_num_of_iommus;
416
417 static DEFINE_SPINLOCK(async_umap_flush_lock);
418 static LIST_HEAD(unmaps_to_do);
419
420 static int timer_on;
421 static long list_size;
422
423 static void domain_exit(struct dmar_domain *domain);
424 static void domain_remove_dev_info(struct dmar_domain *domain);
425 static void domain_remove_one_dev_info(struct dmar_domain *domain,
426                                        struct device *dev);
427 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
428                                            struct device *dev);
429 static int domain_detach_iommu(struct dmar_domain *domain,
430                                struct intel_iommu *iommu);
431
432 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
433 int dmar_disabled = 0;
434 #else
435 int dmar_disabled = 1;
436 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
437
438 int intel_iommu_enabled = 0;
439 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
440
441 static int dmar_map_gfx = 1;
442 static int dmar_forcedac;
443 static int intel_iommu_strict;
444 static int intel_iommu_superpage = 1;
445
446 int intel_iommu_gfx_mapped;
447 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
448
449 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
450 static DEFINE_SPINLOCK(device_domain_lock);
451 static LIST_HEAD(device_domain_list);
452
453 static const struct iommu_ops intel_iommu_ops;
454
455 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
456 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
457 {
458         return container_of(dom, struct dmar_domain, domain);
459 }
460
461 static int __init intel_iommu_setup(char *str)
462 {
463         if (!str)
464                 return -EINVAL;
465         while (*str) {
466                 if (!strncmp(str, "on", 2)) {
467                         dmar_disabled = 0;
468                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
469                 } else if (!strncmp(str, "off", 3)) {
470                         dmar_disabled = 1;
471                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
472                 } else if (!strncmp(str, "igfx_off", 8)) {
473                         dmar_map_gfx = 0;
474                         printk(KERN_INFO
475                                 "Intel-IOMMU: disable GFX device mapping\n");
476                 } else if (!strncmp(str, "forcedac", 8)) {
477                         printk(KERN_INFO
478                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
479                         dmar_forcedac = 1;
480                 } else if (!strncmp(str, "strict", 6)) {
481                         printk(KERN_INFO
482                                 "Intel-IOMMU: disable batched IOTLB flush\n");
483                         intel_iommu_strict = 1;
484                 } else if (!strncmp(str, "sp_off", 6)) {
485                         printk(KERN_INFO
486                                 "Intel-IOMMU: disable supported super page\n");
487                         intel_iommu_superpage = 0;
488                 }
489
490                 str += strcspn(str, ",");
491                 while (*str == ',')
492                         str++;
493         }
494         return 0;
495 }
496 __setup("intel_iommu=", intel_iommu_setup);
497
498 static struct kmem_cache *iommu_domain_cache;
499 static struct kmem_cache *iommu_devinfo_cache;
500
501 static inline void *alloc_pgtable_page(int node)
502 {
503         struct page *page;
504         void *vaddr = NULL;
505
506         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
507         if (page)
508                 vaddr = page_address(page);
509         return vaddr;
510 }
511
512 static inline void free_pgtable_page(void *vaddr)
513 {
514         free_page((unsigned long)vaddr);
515 }
516
517 static inline void *alloc_domain_mem(void)
518 {
519         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
520 }
521
522 static void free_domain_mem(void *vaddr)
523 {
524         kmem_cache_free(iommu_domain_cache, vaddr);
525 }
526
527 static inline void * alloc_devinfo_mem(void)
528 {
529         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
530 }
531
532 static inline void free_devinfo_mem(void *vaddr)
533 {
534         kmem_cache_free(iommu_devinfo_cache, vaddr);
535 }
536
537 static inline int domain_type_is_vm(struct dmar_domain *domain)
538 {
539         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
540 }
541
542 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
543 {
544         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
545                                 DOMAIN_FLAG_STATIC_IDENTITY);
546 }
547
548 static inline int domain_pfn_supported(struct dmar_domain *domain,
549                                        unsigned long pfn)
550 {
551         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
552
553         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
554 }
555
556 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
557 {
558         unsigned long sagaw;
559         int agaw = -1;
560
561         sagaw = cap_sagaw(iommu->cap);
562         for (agaw = width_to_agaw(max_gaw);
563              agaw >= 0; agaw--) {
564                 if (test_bit(agaw, &sagaw))
565                         break;
566         }
567
568         return agaw;
569 }
570
571 /*
572  * Calculate max SAGAW for each iommu.
573  */
574 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
575 {
576         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
577 }
578
579 /*
580  * calculate agaw for each iommu.
581  * "SAGAW" may be different across iommus, use a default agaw, and
582  * get a supported less agaw for iommus that don't support the default agaw.
583  */
584 int iommu_calculate_agaw(struct intel_iommu *iommu)
585 {
586         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
587 }
588
589 /* This functionin only returns single iommu in a domain */
590 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
591 {
592         int iommu_id;
593
594         /* si_domain and vm domain should not get here. */
595         BUG_ON(domain_type_is_vm_or_si(domain));
596         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
597         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
598                 return NULL;
599
600         return g_iommus[iommu_id];
601 }
602
603 static void domain_update_iommu_coherency(struct dmar_domain *domain)
604 {
605         struct dmar_drhd_unit *drhd;
606         struct intel_iommu *iommu;
607         bool found = false;
608         int i;
609
610         domain->iommu_coherency = 1;
611
612         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
613                 found = true;
614                 if (!ecap_coherent(g_iommus[i]->ecap)) {
615                         domain->iommu_coherency = 0;
616                         break;
617                 }
618         }
619         if (found)
620                 return;
621
622         /* No hardware attached; use lowest common denominator */
623         rcu_read_lock();
624         for_each_active_iommu(iommu, drhd) {
625                 if (!ecap_coherent(iommu->ecap)) {
626                         domain->iommu_coherency = 0;
627                         break;
628                 }
629         }
630         rcu_read_unlock();
631 }
632
633 static int domain_update_iommu_snooping(struct intel_iommu *skip)
634 {
635         struct dmar_drhd_unit *drhd;
636         struct intel_iommu *iommu;
637         int ret = 1;
638
639         rcu_read_lock();
640         for_each_active_iommu(iommu, drhd) {
641                 if (iommu != skip) {
642                         if (!ecap_sc_support(iommu->ecap)) {
643                                 ret = 0;
644                                 break;
645                         }
646                 }
647         }
648         rcu_read_unlock();
649
650         return ret;
651 }
652
653 static int domain_update_iommu_superpage(struct intel_iommu *skip)
654 {
655         struct dmar_drhd_unit *drhd;
656         struct intel_iommu *iommu;
657         int mask = 0xf;
658
659         if (!intel_iommu_superpage) {
660                 return 0;
661         }
662
663         /* set iommu_superpage to the smallest common denominator */
664         rcu_read_lock();
665         for_each_active_iommu(iommu, drhd) {
666                 if (iommu != skip) {
667                         mask &= cap_super_page_val(iommu->cap);
668                         if (!mask)
669                                 break;
670                 }
671         }
672         rcu_read_unlock();
673
674         return fls(mask);
675 }
676
677 /* Some capabilities may be different across iommus */
678 static void domain_update_iommu_cap(struct dmar_domain *domain)
679 {
680         domain_update_iommu_coherency(domain);
681         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
682         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
683 }
684
685 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
686 {
687         struct dmar_drhd_unit *drhd = NULL;
688         struct intel_iommu *iommu;
689         struct device *tmp;
690         struct pci_dev *ptmp, *pdev = NULL;
691         u16 segment = 0;
692         int i;
693
694         if (dev_is_pci(dev)) {
695                 pdev = to_pci_dev(dev);
696                 segment = pci_domain_nr(pdev->bus);
697         } else if (has_acpi_companion(dev))
698                 dev = &ACPI_COMPANION(dev)->dev;
699
700         rcu_read_lock();
701         for_each_active_iommu(iommu, drhd) {
702                 if (pdev && segment != drhd->segment)
703                         continue;
704
705                 for_each_active_dev_scope(drhd->devices,
706                                           drhd->devices_cnt, i, tmp) {
707                         if (tmp == dev) {
708                                 *bus = drhd->devices[i].bus;
709                                 *devfn = drhd->devices[i].devfn;
710                                 goto out;
711                         }
712
713                         if (!pdev || !dev_is_pci(tmp))
714                                 continue;
715
716                         ptmp = to_pci_dev(tmp);
717                         if (ptmp->subordinate &&
718                             ptmp->subordinate->number <= pdev->bus->number &&
719                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
720                                 goto got_pdev;
721                 }
722
723                 if (pdev && drhd->include_all) {
724                 got_pdev:
725                         *bus = pdev->bus->number;
726                         *devfn = pdev->devfn;
727                         goto out;
728                 }
729         }
730         iommu = NULL;
731  out:
732         rcu_read_unlock();
733
734         return iommu;
735 }
736
737 static void domain_flush_cache(struct dmar_domain *domain,
738                                void *addr, int size)
739 {
740         if (!domain->iommu_coherency)
741                 clflush_cache_range(addr, size);
742 }
743
744 /* Gets context entry for a given bus and devfn */
745 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
746                 u8 bus, u8 devfn)
747 {
748         struct root_entry *root;
749         struct context_entry *context;
750         unsigned long phy_addr;
751         unsigned long flags;
752
753         spin_lock_irqsave(&iommu->lock, flags);
754         root = &iommu->root_entry[bus];
755         context = get_context_addr_from_root(root);
756         if (!context) {
757                 context = (struct context_entry *)
758                                 alloc_pgtable_page(iommu->node);
759                 if (!context) {
760                         spin_unlock_irqrestore(&iommu->lock, flags);
761                         return NULL;
762                 }
763                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
764                 phy_addr = virt_to_phys((void *)context);
765                 set_root_value(root, phy_addr);
766                 set_root_present(root);
767                 __iommu_flush_cache(iommu, root, sizeof(*root));
768         }
769         spin_unlock_irqrestore(&iommu->lock, flags);
770         return &context[devfn];
771 }
772
773 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
774 {
775         struct root_entry *root;
776         struct context_entry *context;
777         int ret;
778         unsigned long flags;
779
780         spin_lock_irqsave(&iommu->lock, flags);
781         root = &iommu->root_entry[bus];
782         context = get_context_addr_from_root(root);
783         if (!context) {
784                 ret = 0;
785                 goto out;
786         }
787         ret = context_present(&context[devfn]);
788 out:
789         spin_unlock_irqrestore(&iommu->lock, flags);
790         return ret;
791 }
792
793 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
794 {
795         struct root_entry *root;
796         struct context_entry *context;
797         unsigned long flags;
798
799         spin_lock_irqsave(&iommu->lock, flags);
800         root = &iommu->root_entry[bus];
801         context = get_context_addr_from_root(root);
802         if (context) {
803                 context_clear_entry(&context[devfn]);
804                 __iommu_flush_cache(iommu, &context[devfn], \
805                         sizeof(*context));
806         }
807         spin_unlock_irqrestore(&iommu->lock, flags);
808 }
809
810 static void free_context_table(struct intel_iommu *iommu)
811 {
812         struct root_entry *root;
813         int i;
814         unsigned long flags;
815         struct context_entry *context;
816
817         spin_lock_irqsave(&iommu->lock, flags);
818         if (!iommu->root_entry) {
819                 goto out;
820         }
821         for (i = 0; i < ROOT_ENTRY_NR; i++) {
822                 root = &iommu->root_entry[i];
823                 context = get_context_addr_from_root(root);
824                 if (context)
825                         free_pgtable_page(context);
826         }
827         free_pgtable_page(iommu->root_entry);
828         iommu->root_entry = NULL;
829 out:
830         spin_unlock_irqrestore(&iommu->lock, flags);
831 }
832
833 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
834                                       unsigned long pfn, int *target_level)
835 {
836         struct dma_pte *parent, *pte = NULL;
837         int level = agaw_to_level(domain->agaw);
838         int offset;
839
840         BUG_ON(!domain->pgd);
841
842         if (!domain_pfn_supported(domain, pfn))
843                 /* Address beyond IOMMU's addressing capabilities. */
844                 return NULL;
845
846         parent = domain->pgd;
847
848         while (1) {
849                 void *tmp_page;
850
851                 offset = pfn_level_offset(pfn, level);
852                 pte = &parent[offset];
853                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
854                         break;
855                 if (level == *target_level)
856                         break;
857
858                 if (!dma_pte_present(pte)) {
859                         uint64_t pteval;
860
861                         tmp_page = alloc_pgtable_page(domain->nid);
862
863                         if (!tmp_page)
864                                 return NULL;
865
866                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
867                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
868                         if (cmpxchg64(&pte->val, 0ULL, pteval))
869                                 /* Someone else set it while we were thinking; use theirs. */
870                                 free_pgtable_page(tmp_page);
871                         else
872                                 domain_flush_cache(domain, pte, sizeof(*pte));
873                 }
874                 if (level == 1)
875                         break;
876
877                 parent = phys_to_virt(dma_pte_addr(pte));
878                 level--;
879         }
880
881         if (!*target_level)
882                 *target_level = level;
883
884         return pte;
885 }
886
887
888 /* return address's pte at specific level */
889 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
890                                          unsigned long pfn,
891                                          int level, int *large_page)
892 {
893         struct dma_pte *parent, *pte = NULL;
894         int total = agaw_to_level(domain->agaw);
895         int offset;
896
897         parent = domain->pgd;
898         while (level <= total) {
899                 offset = pfn_level_offset(pfn, total);
900                 pte = &parent[offset];
901                 if (level == total)
902                         return pte;
903
904                 if (!dma_pte_present(pte)) {
905                         *large_page = total;
906                         break;
907                 }
908
909                 if (dma_pte_superpage(pte)) {
910                         *large_page = total;
911                         return pte;
912                 }
913
914                 parent = phys_to_virt(dma_pte_addr(pte));
915                 total--;
916         }
917         return NULL;
918 }
919
920 /* clear last level pte, a tlb flush should be followed */
921 static void dma_pte_clear_range(struct dmar_domain *domain,
922                                 unsigned long start_pfn,
923                                 unsigned long last_pfn)
924 {
925         unsigned int large_page = 1;
926         struct dma_pte *first_pte, *pte;
927
928         BUG_ON(!domain_pfn_supported(domain, start_pfn));
929         BUG_ON(!domain_pfn_supported(domain, last_pfn));
930         BUG_ON(start_pfn > last_pfn);
931
932         /* we don't need lock here; nobody else touches the iova range */
933         do {
934                 large_page = 1;
935                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
936                 if (!pte) {
937                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
938                         continue;
939                 }
940                 do {
941                         dma_clear_pte(pte);
942                         start_pfn += lvl_to_nr_pages(large_page);
943                         pte++;
944                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
945
946                 domain_flush_cache(domain, first_pte,
947                                    (void *)pte - (void *)first_pte);
948
949         } while (start_pfn && start_pfn <= last_pfn);
950 }
951
952 static void dma_pte_free_level(struct dmar_domain *domain, int level,
953                                struct dma_pte *pte, unsigned long pfn,
954                                unsigned long start_pfn, unsigned long last_pfn)
955 {
956         pfn = max(start_pfn, pfn);
957         pte = &pte[pfn_level_offset(pfn, level)];
958
959         do {
960                 unsigned long level_pfn;
961                 struct dma_pte *level_pte;
962
963                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
964                         goto next;
965
966                 level_pfn = pfn & level_mask(level - 1);
967                 level_pte = phys_to_virt(dma_pte_addr(pte));
968
969                 if (level > 2)
970                         dma_pte_free_level(domain, level - 1, level_pte,
971                                            level_pfn, start_pfn, last_pfn);
972
973                 /* If range covers entire pagetable, free it */
974                 if (!(start_pfn > level_pfn ||
975                       last_pfn < level_pfn + level_size(level) - 1)) {
976                         dma_clear_pte(pte);
977                         domain_flush_cache(domain, pte, sizeof(*pte));
978                         free_pgtable_page(level_pte);
979                 }
980 next:
981                 pfn += level_size(level);
982         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
983 }
984
985 /* free page table pages. last level pte should already be cleared */
986 static void dma_pte_free_pagetable(struct dmar_domain *domain,
987                                    unsigned long start_pfn,
988                                    unsigned long last_pfn)
989 {
990         BUG_ON(!domain_pfn_supported(domain, start_pfn));
991         BUG_ON(!domain_pfn_supported(domain, last_pfn));
992         BUG_ON(start_pfn > last_pfn);
993
994         dma_pte_clear_range(domain, start_pfn, last_pfn);
995
996         /* We don't need lock here; nobody else touches the iova range */
997         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
998                            domain->pgd, 0, start_pfn, last_pfn);
999
1000         /* free pgd */
1001         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1002                 free_pgtable_page(domain->pgd);
1003                 domain->pgd = NULL;
1004         }
1005 }
1006
1007 /* When a page at a given level is being unlinked from its parent, we don't
1008    need to *modify* it at all. All we need to do is make a list of all the
1009    pages which can be freed just as soon as we've flushed the IOTLB and we
1010    know the hardware page-walk will no longer touch them.
1011    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1012    be freed. */
1013 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1014                                             int level, struct dma_pte *pte,
1015                                             struct page *freelist)
1016 {
1017         struct page *pg;
1018
1019         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1020         pg->freelist = freelist;
1021         freelist = pg;
1022
1023         if (level == 1)
1024                 return freelist;
1025
1026         pte = page_address(pg);
1027         do {
1028                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1029                         freelist = dma_pte_list_pagetables(domain, level - 1,
1030                                                            pte, freelist);
1031                 pte++;
1032         } while (!first_pte_in_page(pte));
1033
1034         return freelist;
1035 }
1036
1037 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1038                                         struct dma_pte *pte, unsigned long pfn,
1039                                         unsigned long start_pfn,
1040                                         unsigned long last_pfn,
1041                                         struct page *freelist)
1042 {
1043         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1044
1045         pfn = max(start_pfn, pfn);
1046         pte = &pte[pfn_level_offset(pfn, level)];
1047
1048         do {
1049                 unsigned long level_pfn;
1050
1051                 if (!dma_pte_present(pte))
1052                         goto next;
1053
1054                 level_pfn = pfn & level_mask(level);
1055
1056                 /* If range covers entire pagetable, free it */
1057                 if (start_pfn <= level_pfn &&
1058                     last_pfn >= level_pfn + level_size(level) - 1) {
1059                         /* These suborbinate page tables are going away entirely. Don't
1060                            bother to clear them; we're just going to *free* them. */
1061                         if (level > 1 && !dma_pte_superpage(pte))
1062                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1063
1064                         dma_clear_pte(pte);
1065                         if (!first_pte)
1066                                 first_pte = pte;
1067                         last_pte = pte;
1068                 } else if (level > 1) {
1069                         /* Recurse down into a level that isn't *entirely* obsolete */
1070                         freelist = dma_pte_clear_level(domain, level - 1,
1071                                                        phys_to_virt(dma_pte_addr(pte)),
1072                                                        level_pfn, start_pfn, last_pfn,
1073                                                        freelist);
1074                 }
1075 next:
1076                 pfn += level_size(level);
1077         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1078
1079         if (first_pte)
1080                 domain_flush_cache(domain, first_pte,
1081                                    (void *)++last_pte - (void *)first_pte);
1082
1083         return freelist;
1084 }
1085
1086 /* We can't just free the pages because the IOMMU may still be walking
1087    the page tables, and may have cached the intermediate levels. The
1088    pages can only be freed after the IOTLB flush has been done. */
1089 struct page *domain_unmap(struct dmar_domain *domain,
1090                           unsigned long start_pfn,
1091                           unsigned long last_pfn)
1092 {
1093         struct page *freelist = NULL;
1094
1095         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1096         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1097         BUG_ON(start_pfn > last_pfn);
1098
1099         /* we don't need lock here; nobody else touches the iova range */
1100         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1101                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1102
1103         /* free pgd */
1104         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1105                 struct page *pgd_page = virt_to_page(domain->pgd);
1106                 pgd_page->freelist = freelist;
1107                 freelist = pgd_page;
1108
1109                 domain->pgd = NULL;
1110         }
1111
1112         return freelist;
1113 }
1114
1115 void dma_free_pagelist(struct page *freelist)
1116 {
1117         struct page *pg;
1118
1119         while ((pg = freelist)) {
1120                 freelist = pg->freelist;
1121                 free_pgtable_page(page_address(pg));
1122         }
1123 }
1124
1125 /* iommu handling */
1126 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1127 {
1128         struct root_entry *root;
1129         unsigned long flags;
1130
1131         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1132         if (!root) {
1133                 pr_err("IOMMU: allocating root entry for %s failed\n",
1134                         iommu->name);
1135                 return -ENOMEM;
1136         }
1137
1138         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1139
1140         spin_lock_irqsave(&iommu->lock, flags);
1141         iommu->root_entry = root;
1142         spin_unlock_irqrestore(&iommu->lock, flags);
1143
1144         return 0;
1145 }
1146
1147 static void iommu_set_root_entry(struct intel_iommu *iommu)
1148 {
1149         void *addr;
1150         u32 sts;
1151         unsigned long flag;
1152
1153         addr = iommu->root_entry;
1154
1155         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1156         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1157
1158         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1159
1160         /* Make sure hardware complete it */
1161         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1162                       readl, (sts & DMA_GSTS_RTPS), sts);
1163
1164         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1165 }
1166
1167 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1168 {
1169         u32 val;
1170         unsigned long flag;
1171
1172         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1173                 return;
1174
1175         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1176         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1177
1178         /* Make sure hardware complete it */
1179         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1180                       readl, (!(val & DMA_GSTS_WBFS)), val);
1181
1182         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1183 }
1184
1185 /* return value determine if we need a write buffer flush */
1186 static void __iommu_flush_context(struct intel_iommu *iommu,
1187                                   u16 did, u16 source_id, u8 function_mask,
1188                                   u64 type)
1189 {
1190         u64 val = 0;
1191         unsigned long flag;
1192
1193         switch (type) {
1194         case DMA_CCMD_GLOBAL_INVL:
1195                 val = DMA_CCMD_GLOBAL_INVL;
1196                 break;
1197         case DMA_CCMD_DOMAIN_INVL:
1198                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1199                 break;
1200         case DMA_CCMD_DEVICE_INVL:
1201                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1202                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1203                 break;
1204         default:
1205                 BUG();
1206         }
1207         val |= DMA_CCMD_ICC;
1208
1209         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1210         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1211
1212         /* Make sure hardware complete it */
1213         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1214                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1215
1216         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1217 }
1218
1219 /* return value determine if we need a write buffer flush */
1220 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1221                                 u64 addr, unsigned int size_order, u64 type)
1222 {
1223         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1224         u64 val = 0, val_iva = 0;
1225         unsigned long flag;
1226
1227         switch (type) {
1228         case DMA_TLB_GLOBAL_FLUSH:
1229                 /* global flush doesn't need set IVA_REG */
1230                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1231                 break;
1232         case DMA_TLB_DSI_FLUSH:
1233                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1234                 break;
1235         case DMA_TLB_PSI_FLUSH:
1236                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1237                 /* IH bit is passed in as part of address */
1238                 val_iva = size_order | addr;
1239                 break;
1240         default:
1241                 BUG();
1242         }
1243         /* Note: set drain read/write */
1244 #if 0
1245         /*
1246          * This is probably to be super secure.. Looks like we can
1247          * ignore it without any impact.
1248          */
1249         if (cap_read_drain(iommu->cap))
1250                 val |= DMA_TLB_READ_DRAIN;
1251 #endif
1252         if (cap_write_drain(iommu->cap))
1253                 val |= DMA_TLB_WRITE_DRAIN;
1254
1255         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1256         /* Note: Only uses first TLB reg currently */
1257         if (val_iva)
1258                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1259         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1260
1261         /* Make sure hardware complete it */
1262         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1263                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1264
1265         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1266
1267         /* check IOTLB invalidation granularity */
1268         if (DMA_TLB_IAIG(val) == 0)
1269                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1270         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1271                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1272                         (unsigned long long)DMA_TLB_IIRG(type),
1273                         (unsigned long long)DMA_TLB_IAIG(val));
1274 }
1275
1276 static struct device_domain_info *
1277 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1278                          u8 bus, u8 devfn)
1279 {
1280         bool found = false;
1281         unsigned long flags;
1282         struct device_domain_info *info;
1283         struct pci_dev *pdev;
1284
1285         if (!ecap_dev_iotlb_support(iommu->ecap))
1286                 return NULL;
1287
1288         if (!iommu->qi)
1289                 return NULL;
1290
1291         spin_lock_irqsave(&device_domain_lock, flags);
1292         list_for_each_entry(info, &domain->devices, link)
1293                 if (info->iommu == iommu && info->bus == bus &&
1294                     info->devfn == devfn) {
1295                         found = true;
1296                         break;
1297                 }
1298         spin_unlock_irqrestore(&device_domain_lock, flags);
1299
1300         if (!found || !info->dev || !dev_is_pci(info->dev))
1301                 return NULL;
1302
1303         pdev = to_pci_dev(info->dev);
1304
1305         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1306                 return NULL;
1307
1308         if (!dmar_find_matched_atsr_unit(pdev))
1309                 return NULL;
1310
1311         return info;
1312 }
1313
1314 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1315 {
1316         if (!info || !dev_is_pci(info->dev))
1317                 return;
1318
1319         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1320 }
1321
1322 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1323 {
1324         if (!info->dev || !dev_is_pci(info->dev) ||
1325             !pci_ats_enabled(to_pci_dev(info->dev)))
1326                 return;
1327
1328         pci_disable_ats(to_pci_dev(info->dev));
1329 }
1330
1331 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1332                                   u64 addr, unsigned mask)
1333 {
1334         u16 sid, qdep;
1335         unsigned long flags;
1336         struct device_domain_info *info;
1337
1338         spin_lock_irqsave(&device_domain_lock, flags);
1339         list_for_each_entry(info, &domain->devices, link) {
1340                 struct pci_dev *pdev;
1341                 if (!info->dev || !dev_is_pci(info->dev))
1342                         continue;
1343
1344                 pdev = to_pci_dev(info->dev);
1345                 if (!pci_ats_enabled(pdev))
1346                         continue;
1347
1348                 sid = info->bus << 8 | info->devfn;
1349                 qdep = pci_ats_queue_depth(pdev);
1350                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1351         }
1352         spin_unlock_irqrestore(&device_domain_lock, flags);
1353 }
1354
1355 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1356                                   unsigned long pfn, unsigned int pages, int ih, int map)
1357 {
1358         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1359         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1360
1361         BUG_ON(pages == 0);
1362
1363         if (ih)
1364                 ih = 1 << 6;
1365         /*
1366          * Fallback to domain selective flush if no PSI support or the size is
1367          * too big.
1368          * PSI requires page size to be 2 ^ x, and the base address is naturally
1369          * aligned to the size
1370          */
1371         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1372                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1373                                                 DMA_TLB_DSI_FLUSH);
1374         else
1375                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1376                                                 DMA_TLB_PSI_FLUSH);
1377
1378         /*
1379          * In caching mode, changes of pages from non-present to present require
1380          * flush. However, device IOTLB doesn't need to be flushed in this case.
1381          */
1382         if (!cap_caching_mode(iommu->cap) || !map)
1383                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1384 }
1385
1386 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1387 {
1388         u32 pmen;
1389         unsigned long flags;
1390
1391         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1392         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1393         pmen &= ~DMA_PMEN_EPM;
1394         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1395
1396         /* wait for the protected region status bit to clear */
1397         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1398                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1399
1400         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1401 }
1402
1403 static void iommu_enable_translation(struct intel_iommu *iommu)
1404 {
1405         u32 sts;
1406         unsigned long flags;
1407
1408         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1409         iommu->gcmd |= DMA_GCMD_TE;
1410         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1411
1412         /* Make sure hardware complete it */
1413         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1414                       readl, (sts & DMA_GSTS_TES), sts);
1415
1416         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1417 }
1418
1419 static void iommu_disable_translation(struct intel_iommu *iommu)
1420 {
1421         u32 sts;
1422         unsigned long flag;
1423
1424         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1425         iommu->gcmd &= ~DMA_GCMD_TE;
1426         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1427
1428         /* Make sure hardware complete it */
1429         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1430                       readl, (!(sts & DMA_GSTS_TES)), sts);
1431
1432         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1433 }
1434
1435
1436 static int iommu_init_domains(struct intel_iommu *iommu)
1437 {
1438         unsigned long ndomains;
1439         unsigned long nlongs;
1440
1441         ndomains = cap_ndoms(iommu->cap);
1442         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1443                  iommu->seq_id, ndomains);
1444         nlongs = BITS_TO_LONGS(ndomains);
1445
1446         spin_lock_init(&iommu->lock);
1447
1448         /* TBD: there might be 64K domains,
1449          * consider other allocation for future chip
1450          */
1451         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1452         if (!iommu->domain_ids) {
1453                 pr_err("IOMMU%d: allocating domain id array failed\n",
1454                        iommu->seq_id);
1455                 return -ENOMEM;
1456         }
1457         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1458                         GFP_KERNEL);
1459         if (!iommu->domains) {
1460                 pr_err("IOMMU%d: allocating domain array failed\n",
1461                        iommu->seq_id);
1462                 kfree(iommu->domain_ids);
1463                 iommu->domain_ids = NULL;
1464                 return -ENOMEM;
1465         }
1466
1467         /*
1468          * if Caching mode is set, then invalid translations are tagged
1469          * with domainid 0. Hence we need to pre-allocate it.
1470          */
1471         if (cap_caching_mode(iommu->cap))
1472                 set_bit(0, iommu->domain_ids);
1473         return 0;
1474 }
1475
1476 static void disable_dmar_iommu(struct intel_iommu *iommu)
1477 {
1478         struct dmar_domain *domain;
1479         int i;
1480
1481         if ((iommu->domains) && (iommu->domain_ids)) {
1482                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1483                         /*
1484                          * Domain id 0 is reserved for invalid translation
1485                          * if hardware supports caching mode.
1486                          */
1487                         if (cap_caching_mode(iommu->cap) && i == 0)
1488                                 continue;
1489
1490                         domain = iommu->domains[i];
1491                         clear_bit(i, iommu->domain_ids);
1492                         if (domain_detach_iommu(domain, iommu) == 0 &&
1493                             !domain_type_is_vm(domain))
1494                                 domain_exit(domain);
1495                 }
1496         }
1497
1498         if (iommu->gcmd & DMA_GCMD_TE)
1499                 iommu_disable_translation(iommu);
1500 }
1501
1502 static void free_dmar_iommu(struct intel_iommu *iommu)
1503 {
1504         if ((iommu->domains) && (iommu->domain_ids)) {
1505                 kfree(iommu->domains);
1506                 kfree(iommu->domain_ids);
1507                 iommu->domains = NULL;
1508                 iommu->domain_ids = NULL;
1509         }
1510
1511         g_iommus[iommu->seq_id] = NULL;
1512
1513         /* free context mapping */
1514         free_context_table(iommu);
1515 }
1516
1517 static struct dmar_domain *alloc_domain(int flags)
1518 {
1519         /* domain id for virtual machine, it won't be set in context */
1520         static atomic_t vm_domid = ATOMIC_INIT(0);
1521         struct dmar_domain *domain;
1522
1523         domain = alloc_domain_mem();
1524         if (!domain)
1525                 return NULL;
1526
1527         memset(domain, 0, sizeof(*domain));
1528         domain->nid = -1;
1529         domain->flags = flags;
1530         spin_lock_init(&domain->iommu_lock);
1531         INIT_LIST_HEAD(&domain->devices);
1532         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1533                 domain->id = atomic_inc_return(&vm_domid);
1534
1535         return domain;
1536 }
1537
1538 static int __iommu_attach_domain(struct dmar_domain *domain,
1539                                  struct intel_iommu *iommu)
1540 {
1541         int num;
1542         unsigned long ndomains;
1543
1544         ndomains = cap_ndoms(iommu->cap);
1545         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1546         if (num < ndomains) {
1547                 set_bit(num, iommu->domain_ids);
1548                 iommu->domains[num] = domain;
1549         } else {
1550                 num = -ENOSPC;
1551         }
1552
1553         return num;
1554 }
1555
1556 static int iommu_attach_domain(struct dmar_domain *domain,
1557                                struct intel_iommu *iommu)
1558 {
1559         int num;
1560         unsigned long flags;
1561
1562         spin_lock_irqsave(&iommu->lock, flags);
1563         num = __iommu_attach_domain(domain, iommu);
1564         spin_unlock_irqrestore(&iommu->lock, flags);
1565         if (num < 0)
1566                 pr_err("IOMMU: no free domain ids\n");
1567
1568         return num;
1569 }
1570
1571 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1572                                   struct intel_iommu *iommu)
1573 {
1574         int num;
1575         unsigned long ndomains;
1576
1577         ndomains = cap_ndoms(iommu->cap);
1578         for_each_set_bit(num, iommu->domain_ids, ndomains)
1579                 if (iommu->domains[num] == domain)
1580                         return num;
1581
1582         return __iommu_attach_domain(domain, iommu);
1583 }
1584
1585 static void iommu_detach_domain(struct dmar_domain *domain,
1586                                 struct intel_iommu *iommu)
1587 {
1588         unsigned long flags;
1589         int num, ndomains;
1590
1591         spin_lock_irqsave(&iommu->lock, flags);
1592         if (domain_type_is_vm_or_si(domain)) {
1593                 ndomains = cap_ndoms(iommu->cap);
1594                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1595                         if (iommu->domains[num] == domain) {
1596                                 clear_bit(num, iommu->domain_ids);
1597                                 iommu->domains[num] = NULL;
1598                                 break;
1599                         }
1600                 }
1601         } else {
1602                 clear_bit(domain->id, iommu->domain_ids);
1603                 iommu->domains[domain->id] = NULL;
1604         }
1605         spin_unlock_irqrestore(&iommu->lock, flags);
1606 }
1607
1608 static void domain_attach_iommu(struct dmar_domain *domain,
1609                                struct intel_iommu *iommu)
1610 {
1611         unsigned long flags;
1612
1613         spin_lock_irqsave(&domain->iommu_lock, flags);
1614         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1615                 domain->iommu_count++;
1616                 if (domain->iommu_count == 1)
1617                         domain->nid = iommu->node;
1618                 domain_update_iommu_cap(domain);
1619         }
1620         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1621 }
1622
1623 static int domain_detach_iommu(struct dmar_domain *domain,
1624                                struct intel_iommu *iommu)
1625 {
1626         unsigned long flags;
1627         int count = INT_MAX;
1628
1629         spin_lock_irqsave(&domain->iommu_lock, flags);
1630         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1631                 count = --domain->iommu_count;
1632                 domain_update_iommu_cap(domain);
1633         }
1634         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1635
1636         return count;
1637 }
1638
1639 static struct iova_domain reserved_iova_list;
1640 static struct lock_class_key reserved_rbtree_key;
1641
1642 static int dmar_init_reserved_ranges(void)
1643 {
1644         struct pci_dev *pdev = NULL;
1645         struct iova *iova;
1646         int i;
1647
1648         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1649                         DMA_32BIT_PFN);
1650
1651         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1652                 &reserved_rbtree_key);
1653
1654         /* IOAPIC ranges shouldn't be accessed by DMA */
1655         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1656                 IOVA_PFN(IOAPIC_RANGE_END));
1657         if (!iova) {
1658                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1659                 return -ENODEV;
1660         }
1661
1662         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1663         for_each_pci_dev(pdev) {
1664                 struct resource *r;
1665
1666                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1667                         r = &pdev->resource[i];
1668                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1669                                 continue;
1670                         iova = reserve_iova(&reserved_iova_list,
1671                                             IOVA_PFN(r->start),
1672                                             IOVA_PFN(r->end));
1673                         if (!iova) {
1674                                 printk(KERN_ERR "Reserve iova failed\n");
1675                                 return -ENODEV;
1676                         }
1677                 }
1678         }
1679         return 0;
1680 }
1681
1682 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1683 {
1684         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1685 }
1686
1687 static inline int guestwidth_to_adjustwidth(int gaw)
1688 {
1689         int agaw;
1690         int r = (gaw - 12) % 9;
1691
1692         if (r == 0)
1693                 agaw = gaw;
1694         else
1695                 agaw = gaw + 9 - r;
1696         if (agaw > 64)
1697                 agaw = 64;
1698         return agaw;
1699 }
1700
1701 static int domain_init(struct dmar_domain *domain, int guest_width)
1702 {
1703         struct intel_iommu *iommu;
1704         int adjust_width, agaw;
1705         unsigned long sagaw;
1706
1707         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1708                         DMA_32BIT_PFN);
1709         domain_reserve_special_ranges(domain);
1710
1711         /* calculate AGAW */
1712         iommu = domain_get_iommu(domain);
1713         if (guest_width > cap_mgaw(iommu->cap))
1714                 guest_width = cap_mgaw(iommu->cap);
1715         domain->gaw = guest_width;
1716         adjust_width = guestwidth_to_adjustwidth(guest_width);
1717         agaw = width_to_agaw(adjust_width);
1718         sagaw = cap_sagaw(iommu->cap);
1719         if (!test_bit(agaw, &sagaw)) {
1720                 /* hardware doesn't support it, choose a bigger one */
1721                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1722                 agaw = find_next_bit(&sagaw, 5, agaw);
1723                 if (agaw >= 5)
1724                         return -ENODEV;
1725         }
1726         domain->agaw = agaw;
1727
1728         if (ecap_coherent(iommu->ecap))
1729                 domain->iommu_coherency = 1;
1730         else
1731                 domain->iommu_coherency = 0;
1732
1733         if (ecap_sc_support(iommu->ecap))
1734                 domain->iommu_snooping = 1;
1735         else
1736                 domain->iommu_snooping = 0;
1737
1738         if (intel_iommu_superpage)
1739                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1740         else
1741                 domain->iommu_superpage = 0;
1742
1743         domain->nid = iommu->node;
1744
1745         /* always allocate the top pgd */
1746         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1747         if (!domain->pgd)
1748                 return -ENOMEM;
1749         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1750         return 0;
1751 }
1752
1753 static void domain_exit(struct dmar_domain *domain)
1754 {
1755         struct page *freelist = NULL;
1756         int i;
1757
1758         /* Domain 0 is reserved, so dont process it */
1759         if (!domain)
1760                 return;
1761
1762         /* Flush any lazy unmaps that may reference this domain */
1763         if (!intel_iommu_strict)
1764                 flush_unmaps_timeout(0);
1765
1766         /* remove associated devices */
1767         domain_remove_dev_info(domain);
1768
1769         /* destroy iovas */
1770         put_iova_domain(&domain->iovad);
1771
1772         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1773
1774         /* clear attached or cached domains */
1775         rcu_read_lock();
1776         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus)
1777                 iommu_detach_domain(domain, g_iommus[i]);
1778         rcu_read_unlock();
1779
1780         dma_free_pagelist(freelist);
1781
1782         free_domain_mem(domain);
1783 }
1784
1785 static int domain_context_mapping_one(struct dmar_domain *domain,
1786                                       struct intel_iommu *iommu,
1787                                       u8 bus, u8 devfn, int translation)
1788 {
1789         struct context_entry *context;
1790         unsigned long flags;
1791         struct dma_pte *pgd;
1792         int id;
1793         int agaw;
1794         struct device_domain_info *info = NULL;
1795
1796         pr_debug("Set context mapping for %02x:%02x.%d\n",
1797                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1798
1799         BUG_ON(!domain->pgd);
1800         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1801                translation != CONTEXT_TT_MULTI_LEVEL);
1802
1803         context = device_to_context_entry(iommu, bus, devfn);
1804         if (!context)
1805                 return -ENOMEM;
1806         spin_lock_irqsave(&iommu->lock, flags);
1807         if (context_present(context)) {
1808                 spin_unlock_irqrestore(&iommu->lock, flags);
1809                 return 0;
1810         }
1811
1812         id = domain->id;
1813         pgd = domain->pgd;
1814
1815         if (domain_type_is_vm_or_si(domain)) {
1816                 if (domain_type_is_vm(domain)) {
1817                         id = iommu_attach_vm_domain(domain, iommu);
1818                         if (id < 0) {
1819                                 spin_unlock_irqrestore(&iommu->lock, flags);
1820                                 pr_err("IOMMU: no free domain ids\n");
1821                                 return -EFAULT;
1822                         }
1823                 }
1824
1825                 /* Skip top levels of page tables for
1826                  * iommu which has less agaw than default.
1827                  * Unnecessary for PT mode.
1828                  */
1829                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1830                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1831                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1832                                 if (!dma_pte_present(pgd)) {
1833                                         spin_unlock_irqrestore(&iommu->lock, flags);
1834                                         return -ENOMEM;
1835                                 }
1836                         }
1837                 }
1838         }
1839
1840         context_set_domain_id(context, id);
1841
1842         if (translation != CONTEXT_TT_PASS_THROUGH) {
1843                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1844                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1845                                      CONTEXT_TT_MULTI_LEVEL;
1846         }
1847         /*
1848          * In pass through mode, AW must be programmed to indicate the largest
1849          * AGAW value supported by hardware. And ASR is ignored by hardware.
1850          */
1851         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1852                 context_set_address_width(context, iommu->msagaw);
1853         else {
1854                 context_set_address_root(context, virt_to_phys(pgd));
1855                 context_set_address_width(context, iommu->agaw);
1856         }
1857
1858         context_set_translation_type(context, translation);
1859         context_set_fault_enable(context);
1860         context_set_present(context);
1861         domain_flush_cache(domain, context, sizeof(*context));
1862
1863         /*
1864          * It's a non-present to present mapping. If hardware doesn't cache
1865          * non-present entry we only need to flush the write-buffer. If the
1866          * _does_ cache non-present entries, then it does so in the special
1867          * domain #0, which we have to flush:
1868          */
1869         if (cap_caching_mode(iommu->cap)) {
1870                 iommu->flush.flush_context(iommu, 0,
1871                                            (((u16)bus) << 8) | devfn,
1872                                            DMA_CCMD_MASK_NOBIT,
1873                                            DMA_CCMD_DEVICE_INVL);
1874                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1875         } else {
1876                 iommu_flush_write_buffer(iommu);
1877         }
1878         iommu_enable_dev_iotlb(info);
1879         spin_unlock_irqrestore(&iommu->lock, flags);
1880
1881         domain_attach_iommu(domain, iommu);
1882
1883         return 0;
1884 }
1885
1886 struct domain_context_mapping_data {
1887         struct dmar_domain *domain;
1888         struct intel_iommu *iommu;
1889         int translation;
1890 };
1891
1892 static int domain_context_mapping_cb(struct pci_dev *pdev,
1893                                      u16 alias, void *opaque)
1894 {
1895         struct domain_context_mapping_data *data = opaque;
1896
1897         return domain_context_mapping_one(data->domain, data->iommu,
1898                                           PCI_BUS_NUM(alias), alias & 0xff,
1899                                           data->translation);
1900 }
1901
1902 static int
1903 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1904                        int translation)
1905 {
1906         struct intel_iommu *iommu;
1907         u8 bus, devfn;
1908         struct domain_context_mapping_data data;
1909
1910         iommu = device_to_iommu(dev, &bus, &devfn);
1911         if (!iommu)
1912                 return -ENODEV;
1913
1914         if (!dev_is_pci(dev))
1915                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1916                                                   translation);
1917
1918         data.domain = domain;
1919         data.iommu = iommu;
1920         data.translation = translation;
1921
1922         return pci_for_each_dma_alias(to_pci_dev(dev),
1923                                       &domain_context_mapping_cb, &data);
1924 }
1925
1926 static int domain_context_mapped_cb(struct pci_dev *pdev,
1927                                     u16 alias, void *opaque)
1928 {
1929         struct intel_iommu *iommu = opaque;
1930
1931         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1932 }
1933
1934 static int domain_context_mapped(struct device *dev)
1935 {
1936         struct intel_iommu *iommu;
1937         u8 bus, devfn;
1938
1939         iommu = device_to_iommu(dev, &bus, &devfn);
1940         if (!iommu)
1941                 return -ENODEV;
1942
1943         if (!dev_is_pci(dev))
1944                 return device_context_mapped(iommu, bus, devfn);
1945
1946         return !pci_for_each_dma_alias(to_pci_dev(dev),
1947                                        domain_context_mapped_cb, iommu);
1948 }
1949
1950 /* Returns a number of VTD pages, but aligned to MM page size */
1951 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1952                                             size_t size)
1953 {
1954         host_addr &= ~PAGE_MASK;
1955         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1956 }
1957
1958 /* Return largest possible superpage level for a given mapping */
1959 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1960                                           unsigned long iov_pfn,
1961                                           unsigned long phy_pfn,
1962                                           unsigned long pages)
1963 {
1964         int support, level = 1;
1965         unsigned long pfnmerge;
1966
1967         support = domain->iommu_superpage;
1968
1969         /* To use a large page, the virtual *and* physical addresses
1970            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1971            of them will mean we have to use smaller pages. So just
1972            merge them and check both at once. */
1973         pfnmerge = iov_pfn | phy_pfn;
1974
1975         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1976                 pages >>= VTD_STRIDE_SHIFT;
1977                 if (!pages)
1978                         break;
1979                 pfnmerge >>= VTD_STRIDE_SHIFT;
1980                 level++;
1981                 support--;
1982         }
1983         return level;
1984 }
1985
1986 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1987                             struct scatterlist *sg, unsigned long phys_pfn,
1988                             unsigned long nr_pages, int prot)
1989 {
1990         struct dma_pte *first_pte = NULL, *pte = NULL;
1991         phys_addr_t uninitialized_var(pteval);
1992         unsigned long sg_res = 0;
1993         unsigned int largepage_lvl = 0;
1994         unsigned long lvl_pages = 0;
1995
1996         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
1997
1998         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1999                 return -EINVAL;
2000
2001         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2002
2003         if (!sg) {
2004                 sg_res = nr_pages;
2005                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2006         }
2007
2008         while (nr_pages > 0) {
2009                 uint64_t tmp;
2010
2011                 if (!sg_res) {
2012                         sg_res = aligned_nrpages(sg->offset, sg->length);
2013                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2014                         sg->dma_length = sg->length;
2015                         pteval = page_to_phys(sg_page(sg)) | prot;
2016                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2017                 }
2018
2019                 if (!pte) {
2020                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2021
2022                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2023                         if (!pte)
2024                                 return -ENOMEM;
2025                         /* It is large page*/
2026                         if (largepage_lvl > 1) {
2027                                 pteval |= DMA_PTE_LARGE_PAGE;
2028                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2029                                 /*
2030                                  * Ensure that old small page tables are
2031                                  * removed to make room for superpage,
2032                                  * if they exist.
2033                                  */
2034                                 dma_pte_free_pagetable(domain, iov_pfn,
2035                                                        iov_pfn + lvl_pages - 1);
2036                         } else {
2037                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2038                         }
2039
2040                 }
2041                 /* We don't need lock here, nobody else
2042                  * touches the iova range
2043                  */
2044                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2045                 if (tmp) {
2046                         static int dumps = 5;
2047                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2048                                iov_pfn, tmp, (unsigned long long)pteval);
2049                         if (dumps) {
2050                                 dumps--;
2051                                 debug_dma_dump_mappings(NULL);
2052                         }
2053                         WARN_ON(1);
2054                 }
2055
2056                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2057
2058                 BUG_ON(nr_pages < lvl_pages);
2059                 BUG_ON(sg_res < lvl_pages);
2060
2061                 nr_pages -= lvl_pages;
2062                 iov_pfn += lvl_pages;
2063                 phys_pfn += lvl_pages;
2064                 pteval += lvl_pages * VTD_PAGE_SIZE;
2065                 sg_res -= lvl_pages;
2066
2067                 /* If the next PTE would be the first in a new page, then we
2068                    need to flush the cache on the entries we've just written.
2069                    And then we'll need to recalculate 'pte', so clear it and
2070                    let it get set again in the if (!pte) block above.
2071
2072                    If we're done (!nr_pages) we need to flush the cache too.
2073
2074                    Also if we've been setting superpages, we may need to
2075                    recalculate 'pte' and switch back to smaller pages for the
2076                    end of the mapping, if the trailing size is not enough to
2077                    use another superpage (i.e. sg_res < lvl_pages). */
2078                 pte++;
2079                 if (!nr_pages || first_pte_in_page(pte) ||
2080                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2081                         domain_flush_cache(domain, first_pte,
2082                                            (void *)pte - (void *)first_pte);
2083                         pte = NULL;
2084                 }
2085
2086                 if (!sg_res && nr_pages)
2087                         sg = sg_next(sg);
2088         }
2089         return 0;
2090 }
2091
2092 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2093                                     struct scatterlist *sg, unsigned long nr_pages,
2094                                     int prot)
2095 {
2096         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2097 }
2098
2099 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2100                                      unsigned long phys_pfn, unsigned long nr_pages,
2101                                      int prot)
2102 {
2103         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2104 }
2105
2106 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2107 {
2108         if (!iommu)
2109                 return;
2110
2111         clear_context_table(iommu, bus, devfn);
2112         iommu->flush.flush_context(iommu, 0, 0, 0,
2113                                            DMA_CCMD_GLOBAL_INVL);
2114         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2115 }
2116
2117 static inline void unlink_domain_info(struct device_domain_info *info)
2118 {
2119         assert_spin_locked(&device_domain_lock);
2120         list_del(&info->link);
2121         list_del(&info->global);
2122         if (info->dev)
2123                 info->dev->archdata.iommu = NULL;
2124 }
2125
2126 static void domain_remove_dev_info(struct dmar_domain *domain)
2127 {
2128         struct device_domain_info *info, *tmp;
2129         unsigned long flags;
2130
2131         spin_lock_irqsave(&device_domain_lock, flags);
2132         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2133                 unlink_domain_info(info);
2134                 spin_unlock_irqrestore(&device_domain_lock, flags);
2135
2136                 iommu_disable_dev_iotlb(info);
2137                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2138
2139                 if (domain_type_is_vm(domain)) {
2140                         iommu_detach_dependent_devices(info->iommu, info->dev);
2141                         domain_detach_iommu(domain, info->iommu);
2142                 }
2143
2144                 free_devinfo_mem(info);
2145                 spin_lock_irqsave(&device_domain_lock, flags);
2146         }
2147         spin_unlock_irqrestore(&device_domain_lock, flags);
2148 }
2149
2150 /*
2151  * find_domain
2152  * Note: we use struct device->archdata.iommu stores the info
2153  */
2154 static struct dmar_domain *find_domain(struct device *dev)
2155 {
2156         struct device_domain_info *info;
2157
2158         /* No lock here, assumes no domain exit in normal case */
2159         info = dev->archdata.iommu;
2160         if (info)
2161                 return info->domain;
2162         return NULL;
2163 }
2164
2165 static inline struct device_domain_info *
2166 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2167 {
2168         struct device_domain_info *info;
2169
2170         list_for_each_entry(info, &device_domain_list, global)
2171                 if (info->iommu->segment == segment && info->bus == bus &&
2172                     info->devfn == devfn)
2173                         return info;
2174
2175         return NULL;
2176 }
2177
2178 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2179                                                 int bus, int devfn,
2180                                                 struct device *dev,
2181                                                 struct dmar_domain *domain)
2182 {
2183         struct dmar_domain *found = NULL;
2184         struct device_domain_info *info;
2185         unsigned long flags;
2186
2187         info = alloc_devinfo_mem();
2188         if (!info)
2189                 return NULL;
2190
2191         info->bus = bus;
2192         info->devfn = devfn;
2193         info->dev = dev;
2194         info->domain = domain;
2195         info->iommu = iommu;
2196
2197         spin_lock_irqsave(&device_domain_lock, flags);
2198         if (dev)
2199                 found = find_domain(dev);
2200         else {
2201                 struct device_domain_info *info2;
2202                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2203                 if (info2)
2204                         found = info2->domain;
2205         }
2206         if (found) {
2207                 spin_unlock_irqrestore(&device_domain_lock, flags);
2208                 free_devinfo_mem(info);
2209                 /* Caller must free the original domain */
2210                 return found;
2211         }
2212
2213         list_add(&info->link, &domain->devices);
2214         list_add(&info->global, &device_domain_list);
2215         if (dev)
2216                 dev->archdata.iommu = info;
2217         spin_unlock_irqrestore(&device_domain_lock, flags);
2218
2219         return domain;
2220 }
2221
2222 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2223 {
2224         *(u16 *)opaque = alias;
2225         return 0;
2226 }
2227
2228 /* domain is initialized */
2229 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2230 {
2231         struct dmar_domain *domain, *tmp;
2232         struct intel_iommu *iommu;
2233         struct device_domain_info *info;
2234         u16 dma_alias;
2235         unsigned long flags;
2236         u8 bus, devfn;
2237
2238         domain = find_domain(dev);
2239         if (domain)
2240                 return domain;
2241
2242         iommu = device_to_iommu(dev, &bus, &devfn);
2243         if (!iommu)
2244                 return NULL;
2245
2246         if (dev_is_pci(dev)) {
2247                 struct pci_dev *pdev = to_pci_dev(dev);
2248
2249                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2250
2251                 spin_lock_irqsave(&device_domain_lock, flags);
2252                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2253                                                       PCI_BUS_NUM(dma_alias),
2254                                                       dma_alias & 0xff);
2255                 if (info) {
2256                         iommu = info->iommu;
2257                         domain = info->domain;
2258                 }
2259                 spin_unlock_irqrestore(&device_domain_lock, flags);
2260
2261                 /* DMA alias already has a domain, uses it */
2262                 if (info)
2263                         goto found_domain;
2264         }
2265
2266         /* Allocate and initialize new domain for the device */
2267         domain = alloc_domain(0);
2268         if (!domain)
2269                 return NULL;
2270         domain->id = iommu_attach_domain(domain, iommu);
2271         if (domain->id < 0) {
2272                 free_domain_mem(domain);
2273                 return NULL;
2274         }
2275         domain_attach_iommu(domain, iommu);
2276         if (domain_init(domain, gaw)) {
2277                 domain_exit(domain);
2278                 return NULL;
2279         }
2280
2281         /* register PCI DMA alias device */
2282         if (dev_is_pci(dev)) {
2283                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2284                                            dma_alias & 0xff, NULL, domain);
2285
2286                 if (!tmp || tmp != domain) {
2287                         domain_exit(domain);
2288                         domain = tmp;
2289                 }
2290
2291                 if (!domain)
2292                         return NULL;
2293         }
2294
2295 found_domain:
2296         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2297
2298         if (!tmp || tmp != domain) {
2299                 domain_exit(domain);
2300                 domain = tmp;
2301         }
2302
2303         return domain;
2304 }
2305
2306 static int iommu_identity_mapping;
2307 #define IDENTMAP_ALL            1
2308 #define IDENTMAP_GFX            2
2309 #define IDENTMAP_AZALIA         4
2310
2311 static int iommu_domain_identity_map(struct dmar_domain *domain,
2312                                      unsigned long long start,
2313                                      unsigned long long end)
2314 {
2315         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2316         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2317
2318         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2319                           dma_to_mm_pfn(last_vpfn))) {
2320                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2321                 return -ENOMEM;
2322         }
2323
2324         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2325                  start, end, domain->id);
2326         /*
2327          * RMRR range might have overlap with physical memory range,
2328          * clear it first
2329          */
2330         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2331
2332         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2333                                   last_vpfn - first_vpfn + 1,
2334                                   DMA_PTE_READ|DMA_PTE_WRITE);
2335 }
2336
2337 static int iommu_prepare_identity_map(struct device *dev,
2338                                       unsigned long long start,
2339                                       unsigned long long end)
2340 {
2341         struct dmar_domain *domain;
2342         int ret;
2343
2344         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2345         if (!domain)
2346                 return -ENOMEM;
2347
2348         /* For _hardware_ passthrough, don't bother. But for software
2349            passthrough, we do it anyway -- it may indicate a memory
2350            range which is reserved in E820, so which didn't get set
2351            up to start with in si_domain */
2352         if (domain == si_domain && hw_pass_through) {
2353                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2354                        dev_name(dev), start, end);
2355                 return 0;
2356         }
2357
2358         printk(KERN_INFO
2359                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2360                dev_name(dev), start, end);
2361         
2362         if (end < start) {
2363                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2364                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2365                         dmi_get_system_info(DMI_BIOS_VENDOR),
2366                         dmi_get_system_info(DMI_BIOS_VERSION),
2367                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2368                 ret = -EIO;
2369                 goto error;
2370         }
2371
2372         if (end >> agaw_to_width(domain->agaw)) {
2373                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2374                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2375                      agaw_to_width(domain->agaw),
2376                      dmi_get_system_info(DMI_BIOS_VENDOR),
2377                      dmi_get_system_info(DMI_BIOS_VERSION),
2378                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2379                 ret = -EIO;
2380                 goto error;
2381         }
2382
2383         ret = iommu_domain_identity_map(domain, start, end);
2384         if (ret)
2385                 goto error;
2386
2387         /* context entry init */
2388         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2389         if (ret)
2390                 goto error;
2391
2392         return 0;
2393
2394  error:
2395         domain_exit(domain);
2396         return ret;
2397 }
2398
2399 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2400                                          struct device *dev)
2401 {
2402         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2403                 return 0;
2404         return iommu_prepare_identity_map(dev, rmrr->base_address,
2405                                           rmrr->end_address);
2406 }
2407
2408 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2409 static inline void iommu_prepare_isa(void)
2410 {
2411         struct pci_dev *pdev;
2412         int ret;
2413
2414         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2415         if (!pdev)
2416                 return;
2417
2418         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2419         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2420
2421         if (ret)
2422                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2423                        "floppy might not work\n");
2424
2425         pci_dev_put(pdev);
2426 }
2427 #else
2428 static inline void iommu_prepare_isa(void)
2429 {
2430         return;
2431 }
2432 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2433
2434 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2435
2436 static int __init si_domain_init(int hw)
2437 {
2438         struct dmar_drhd_unit *drhd;
2439         struct intel_iommu *iommu;
2440         int nid, ret = 0;
2441         bool first = true;
2442
2443         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2444         if (!si_domain)
2445                 return -EFAULT;
2446
2447         for_each_active_iommu(iommu, drhd) {
2448                 ret = iommu_attach_domain(si_domain, iommu);
2449                 if (ret < 0) {
2450                         domain_exit(si_domain);
2451                         return -EFAULT;
2452                 } else if (first) {
2453                         si_domain->id = ret;
2454                         first = false;
2455                 } else if (si_domain->id != ret) {
2456                         domain_exit(si_domain);
2457                         return -EFAULT;
2458                 }
2459                 domain_attach_iommu(si_domain, iommu);
2460         }
2461
2462         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2463                 domain_exit(si_domain);
2464                 return -EFAULT;
2465         }
2466
2467         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2468                  si_domain->id);
2469
2470         if (hw)
2471                 return 0;
2472
2473         for_each_online_node(nid) {
2474                 unsigned long start_pfn, end_pfn;
2475                 int i;
2476
2477                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2478                         ret = iommu_domain_identity_map(si_domain,
2479                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2480                         if (ret)
2481                                 return ret;
2482                 }
2483         }
2484
2485         return 0;
2486 }
2487
2488 static int identity_mapping(struct device *dev)
2489 {
2490         struct device_domain_info *info;
2491
2492         if (likely(!iommu_identity_mapping))
2493                 return 0;
2494
2495         info = dev->archdata.iommu;
2496         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2497                 return (info->domain == si_domain);
2498
2499         return 0;
2500 }
2501
2502 static int domain_add_dev_info(struct dmar_domain *domain,
2503                                struct device *dev, int translation)
2504 {
2505         struct dmar_domain *ndomain;
2506         struct intel_iommu *iommu;
2507         u8 bus, devfn;
2508         int ret;
2509
2510         iommu = device_to_iommu(dev, &bus, &devfn);
2511         if (!iommu)
2512                 return -ENODEV;
2513
2514         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2515         if (ndomain != domain)
2516                 return -EBUSY;
2517
2518         ret = domain_context_mapping(domain, dev, translation);
2519         if (ret) {
2520                 domain_remove_one_dev_info(domain, dev);
2521                 return ret;
2522         }
2523
2524         return 0;
2525 }
2526
2527 static bool device_has_rmrr(struct device *dev)
2528 {
2529         struct dmar_rmrr_unit *rmrr;
2530         struct device *tmp;
2531         int i;
2532
2533         rcu_read_lock();
2534         for_each_rmrr_units(rmrr) {
2535                 /*
2536                  * Return TRUE if this RMRR contains the device that
2537                  * is passed in.
2538                  */
2539                 for_each_active_dev_scope(rmrr->devices,
2540                                           rmrr->devices_cnt, i, tmp)
2541                         if (tmp == dev) {
2542                                 rcu_read_unlock();
2543                                 return true;
2544                         }
2545         }
2546         rcu_read_unlock();
2547         return false;
2548 }
2549
2550 /*
2551  * There are a couple cases where we need to restrict the functionality of
2552  * devices associated with RMRRs.  The first is when evaluating a device for
2553  * identity mapping because problems exist when devices are moved in and out
2554  * of domains and their respective RMRR information is lost.  This means that
2555  * a device with associated RMRRs will never be in a "passthrough" domain.
2556  * The second is use of the device through the IOMMU API.  This interface
2557  * expects to have full control of the IOVA space for the device.  We cannot
2558  * satisfy both the requirement that RMRR access is maintained and have an
2559  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2560  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2561  * We therefore prevent devices associated with an RMRR from participating in
2562  * the IOMMU API, which eliminates them from device assignment.
2563  *
2564  * In both cases we assume that PCI USB devices with RMRRs have them largely
2565  * for historical reasons and that the RMRR space is not actively used post
2566  * boot.  This exclusion may change if vendors begin to abuse it.
2567  */
2568 static bool device_is_rmrr_locked(struct device *dev)
2569 {
2570         if (!device_has_rmrr(dev))
2571                 return false;
2572
2573         if (dev_is_pci(dev)) {
2574                 struct pci_dev *pdev = to_pci_dev(dev);
2575
2576                 if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
2577                         return false;
2578         }
2579
2580         return true;
2581 }
2582
2583 static int iommu_should_identity_map(struct device *dev, int startup)
2584 {
2585
2586         if (dev_is_pci(dev)) {
2587                 struct pci_dev *pdev = to_pci_dev(dev);
2588
2589                 if (device_is_rmrr_locked(dev))
2590                         return 0;
2591
2592                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2593                         return 1;
2594
2595                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2596                         return 1;
2597
2598                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2599                         return 0;
2600
2601                 /*
2602                  * We want to start off with all devices in the 1:1 domain, and
2603                  * take them out later if we find they can't access all of memory.
2604                  *
2605                  * However, we can't do this for PCI devices behind bridges,
2606                  * because all PCI devices behind the same bridge will end up
2607                  * with the same source-id on their transactions.
2608                  *
2609                  * Practically speaking, we can't change things around for these
2610                  * devices at run-time, because we can't be sure there'll be no
2611                  * DMA transactions in flight for any of their siblings.
2612                  *
2613                  * So PCI devices (unless they're on the root bus) as well as
2614                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2615                  * the 1:1 domain, just in _case_ one of their siblings turns out
2616                  * not to be able to map all of memory.
2617                  */
2618                 if (!pci_is_pcie(pdev)) {
2619                         if (!pci_is_root_bus(pdev->bus))
2620                                 return 0;
2621                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2622                                 return 0;
2623                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2624                         return 0;
2625         } else {
2626                 if (device_has_rmrr(dev))
2627                         return 0;
2628         }
2629
2630         /*
2631          * At boot time, we don't yet know if devices will be 64-bit capable.
2632          * Assume that they will — if they turn out not to be, then we can
2633          * take them out of the 1:1 domain later.
2634          */
2635         if (!startup) {
2636                 /*
2637                  * If the device's dma_mask is less than the system's memory
2638                  * size then this is not a candidate for identity mapping.
2639                  */
2640                 u64 dma_mask = *dev->dma_mask;
2641
2642                 if (dev->coherent_dma_mask &&
2643                     dev->coherent_dma_mask < dma_mask)
2644                         dma_mask = dev->coherent_dma_mask;
2645
2646                 return dma_mask >= dma_get_required_mask(dev);
2647         }
2648
2649         return 1;
2650 }
2651
2652 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2653 {
2654         int ret;
2655
2656         if (!iommu_should_identity_map(dev, 1))
2657                 return 0;
2658
2659         ret = domain_add_dev_info(si_domain, dev,
2660                                   hw ? CONTEXT_TT_PASS_THROUGH :
2661                                        CONTEXT_TT_MULTI_LEVEL);
2662         if (!ret)
2663                 pr_info("IOMMU: %s identity mapping for device %s\n",
2664                         hw ? "hardware" : "software", dev_name(dev));
2665         else if (ret == -ENODEV)
2666                 /* device not associated with an iommu */
2667                 ret = 0;
2668
2669         return ret;
2670 }
2671
2672
2673 static int __init iommu_prepare_static_identity_mapping(int hw)
2674 {
2675         struct pci_dev *pdev = NULL;
2676         struct dmar_drhd_unit *drhd;
2677         struct intel_iommu *iommu;
2678         struct device *dev;
2679         int i;
2680         int ret = 0;
2681
2682         ret = si_domain_init(hw);
2683         if (ret)
2684                 return -EFAULT;
2685
2686         for_each_pci_dev(pdev) {
2687                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2688                 if (ret)
2689                         return ret;
2690         }
2691
2692         for_each_active_iommu(iommu, drhd)
2693                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2694                         struct acpi_device_physical_node *pn;
2695                         struct acpi_device *adev;
2696
2697                         if (dev->bus != &acpi_bus_type)
2698                                 continue;
2699                                 
2700                         adev= to_acpi_device(dev);
2701                         mutex_lock(&adev->physical_node_lock);
2702                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2703                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2704                                 if (ret)
2705                                         break;
2706                         }
2707                         mutex_unlock(&adev->physical_node_lock);
2708                         if (ret)
2709                                 return ret;
2710                 }
2711
2712         return 0;
2713 }
2714
2715 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2716 {
2717         /*
2718          * Start from the sane iommu hardware state.
2719          * If the queued invalidation is already initialized by us
2720          * (for example, while enabling interrupt-remapping) then
2721          * we got the things already rolling from a sane state.
2722          */
2723         if (!iommu->qi) {
2724                 /*
2725                  * Clear any previous faults.
2726                  */
2727                 dmar_fault(-1, iommu);
2728                 /*
2729                  * Disable queued invalidation if supported and already enabled
2730                  * before OS handover.
2731                  */
2732                 dmar_disable_qi(iommu);
2733         }
2734
2735         if (dmar_enable_qi(iommu)) {
2736                 /*
2737                  * Queued Invalidate not enabled, use Register Based Invalidate
2738                  */
2739                 iommu->flush.flush_context = __iommu_flush_context;
2740                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2741                 pr_info("IOMMU: %s using Register based invalidation\n",
2742                         iommu->name);
2743         } else {
2744                 iommu->flush.flush_context = qi_flush_context;
2745                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2746                 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2747         }
2748 }
2749
2750 static int __init init_dmars(void)
2751 {
2752         struct dmar_drhd_unit *drhd;
2753         struct dmar_rmrr_unit *rmrr;
2754         struct device *dev;
2755         struct intel_iommu *iommu;
2756         int i, ret;
2757
2758         /*
2759          * for each drhd
2760          *    allocate root
2761          *    initialize and program root entry to not present
2762          * endfor
2763          */
2764         for_each_drhd_unit(drhd) {
2765                 /*
2766                  * lock not needed as this is only incremented in the single
2767                  * threaded kernel __init code path all other access are read
2768                  * only
2769                  */
2770                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2771                         g_num_of_iommus++;
2772                         continue;
2773                 }
2774                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2775                           DMAR_UNITS_SUPPORTED);
2776         }
2777
2778         /* Preallocate enough resources for IOMMU hot-addition */
2779         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2780                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2781
2782         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2783                         GFP_KERNEL);
2784         if (!g_iommus) {
2785                 printk(KERN_ERR "Allocating global iommu array failed\n");
2786                 ret = -ENOMEM;
2787                 goto error;
2788         }
2789
2790         deferred_flush = kzalloc(g_num_of_iommus *
2791                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2792         if (!deferred_flush) {
2793                 ret = -ENOMEM;
2794                 goto free_g_iommus;
2795         }
2796
2797         for_each_active_iommu(iommu, drhd) {
2798                 g_iommus[iommu->seq_id] = iommu;
2799
2800                 ret = iommu_init_domains(iommu);
2801                 if (ret)
2802                         goto free_iommu;
2803
2804                 /*
2805                  * TBD:
2806                  * we could share the same root & context tables
2807                  * among all IOMMU's. Need to Split it later.
2808                  */
2809                 ret = iommu_alloc_root_entry(iommu);
2810                 if (ret)
2811                         goto free_iommu;
2812                 if (!ecap_pass_through(iommu->ecap))
2813                         hw_pass_through = 0;
2814         }
2815
2816         for_each_active_iommu(iommu, drhd)
2817                 intel_iommu_init_qi(iommu);
2818
2819         if (iommu_pass_through)
2820                 iommu_identity_mapping |= IDENTMAP_ALL;
2821
2822 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2823         iommu_identity_mapping |= IDENTMAP_GFX;
2824 #endif
2825
2826         check_tylersburg_isoch();
2827
2828         /*
2829          * If pass through is not set or not enabled, setup context entries for
2830          * identity mappings for rmrr, gfx, and isa and may fall back to static
2831          * identity mapping if iommu_identity_mapping is set.
2832          */
2833         if (iommu_identity_mapping) {
2834                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2835                 if (ret) {
2836                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2837                         goto free_iommu;
2838                 }
2839         }
2840         /*
2841          * For each rmrr
2842          *   for each dev attached to rmrr
2843          *   do
2844          *     locate drhd for dev, alloc domain for dev
2845          *     allocate free domain
2846          *     allocate page table entries for rmrr
2847          *     if context not allocated for bus
2848          *           allocate and init context
2849          *           set present in root table for this bus
2850          *     init context with domain, translation etc
2851          *    endfor
2852          * endfor
2853          */
2854         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2855         for_each_rmrr_units(rmrr) {
2856                 /* some BIOS lists non-exist devices in DMAR table. */
2857                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2858                                           i, dev) {
2859                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2860                         if (ret)
2861                                 printk(KERN_ERR
2862                                        "IOMMU: mapping reserved region failed\n");
2863                 }
2864         }
2865
2866         iommu_prepare_isa();
2867
2868         /*
2869          * for each drhd
2870          *   enable fault log
2871          *   global invalidate context cache
2872          *   global invalidate iotlb
2873          *   enable translation
2874          */
2875         for_each_iommu(iommu, drhd) {
2876                 if (drhd->ignored) {
2877                         /*
2878                          * we always have to disable PMRs or DMA may fail on
2879                          * this device
2880                          */
2881                         if (force_on)
2882                                 iommu_disable_protect_mem_regions(iommu);
2883                         continue;
2884                 }
2885
2886                 iommu_flush_write_buffer(iommu);
2887
2888                 ret = dmar_set_interrupt(iommu);
2889                 if (ret)
2890                         goto free_iommu;
2891
2892                 iommu_set_root_entry(iommu);
2893
2894                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2895                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2896                 iommu_enable_translation(iommu);
2897                 iommu_disable_protect_mem_regions(iommu);
2898         }
2899
2900         return 0;
2901
2902 free_iommu:
2903         for_each_active_iommu(iommu, drhd) {
2904                 disable_dmar_iommu(iommu);
2905                 free_dmar_iommu(iommu);
2906         }
2907         kfree(deferred_flush);
2908 free_g_iommus:
2909         kfree(g_iommus);
2910 error:
2911         return ret;
2912 }
2913
2914 /* This takes a number of _MM_ pages, not VTD pages */
2915 static struct iova *intel_alloc_iova(struct device *dev,
2916                                      struct dmar_domain *domain,
2917                                      unsigned long nrpages, uint64_t dma_mask)
2918 {
2919         struct iova *iova = NULL;
2920
2921         /* Restrict dma_mask to the width that the iommu can handle */
2922         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2923
2924         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2925                 /*
2926                  * First try to allocate an io virtual address in
2927                  * DMA_BIT_MASK(32) and if that fails then try allocating
2928                  * from higher range
2929                  */
2930                 iova = alloc_iova(&domain->iovad, nrpages,
2931                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2932                 if (iova)
2933                         return iova;
2934         }
2935         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2936         if (unlikely(!iova)) {
2937                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2938                        nrpages, dev_name(dev));
2939                 return NULL;
2940         }
2941
2942         return iova;
2943 }
2944
2945 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2946 {
2947         struct dmar_domain *domain;
2948         int ret;
2949
2950         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2951         if (!domain) {
2952                 printk(KERN_ERR "Allocating domain for %s failed",
2953                        dev_name(dev));
2954                 return NULL;
2955         }
2956
2957         /* make sure context mapping is ok */
2958         if (unlikely(!domain_context_mapped(dev))) {
2959                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2960                 if (ret) {
2961                         printk(KERN_ERR "Domain context map for %s failed",
2962                                dev_name(dev));
2963                         return NULL;
2964                 }
2965         }
2966
2967         return domain;
2968 }
2969
2970 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2971 {
2972         struct device_domain_info *info;
2973
2974         /* No lock here, assumes no domain exit in normal case */
2975         info = dev->archdata.iommu;
2976         if (likely(info))
2977                 return info->domain;
2978
2979         return __get_valid_domain_for_dev(dev);
2980 }
2981
2982 static int iommu_dummy(struct device *dev)
2983 {
2984         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2985 }
2986
2987 /* Check if the dev needs to go through non-identity map and unmap process.*/
2988 static int iommu_no_mapping(struct device *dev)
2989 {
2990         int found;
2991
2992         if (iommu_dummy(dev))
2993                 return 1;
2994
2995         if (!iommu_identity_mapping)
2996                 return 0;
2997
2998         found = identity_mapping(dev);
2999         if (found) {
3000                 if (iommu_should_identity_map(dev, 0))
3001                         return 1;
3002                 else {
3003                         /*
3004                          * 32 bit DMA is removed from si_domain and fall back
3005                          * to non-identity mapping.
3006                          */
3007                         domain_remove_one_dev_info(si_domain, dev);
3008                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
3009                                dev_name(dev));
3010                         return 0;
3011                 }
3012         } else {
3013                 /*
3014                  * In case of a detached 64 bit DMA device from vm, the device
3015                  * is put into si_domain for identity mapping.
3016                  */
3017                 if (iommu_should_identity_map(dev, 0)) {
3018                         int ret;
3019                         ret = domain_add_dev_info(si_domain, dev,
3020                                                   hw_pass_through ?
3021                                                   CONTEXT_TT_PASS_THROUGH :
3022                                                   CONTEXT_TT_MULTI_LEVEL);
3023                         if (!ret) {
3024                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
3025                                        dev_name(dev));
3026                                 return 1;
3027                         }
3028                 }
3029         }
3030
3031         return 0;
3032 }
3033
3034 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3035                                      size_t size, int dir, u64 dma_mask)
3036 {
3037         struct dmar_domain *domain;
3038         phys_addr_t start_paddr;
3039         struct iova *iova;
3040         int prot = 0;
3041         int ret;
3042         struct intel_iommu *iommu;
3043         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3044
3045         BUG_ON(dir == DMA_NONE);
3046
3047         if (iommu_no_mapping(dev))
3048                 return paddr;
3049
3050         domain = get_valid_domain_for_dev(dev);
3051         if (!domain)
3052                 return 0;
3053
3054         iommu = domain_get_iommu(domain);
3055         size = aligned_nrpages(paddr, size);
3056
3057         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3058         if (!iova)
3059                 goto error;
3060
3061         /*
3062          * Check if DMAR supports zero-length reads on write only
3063          * mappings..
3064          */
3065         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3066                         !cap_zlr(iommu->cap))
3067                 prot |= DMA_PTE_READ;
3068         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3069                 prot |= DMA_PTE_WRITE;
3070         /*
3071          * paddr - (paddr + size) might be partial page, we should map the whole
3072          * page.  Note: if two part of one page are separately mapped, we
3073          * might have two guest_addr mapping to the same host paddr, but this
3074          * is not a big problem
3075          */
3076         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3077                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3078         if (ret)
3079                 goto error;
3080
3081         /* it's a non-present to present mapping. Only flush if caching mode */
3082         if (cap_caching_mode(iommu->cap))
3083                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3084         else
3085                 iommu_flush_write_buffer(iommu);
3086
3087         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3088         start_paddr += paddr & ~PAGE_MASK;
3089         return start_paddr;
3090
3091 error:
3092         if (iova)
3093                 __free_iova(&domain->iovad, iova);
3094         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3095                 dev_name(dev), size, (unsigned long long)paddr, dir);
3096         return 0;
3097 }
3098
3099 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3100                                  unsigned long offset, size_t size,
3101                                  enum dma_data_direction dir,
3102                                  struct dma_attrs *attrs)
3103 {
3104         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3105                                   dir, *dev->dma_mask);
3106 }
3107
3108 static void flush_unmaps(void)
3109 {
3110         int i, j;
3111
3112         timer_on = 0;
3113
3114         /* just flush them all */
3115         for (i = 0; i < g_num_of_iommus; i++) {
3116                 struct intel_iommu *iommu = g_iommus[i];
3117                 if (!iommu)
3118                         continue;
3119
3120                 if (!deferred_flush[i].next)
3121                         continue;
3122
3123                 /* In caching mode, global flushes turn emulation expensive */
3124                 if (!cap_caching_mode(iommu->cap))
3125                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3126                                          DMA_TLB_GLOBAL_FLUSH);
3127                 for (j = 0; j < deferred_flush[i].next; j++) {
3128                         unsigned long mask;
3129                         struct iova *iova = deferred_flush[i].iova[j];
3130                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3131
3132                         /* On real hardware multiple invalidations are expensive */
3133                         if (cap_caching_mode(iommu->cap))
3134                                 iommu_flush_iotlb_psi(iommu, domain->id,
3135                                         iova->pfn_lo, iova_size(iova),
3136                                         !deferred_flush[i].freelist[j], 0);
3137                         else {
3138                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3139                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3140                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3141                         }
3142                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3143                         if (deferred_flush[i].freelist[j])
3144                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3145                 }
3146                 deferred_flush[i].next = 0;
3147         }
3148
3149         list_size = 0;
3150 }
3151
3152 static void flush_unmaps_timeout(unsigned long data)
3153 {
3154         unsigned long flags;
3155
3156         spin_lock_irqsave(&async_umap_flush_lock, flags);
3157         flush_unmaps();
3158         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3159 }
3160
3161 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3162 {
3163         unsigned long flags;
3164         int next, iommu_id;
3165         struct intel_iommu *iommu;
3166
3167         spin_lock_irqsave(&async_umap_flush_lock, flags);
3168         if (list_size == HIGH_WATER_MARK)
3169                 flush_unmaps();
3170
3171         iommu = domain_get_iommu(dom);
3172         iommu_id = iommu->seq_id;
3173
3174         next = deferred_flush[iommu_id].next;
3175         deferred_flush[iommu_id].domain[next] = dom;
3176         deferred_flush[iommu_id].iova[next] = iova;
3177         deferred_flush[iommu_id].freelist[next] = freelist;
3178         deferred_flush[iommu_id].next++;
3179
3180         if (!timer_on) {
3181                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3182                 timer_on = 1;
3183         }
3184         list_size++;
3185         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3186 }
3187
3188 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3189 {
3190         struct dmar_domain *domain;
3191         unsigned long start_pfn, last_pfn;
3192         struct iova *iova;
3193         struct intel_iommu *iommu;
3194         struct page *freelist;
3195
3196         if (iommu_no_mapping(dev))
3197                 return;
3198
3199         domain = find_domain(dev);
3200         BUG_ON(!domain);
3201
3202         iommu = domain_get_iommu(domain);
3203
3204         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3205         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3206                       (unsigned long long)dev_addr))
3207                 return;
3208
3209         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3210         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3211
3212         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3213                  dev_name(dev), start_pfn, last_pfn);
3214
3215         freelist = domain_unmap(domain, start_pfn, last_pfn);
3216
3217         if (intel_iommu_strict) {
3218                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3219                                       last_pfn - start_pfn + 1, !freelist, 0);
3220                 /* free iova */
3221                 __free_iova(&domain->iovad, iova);
3222                 dma_free_pagelist(freelist);
3223         } else {
3224                 add_unmap(domain, iova, freelist);
3225                 /*
3226                  * queue up the release of the unmap to save the 1/6th of the
3227                  * cpu used up by the iotlb flush operation...
3228                  */
3229         }
3230 }
3231
3232 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3233                              size_t size, enum dma_data_direction dir,
3234                              struct dma_attrs *attrs)
3235 {
3236         intel_unmap(dev, dev_addr);
3237 }
3238
3239 static void *intel_alloc_coherent(struct device *dev, size_t size,
3240                                   dma_addr_t *dma_handle, gfp_t flags,
3241                                   struct dma_attrs *attrs)
3242 {
3243         struct page *page = NULL;
3244         int order;
3245
3246         size = PAGE_ALIGN(size);
3247         order = get_order(size);
3248
3249         if (!iommu_no_mapping(dev))
3250                 flags &= ~(GFP_DMA | GFP_DMA32);
3251         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3252                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3253                         flags |= GFP_DMA;
3254                 else
3255                         flags |= GFP_DMA32;
3256         }
3257
3258         if (flags & __GFP_WAIT) {
3259                 unsigned int count = size >> PAGE_SHIFT;
3260
3261                 page = dma_alloc_from_contiguous(dev, count, order);
3262                 if (page && iommu_no_mapping(dev) &&
3263                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3264                         dma_release_from_contiguous(dev, page, count);
3265                         page = NULL;
3266                 }
3267         }
3268
3269         if (!page)
3270                 page = alloc_pages(flags, order);
3271         if (!page)
3272                 return NULL;
3273         memset(page_address(page), 0, size);
3274
3275         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3276                                          DMA_BIDIRECTIONAL,
3277                                          dev->coherent_dma_mask);
3278         if (*dma_handle)
3279                 return page_address(page);
3280         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3281                 __free_pages(page, order);
3282
3283         return NULL;
3284 }
3285
3286 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3287                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3288 {
3289         int order;
3290         struct page *page = virt_to_page(vaddr);
3291
3292         size = PAGE_ALIGN(size);
3293         order = get_order(size);
3294
3295         intel_unmap(dev, dma_handle);
3296         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3297                 __free_pages(page, order);
3298 }
3299
3300 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3301                            int nelems, enum dma_data_direction dir,
3302                            struct dma_attrs *attrs)
3303 {
3304         intel_unmap(dev, sglist[0].dma_address);
3305 }
3306
3307 static int intel_nontranslate_map_sg(struct device *hddev,
3308         struct scatterlist *sglist, int nelems, int dir)
3309 {
3310         int i;
3311         struct scatterlist *sg;
3312
3313         for_each_sg(sglist, sg, nelems, i) {
3314                 BUG_ON(!sg_page(sg));
3315                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3316                 sg->dma_length = sg->length;
3317         }
3318         return nelems;
3319 }
3320
3321 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3322                         enum dma_data_direction dir, struct dma_attrs *attrs)
3323 {
3324         int i;
3325         struct dmar_domain *domain;
3326         size_t size = 0;
3327         int prot = 0;
3328         struct iova *iova = NULL;
3329         int ret;
3330         struct scatterlist *sg;
3331         unsigned long start_vpfn;
3332         struct intel_iommu *iommu;
3333
3334         BUG_ON(dir == DMA_NONE);
3335         if (iommu_no_mapping(dev))
3336                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3337
3338         domain = get_valid_domain_for_dev(dev);
3339         if (!domain)
3340                 return 0;
3341
3342         iommu = domain_get_iommu(domain);
3343
3344         for_each_sg(sglist, sg, nelems, i)
3345                 size += aligned_nrpages(sg->offset, sg->length);
3346
3347         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3348                                 *dev->dma_mask);
3349         if (!iova) {
3350                 sglist->dma_length = 0;
3351                 return 0;
3352         }
3353
3354         /*
3355          * Check if DMAR supports zero-length reads on write only
3356          * mappings..
3357          */
3358         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3359                         !cap_zlr(iommu->cap))
3360                 prot |= DMA_PTE_READ;
3361         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3362                 prot |= DMA_PTE_WRITE;
3363
3364         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3365
3366         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3367         if (unlikely(ret)) {
3368                 dma_pte_free_pagetable(domain, start_vpfn,
3369                                        start_vpfn + size - 1);
3370                 __free_iova(&domain->iovad, iova);
3371                 return 0;
3372         }
3373
3374         /* it's a non-present to present mapping. Only flush if caching mode */
3375         if (cap_caching_mode(iommu->cap))
3376                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3377         else
3378                 iommu_flush_write_buffer(iommu);
3379
3380         return nelems;
3381 }
3382
3383 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3384 {
3385         return !dma_addr;
3386 }
3387
3388 struct dma_map_ops intel_dma_ops = {
3389         .alloc = intel_alloc_coherent,
3390         .free = intel_free_coherent,
3391         .map_sg = intel_map_sg,
3392         .unmap_sg = intel_unmap_sg,
3393         .map_page = intel_map_page,
3394         .unmap_page = intel_unmap_page,
3395         .mapping_error = intel_mapping_error,
3396 };
3397
3398 static inline int iommu_domain_cache_init(void)
3399 {
3400         int ret = 0;
3401
3402         iommu_domain_cache = kmem_cache_create("iommu_domain",
3403                                          sizeof(struct dmar_domain),
3404                                          0,
3405                                          SLAB_HWCACHE_ALIGN,
3406
3407                                          NULL);
3408         if (!iommu_domain_cache) {
3409                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3410                 ret = -ENOMEM;
3411         }
3412
3413         return ret;
3414 }
3415
3416 static inline int iommu_devinfo_cache_init(void)
3417 {
3418         int ret = 0;
3419
3420         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3421                                          sizeof(struct device_domain_info),
3422                                          0,
3423                                          SLAB_HWCACHE_ALIGN,
3424                                          NULL);
3425         if (!iommu_devinfo_cache) {
3426                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3427                 ret = -ENOMEM;
3428         }
3429
3430         return ret;
3431 }
3432
3433 static int __init iommu_init_mempool(void)
3434 {
3435         int ret;
3436         ret = iommu_iova_cache_init();
3437         if (ret)
3438                 return ret;
3439
3440         ret = iommu_domain_cache_init();
3441         if (ret)
3442                 goto domain_error;
3443
3444         ret = iommu_devinfo_cache_init();
3445         if (!ret)
3446                 return ret;
3447
3448         kmem_cache_destroy(iommu_domain_cache);
3449 domain_error:
3450         iommu_iova_cache_destroy();
3451
3452         return -ENOMEM;
3453 }
3454
3455 static void __init iommu_exit_mempool(void)
3456 {
3457         kmem_cache_destroy(iommu_devinfo_cache);
3458         kmem_cache_destroy(iommu_domain_cache);
3459         iommu_iova_cache_destroy();
3460 }
3461
3462 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3463 {
3464         struct dmar_drhd_unit *drhd;
3465         u32 vtbar;
3466         int rc;
3467
3468         /* We know that this device on this chipset has its own IOMMU.
3469          * If we find it under a different IOMMU, then the BIOS is lying
3470          * to us. Hope that the IOMMU for this device is actually
3471          * disabled, and it needs no translation...
3472          */
3473         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3474         if (rc) {
3475                 /* "can't" happen */
3476                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3477                 return;
3478         }
3479         vtbar &= 0xffff0000;
3480
3481         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3482         drhd = dmar_find_matched_drhd_unit(pdev);
3483         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3484                             TAINT_FIRMWARE_WORKAROUND,
3485                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3486                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3487 }
3488 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3489
3490 static void __init init_no_remapping_devices(void)
3491 {
3492         struct dmar_drhd_unit *drhd;
3493         struct device *dev;
3494         int i;
3495
3496         for_each_drhd_unit(drhd) {
3497                 if (!drhd->include_all) {
3498                         for_each_active_dev_scope(drhd->devices,
3499                                                   drhd->devices_cnt, i, dev)
3500                                 break;
3501                         /* ignore DMAR unit if no devices exist */
3502                         if (i == drhd->devices_cnt)
3503                                 drhd->ignored = 1;
3504                 }
3505         }
3506
3507         for_each_active_drhd_unit(drhd) {
3508                 if (drhd->include_all)
3509                         continue;
3510
3511                 for_each_active_dev_scope(drhd->devices,
3512                                           drhd->devices_cnt, i, dev)
3513                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3514                                 break;
3515                 if (i < drhd->devices_cnt)
3516                         continue;
3517
3518                 /* This IOMMU has *only* gfx devices. Either bypass it or
3519                    set the gfx_mapped flag, as appropriate */
3520                 if (dmar_map_gfx) {
3521                         intel_iommu_gfx_mapped = 1;
3522                 } else {
3523                         drhd->ignored = 1;
3524                         for_each_active_dev_scope(drhd->devices,
3525                                                   drhd->devices_cnt, i, dev)
3526                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3527                 }
3528         }
3529 }
3530
3531 #ifdef CONFIG_SUSPEND
3532 static int init_iommu_hw(void)
3533 {
3534         struct dmar_drhd_unit *drhd;
3535         struct intel_iommu *iommu = NULL;
3536
3537         for_each_active_iommu(iommu, drhd)
3538                 if (iommu->qi)
3539                         dmar_reenable_qi(iommu);
3540
3541         for_each_iommu(iommu, drhd) {
3542                 if (drhd->ignored) {
3543                         /*
3544                          * we always have to disable PMRs or DMA may fail on
3545                          * this device
3546                          */
3547                         if (force_on)
3548                                 iommu_disable_protect_mem_regions(iommu);
3549                         continue;
3550                 }
3551         
3552                 iommu_flush_write_buffer(iommu);
3553
3554                 iommu_set_root_entry(iommu);
3555
3556                 iommu->flush.flush_context(iommu, 0, 0, 0,
3557                                            DMA_CCMD_GLOBAL_INVL);
3558                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3559                 iommu_enable_translation(iommu);
3560                 iommu_disable_protect_mem_regions(iommu);
3561         }
3562
3563         return 0;
3564 }
3565
3566 static void iommu_flush_all(void)
3567 {
3568         struct dmar_drhd_unit *drhd;
3569         struct intel_iommu *iommu;
3570
3571         for_each_active_iommu(iommu, drhd) {
3572                 iommu->flush.flush_context(iommu, 0, 0, 0,
3573                                            DMA_CCMD_GLOBAL_INVL);
3574                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3575                                          DMA_TLB_GLOBAL_FLUSH);
3576         }
3577 }
3578
3579 static int iommu_suspend(void)
3580 {
3581         struct dmar_drhd_unit *drhd;
3582         struct intel_iommu *iommu = NULL;
3583         unsigned long flag;
3584
3585         for_each_active_iommu(iommu, drhd) {
3586                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3587                                                  GFP_ATOMIC);
3588                 if (!iommu->iommu_state)
3589                         goto nomem;
3590         }
3591
3592         iommu_flush_all();
3593
3594         for_each_active_iommu(iommu, drhd) {
3595                 iommu_disable_translation(iommu);
3596
3597                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3598
3599                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3600                         readl(iommu->reg + DMAR_FECTL_REG);
3601                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3602                         readl(iommu->reg + DMAR_FEDATA_REG);
3603                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3604                         readl(iommu->reg + DMAR_FEADDR_REG);
3605                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3606                         readl(iommu->reg + DMAR_FEUADDR_REG);
3607
3608                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3609         }
3610         return 0;
3611
3612 nomem:
3613         for_each_active_iommu(iommu, drhd)
3614                 kfree(iommu->iommu_state);
3615
3616         return -ENOMEM;
3617 }
3618
3619 static void iommu_resume(void)
3620 {
3621         struct dmar_drhd_unit *drhd;
3622         struct intel_iommu *iommu = NULL;
3623         unsigned long flag;
3624
3625         if (init_iommu_hw()) {
3626                 if (force_on)
3627                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3628                 else
3629                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3630                 return;
3631         }
3632
3633         for_each_active_iommu(iommu, drhd) {
3634
3635                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3636
3637                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3638                         iommu->reg + DMAR_FECTL_REG);
3639                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3640                         iommu->reg + DMAR_FEDATA_REG);
3641                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3642                         iommu->reg + DMAR_FEADDR_REG);
3643                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3644                         iommu->reg + DMAR_FEUADDR_REG);
3645
3646                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3647         }
3648
3649         for_each_active_iommu(iommu, drhd)
3650                 kfree(iommu->iommu_state);
3651 }
3652
3653 static struct syscore_ops iommu_syscore_ops = {
3654         .resume         = iommu_resume,
3655         .suspend        = iommu_suspend,
3656 };
3657
3658 static void __init init_iommu_pm_ops(void)
3659 {
3660         register_syscore_ops(&iommu_syscore_ops);
3661 }
3662
3663 #else
3664 static inline void init_iommu_pm_ops(void) {}
3665 #endif  /* CONFIG_PM */
3666
3667
3668 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3669 {
3670         struct acpi_dmar_reserved_memory *rmrr;
3671         struct dmar_rmrr_unit *rmrru;
3672
3673         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3674         if (!rmrru)
3675                 return -ENOMEM;
3676
3677         rmrru->hdr = header;
3678         rmrr = (struct acpi_dmar_reserved_memory *)header;
3679         rmrru->base_address = rmrr->base_address;
3680         rmrru->end_address = rmrr->end_address;
3681         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3682                                 ((void *)rmrr) + rmrr->header.length,
3683                                 &rmrru->devices_cnt);
3684         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3685                 kfree(rmrru);
3686                 return -ENOMEM;
3687         }
3688
3689         list_add(&rmrru->list, &dmar_rmrr_units);
3690
3691         return 0;
3692 }
3693
3694 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3695 {
3696         struct dmar_atsr_unit *atsru;
3697         struct acpi_dmar_atsr *tmp;
3698
3699         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3700                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3701                 if (atsr->segment != tmp->segment)
3702                         continue;
3703                 if (atsr->header.length != tmp->header.length)
3704                         continue;
3705                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3706                         return atsru;
3707         }
3708
3709         return NULL;
3710 }
3711
3712 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3713 {
3714         struct acpi_dmar_atsr *atsr;
3715         struct dmar_atsr_unit *atsru;
3716
3717         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3718                 return 0;
3719
3720         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3721         atsru = dmar_find_atsr(atsr);
3722         if (atsru)
3723                 return 0;
3724
3725         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3726         if (!atsru)
3727                 return -ENOMEM;
3728
3729         /*
3730          * If memory is allocated from slab by ACPI _DSM method, we need to
3731          * copy the memory content because the memory buffer will be freed
3732          * on return.
3733          */
3734         atsru->hdr = (void *)(atsru + 1);
3735         memcpy(atsru->hdr, hdr, hdr->length);
3736         atsru->include_all = atsr->flags & 0x1;
3737         if (!atsru->include_all) {
3738                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3739                                 (void *)atsr + atsr->header.length,
3740                                 &atsru->devices_cnt);
3741                 if (atsru->devices_cnt && atsru->devices == NULL) {
3742                         kfree(atsru);
3743                         return -ENOMEM;
3744                 }
3745         }
3746
3747         list_add_rcu(&atsru->list, &dmar_atsr_units);
3748
3749         return 0;
3750 }
3751
3752 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3753 {
3754         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3755         kfree(atsru);
3756 }
3757
3758 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3759 {
3760         struct acpi_dmar_atsr *atsr;
3761         struct dmar_atsr_unit *atsru;
3762
3763         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3764         atsru = dmar_find_atsr(atsr);
3765         if (atsru) {
3766                 list_del_rcu(&atsru->list);
3767                 synchronize_rcu();
3768                 intel_iommu_free_atsr(atsru);
3769         }
3770
3771         return 0;
3772 }
3773
3774 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3775 {
3776         int i;
3777         struct device *dev;
3778         struct acpi_dmar_atsr *atsr;
3779         struct dmar_atsr_unit *atsru;
3780
3781         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3782         atsru = dmar_find_atsr(atsr);
3783         if (!atsru)
3784                 return 0;
3785
3786         if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3787                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3788                                           i, dev)
3789                         return -EBUSY;
3790
3791         return 0;
3792 }
3793
3794 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3795 {
3796         int sp, ret = 0;
3797         struct intel_iommu *iommu = dmaru->iommu;
3798
3799         if (g_iommus[iommu->seq_id])
3800                 return 0;
3801
3802         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3803                 pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3804                         iommu->name);
3805                 return -ENXIO;
3806         }
3807         if (!ecap_sc_support(iommu->ecap) &&
3808             domain_update_iommu_snooping(iommu)) {
3809                 pr_warn("IOMMU: %s doesn't support snooping.\n",
3810                         iommu->name);
3811                 return -ENXIO;
3812         }
3813         sp = domain_update_iommu_superpage(iommu) - 1;
3814         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3815                 pr_warn("IOMMU: %s doesn't support large page.\n",
3816                         iommu->name);
3817                 return -ENXIO;
3818         }
3819
3820         /*
3821          * Disable translation if already enabled prior to OS handover.
3822          */
3823         if (iommu->gcmd & DMA_GCMD_TE)
3824                 iommu_disable_translation(iommu);
3825
3826         g_iommus[iommu->seq_id] = iommu;
3827         ret = iommu_init_domains(iommu);
3828         if (ret == 0)
3829                 ret = iommu_alloc_root_entry(iommu);
3830         if (ret)
3831                 goto out;
3832
3833         if (dmaru->ignored) {
3834                 /*
3835                  * we always have to disable PMRs or DMA may fail on this device
3836                  */
3837                 if (force_on)
3838                         iommu_disable_protect_mem_regions(iommu);
3839                 return 0;
3840         }
3841
3842         intel_iommu_init_qi(iommu);
3843         iommu_flush_write_buffer(iommu);
3844         ret = dmar_set_interrupt(iommu);
3845         if (ret)
3846                 goto disable_iommu;
3847
3848         iommu_set_root_entry(iommu);
3849         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3850         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3851         iommu_enable_translation(iommu);
3852
3853         if (si_domain) {
3854                 ret = iommu_attach_domain(si_domain, iommu);
3855                 if (ret < 0 || si_domain->id != ret)
3856                         goto disable_iommu;
3857                 domain_attach_iommu(si_domain, iommu);
3858         }
3859
3860         iommu_disable_protect_mem_regions(iommu);
3861         return 0;
3862
3863 disable_iommu:
3864         disable_dmar_iommu(iommu);
3865 out:
3866         free_dmar_iommu(iommu);
3867         return ret;
3868 }
3869
3870 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3871 {
3872         int ret = 0;
3873         struct intel_iommu *iommu = dmaru->iommu;
3874
3875         if (!intel_iommu_enabled)
3876                 return 0;
3877         if (iommu == NULL)
3878                 return -EINVAL;
3879
3880         if (insert) {
3881                 ret = intel_iommu_add(dmaru);
3882         } else {
3883                 disable_dmar_iommu(iommu);
3884                 free_dmar_iommu(iommu);
3885         }
3886
3887         return ret;
3888 }
3889
3890 static void intel_iommu_free_dmars(void)
3891 {
3892         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3893         struct dmar_atsr_unit *atsru, *atsr_n;
3894
3895         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3896                 list_del(&rmrru->list);
3897                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3898                 kfree(rmrru);
3899         }
3900
3901         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3902                 list_del(&atsru->list);
3903                 intel_iommu_free_atsr(atsru);
3904         }
3905 }
3906
3907 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3908 {
3909         int i, ret = 1;
3910         struct pci_bus *bus;
3911         struct pci_dev *bridge = NULL;
3912         struct device *tmp;
3913         struct acpi_dmar_atsr *atsr;
3914         struct dmar_atsr_unit *atsru;
3915
3916         dev = pci_physfn(dev);
3917         for (bus = dev->bus; bus; bus = bus->parent) {
3918                 bridge = bus->self;
3919                 if (!bridge || !pci_is_pcie(bridge) ||
3920                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3921                         return 0;
3922                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3923                         break;
3924         }
3925         if (!bridge)
3926                 return 0;
3927
3928         rcu_read_lock();
3929         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3930                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3931                 if (atsr->segment != pci_domain_nr(dev->bus))
3932                         continue;
3933
3934                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3935                         if (tmp == &bridge->dev)
3936                                 goto out;
3937
3938                 if (atsru->include_all)
3939                         goto out;
3940         }
3941         ret = 0;
3942 out:
3943         rcu_read_unlock();
3944
3945         return ret;
3946 }
3947
3948 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3949 {
3950         int ret = 0;
3951         struct dmar_rmrr_unit *rmrru;
3952         struct dmar_atsr_unit *atsru;
3953         struct acpi_dmar_atsr *atsr;
3954         struct acpi_dmar_reserved_memory *rmrr;
3955
3956         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3957                 return 0;
3958
3959         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3960                 rmrr = container_of(rmrru->hdr,
3961                                     struct acpi_dmar_reserved_memory, header);
3962                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3963                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3964                                 ((void *)rmrr) + rmrr->header.length,
3965                                 rmrr->segment, rmrru->devices,
3966                                 rmrru->devices_cnt);
3967                         if(ret < 0)
3968                                 return ret;
3969                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3970                         dmar_remove_dev_scope(info, rmrr->segment,
3971                                 rmrru->devices, rmrru->devices_cnt);
3972                 }
3973         }
3974
3975         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3976                 if (atsru->include_all)
3977                         continue;
3978
3979                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3980                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3981                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3982                                         (void *)atsr + atsr->header.length,
3983                                         atsr->segment, atsru->devices,
3984                                         atsru->devices_cnt);
3985                         if (ret > 0)
3986                                 break;
3987                         else if(ret < 0)
3988                                 return ret;
3989                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3990                         if (dmar_remove_dev_scope(info, atsr->segment,
3991                                         atsru->devices, atsru->devices_cnt))
3992                                 break;
3993                 }
3994         }
3995
3996         return 0;
3997 }
3998
3999 /*
4000  * Here we only respond to action of unbound device from driver.
4001  *
4002  * Added device is not attached to its DMAR domain here yet. That will happen
4003  * when mapping the device to iova.
4004  */
4005 static int device_notifier(struct notifier_block *nb,
4006                                   unsigned long action, void *data)
4007 {
4008         struct device *dev = data;
4009         struct dmar_domain *domain;
4010
4011         if (iommu_dummy(dev))
4012                 return 0;
4013
4014         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4015                 return 0;
4016
4017         domain = find_domain(dev);
4018         if (!domain)
4019                 return 0;
4020
4021         down_read(&dmar_global_lock);
4022         domain_remove_one_dev_info(domain, dev);
4023         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4024                 domain_exit(domain);
4025         up_read(&dmar_global_lock);
4026
4027         return 0;
4028 }
4029
4030 static struct notifier_block device_nb = {
4031         .notifier_call = device_notifier,
4032 };
4033
4034 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4035                                        unsigned long val, void *v)
4036 {
4037         struct memory_notify *mhp = v;
4038         unsigned long long start, end;
4039         unsigned long start_vpfn, last_vpfn;
4040
4041         switch (val) {
4042         case MEM_GOING_ONLINE:
4043                 start = mhp->start_pfn << PAGE_SHIFT;
4044                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4045                 if (iommu_domain_identity_map(si_domain, start, end)) {
4046                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4047                                 start, end);
4048                         return NOTIFY_BAD;
4049                 }
4050                 break;
4051
4052         case MEM_OFFLINE:
4053         case MEM_CANCEL_ONLINE:
4054                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4055                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4056                 while (start_vpfn <= last_vpfn) {
4057                         struct iova *iova;
4058                         struct dmar_drhd_unit *drhd;
4059                         struct intel_iommu *iommu;
4060                         struct page *freelist;
4061
4062                         iova = find_iova(&si_domain->iovad, start_vpfn);
4063                         if (iova == NULL) {
4064                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
4065                                          start_vpfn);
4066                                 break;
4067                         }
4068
4069                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4070                                                      start_vpfn, last_vpfn);
4071                         if (iova == NULL) {
4072                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4073                                         start_vpfn, last_vpfn);
4074                                 return NOTIFY_BAD;
4075                         }
4076
4077                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4078                                                iova->pfn_hi);
4079
4080                         rcu_read_lock();
4081                         for_each_active_iommu(iommu, drhd)
4082                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
4083                                         iova->pfn_lo, iova_size(iova),
4084                                         !freelist, 0);
4085                         rcu_read_unlock();
4086                         dma_free_pagelist(freelist);
4087
4088                         start_vpfn = iova->pfn_hi + 1;
4089                         free_iova_mem(iova);
4090                 }
4091                 break;
4092         }
4093
4094         return NOTIFY_OK;
4095 }
4096
4097 static struct notifier_block intel_iommu_memory_nb = {
4098         .notifier_call = intel_iommu_memory_notifier,
4099         .priority = 0
4100 };
4101
4102
4103 static ssize_t intel_iommu_show_version(struct device *dev,
4104                                         struct device_attribute *attr,
4105                                         char *buf)
4106 {
4107         struct intel_iommu *iommu = dev_get_drvdata(dev);
4108         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4109         return sprintf(buf, "%d:%d\n",
4110                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4111 }
4112 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4113
4114 static ssize_t intel_iommu_show_address(struct device *dev,
4115                                         struct device_attribute *attr,
4116                                         char *buf)
4117 {
4118         struct intel_iommu *iommu = dev_get_drvdata(dev);
4119         return sprintf(buf, "%llx\n", iommu->reg_phys);
4120 }
4121 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4122
4123 static ssize_t intel_iommu_show_cap(struct device *dev,
4124                                     struct device_attribute *attr,
4125                                     char *buf)
4126 {
4127         struct intel_iommu *iommu = dev_get_drvdata(dev);
4128         return sprintf(buf, "%llx\n", iommu->cap);
4129 }
4130 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4131
4132 static ssize_t intel_iommu_show_ecap(struct device *dev,
4133                                     struct device_attribute *attr,
4134                                     char *buf)
4135 {
4136         struct intel_iommu *iommu = dev_get_drvdata(dev);
4137         return sprintf(buf, "%llx\n", iommu->ecap);
4138 }
4139 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4140
4141 static struct attribute *intel_iommu_attrs[] = {
4142         &dev_attr_version.attr,
4143         &dev_attr_address.attr,
4144         &dev_attr_cap.attr,
4145         &dev_attr_ecap.attr,
4146         NULL,
4147 };
4148
4149 static struct attribute_group intel_iommu_group = {
4150         .name = "intel-iommu",
4151         .attrs = intel_iommu_attrs,
4152 };
4153
4154 const struct attribute_group *intel_iommu_groups[] = {
4155         &intel_iommu_group,
4156         NULL,
4157 };
4158
4159 int __init intel_iommu_init(void)
4160 {
4161         int ret = -ENODEV;
4162         struct dmar_drhd_unit *drhd;
4163         struct intel_iommu *iommu;
4164
4165         /* VT-d is required for a TXT/tboot launch, so enforce that */
4166         force_on = tboot_force_iommu();
4167
4168         if (iommu_init_mempool()) {
4169                 if (force_on)
4170                         panic("tboot: Failed to initialize iommu memory\n");
4171                 return -ENOMEM;
4172         }
4173
4174         down_write(&dmar_global_lock);
4175         if (dmar_table_init()) {
4176                 if (force_on)
4177                         panic("tboot: Failed to initialize DMAR table\n");
4178                 goto out_free_dmar;
4179         }
4180
4181         /*
4182          * Disable translation if already enabled prior to OS handover.
4183          */
4184         for_each_active_iommu(iommu, drhd)
4185                 if (iommu->gcmd & DMA_GCMD_TE)
4186                         iommu_disable_translation(iommu);
4187
4188         if (dmar_dev_scope_init() < 0) {
4189                 if (force_on)
4190                         panic("tboot: Failed to initialize DMAR device scope\n");
4191                 goto out_free_dmar;
4192         }
4193
4194         if (no_iommu || dmar_disabled)
4195                 goto out_free_dmar;
4196
4197         if (list_empty(&dmar_rmrr_units))
4198                 printk(KERN_INFO "DMAR: No RMRR found\n");
4199
4200         if (list_empty(&dmar_atsr_units))
4201                 printk(KERN_INFO "DMAR: No ATSR found\n");
4202
4203         if (dmar_init_reserved_ranges()) {
4204                 if (force_on)
4205                         panic("tboot: Failed to reserve iommu ranges\n");
4206                 goto out_free_reserved_range;
4207         }
4208
4209         init_no_remapping_devices();
4210
4211         ret = init_dmars();
4212         if (ret) {
4213                 if (force_on)
4214                         panic("tboot: Failed to initialize DMARs\n");
4215                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4216                 goto out_free_reserved_range;
4217         }
4218         up_write(&dmar_global_lock);
4219         printk(KERN_INFO
4220         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4221
4222         init_timer(&unmap_timer);
4223 #ifdef CONFIG_SWIOTLB
4224         swiotlb = 0;
4225 #endif
4226         dma_ops = &intel_dma_ops;
4227
4228         init_iommu_pm_ops();
4229
4230         for_each_active_iommu(iommu, drhd)
4231                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4232                                                        intel_iommu_groups,
4233                                                        iommu->name);
4234
4235         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4236         bus_register_notifier(&pci_bus_type, &device_nb);
4237         if (si_domain && !hw_pass_through)
4238                 register_memory_notifier(&intel_iommu_memory_nb);
4239
4240         intel_iommu_enabled = 1;
4241
4242         return 0;
4243
4244 out_free_reserved_range:
4245         put_iova_domain(&reserved_iova_list);
4246 out_free_dmar:
4247         intel_iommu_free_dmars();
4248         up_write(&dmar_global_lock);
4249         iommu_exit_mempool();
4250         return ret;
4251 }
4252
4253 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4254 {
4255         struct intel_iommu *iommu = opaque;
4256
4257         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4258         return 0;
4259 }
4260
4261 /*
4262  * NB - intel-iommu lacks any sort of reference counting for the users of
4263  * dependent devices.  If multiple endpoints have intersecting dependent
4264  * devices, unbinding the driver from any one of them will possibly leave
4265  * the others unable to operate.
4266  */
4267 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4268                                            struct device *dev)
4269 {
4270         if (!iommu || !dev || !dev_is_pci(dev))
4271                 return;
4272
4273         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4274 }
4275
4276 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4277                                        struct device *dev)
4278 {
4279         struct device_domain_info *info, *tmp;
4280         struct intel_iommu *iommu;
4281         unsigned long flags;
4282         bool found = false;
4283         u8 bus, devfn;
4284
4285         iommu = device_to_iommu(dev, &bus, &devfn);
4286         if (!iommu)
4287                 return;
4288
4289         spin_lock_irqsave(&device_domain_lock, flags);
4290         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4291                 if (info->iommu == iommu && info->bus == bus &&
4292                     info->devfn == devfn) {
4293                         unlink_domain_info(info);
4294                         spin_unlock_irqrestore(&device_domain_lock, flags);
4295
4296                         iommu_disable_dev_iotlb(info);
4297                         iommu_detach_dev(iommu, info->bus, info->devfn);
4298                         iommu_detach_dependent_devices(iommu, dev);
4299                         free_devinfo_mem(info);
4300
4301                         spin_lock_irqsave(&device_domain_lock, flags);
4302
4303                         if (found)
4304                                 break;
4305                         else
4306                                 continue;
4307                 }
4308
4309                 /* if there is no other devices under the same iommu
4310                  * owned by this domain, clear this iommu in iommu_bmp
4311                  * update iommu count and coherency
4312                  */
4313                 if (info->iommu == iommu)
4314                         found = true;
4315         }
4316
4317         spin_unlock_irqrestore(&device_domain_lock, flags);
4318
4319         if (found == 0) {
4320                 domain_detach_iommu(domain, iommu);
4321                 if (!domain_type_is_vm_or_si(domain))
4322                         iommu_detach_domain(domain, iommu);
4323         }
4324 }
4325
4326 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4327 {
4328         int adjust_width;
4329
4330         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4331                         DMA_32BIT_PFN);
4332         domain_reserve_special_ranges(domain);
4333
4334         /* calculate AGAW */
4335         domain->gaw = guest_width;
4336         adjust_width = guestwidth_to_adjustwidth(guest_width);
4337         domain->agaw = width_to_agaw(adjust_width);
4338
4339         domain->iommu_coherency = 0;
4340         domain->iommu_snooping = 0;
4341         domain->iommu_superpage = 0;
4342         domain->max_addr = 0;
4343
4344         /* always allocate the top pgd */
4345         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4346         if (!domain->pgd)
4347                 return -ENOMEM;
4348         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4349         return 0;
4350 }
4351
4352 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4353 {
4354         struct dmar_domain *dmar_domain;
4355         struct iommu_domain *domain;
4356
4357         if (type != IOMMU_DOMAIN_UNMANAGED)
4358                 return NULL;
4359
4360         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4361         if (!dmar_domain) {
4362                 printk(KERN_ERR
4363                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4364                 return NULL;
4365         }
4366         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4367                 printk(KERN_ERR
4368                         "intel_iommu_domain_init() failed\n");
4369                 domain_exit(dmar_domain);
4370                 return NULL;
4371         }
4372         domain_update_iommu_cap(dmar_domain);
4373
4374         domain = &dmar_domain->domain;
4375         domain->geometry.aperture_start = 0;
4376         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4377         domain->geometry.force_aperture = true;
4378
4379         return domain;
4380 }
4381
4382 static void intel_iommu_domain_free(struct iommu_domain *domain)
4383 {
4384         domain_exit(to_dmar_domain(domain));
4385 }
4386
4387 static int intel_iommu_attach_device(struct iommu_domain *domain,
4388                                      struct device *dev)
4389 {
4390         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4391         struct intel_iommu *iommu;
4392         int addr_width;
4393         u8 bus, devfn;
4394
4395         if (device_is_rmrr_locked(dev)) {
4396                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4397                 return -EPERM;
4398         }
4399
4400         /* normally dev is not mapped */
4401         if (unlikely(domain_context_mapped(dev))) {
4402                 struct dmar_domain *old_domain;
4403
4404                 old_domain = find_domain(dev);
4405                 if (old_domain) {
4406                         if (domain_type_is_vm_or_si(dmar_domain))
4407                                 domain_remove_one_dev_info(old_domain, dev);
4408                         else
4409                                 domain_remove_dev_info(old_domain);
4410
4411                         if (!domain_type_is_vm_or_si(old_domain) &&
4412                              list_empty(&old_domain->devices))
4413                                 domain_exit(old_domain);
4414                 }
4415         }
4416
4417         iommu = device_to_iommu(dev, &bus, &devfn);
4418         if (!iommu)
4419                 return -ENODEV;
4420
4421         /* check if this iommu agaw is sufficient for max mapped address */
4422         addr_width = agaw_to_width(iommu->agaw);
4423         if (addr_width > cap_mgaw(iommu->cap))
4424                 addr_width = cap_mgaw(iommu->cap);
4425
4426         if (dmar_domain->max_addr > (1LL << addr_width)) {
4427                 printk(KERN_ERR "%s: iommu width (%d) is not "
4428                        "sufficient for the mapped address (%llx)\n",
4429                        __func__, addr_width, dmar_domain->max_addr);
4430                 return -EFAULT;
4431         }
4432         dmar_domain->gaw = addr_width;
4433
4434         /*
4435          * Knock out extra levels of page tables if necessary
4436          */
4437         while (iommu->agaw < dmar_domain->agaw) {
4438                 struct dma_pte *pte;
4439
4440                 pte = dmar_domain->pgd;
4441                 if (dma_pte_present(pte)) {
4442                         dmar_domain->pgd = (struct dma_pte *)
4443                                 phys_to_virt(dma_pte_addr(pte));
4444                         free_pgtable_page(pte);
4445                 }
4446                 dmar_domain->agaw--;
4447         }
4448
4449         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4450 }
4451
4452 static void intel_iommu_detach_device(struct iommu_domain *domain,
4453                                       struct device *dev)
4454 {
4455         domain_remove_one_dev_info(to_dmar_domain(domain), dev);
4456 }
4457
4458 static int intel_iommu_map(struct iommu_domain *domain,
4459                            unsigned long iova, phys_addr_t hpa,
4460                            size_t size, int iommu_prot)
4461 {
4462         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4463         u64 max_addr;
4464         int prot = 0;
4465         int ret;
4466
4467         if (iommu_prot & IOMMU_READ)
4468                 prot |= DMA_PTE_READ;
4469         if (iommu_prot & IOMMU_WRITE)
4470                 prot |= DMA_PTE_WRITE;
4471         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4472                 prot |= DMA_PTE_SNP;
4473
4474         max_addr = iova + size;
4475         if (dmar_domain->max_addr < max_addr) {
4476                 u64 end;
4477
4478                 /* check if minimum agaw is sufficient for mapped address */
4479                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4480                 if (end < max_addr) {
4481                         printk(KERN_ERR "%s: iommu width (%d) is not "
4482                                "sufficient for the mapped address (%llx)\n",
4483                                __func__, dmar_domain->gaw, max_addr);
4484                         return -EFAULT;
4485                 }
4486                 dmar_domain->max_addr = max_addr;
4487         }
4488         /* Round up size to next multiple of PAGE_SIZE, if it and
4489            the low bits of hpa would take us onto the next page */
4490         size = aligned_nrpages(hpa, size);
4491         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4492                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4493         return ret;
4494 }
4495
4496 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4497                                 unsigned long iova, size_t size)
4498 {
4499         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4500         struct page *freelist = NULL;
4501         struct intel_iommu *iommu;
4502         unsigned long start_pfn, last_pfn;
4503         unsigned int npages;
4504         int iommu_id, num, ndomains, level = 0;
4505
4506         /* Cope with horrid API which requires us to unmap more than the
4507            size argument if it happens to be a large-page mapping. */
4508         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4509                 BUG();
4510
4511         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4512                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4513
4514         start_pfn = iova >> VTD_PAGE_SHIFT;
4515         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4516
4517         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4518
4519         npages = last_pfn - start_pfn + 1;
4520
4521         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4522                iommu = g_iommus[iommu_id];
4523
4524                /*
4525                 * find bit position of dmar_domain
4526                 */
4527                ndomains = cap_ndoms(iommu->cap);
4528                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4529                        if (iommu->domains[num] == dmar_domain)
4530                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4531                                                      npages, !freelist, 0);
4532                }
4533
4534         }
4535
4536         dma_free_pagelist(freelist);
4537
4538         if (dmar_domain->max_addr == iova + size)
4539                 dmar_domain->max_addr = iova;
4540
4541         return size;
4542 }
4543
4544 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4545                                             dma_addr_t iova)
4546 {
4547         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4548         struct dma_pte *pte;
4549         int level = 0;
4550         u64 phys = 0;
4551
4552         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4553         if (pte)
4554                 phys = dma_pte_addr(pte);
4555
4556         return phys;
4557 }
4558
4559 static bool intel_iommu_capable(enum iommu_cap cap)
4560 {
4561         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4562                 return domain_update_iommu_snooping(NULL) == 1;
4563         if (cap == IOMMU_CAP_INTR_REMAP)
4564                 return irq_remapping_enabled == 1;
4565
4566         return false;
4567 }
4568
4569 static int intel_iommu_add_device(struct device *dev)
4570 {
4571         struct intel_iommu *iommu;
4572         struct iommu_group *group;
4573         u8 bus, devfn;
4574
4575         iommu = device_to_iommu(dev, &bus, &devfn);
4576         if (!iommu)
4577                 return -ENODEV;
4578
4579         iommu_device_link(iommu->iommu_dev, dev);
4580
4581         group = iommu_group_get_for_dev(dev);
4582
4583         if (IS_ERR(group))
4584                 return PTR_ERR(group);
4585
4586         iommu_group_put(group);
4587         return 0;
4588 }
4589
4590 static void intel_iommu_remove_device(struct device *dev)
4591 {
4592         struct intel_iommu *iommu;
4593         u8 bus, devfn;
4594
4595         iommu = device_to_iommu(dev, &bus, &devfn);
4596         if (!iommu)
4597                 return;
4598
4599         iommu_group_remove_device(dev);
4600
4601         iommu_device_unlink(iommu->iommu_dev, dev);
4602 }
4603
4604 static const struct iommu_ops intel_iommu_ops = {
4605         .capable        = intel_iommu_capable,
4606         .domain_alloc   = intel_iommu_domain_alloc,
4607         .domain_free    = intel_iommu_domain_free,
4608         .attach_dev     = intel_iommu_attach_device,
4609         .detach_dev     = intel_iommu_detach_device,
4610         .map            = intel_iommu_map,
4611         .unmap          = intel_iommu_unmap,
4612         .map_sg         = default_iommu_map_sg,
4613         .iova_to_phys   = intel_iommu_iova_to_phys,
4614         .add_device     = intel_iommu_add_device,
4615         .remove_device  = intel_iommu_remove_device,
4616         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4617 };
4618
4619 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4620 {
4621         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4622         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4623         dmar_map_gfx = 0;
4624 }
4625
4626 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4627 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4628 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4629 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4630 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4631 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4632 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4633
4634 static void quirk_iommu_rwbf(struct pci_dev *dev)
4635 {
4636         /*
4637          * Mobile 4 Series Chipset neglects to set RWBF capability,
4638          * but needs it. Same seems to hold for the desktop versions.
4639          */
4640         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4641         rwbf_quirk = 1;
4642 }
4643
4644 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4645 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4646 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4647 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4648 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4649 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4650 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4651
4652 #define GGC 0x52
4653 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4654 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4655 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4656 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4657 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4658 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4659 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4660 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4661
4662 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4663 {
4664         unsigned short ggc;
4665
4666         if (pci_read_config_word(dev, GGC, &ggc))
4667                 return;
4668
4669         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4670                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4671                 dmar_map_gfx = 0;
4672         } else if (dmar_map_gfx) {
4673                 /* we have to ensure the gfx device is idle before we flush */
4674                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4675                 intel_iommu_strict = 1;
4676        }
4677 }
4678 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4679 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4680 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4681 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4682
4683 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4684    ISOCH DMAR unit for the Azalia sound device, but not give it any
4685    TLB entries, which causes it to deadlock. Check for that.  We do
4686    this in a function called from init_dmars(), instead of in a PCI
4687    quirk, because we don't want to print the obnoxious "BIOS broken"
4688    message if VT-d is actually disabled.
4689 */
4690 static void __init check_tylersburg_isoch(void)
4691 {
4692         struct pci_dev *pdev;
4693         uint32_t vtisochctrl;
4694
4695         /* If there's no Azalia in the system anyway, forget it. */
4696         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4697         if (!pdev)
4698                 return;
4699         pci_dev_put(pdev);
4700
4701         /* System Management Registers. Might be hidden, in which case
4702            we can't do the sanity check. But that's OK, because the
4703            known-broken BIOSes _don't_ actually hide it, so far. */
4704         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4705         if (!pdev)
4706                 return;
4707
4708         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4709                 pci_dev_put(pdev);
4710                 return;
4711         }
4712
4713         pci_dev_put(pdev);
4714
4715         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4716         if (vtisochctrl & 1)
4717                 return;
4718
4719         /* Drop all bits other than the number of TLB entries */
4720         vtisochctrl &= 0x1c;
4721
4722         /* If we have the recommended number of TLB entries (16), fine. */
4723         if (vtisochctrl == 0x10)
4724                 return;
4725
4726         /* Zero TLB entries? You get to ride the short bus to school. */
4727         if (!vtisochctrl) {
4728                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4729                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4730                      dmi_get_system_info(DMI_BIOS_VENDOR),
4731                      dmi_get_system_info(DMI_BIOS_VERSION),
4732                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4733                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4734                 return;
4735         }
4736         
4737         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4738                vtisochctrl);
4739 }