Merge remote-tracking branch 'asoc/fix/dapm' into asoc-linus
[sfrench/cifs-2.6.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
54 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
55 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
56
57 #define IOAPIC_RANGE_START      (0xfee00000)
58 #define IOAPIC_RANGE_END        (0xfeefffff)
59 #define IOVA_START_ADDR         (0x1000)
60
61 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
62
63 #define MAX_AGAW_WIDTH 64
64 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
65
66 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
67 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
68
69 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
70    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
71 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
72                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
73 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
74
75 /* IO virtual address start page frame number */
76 #define IOVA_START_PFN          (1)
77
78 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
79 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
80 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
81
82 /* page table handling */
83 #define LEVEL_STRIDE            (9)
84 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
85
86 /*
87  * This bitmap is used to advertise the page sizes our hardware support
88  * to the IOMMU core, which will then use this information to split
89  * physically contiguous memory regions it is mapping into page sizes
90  * that we support.
91  *
92  * Traditionally the IOMMU core just handed us the mappings directly,
93  * after making sure the size is an order of a 4KiB page and that the
94  * mapping has natural alignment.
95  *
96  * To retain this behavior, we currently advertise that we support
97  * all page sizes that are an order of 4KiB.
98  *
99  * If at some point we'd like to utilize the IOMMU core's new behavior,
100  * we could change this to advertise the real page sizes we support.
101  */
102 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
103
104 static inline int agaw_to_level(int agaw)
105 {
106         return agaw + 2;
107 }
108
109 static inline int agaw_to_width(int agaw)
110 {
111         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
112 }
113
114 static inline int width_to_agaw(int width)
115 {
116         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
117 }
118
119 static inline unsigned int level_to_offset_bits(int level)
120 {
121         return (level - 1) * LEVEL_STRIDE;
122 }
123
124 static inline int pfn_level_offset(unsigned long pfn, int level)
125 {
126         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
127 }
128
129 static inline unsigned long level_mask(int level)
130 {
131         return -1UL << level_to_offset_bits(level);
132 }
133
134 static inline unsigned long level_size(int level)
135 {
136         return 1UL << level_to_offset_bits(level);
137 }
138
139 static inline unsigned long align_to_level(unsigned long pfn, int level)
140 {
141         return (pfn + level_size(level) - 1) & level_mask(level);
142 }
143
144 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
145 {
146         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
147 }
148
149 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
150    are never going to work. */
151 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
152 {
153         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
154 }
155
156 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
157 {
158         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
159 }
160 static inline unsigned long page_to_dma_pfn(struct page *pg)
161 {
162         return mm_to_dma_pfn(page_to_pfn(pg));
163 }
164 static inline unsigned long virt_to_dma_pfn(void *p)
165 {
166         return page_to_dma_pfn(virt_to_page(p));
167 }
168
169 /* global iommu list, set NULL for ignored DMAR units */
170 static struct intel_iommu **g_iommus;
171
172 static void __init check_tylersburg_isoch(void);
173 static int rwbf_quirk;
174
175 /*
176  * set to 1 to panic kernel if can't successfully enable VT-d
177  * (used when kernel is launched w/ TXT)
178  */
179 static int force_on = 0;
180
181 /*
182  * 0: Present
183  * 1-11: Reserved
184  * 12-63: Context Ptr (12 - (haw-1))
185  * 64-127: Reserved
186  */
187 struct root_entry {
188         u64     lo;
189         u64     hi;
190 };
191 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
192
193
194 /*
195  * low 64 bits:
196  * 0: present
197  * 1: fault processing disable
198  * 2-3: translation type
199  * 12-63: address space root
200  * high 64 bits:
201  * 0-2: address width
202  * 3-6: aval
203  * 8-23: domain id
204  */
205 struct context_entry {
206         u64 lo;
207         u64 hi;
208 };
209
210 static inline bool context_present(struct context_entry *context)
211 {
212         return (context->lo & 1);
213 }
214 static inline void context_set_present(struct context_entry *context)
215 {
216         context->lo |= 1;
217 }
218
219 static inline void context_set_fault_enable(struct context_entry *context)
220 {
221         context->lo &= (((u64)-1) << 2) | 1;
222 }
223
224 static inline void context_set_translation_type(struct context_entry *context,
225                                                 unsigned long value)
226 {
227         context->lo &= (((u64)-1) << 4) | 3;
228         context->lo |= (value & 3) << 2;
229 }
230
231 static inline void context_set_address_root(struct context_entry *context,
232                                             unsigned long value)
233 {
234         context->lo &= ~VTD_PAGE_MASK;
235         context->lo |= value & VTD_PAGE_MASK;
236 }
237
238 static inline void context_set_address_width(struct context_entry *context,
239                                              unsigned long value)
240 {
241         context->hi |= value & 7;
242 }
243
244 static inline void context_set_domain_id(struct context_entry *context,
245                                          unsigned long value)
246 {
247         context->hi |= (value & ((1 << 16) - 1)) << 8;
248 }
249
250 static inline void context_clear_entry(struct context_entry *context)
251 {
252         context->lo = 0;
253         context->hi = 0;
254 }
255
256 /*
257  * 0: readable
258  * 1: writable
259  * 2-6: reserved
260  * 7: super page
261  * 8-10: available
262  * 11: snoop behavior
263  * 12-63: Host physcial address
264  */
265 struct dma_pte {
266         u64 val;
267 };
268
269 static inline void dma_clear_pte(struct dma_pte *pte)
270 {
271         pte->val = 0;
272 }
273
274 static inline u64 dma_pte_addr(struct dma_pte *pte)
275 {
276 #ifdef CONFIG_64BIT
277         return pte->val & VTD_PAGE_MASK;
278 #else
279         /* Must have a full atomic 64-bit read */
280         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
281 #endif
282 }
283
284 static inline bool dma_pte_present(struct dma_pte *pte)
285 {
286         return (pte->val & 3) != 0;
287 }
288
289 static inline bool dma_pte_superpage(struct dma_pte *pte)
290 {
291         return (pte->val & DMA_PTE_LARGE_PAGE);
292 }
293
294 static inline int first_pte_in_page(struct dma_pte *pte)
295 {
296         return !((unsigned long)pte & ~VTD_PAGE_MASK);
297 }
298
299 /*
300  * This domain is a statically identity mapping domain.
301  *      1. This domain creats a static 1:1 mapping to all usable memory.
302  *      2. It maps to each iommu if successful.
303  *      3. Each iommu mapps to this domain if successful.
304  */
305 static struct dmar_domain *si_domain;
306 static int hw_pass_through = 1;
307
308 /* domain represents a virtual machine, more than one devices
309  * across iommus may be owned in one domain, e.g. kvm guest.
310  */
311 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
312
313 /* si_domain contains mulitple devices */
314 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
315
316 struct dmar_domain {
317         int     id;                     /* domain id */
318         int     nid;                    /* node id */
319         DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
320                                         /* bitmap of iommus this domain uses*/
321
322         struct list_head devices;       /* all devices' list */
323         struct iova_domain iovad;       /* iova's that belong to this domain */
324
325         struct dma_pte  *pgd;           /* virtual address */
326         int             gaw;            /* max guest address width */
327
328         /* adjusted guest address width, 0 is level 2 30-bit */
329         int             agaw;
330
331         int             flags;          /* flags to find out type of domain */
332
333         int             iommu_coherency;/* indicate coherency of iommu access */
334         int             iommu_snooping; /* indicate snooping control feature*/
335         int             iommu_count;    /* reference count of iommu */
336         int             iommu_superpage;/* Level of superpages supported:
337                                            0 == 4KiB (no superpages), 1 == 2MiB,
338                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
339         spinlock_t      iommu_lock;     /* protect iommu set in domain */
340         u64             max_addr;       /* maximum mapped address */
341
342         struct iommu_domain domain;     /* generic domain data structure for
343                                            iommu core */
344 };
345
346 /* PCI domain-device relationship */
347 struct device_domain_info {
348         struct list_head link;  /* link to domain siblings */
349         struct list_head global; /* link to global list */
350         u8 bus;                 /* PCI bus number */
351         u8 devfn;               /* PCI devfn number */
352         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
353         struct intel_iommu *iommu; /* IOMMU used by this device */
354         struct dmar_domain *domain; /* pointer to domain */
355 };
356
357 struct dmar_rmrr_unit {
358         struct list_head list;          /* list of rmrr units   */
359         struct acpi_dmar_header *hdr;   /* ACPI header          */
360         u64     base_address;           /* reserved base address*/
361         u64     end_address;            /* reserved end address */
362         struct dmar_dev_scope *devices; /* target devices */
363         int     devices_cnt;            /* target device count */
364 };
365
366 struct dmar_atsr_unit {
367         struct list_head list;          /* list of ATSR units */
368         struct acpi_dmar_header *hdr;   /* ACPI header */
369         struct dmar_dev_scope *devices; /* target devices */
370         int devices_cnt;                /* target device count */
371         u8 include_all:1;               /* include all ports */
372 };
373
374 static LIST_HEAD(dmar_atsr_units);
375 static LIST_HEAD(dmar_rmrr_units);
376
377 #define for_each_rmrr_units(rmrr) \
378         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
379
380 static void flush_unmaps_timeout(unsigned long data);
381
382 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
383
384 #define HIGH_WATER_MARK 250
385 struct deferred_flush_tables {
386         int next;
387         struct iova *iova[HIGH_WATER_MARK];
388         struct dmar_domain *domain[HIGH_WATER_MARK];
389         struct page *freelist[HIGH_WATER_MARK];
390 };
391
392 static struct deferred_flush_tables *deferred_flush;
393
394 /* bitmap for indexing intel_iommus */
395 static int g_num_of_iommus;
396
397 static DEFINE_SPINLOCK(async_umap_flush_lock);
398 static LIST_HEAD(unmaps_to_do);
399
400 static int timer_on;
401 static long list_size;
402
403 static void domain_exit(struct dmar_domain *domain);
404 static void domain_remove_dev_info(struct dmar_domain *domain);
405 static void domain_remove_one_dev_info(struct dmar_domain *domain,
406                                        struct device *dev);
407 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
408                                            struct device *dev);
409 static int domain_detach_iommu(struct dmar_domain *domain,
410                                struct intel_iommu *iommu);
411
412 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
413 int dmar_disabled = 0;
414 #else
415 int dmar_disabled = 1;
416 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
417
418 int intel_iommu_enabled = 0;
419 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
420
421 static int dmar_map_gfx = 1;
422 static int dmar_forcedac;
423 static int intel_iommu_strict;
424 static int intel_iommu_superpage = 1;
425
426 int intel_iommu_gfx_mapped;
427 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
428
429 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
430 static DEFINE_SPINLOCK(device_domain_lock);
431 static LIST_HEAD(device_domain_list);
432
433 static const struct iommu_ops intel_iommu_ops;
434
435 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
436 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
437 {
438         return container_of(dom, struct dmar_domain, domain);
439 }
440
441 static int __init intel_iommu_setup(char *str)
442 {
443         if (!str)
444                 return -EINVAL;
445         while (*str) {
446                 if (!strncmp(str, "on", 2)) {
447                         dmar_disabled = 0;
448                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
449                 } else if (!strncmp(str, "off", 3)) {
450                         dmar_disabled = 1;
451                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
452                 } else if (!strncmp(str, "igfx_off", 8)) {
453                         dmar_map_gfx = 0;
454                         printk(KERN_INFO
455                                 "Intel-IOMMU: disable GFX device mapping\n");
456                 } else if (!strncmp(str, "forcedac", 8)) {
457                         printk(KERN_INFO
458                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
459                         dmar_forcedac = 1;
460                 } else if (!strncmp(str, "strict", 6)) {
461                         printk(KERN_INFO
462                                 "Intel-IOMMU: disable batched IOTLB flush\n");
463                         intel_iommu_strict = 1;
464                 } else if (!strncmp(str, "sp_off", 6)) {
465                         printk(KERN_INFO
466                                 "Intel-IOMMU: disable supported super page\n");
467                         intel_iommu_superpage = 0;
468                 }
469
470                 str += strcspn(str, ",");
471                 while (*str == ',')
472                         str++;
473         }
474         return 0;
475 }
476 __setup("intel_iommu=", intel_iommu_setup);
477
478 static struct kmem_cache *iommu_domain_cache;
479 static struct kmem_cache *iommu_devinfo_cache;
480
481 static inline void *alloc_pgtable_page(int node)
482 {
483         struct page *page;
484         void *vaddr = NULL;
485
486         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
487         if (page)
488                 vaddr = page_address(page);
489         return vaddr;
490 }
491
492 static inline void free_pgtable_page(void *vaddr)
493 {
494         free_page((unsigned long)vaddr);
495 }
496
497 static inline void *alloc_domain_mem(void)
498 {
499         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
500 }
501
502 static void free_domain_mem(void *vaddr)
503 {
504         kmem_cache_free(iommu_domain_cache, vaddr);
505 }
506
507 static inline void * alloc_devinfo_mem(void)
508 {
509         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
510 }
511
512 static inline void free_devinfo_mem(void *vaddr)
513 {
514         kmem_cache_free(iommu_devinfo_cache, vaddr);
515 }
516
517 static inline int domain_type_is_vm(struct dmar_domain *domain)
518 {
519         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
520 }
521
522 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
523 {
524         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
525                                 DOMAIN_FLAG_STATIC_IDENTITY);
526 }
527
528 static inline int domain_pfn_supported(struct dmar_domain *domain,
529                                        unsigned long pfn)
530 {
531         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
532
533         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
534 }
535
536 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
537 {
538         unsigned long sagaw;
539         int agaw = -1;
540
541         sagaw = cap_sagaw(iommu->cap);
542         for (agaw = width_to_agaw(max_gaw);
543              agaw >= 0; agaw--) {
544                 if (test_bit(agaw, &sagaw))
545                         break;
546         }
547
548         return agaw;
549 }
550
551 /*
552  * Calculate max SAGAW for each iommu.
553  */
554 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
555 {
556         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
557 }
558
559 /*
560  * calculate agaw for each iommu.
561  * "SAGAW" may be different across iommus, use a default agaw, and
562  * get a supported less agaw for iommus that don't support the default agaw.
563  */
564 int iommu_calculate_agaw(struct intel_iommu *iommu)
565 {
566         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
567 }
568
569 /* This functionin only returns single iommu in a domain */
570 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
571 {
572         int iommu_id;
573
574         /* si_domain and vm domain should not get here. */
575         BUG_ON(domain_type_is_vm_or_si(domain));
576         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
577         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
578                 return NULL;
579
580         return g_iommus[iommu_id];
581 }
582
583 static void domain_update_iommu_coherency(struct dmar_domain *domain)
584 {
585         struct dmar_drhd_unit *drhd;
586         struct intel_iommu *iommu;
587         bool found = false;
588         int i;
589
590         domain->iommu_coherency = 1;
591
592         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
593                 found = true;
594                 if (!ecap_coherent(g_iommus[i]->ecap)) {
595                         domain->iommu_coherency = 0;
596                         break;
597                 }
598         }
599         if (found)
600                 return;
601
602         /* No hardware attached; use lowest common denominator */
603         rcu_read_lock();
604         for_each_active_iommu(iommu, drhd) {
605                 if (!ecap_coherent(iommu->ecap)) {
606                         domain->iommu_coherency = 0;
607                         break;
608                 }
609         }
610         rcu_read_unlock();
611 }
612
613 static int domain_update_iommu_snooping(struct intel_iommu *skip)
614 {
615         struct dmar_drhd_unit *drhd;
616         struct intel_iommu *iommu;
617         int ret = 1;
618
619         rcu_read_lock();
620         for_each_active_iommu(iommu, drhd) {
621                 if (iommu != skip) {
622                         if (!ecap_sc_support(iommu->ecap)) {
623                                 ret = 0;
624                                 break;
625                         }
626                 }
627         }
628         rcu_read_unlock();
629
630         return ret;
631 }
632
633 static int domain_update_iommu_superpage(struct intel_iommu *skip)
634 {
635         struct dmar_drhd_unit *drhd;
636         struct intel_iommu *iommu;
637         int mask = 0xf;
638
639         if (!intel_iommu_superpage) {
640                 return 0;
641         }
642
643         /* set iommu_superpage to the smallest common denominator */
644         rcu_read_lock();
645         for_each_active_iommu(iommu, drhd) {
646                 if (iommu != skip) {
647                         mask &= cap_super_page_val(iommu->cap);
648                         if (!mask)
649                                 break;
650                 }
651         }
652         rcu_read_unlock();
653
654         return fls(mask);
655 }
656
657 /* Some capabilities may be different across iommus */
658 static void domain_update_iommu_cap(struct dmar_domain *domain)
659 {
660         domain_update_iommu_coherency(domain);
661         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
662         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
663 }
664
665 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
666                                                        u8 bus, u8 devfn, int alloc)
667 {
668         struct root_entry *root = &iommu->root_entry[bus];
669         struct context_entry *context;
670         u64 *entry;
671
672         if (ecap_ecs(iommu->ecap)) {
673                 if (devfn >= 0x80) {
674                         devfn -= 0x80;
675                         entry = &root->hi;
676                 }
677                 devfn *= 2;
678         }
679         entry = &root->lo;
680         if (*entry & 1)
681                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
682         else {
683                 unsigned long phy_addr;
684                 if (!alloc)
685                         return NULL;
686
687                 context = alloc_pgtable_page(iommu->node);
688                 if (!context)
689                         return NULL;
690
691                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
692                 phy_addr = virt_to_phys((void *)context);
693                 *entry = phy_addr | 1;
694                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
695         }
696         return &context[devfn];
697 }
698
699 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
700 {
701         struct dmar_drhd_unit *drhd = NULL;
702         struct intel_iommu *iommu;
703         struct device *tmp;
704         struct pci_dev *ptmp, *pdev = NULL;
705         u16 segment = 0;
706         int i;
707
708         if (dev_is_pci(dev)) {
709                 pdev = to_pci_dev(dev);
710                 segment = pci_domain_nr(pdev->bus);
711         } else if (has_acpi_companion(dev))
712                 dev = &ACPI_COMPANION(dev)->dev;
713
714         rcu_read_lock();
715         for_each_active_iommu(iommu, drhd) {
716                 if (pdev && segment != drhd->segment)
717                         continue;
718
719                 for_each_active_dev_scope(drhd->devices,
720                                           drhd->devices_cnt, i, tmp) {
721                         if (tmp == dev) {
722                                 *bus = drhd->devices[i].bus;
723                                 *devfn = drhd->devices[i].devfn;
724                                 goto out;
725                         }
726
727                         if (!pdev || !dev_is_pci(tmp))
728                                 continue;
729
730                         ptmp = to_pci_dev(tmp);
731                         if (ptmp->subordinate &&
732                             ptmp->subordinate->number <= pdev->bus->number &&
733                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
734                                 goto got_pdev;
735                 }
736
737                 if (pdev && drhd->include_all) {
738                 got_pdev:
739                         *bus = pdev->bus->number;
740                         *devfn = pdev->devfn;
741                         goto out;
742                 }
743         }
744         iommu = NULL;
745  out:
746         rcu_read_unlock();
747
748         return iommu;
749 }
750
751 static void domain_flush_cache(struct dmar_domain *domain,
752                                void *addr, int size)
753 {
754         if (!domain->iommu_coherency)
755                 clflush_cache_range(addr, size);
756 }
757
758 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
759 {
760         struct context_entry *context;
761         int ret = 0;
762         unsigned long flags;
763
764         spin_lock_irqsave(&iommu->lock, flags);
765         context = iommu_context_addr(iommu, bus, devfn, 0);
766         if (context)
767                 ret = context_present(context);
768         spin_unlock_irqrestore(&iommu->lock, flags);
769         return ret;
770 }
771
772 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
773 {
774         struct context_entry *context;
775         unsigned long flags;
776
777         spin_lock_irqsave(&iommu->lock, flags);
778         context = iommu_context_addr(iommu, bus, devfn, 0);
779         if (context) {
780                 context_clear_entry(context);
781                 __iommu_flush_cache(iommu, context, sizeof(*context));
782         }
783         spin_unlock_irqrestore(&iommu->lock, flags);
784 }
785
786 static void free_context_table(struct intel_iommu *iommu)
787 {
788         int i;
789         unsigned long flags;
790         struct context_entry *context;
791
792         spin_lock_irqsave(&iommu->lock, flags);
793         if (!iommu->root_entry) {
794                 goto out;
795         }
796         for (i = 0; i < ROOT_ENTRY_NR; i++) {
797                 context = iommu_context_addr(iommu, i, 0, 0);
798                 if (context)
799                         free_pgtable_page(context);
800
801                 if (!ecap_ecs(iommu->ecap))
802                         continue;
803
804                 context = iommu_context_addr(iommu, i, 0x80, 0);
805                 if (context)
806                         free_pgtable_page(context);
807
808         }
809         free_pgtable_page(iommu->root_entry);
810         iommu->root_entry = NULL;
811 out:
812         spin_unlock_irqrestore(&iommu->lock, flags);
813 }
814
815 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
816                                       unsigned long pfn, int *target_level)
817 {
818         struct dma_pte *parent, *pte = NULL;
819         int level = agaw_to_level(domain->agaw);
820         int offset;
821
822         BUG_ON(!domain->pgd);
823
824         if (!domain_pfn_supported(domain, pfn))
825                 /* Address beyond IOMMU's addressing capabilities. */
826                 return NULL;
827
828         parent = domain->pgd;
829
830         while (1) {
831                 void *tmp_page;
832
833                 offset = pfn_level_offset(pfn, level);
834                 pte = &parent[offset];
835                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
836                         break;
837                 if (level == *target_level)
838                         break;
839
840                 if (!dma_pte_present(pte)) {
841                         uint64_t pteval;
842
843                         tmp_page = alloc_pgtable_page(domain->nid);
844
845                         if (!tmp_page)
846                                 return NULL;
847
848                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
849                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
850                         if (cmpxchg64(&pte->val, 0ULL, pteval))
851                                 /* Someone else set it while we were thinking; use theirs. */
852                                 free_pgtable_page(tmp_page);
853                         else
854                                 domain_flush_cache(domain, pte, sizeof(*pte));
855                 }
856                 if (level == 1)
857                         break;
858
859                 parent = phys_to_virt(dma_pte_addr(pte));
860                 level--;
861         }
862
863         if (!*target_level)
864                 *target_level = level;
865
866         return pte;
867 }
868
869
870 /* return address's pte at specific level */
871 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
872                                          unsigned long pfn,
873                                          int level, int *large_page)
874 {
875         struct dma_pte *parent, *pte = NULL;
876         int total = agaw_to_level(domain->agaw);
877         int offset;
878
879         parent = domain->pgd;
880         while (level <= total) {
881                 offset = pfn_level_offset(pfn, total);
882                 pte = &parent[offset];
883                 if (level == total)
884                         return pte;
885
886                 if (!dma_pte_present(pte)) {
887                         *large_page = total;
888                         break;
889                 }
890
891                 if (dma_pte_superpage(pte)) {
892                         *large_page = total;
893                         return pte;
894                 }
895
896                 parent = phys_to_virt(dma_pte_addr(pte));
897                 total--;
898         }
899         return NULL;
900 }
901
902 /* clear last level pte, a tlb flush should be followed */
903 static void dma_pte_clear_range(struct dmar_domain *domain,
904                                 unsigned long start_pfn,
905                                 unsigned long last_pfn)
906 {
907         unsigned int large_page = 1;
908         struct dma_pte *first_pte, *pte;
909
910         BUG_ON(!domain_pfn_supported(domain, start_pfn));
911         BUG_ON(!domain_pfn_supported(domain, last_pfn));
912         BUG_ON(start_pfn > last_pfn);
913
914         /* we don't need lock here; nobody else touches the iova range */
915         do {
916                 large_page = 1;
917                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
918                 if (!pte) {
919                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
920                         continue;
921                 }
922                 do {
923                         dma_clear_pte(pte);
924                         start_pfn += lvl_to_nr_pages(large_page);
925                         pte++;
926                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
927
928                 domain_flush_cache(domain, first_pte,
929                                    (void *)pte - (void *)first_pte);
930
931         } while (start_pfn && start_pfn <= last_pfn);
932 }
933
934 static void dma_pte_free_level(struct dmar_domain *domain, int level,
935                                struct dma_pte *pte, unsigned long pfn,
936                                unsigned long start_pfn, unsigned long last_pfn)
937 {
938         pfn = max(start_pfn, pfn);
939         pte = &pte[pfn_level_offset(pfn, level)];
940
941         do {
942                 unsigned long level_pfn;
943                 struct dma_pte *level_pte;
944
945                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
946                         goto next;
947
948                 level_pfn = pfn & level_mask(level - 1);
949                 level_pte = phys_to_virt(dma_pte_addr(pte));
950
951                 if (level > 2)
952                         dma_pte_free_level(domain, level - 1, level_pte,
953                                            level_pfn, start_pfn, last_pfn);
954
955                 /* If range covers entire pagetable, free it */
956                 if (!(start_pfn > level_pfn ||
957                       last_pfn < level_pfn + level_size(level) - 1)) {
958                         dma_clear_pte(pte);
959                         domain_flush_cache(domain, pte, sizeof(*pte));
960                         free_pgtable_page(level_pte);
961                 }
962 next:
963                 pfn += level_size(level);
964         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
965 }
966
967 /* free page table pages. last level pte should already be cleared */
968 static void dma_pte_free_pagetable(struct dmar_domain *domain,
969                                    unsigned long start_pfn,
970                                    unsigned long last_pfn)
971 {
972         BUG_ON(!domain_pfn_supported(domain, start_pfn));
973         BUG_ON(!domain_pfn_supported(domain, last_pfn));
974         BUG_ON(start_pfn > last_pfn);
975
976         dma_pte_clear_range(domain, start_pfn, last_pfn);
977
978         /* We don't need lock here; nobody else touches the iova range */
979         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
980                            domain->pgd, 0, start_pfn, last_pfn);
981
982         /* free pgd */
983         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
984                 free_pgtable_page(domain->pgd);
985                 domain->pgd = NULL;
986         }
987 }
988
989 /* When a page at a given level is being unlinked from its parent, we don't
990    need to *modify* it at all. All we need to do is make a list of all the
991    pages which can be freed just as soon as we've flushed the IOTLB and we
992    know the hardware page-walk will no longer touch them.
993    The 'pte' argument is the *parent* PTE, pointing to the page that is to
994    be freed. */
995 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
996                                             int level, struct dma_pte *pte,
997                                             struct page *freelist)
998 {
999         struct page *pg;
1000
1001         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1002         pg->freelist = freelist;
1003         freelist = pg;
1004
1005         if (level == 1)
1006                 return freelist;
1007
1008         pte = page_address(pg);
1009         do {
1010                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1011                         freelist = dma_pte_list_pagetables(domain, level - 1,
1012                                                            pte, freelist);
1013                 pte++;
1014         } while (!first_pte_in_page(pte));
1015
1016         return freelist;
1017 }
1018
1019 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1020                                         struct dma_pte *pte, unsigned long pfn,
1021                                         unsigned long start_pfn,
1022                                         unsigned long last_pfn,
1023                                         struct page *freelist)
1024 {
1025         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1026
1027         pfn = max(start_pfn, pfn);
1028         pte = &pte[pfn_level_offset(pfn, level)];
1029
1030         do {
1031                 unsigned long level_pfn;
1032
1033                 if (!dma_pte_present(pte))
1034                         goto next;
1035
1036                 level_pfn = pfn & level_mask(level);
1037
1038                 /* If range covers entire pagetable, free it */
1039                 if (start_pfn <= level_pfn &&
1040                     last_pfn >= level_pfn + level_size(level) - 1) {
1041                         /* These suborbinate page tables are going away entirely. Don't
1042                            bother to clear them; we're just going to *free* them. */
1043                         if (level > 1 && !dma_pte_superpage(pte))
1044                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1045
1046                         dma_clear_pte(pte);
1047                         if (!first_pte)
1048                                 first_pte = pte;
1049                         last_pte = pte;
1050                 } else if (level > 1) {
1051                         /* Recurse down into a level that isn't *entirely* obsolete */
1052                         freelist = dma_pte_clear_level(domain, level - 1,
1053                                                        phys_to_virt(dma_pte_addr(pte)),
1054                                                        level_pfn, start_pfn, last_pfn,
1055                                                        freelist);
1056                 }
1057 next:
1058                 pfn += level_size(level);
1059         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1060
1061         if (first_pte)
1062                 domain_flush_cache(domain, first_pte,
1063                                    (void *)++last_pte - (void *)first_pte);
1064
1065         return freelist;
1066 }
1067
1068 /* We can't just free the pages because the IOMMU may still be walking
1069    the page tables, and may have cached the intermediate levels. The
1070    pages can only be freed after the IOTLB flush has been done. */
1071 struct page *domain_unmap(struct dmar_domain *domain,
1072                           unsigned long start_pfn,
1073                           unsigned long last_pfn)
1074 {
1075         struct page *freelist = NULL;
1076
1077         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1078         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1079         BUG_ON(start_pfn > last_pfn);
1080
1081         /* we don't need lock here; nobody else touches the iova range */
1082         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1083                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1084
1085         /* free pgd */
1086         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1087                 struct page *pgd_page = virt_to_page(domain->pgd);
1088                 pgd_page->freelist = freelist;
1089                 freelist = pgd_page;
1090
1091                 domain->pgd = NULL;
1092         }
1093
1094         return freelist;
1095 }
1096
1097 void dma_free_pagelist(struct page *freelist)
1098 {
1099         struct page *pg;
1100
1101         while ((pg = freelist)) {
1102                 freelist = pg->freelist;
1103                 free_pgtable_page(page_address(pg));
1104         }
1105 }
1106
1107 /* iommu handling */
1108 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1109 {
1110         struct root_entry *root;
1111         unsigned long flags;
1112
1113         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1114         if (!root) {
1115                 pr_err("IOMMU: allocating root entry for %s failed\n",
1116                         iommu->name);
1117                 return -ENOMEM;
1118         }
1119
1120         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1121
1122         spin_lock_irqsave(&iommu->lock, flags);
1123         iommu->root_entry = root;
1124         spin_unlock_irqrestore(&iommu->lock, flags);
1125
1126         return 0;
1127 }
1128
1129 static void iommu_set_root_entry(struct intel_iommu *iommu)
1130 {
1131         u64 addr;
1132         u32 sts;
1133         unsigned long flag;
1134
1135         addr = virt_to_phys(iommu->root_entry);
1136         if (ecap_ecs(iommu->ecap))
1137                 addr |= DMA_RTADDR_RTT;
1138
1139         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1140         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1141
1142         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1143
1144         /* Make sure hardware complete it */
1145         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1146                       readl, (sts & DMA_GSTS_RTPS), sts);
1147
1148         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1149 }
1150
1151 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1152 {
1153         u32 val;
1154         unsigned long flag;
1155
1156         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1157                 return;
1158
1159         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1160         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1161
1162         /* Make sure hardware complete it */
1163         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1164                       readl, (!(val & DMA_GSTS_WBFS)), val);
1165
1166         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1167 }
1168
1169 /* return value determine if we need a write buffer flush */
1170 static void __iommu_flush_context(struct intel_iommu *iommu,
1171                                   u16 did, u16 source_id, u8 function_mask,
1172                                   u64 type)
1173 {
1174         u64 val = 0;
1175         unsigned long flag;
1176
1177         switch (type) {
1178         case DMA_CCMD_GLOBAL_INVL:
1179                 val = DMA_CCMD_GLOBAL_INVL;
1180                 break;
1181         case DMA_CCMD_DOMAIN_INVL:
1182                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1183                 break;
1184         case DMA_CCMD_DEVICE_INVL:
1185                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1186                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1187                 break;
1188         default:
1189                 BUG();
1190         }
1191         val |= DMA_CCMD_ICC;
1192
1193         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1194         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1195
1196         /* Make sure hardware complete it */
1197         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1198                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1199
1200         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1201 }
1202
1203 /* return value determine if we need a write buffer flush */
1204 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1205                                 u64 addr, unsigned int size_order, u64 type)
1206 {
1207         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1208         u64 val = 0, val_iva = 0;
1209         unsigned long flag;
1210
1211         switch (type) {
1212         case DMA_TLB_GLOBAL_FLUSH:
1213                 /* global flush doesn't need set IVA_REG */
1214                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1215                 break;
1216         case DMA_TLB_DSI_FLUSH:
1217                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1218                 break;
1219         case DMA_TLB_PSI_FLUSH:
1220                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1221                 /* IH bit is passed in as part of address */
1222                 val_iva = size_order | addr;
1223                 break;
1224         default:
1225                 BUG();
1226         }
1227         /* Note: set drain read/write */
1228 #if 0
1229         /*
1230          * This is probably to be super secure.. Looks like we can
1231          * ignore it without any impact.
1232          */
1233         if (cap_read_drain(iommu->cap))
1234                 val |= DMA_TLB_READ_DRAIN;
1235 #endif
1236         if (cap_write_drain(iommu->cap))
1237                 val |= DMA_TLB_WRITE_DRAIN;
1238
1239         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1240         /* Note: Only uses first TLB reg currently */
1241         if (val_iva)
1242                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1243         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1244
1245         /* Make sure hardware complete it */
1246         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1247                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1248
1249         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1250
1251         /* check IOTLB invalidation granularity */
1252         if (DMA_TLB_IAIG(val) == 0)
1253                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1254         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1255                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1256                         (unsigned long long)DMA_TLB_IIRG(type),
1257                         (unsigned long long)DMA_TLB_IAIG(val));
1258 }
1259
1260 static struct device_domain_info *
1261 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1262                          u8 bus, u8 devfn)
1263 {
1264         bool found = false;
1265         unsigned long flags;
1266         struct device_domain_info *info;
1267         struct pci_dev *pdev;
1268
1269         if (!ecap_dev_iotlb_support(iommu->ecap))
1270                 return NULL;
1271
1272         if (!iommu->qi)
1273                 return NULL;
1274
1275         spin_lock_irqsave(&device_domain_lock, flags);
1276         list_for_each_entry(info, &domain->devices, link)
1277                 if (info->iommu == iommu && info->bus == bus &&
1278                     info->devfn == devfn) {
1279                         found = true;
1280                         break;
1281                 }
1282         spin_unlock_irqrestore(&device_domain_lock, flags);
1283
1284         if (!found || !info->dev || !dev_is_pci(info->dev))
1285                 return NULL;
1286
1287         pdev = to_pci_dev(info->dev);
1288
1289         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1290                 return NULL;
1291
1292         if (!dmar_find_matched_atsr_unit(pdev))
1293                 return NULL;
1294
1295         return info;
1296 }
1297
1298 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1299 {
1300         if (!info || !dev_is_pci(info->dev))
1301                 return;
1302
1303         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1304 }
1305
1306 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1307 {
1308         if (!info->dev || !dev_is_pci(info->dev) ||
1309             !pci_ats_enabled(to_pci_dev(info->dev)))
1310                 return;
1311
1312         pci_disable_ats(to_pci_dev(info->dev));
1313 }
1314
1315 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1316                                   u64 addr, unsigned mask)
1317 {
1318         u16 sid, qdep;
1319         unsigned long flags;
1320         struct device_domain_info *info;
1321
1322         spin_lock_irqsave(&device_domain_lock, flags);
1323         list_for_each_entry(info, &domain->devices, link) {
1324                 struct pci_dev *pdev;
1325                 if (!info->dev || !dev_is_pci(info->dev))
1326                         continue;
1327
1328                 pdev = to_pci_dev(info->dev);
1329                 if (!pci_ats_enabled(pdev))
1330                         continue;
1331
1332                 sid = info->bus << 8 | info->devfn;
1333                 qdep = pci_ats_queue_depth(pdev);
1334                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1335         }
1336         spin_unlock_irqrestore(&device_domain_lock, flags);
1337 }
1338
1339 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1340                                   unsigned long pfn, unsigned int pages, int ih, int map)
1341 {
1342         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1343         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1344
1345         BUG_ON(pages == 0);
1346
1347         if (ih)
1348                 ih = 1 << 6;
1349         /*
1350          * Fallback to domain selective flush if no PSI support or the size is
1351          * too big.
1352          * PSI requires page size to be 2 ^ x, and the base address is naturally
1353          * aligned to the size
1354          */
1355         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1356                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1357                                                 DMA_TLB_DSI_FLUSH);
1358         else
1359                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1360                                                 DMA_TLB_PSI_FLUSH);
1361
1362         /*
1363          * In caching mode, changes of pages from non-present to present require
1364          * flush. However, device IOTLB doesn't need to be flushed in this case.
1365          */
1366         if (!cap_caching_mode(iommu->cap) || !map)
1367                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1368 }
1369
1370 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1371 {
1372         u32 pmen;
1373         unsigned long flags;
1374
1375         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1376         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1377         pmen &= ~DMA_PMEN_EPM;
1378         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1379
1380         /* wait for the protected region status bit to clear */
1381         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1382                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1383
1384         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1385 }
1386
1387 static void iommu_enable_translation(struct intel_iommu *iommu)
1388 {
1389         u32 sts;
1390         unsigned long flags;
1391
1392         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1393         iommu->gcmd |= DMA_GCMD_TE;
1394         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1395
1396         /* Make sure hardware complete it */
1397         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1398                       readl, (sts & DMA_GSTS_TES), sts);
1399
1400         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1401 }
1402
1403 static void iommu_disable_translation(struct intel_iommu *iommu)
1404 {
1405         u32 sts;
1406         unsigned long flag;
1407
1408         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1409         iommu->gcmd &= ~DMA_GCMD_TE;
1410         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1411
1412         /* Make sure hardware complete it */
1413         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1414                       readl, (!(sts & DMA_GSTS_TES)), sts);
1415
1416         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1417 }
1418
1419
1420 static int iommu_init_domains(struct intel_iommu *iommu)
1421 {
1422         unsigned long ndomains;
1423         unsigned long nlongs;
1424
1425         ndomains = cap_ndoms(iommu->cap);
1426         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1427                  iommu->seq_id, ndomains);
1428         nlongs = BITS_TO_LONGS(ndomains);
1429
1430         spin_lock_init(&iommu->lock);
1431
1432         /* TBD: there might be 64K domains,
1433          * consider other allocation for future chip
1434          */
1435         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1436         if (!iommu->domain_ids) {
1437                 pr_err("IOMMU%d: allocating domain id array failed\n",
1438                        iommu->seq_id);
1439                 return -ENOMEM;
1440         }
1441         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1442                         GFP_KERNEL);
1443         if (!iommu->domains) {
1444                 pr_err("IOMMU%d: allocating domain array failed\n",
1445                        iommu->seq_id);
1446                 kfree(iommu->domain_ids);
1447                 iommu->domain_ids = NULL;
1448                 return -ENOMEM;
1449         }
1450
1451         /*
1452          * if Caching mode is set, then invalid translations are tagged
1453          * with domainid 0. Hence we need to pre-allocate it.
1454          */
1455         if (cap_caching_mode(iommu->cap))
1456                 set_bit(0, iommu->domain_ids);
1457         return 0;
1458 }
1459
1460 static void disable_dmar_iommu(struct intel_iommu *iommu)
1461 {
1462         struct dmar_domain *domain;
1463         int i;
1464
1465         if ((iommu->domains) && (iommu->domain_ids)) {
1466                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1467                         /*
1468                          * Domain id 0 is reserved for invalid translation
1469                          * if hardware supports caching mode.
1470                          */
1471                         if (cap_caching_mode(iommu->cap) && i == 0)
1472                                 continue;
1473
1474                         domain = iommu->domains[i];
1475                         clear_bit(i, iommu->domain_ids);
1476                         if (domain_detach_iommu(domain, iommu) == 0 &&
1477                             !domain_type_is_vm(domain))
1478                                 domain_exit(domain);
1479                 }
1480         }
1481
1482         if (iommu->gcmd & DMA_GCMD_TE)
1483                 iommu_disable_translation(iommu);
1484 }
1485
1486 static void free_dmar_iommu(struct intel_iommu *iommu)
1487 {
1488         if ((iommu->domains) && (iommu->domain_ids)) {
1489                 kfree(iommu->domains);
1490                 kfree(iommu->domain_ids);
1491                 iommu->domains = NULL;
1492                 iommu->domain_ids = NULL;
1493         }
1494
1495         g_iommus[iommu->seq_id] = NULL;
1496
1497         /* free context mapping */
1498         free_context_table(iommu);
1499 }
1500
1501 static struct dmar_domain *alloc_domain(int flags)
1502 {
1503         /* domain id for virtual machine, it won't be set in context */
1504         static atomic_t vm_domid = ATOMIC_INIT(0);
1505         struct dmar_domain *domain;
1506
1507         domain = alloc_domain_mem();
1508         if (!domain)
1509                 return NULL;
1510
1511         memset(domain, 0, sizeof(*domain));
1512         domain->nid = -1;
1513         domain->flags = flags;
1514         spin_lock_init(&domain->iommu_lock);
1515         INIT_LIST_HEAD(&domain->devices);
1516         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1517                 domain->id = atomic_inc_return(&vm_domid);
1518
1519         return domain;
1520 }
1521
1522 static int __iommu_attach_domain(struct dmar_domain *domain,
1523                                  struct intel_iommu *iommu)
1524 {
1525         int num;
1526         unsigned long ndomains;
1527
1528         ndomains = cap_ndoms(iommu->cap);
1529         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1530         if (num < ndomains) {
1531                 set_bit(num, iommu->domain_ids);
1532                 iommu->domains[num] = domain;
1533         } else {
1534                 num = -ENOSPC;
1535         }
1536
1537         return num;
1538 }
1539
1540 static int iommu_attach_domain(struct dmar_domain *domain,
1541                                struct intel_iommu *iommu)
1542 {
1543         int num;
1544         unsigned long flags;
1545
1546         spin_lock_irqsave(&iommu->lock, flags);
1547         num = __iommu_attach_domain(domain, iommu);
1548         spin_unlock_irqrestore(&iommu->lock, flags);
1549         if (num < 0)
1550                 pr_err("IOMMU: no free domain ids\n");
1551
1552         return num;
1553 }
1554
1555 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1556                                   struct intel_iommu *iommu)
1557 {
1558         int num;
1559         unsigned long ndomains;
1560
1561         ndomains = cap_ndoms(iommu->cap);
1562         for_each_set_bit(num, iommu->domain_ids, ndomains)
1563                 if (iommu->domains[num] == domain)
1564                         return num;
1565
1566         return __iommu_attach_domain(domain, iommu);
1567 }
1568
1569 static void iommu_detach_domain(struct dmar_domain *domain,
1570                                 struct intel_iommu *iommu)
1571 {
1572         unsigned long flags;
1573         int num, ndomains;
1574
1575         spin_lock_irqsave(&iommu->lock, flags);
1576         if (domain_type_is_vm_or_si(domain)) {
1577                 ndomains = cap_ndoms(iommu->cap);
1578                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1579                         if (iommu->domains[num] == domain) {
1580                                 clear_bit(num, iommu->domain_ids);
1581                                 iommu->domains[num] = NULL;
1582                                 break;
1583                         }
1584                 }
1585         } else {
1586                 clear_bit(domain->id, iommu->domain_ids);
1587                 iommu->domains[domain->id] = NULL;
1588         }
1589         spin_unlock_irqrestore(&iommu->lock, flags);
1590 }
1591
1592 static void domain_attach_iommu(struct dmar_domain *domain,
1593                                struct intel_iommu *iommu)
1594 {
1595         unsigned long flags;
1596
1597         spin_lock_irqsave(&domain->iommu_lock, flags);
1598         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1599                 domain->iommu_count++;
1600                 if (domain->iommu_count == 1)
1601                         domain->nid = iommu->node;
1602                 domain_update_iommu_cap(domain);
1603         }
1604         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1605 }
1606
1607 static int domain_detach_iommu(struct dmar_domain *domain,
1608                                struct intel_iommu *iommu)
1609 {
1610         unsigned long flags;
1611         int count = INT_MAX;
1612
1613         spin_lock_irqsave(&domain->iommu_lock, flags);
1614         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1615                 count = --domain->iommu_count;
1616                 domain_update_iommu_cap(domain);
1617         }
1618         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1619
1620         return count;
1621 }
1622
1623 static struct iova_domain reserved_iova_list;
1624 static struct lock_class_key reserved_rbtree_key;
1625
1626 static int dmar_init_reserved_ranges(void)
1627 {
1628         struct pci_dev *pdev = NULL;
1629         struct iova *iova;
1630         int i;
1631
1632         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1633                         DMA_32BIT_PFN);
1634
1635         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1636                 &reserved_rbtree_key);
1637
1638         /* IOAPIC ranges shouldn't be accessed by DMA */
1639         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1640                 IOVA_PFN(IOAPIC_RANGE_END));
1641         if (!iova) {
1642                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1643                 return -ENODEV;
1644         }
1645
1646         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1647         for_each_pci_dev(pdev) {
1648                 struct resource *r;
1649
1650                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1651                         r = &pdev->resource[i];
1652                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1653                                 continue;
1654                         iova = reserve_iova(&reserved_iova_list,
1655                                             IOVA_PFN(r->start),
1656                                             IOVA_PFN(r->end));
1657                         if (!iova) {
1658                                 printk(KERN_ERR "Reserve iova failed\n");
1659                                 return -ENODEV;
1660                         }
1661                 }
1662         }
1663         return 0;
1664 }
1665
1666 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1667 {
1668         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1669 }
1670
1671 static inline int guestwidth_to_adjustwidth(int gaw)
1672 {
1673         int agaw;
1674         int r = (gaw - 12) % 9;
1675
1676         if (r == 0)
1677                 agaw = gaw;
1678         else
1679                 agaw = gaw + 9 - r;
1680         if (agaw > 64)
1681                 agaw = 64;
1682         return agaw;
1683 }
1684
1685 static int domain_init(struct dmar_domain *domain, int guest_width)
1686 {
1687         struct intel_iommu *iommu;
1688         int adjust_width, agaw;
1689         unsigned long sagaw;
1690
1691         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1692                         DMA_32BIT_PFN);
1693         domain_reserve_special_ranges(domain);
1694
1695         /* calculate AGAW */
1696         iommu = domain_get_iommu(domain);
1697         if (guest_width > cap_mgaw(iommu->cap))
1698                 guest_width = cap_mgaw(iommu->cap);
1699         domain->gaw = guest_width;
1700         adjust_width = guestwidth_to_adjustwidth(guest_width);
1701         agaw = width_to_agaw(adjust_width);
1702         sagaw = cap_sagaw(iommu->cap);
1703         if (!test_bit(agaw, &sagaw)) {
1704                 /* hardware doesn't support it, choose a bigger one */
1705                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1706                 agaw = find_next_bit(&sagaw, 5, agaw);
1707                 if (agaw >= 5)
1708                         return -ENODEV;
1709         }
1710         domain->agaw = agaw;
1711
1712         if (ecap_coherent(iommu->ecap))
1713                 domain->iommu_coherency = 1;
1714         else
1715                 domain->iommu_coherency = 0;
1716
1717         if (ecap_sc_support(iommu->ecap))
1718                 domain->iommu_snooping = 1;
1719         else
1720                 domain->iommu_snooping = 0;
1721
1722         if (intel_iommu_superpage)
1723                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1724         else
1725                 domain->iommu_superpage = 0;
1726
1727         domain->nid = iommu->node;
1728
1729         /* always allocate the top pgd */
1730         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1731         if (!domain->pgd)
1732                 return -ENOMEM;
1733         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1734         return 0;
1735 }
1736
1737 static void domain_exit(struct dmar_domain *domain)
1738 {
1739         struct page *freelist = NULL;
1740         int i;
1741
1742         /* Domain 0 is reserved, so dont process it */
1743         if (!domain)
1744                 return;
1745
1746         /* Flush any lazy unmaps that may reference this domain */
1747         if (!intel_iommu_strict)
1748                 flush_unmaps_timeout(0);
1749
1750         /* remove associated devices */
1751         domain_remove_dev_info(domain);
1752
1753         /* destroy iovas */
1754         put_iova_domain(&domain->iovad);
1755
1756         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1757
1758         /* clear attached or cached domains */
1759         rcu_read_lock();
1760         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus)
1761                 iommu_detach_domain(domain, g_iommus[i]);
1762         rcu_read_unlock();
1763
1764         dma_free_pagelist(freelist);
1765
1766         free_domain_mem(domain);
1767 }
1768
1769 static int domain_context_mapping_one(struct dmar_domain *domain,
1770                                       struct intel_iommu *iommu,
1771                                       u8 bus, u8 devfn, int translation)
1772 {
1773         struct context_entry *context;
1774         unsigned long flags;
1775         struct dma_pte *pgd;
1776         int id;
1777         int agaw;
1778         struct device_domain_info *info = NULL;
1779
1780         pr_debug("Set context mapping for %02x:%02x.%d\n",
1781                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1782
1783         BUG_ON(!domain->pgd);
1784         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1785                translation != CONTEXT_TT_MULTI_LEVEL);
1786
1787         spin_lock_irqsave(&iommu->lock, flags);
1788         context = iommu_context_addr(iommu, bus, devfn, 1);
1789         spin_unlock_irqrestore(&iommu->lock, flags);
1790         if (!context)
1791                 return -ENOMEM;
1792         spin_lock_irqsave(&iommu->lock, flags);
1793         if (context_present(context)) {
1794                 spin_unlock_irqrestore(&iommu->lock, flags);
1795                 return 0;
1796         }
1797
1798         id = domain->id;
1799         pgd = domain->pgd;
1800
1801         if (domain_type_is_vm_or_si(domain)) {
1802                 if (domain_type_is_vm(domain)) {
1803                         id = iommu_attach_vm_domain(domain, iommu);
1804                         if (id < 0) {
1805                                 spin_unlock_irqrestore(&iommu->lock, flags);
1806                                 pr_err("IOMMU: no free domain ids\n");
1807                                 return -EFAULT;
1808                         }
1809                 }
1810
1811                 /* Skip top levels of page tables for
1812                  * iommu which has less agaw than default.
1813                  * Unnecessary for PT mode.
1814                  */
1815                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1816                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1817                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1818                                 if (!dma_pte_present(pgd)) {
1819                                         spin_unlock_irqrestore(&iommu->lock, flags);
1820                                         return -ENOMEM;
1821                                 }
1822                         }
1823                 }
1824         }
1825
1826         context_set_domain_id(context, id);
1827
1828         if (translation != CONTEXT_TT_PASS_THROUGH) {
1829                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1830                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1831                                      CONTEXT_TT_MULTI_LEVEL;
1832         }
1833         /*
1834          * In pass through mode, AW must be programmed to indicate the largest
1835          * AGAW value supported by hardware. And ASR is ignored by hardware.
1836          */
1837         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1838                 context_set_address_width(context, iommu->msagaw);
1839         else {
1840                 context_set_address_root(context, virt_to_phys(pgd));
1841                 context_set_address_width(context, iommu->agaw);
1842         }
1843
1844         context_set_translation_type(context, translation);
1845         context_set_fault_enable(context);
1846         context_set_present(context);
1847         domain_flush_cache(domain, context, sizeof(*context));
1848
1849         /*
1850          * It's a non-present to present mapping. If hardware doesn't cache
1851          * non-present entry we only need to flush the write-buffer. If the
1852          * _does_ cache non-present entries, then it does so in the special
1853          * domain #0, which we have to flush:
1854          */
1855         if (cap_caching_mode(iommu->cap)) {
1856                 iommu->flush.flush_context(iommu, 0,
1857                                            (((u16)bus) << 8) | devfn,
1858                                            DMA_CCMD_MASK_NOBIT,
1859                                            DMA_CCMD_DEVICE_INVL);
1860                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1861         } else {
1862                 iommu_flush_write_buffer(iommu);
1863         }
1864         iommu_enable_dev_iotlb(info);
1865         spin_unlock_irqrestore(&iommu->lock, flags);
1866
1867         domain_attach_iommu(domain, iommu);
1868
1869         return 0;
1870 }
1871
1872 struct domain_context_mapping_data {
1873         struct dmar_domain *domain;
1874         struct intel_iommu *iommu;
1875         int translation;
1876 };
1877
1878 static int domain_context_mapping_cb(struct pci_dev *pdev,
1879                                      u16 alias, void *opaque)
1880 {
1881         struct domain_context_mapping_data *data = opaque;
1882
1883         return domain_context_mapping_one(data->domain, data->iommu,
1884                                           PCI_BUS_NUM(alias), alias & 0xff,
1885                                           data->translation);
1886 }
1887
1888 static int
1889 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1890                        int translation)
1891 {
1892         struct intel_iommu *iommu;
1893         u8 bus, devfn;
1894         struct domain_context_mapping_data data;
1895
1896         iommu = device_to_iommu(dev, &bus, &devfn);
1897         if (!iommu)
1898                 return -ENODEV;
1899
1900         if (!dev_is_pci(dev))
1901                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1902                                                   translation);
1903
1904         data.domain = domain;
1905         data.iommu = iommu;
1906         data.translation = translation;
1907
1908         return pci_for_each_dma_alias(to_pci_dev(dev),
1909                                       &domain_context_mapping_cb, &data);
1910 }
1911
1912 static int domain_context_mapped_cb(struct pci_dev *pdev,
1913                                     u16 alias, void *opaque)
1914 {
1915         struct intel_iommu *iommu = opaque;
1916
1917         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1918 }
1919
1920 static int domain_context_mapped(struct device *dev)
1921 {
1922         struct intel_iommu *iommu;
1923         u8 bus, devfn;
1924
1925         iommu = device_to_iommu(dev, &bus, &devfn);
1926         if (!iommu)
1927                 return -ENODEV;
1928
1929         if (!dev_is_pci(dev))
1930                 return device_context_mapped(iommu, bus, devfn);
1931
1932         return !pci_for_each_dma_alias(to_pci_dev(dev),
1933                                        domain_context_mapped_cb, iommu);
1934 }
1935
1936 /* Returns a number of VTD pages, but aligned to MM page size */
1937 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1938                                             size_t size)
1939 {
1940         host_addr &= ~PAGE_MASK;
1941         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1942 }
1943
1944 /* Return largest possible superpage level for a given mapping */
1945 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1946                                           unsigned long iov_pfn,
1947                                           unsigned long phy_pfn,
1948                                           unsigned long pages)
1949 {
1950         int support, level = 1;
1951         unsigned long pfnmerge;
1952
1953         support = domain->iommu_superpage;
1954
1955         /* To use a large page, the virtual *and* physical addresses
1956            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1957            of them will mean we have to use smaller pages. So just
1958            merge them and check both at once. */
1959         pfnmerge = iov_pfn | phy_pfn;
1960
1961         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1962                 pages >>= VTD_STRIDE_SHIFT;
1963                 if (!pages)
1964                         break;
1965                 pfnmerge >>= VTD_STRIDE_SHIFT;
1966                 level++;
1967                 support--;
1968         }
1969         return level;
1970 }
1971
1972 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1973                             struct scatterlist *sg, unsigned long phys_pfn,
1974                             unsigned long nr_pages, int prot)
1975 {
1976         struct dma_pte *first_pte = NULL, *pte = NULL;
1977         phys_addr_t uninitialized_var(pteval);
1978         unsigned long sg_res = 0;
1979         unsigned int largepage_lvl = 0;
1980         unsigned long lvl_pages = 0;
1981
1982         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
1983
1984         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1985                 return -EINVAL;
1986
1987         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1988
1989         if (!sg) {
1990                 sg_res = nr_pages;
1991                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1992         }
1993
1994         while (nr_pages > 0) {
1995                 uint64_t tmp;
1996
1997                 if (!sg_res) {
1998                         sg_res = aligned_nrpages(sg->offset, sg->length);
1999                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2000                         sg->dma_length = sg->length;
2001                         pteval = page_to_phys(sg_page(sg)) | prot;
2002                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2003                 }
2004
2005                 if (!pte) {
2006                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2007
2008                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2009                         if (!pte)
2010                                 return -ENOMEM;
2011                         /* It is large page*/
2012                         if (largepage_lvl > 1) {
2013                                 pteval |= DMA_PTE_LARGE_PAGE;
2014                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2015                                 /*
2016                                  * Ensure that old small page tables are
2017                                  * removed to make room for superpage,
2018                                  * if they exist.
2019                                  */
2020                                 dma_pte_free_pagetable(domain, iov_pfn,
2021                                                        iov_pfn + lvl_pages - 1);
2022                         } else {
2023                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2024                         }
2025
2026                 }
2027                 /* We don't need lock here, nobody else
2028                  * touches the iova range
2029                  */
2030                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2031                 if (tmp) {
2032                         static int dumps = 5;
2033                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2034                                iov_pfn, tmp, (unsigned long long)pteval);
2035                         if (dumps) {
2036                                 dumps--;
2037                                 debug_dma_dump_mappings(NULL);
2038                         }
2039                         WARN_ON(1);
2040                 }
2041
2042                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2043
2044                 BUG_ON(nr_pages < lvl_pages);
2045                 BUG_ON(sg_res < lvl_pages);
2046
2047                 nr_pages -= lvl_pages;
2048                 iov_pfn += lvl_pages;
2049                 phys_pfn += lvl_pages;
2050                 pteval += lvl_pages * VTD_PAGE_SIZE;
2051                 sg_res -= lvl_pages;
2052
2053                 /* If the next PTE would be the first in a new page, then we
2054                    need to flush the cache on the entries we've just written.
2055                    And then we'll need to recalculate 'pte', so clear it and
2056                    let it get set again in the if (!pte) block above.
2057
2058                    If we're done (!nr_pages) we need to flush the cache too.
2059
2060                    Also if we've been setting superpages, we may need to
2061                    recalculate 'pte' and switch back to smaller pages for the
2062                    end of the mapping, if the trailing size is not enough to
2063                    use another superpage (i.e. sg_res < lvl_pages). */
2064                 pte++;
2065                 if (!nr_pages || first_pte_in_page(pte) ||
2066                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2067                         domain_flush_cache(domain, first_pte,
2068                                            (void *)pte - (void *)first_pte);
2069                         pte = NULL;
2070                 }
2071
2072                 if (!sg_res && nr_pages)
2073                         sg = sg_next(sg);
2074         }
2075         return 0;
2076 }
2077
2078 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2079                                     struct scatterlist *sg, unsigned long nr_pages,
2080                                     int prot)
2081 {
2082         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2083 }
2084
2085 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2086                                      unsigned long phys_pfn, unsigned long nr_pages,
2087                                      int prot)
2088 {
2089         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2090 }
2091
2092 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2093 {
2094         if (!iommu)
2095                 return;
2096
2097         clear_context_table(iommu, bus, devfn);
2098         iommu->flush.flush_context(iommu, 0, 0, 0,
2099                                            DMA_CCMD_GLOBAL_INVL);
2100         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2101 }
2102
2103 static inline void unlink_domain_info(struct device_domain_info *info)
2104 {
2105         assert_spin_locked(&device_domain_lock);
2106         list_del(&info->link);
2107         list_del(&info->global);
2108         if (info->dev)
2109                 info->dev->archdata.iommu = NULL;
2110 }
2111
2112 static void domain_remove_dev_info(struct dmar_domain *domain)
2113 {
2114         struct device_domain_info *info, *tmp;
2115         unsigned long flags;
2116
2117         spin_lock_irqsave(&device_domain_lock, flags);
2118         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2119                 unlink_domain_info(info);
2120                 spin_unlock_irqrestore(&device_domain_lock, flags);
2121
2122                 iommu_disable_dev_iotlb(info);
2123                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2124
2125                 if (domain_type_is_vm(domain)) {
2126                         iommu_detach_dependent_devices(info->iommu, info->dev);
2127                         domain_detach_iommu(domain, info->iommu);
2128                 }
2129
2130                 free_devinfo_mem(info);
2131                 spin_lock_irqsave(&device_domain_lock, flags);
2132         }
2133         spin_unlock_irqrestore(&device_domain_lock, flags);
2134 }
2135
2136 /*
2137  * find_domain
2138  * Note: we use struct device->archdata.iommu stores the info
2139  */
2140 static struct dmar_domain *find_domain(struct device *dev)
2141 {
2142         struct device_domain_info *info;
2143
2144         /* No lock here, assumes no domain exit in normal case */
2145         info = dev->archdata.iommu;
2146         if (info)
2147                 return info->domain;
2148         return NULL;
2149 }
2150
2151 static inline struct device_domain_info *
2152 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2153 {
2154         struct device_domain_info *info;
2155
2156         list_for_each_entry(info, &device_domain_list, global)
2157                 if (info->iommu->segment == segment && info->bus == bus &&
2158                     info->devfn == devfn)
2159                         return info;
2160
2161         return NULL;
2162 }
2163
2164 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2165                                                 int bus, int devfn,
2166                                                 struct device *dev,
2167                                                 struct dmar_domain *domain)
2168 {
2169         struct dmar_domain *found = NULL;
2170         struct device_domain_info *info;
2171         unsigned long flags;
2172
2173         info = alloc_devinfo_mem();
2174         if (!info)
2175                 return NULL;
2176
2177         info->bus = bus;
2178         info->devfn = devfn;
2179         info->dev = dev;
2180         info->domain = domain;
2181         info->iommu = iommu;
2182
2183         spin_lock_irqsave(&device_domain_lock, flags);
2184         if (dev)
2185                 found = find_domain(dev);
2186         else {
2187                 struct device_domain_info *info2;
2188                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2189                 if (info2)
2190                         found = info2->domain;
2191         }
2192         if (found) {
2193                 spin_unlock_irqrestore(&device_domain_lock, flags);
2194                 free_devinfo_mem(info);
2195                 /* Caller must free the original domain */
2196                 return found;
2197         }
2198
2199         list_add(&info->link, &domain->devices);
2200         list_add(&info->global, &device_domain_list);
2201         if (dev)
2202                 dev->archdata.iommu = info;
2203         spin_unlock_irqrestore(&device_domain_lock, flags);
2204
2205         return domain;
2206 }
2207
2208 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2209 {
2210         *(u16 *)opaque = alias;
2211         return 0;
2212 }
2213
2214 /* domain is initialized */
2215 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2216 {
2217         struct dmar_domain *domain, *tmp;
2218         struct intel_iommu *iommu;
2219         struct device_domain_info *info;
2220         u16 dma_alias;
2221         unsigned long flags;
2222         u8 bus, devfn;
2223
2224         domain = find_domain(dev);
2225         if (domain)
2226                 return domain;
2227
2228         iommu = device_to_iommu(dev, &bus, &devfn);
2229         if (!iommu)
2230                 return NULL;
2231
2232         if (dev_is_pci(dev)) {
2233                 struct pci_dev *pdev = to_pci_dev(dev);
2234
2235                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2236
2237                 spin_lock_irqsave(&device_domain_lock, flags);
2238                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2239                                                       PCI_BUS_NUM(dma_alias),
2240                                                       dma_alias & 0xff);
2241                 if (info) {
2242                         iommu = info->iommu;
2243                         domain = info->domain;
2244                 }
2245                 spin_unlock_irqrestore(&device_domain_lock, flags);
2246
2247                 /* DMA alias already has a domain, uses it */
2248                 if (info)
2249                         goto found_domain;
2250         }
2251
2252         /* Allocate and initialize new domain for the device */
2253         domain = alloc_domain(0);
2254         if (!domain)
2255                 return NULL;
2256         domain->id = iommu_attach_domain(domain, iommu);
2257         if (domain->id < 0) {
2258                 free_domain_mem(domain);
2259                 return NULL;
2260         }
2261         domain_attach_iommu(domain, iommu);
2262         if (domain_init(domain, gaw)) {
2263                 domain_exit(domain);
2264                 return NULL;
2265         }
2266
2267         /* register PCI DMA alias device */
2268         if (dev_is_pci(dev)) {
2269                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2270                                            dma_alias & 0xff, NULL, domain);
2271
2272                 if (!tmp || tmp != domain) {
2273                         domain_exit(domain);
2274                         domain = tmp;
2275                 }
2276
2277                 if (!domain)
2278                         return NULL;
2279         }
2280
2281 found_domain:
2282         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2283
2284         if (!tmp || tmp != domain) {
2285                 domain_exit(domain);
2286                 domain = tmp;
2287         }
2288
2289         return domain;
2290 }
2291
2292 static int iommu_identity_mapping;
2293 #define IDENTMAP_ALL            1
2294 #define IDENTMAP_GFX            2
2295 #define IDENTMAP_AZALIA         4
2296
2297 static int iommu_domain_identity_map(struct dmar_domain *domain,
2298                                      unsigned long long start,
2299                                      unsigned long long end)
2300 {
2301         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2302         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2303
2304         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2305                           dma_to_mm_pfn(last_vpfn))) {
2306                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2307                 return -ENOMEM;
2308         }
2309
2310         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2311                  start, end, domain->id);
2312         /*
2313          * RMRR range might have overlap with physical memory range,
2314          * clear it first
2315          */
2316         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2317
2318         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2319                                   last_vpfn - first_vpfn + 1,
2320                                   DMA_PTE_READ|DMA_PTE_WRITE);
2321 }
2322
2323 static int iommu_prepare_identity_map(struct device *dev,
2324                                       unsigned long long start,
2325                                       unsigned long long end)
2326 {
2327         struct dmar_domain *domain;
2328         int ret;
2329
2330         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2331         if (!domain)
2332                 return -ENOMEM;
2333
2334         /* For _hardware_ passthrough, don't bother. But for software
2335            passthrough, we do it anyway -- it may indicate a memory
2336            range which is reserved in E820, so which didn't get set
2337            up to start with in si_domain */
2338         if (domain == si_domain && hw_pass_through) {
2339                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2340                        dev_name(dev), start, end);
2341                 return 0;
2342         }
2343
2344         printk(KERN_INFO
2345                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2346                dev_name(dev), start, end);
2347         
2348         if (end < start) {
2349                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2350                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2351                         dmi_get_system_info(DMI_BIOS_VENDOR),
2352                         dmi_get_system_info(DMI_BIOS_VERSION),
2353                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2354                 ret = -EIO;
2355                 goto error;
2356         }
2357
2358         if (end >> agaw_to_width(domain->agaw)) {
2359                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2360                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2361                      agaw_to_width(domain->agaw),
2362                      dmi_get_system_info(DMI_BIOS_VENDOR),
2363                      dmi_get_system_info(DMI_BIOS_VERSION),
2364                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2365                 ret = -EIO;
2366                 goto error;
2367         }
2368
2369         ret = iommu_domain_identity_map(domain, start, end);
2370         if (ret)
2371                 goto error;
2372
2373         /* context entry init */
2374         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2375         if (ret)
2376                 goto error;
2377
2378         return 0;
2379
2380  error:
2381         domain_exit(domain);
2382         return ret;
2383 }
2384
2385 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2386                                          struct device *dev)
2387 {
2388         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2389                 return 0;
2390         return iommu_prepare_identity_map(dev, rmrr->base_address,
2391                                           rmrr->end_address);
2392 }
2393
2394 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2395 static inline void iommu_prepare_isa(void)
2396 {
2397         struct pci_dev *pdev;
2398         int ret;
2399
2400         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2401         if (!pdev)
2402                 return;
2403
2404         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2405         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2406
2407         if (ret)
2408                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2409                        "floppy might not work\n");
2410
2411         pci_dev_put(pdev);
2412 }
2413 #else
2414 static inline void iommu_prepare_isa(void)
2415 {
2416         return;
2417 }
2418 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2419
2420 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2421
2422 static int __init si_domain_init(int hw)
2423 {
2424         struct dmar_drhd_unit *drhd;
2425         struct intel_iommu *iommu;
2426         int nid, ret = 0;
2427         bool first = true;
2428
2429         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2430         if (!si_domain)
2431                 return -EFAULT;
2432
2433         for_each_active_iommu(iommu, drhd) {
2434                 ret = iommu_attach_domain(si_domain, iommu);
2435                 if (ret < 0) {
2436                         domain_exit(si_domain);
2437                         return -EFAULT;
2438                 } else if (first) {
2439                         si_domain->id = ret;
2440                         first = false;
2441                 } else if (si_domain->id != ret) {
2442                         domain_exit(si_domain);
2443                         return -EFAULT;
2444                 }
2445                 domain_attach_iommu(si_domain, iommu);
2446         }
2447
2448         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2449                 domain_exit(si_domain);
2450                 return -EFAULT;
2451         }
2452
2453         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2454                  si_domain->id);
2455
2456         if (hw)
2457                 return 0;
2458
2459         for_each_online_node(nid) {
2460                 unsigned long start_pfn, end_pfn;
2461                 int i;
2462
2463                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2464                         ret = iommu_domain_identity_map(si_domain,
2465                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2466                         if (ret)
2467                                 return ret;
2468                 }
2469         }
2470
2471         return 0;
2472 }
2473
2474 static int identity_mapping(struct device *dev)
2475 {
2476         struct device_domain_info *info;
2477
2478         if (likely(!iommu_identity_mapping))
2479                 return 0;
2480
2481         info = dev->archdata.iommu;
2482         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2483                 return (info->domain == si_domain);
2484
2485         return 0;
2486 }
2487
2488 static int domain_add_dev_info(struct dmar_domain *domain,
2489                                struct device *dev, int translation)
2490 {
2491         struct dmar_domain *ndomain;
2492         struct intel_iommu *iommu;
2493         u8 bus, devfn;
2494         int ret;
2495
2496         iommu = device_to_iommu(dev, &bus, &devfn);
2497         if (!iommu)
2498                 return -ENODEV;
2499
2500         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2501         if (ndomain != domain)
2502                 return -EBUSY;
2503
2504         ret = domain_context_mapping(domain, dev, translation);
2505         if (ret) {
2506                 domain_remove_one_dev_info(domain, dev);
2507                 return ret;
2508         }
2509
2510         return 0;
2511 }
2512
2513 static bool device_has_rmrr(struct device *dev)
2514 {
2515         struct dmar_rmrr_unit *rmrr;
2516         struct device *tmp;
2517         int i;
2518
2519         rcu_read_lock();
2520         for_each_rmrr_units(rmrr) {
2521                 /*
2522                  * Return TRUE if this RMRR contains the device that
2523                  * is passed in.
2524                  */
2525                 for_each_active_dev_scope(rmrr->devices,
2526                                           rmrr->devices_cnt, i, tmp)
2527                         if (tmp == dev) {
2528                                 rcu_read_unlock();
2529                                 return true;
2530                         }
2531         }
2532         rcu_read_unlock();
2533         return false;
2534 }
2535
2536 /*
2537  * There are a couple cases where we need to restrict the functionality of
2538  * devices associated with RMRRs.  The first is when evaluating a device for
2539  * identity mapping because problems exist when devices are moved in and out
2540  * of domains and their respective RMRR information is lost.  This means that
2541  * a device with associated RMRRs will never be in a "passthrough" domain.
2542  * The second is use of the device through the IOMMU API.  This interface
2543  * expects to have full control of the IOVA space for the device.  We cannot
2544  * satisfy both the requirement that RMRR access is maintained and have an
2545  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2546  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2547  * We therefore prevent devices associated with an RMRR from participating in
2548  * the IOMMU API, which eliminates them from device assignment.
2549  *
2550  * In both cases we assume that PCI USB devices with RMRRs have them largely
2551  * for historical reasons and that the RMRR space is not actively used post
2552  * boot.  This exclusion may change if vendors begin to abuse it.
2553  *
2554  * The same exception is made for graphics devices, with the requirement that
2555  * any use of the RMRR regions will be torn down before assigning the device
2556  * to a guest.
2557  */
2558 static bool device_is_rmrr_locked(struct device *dev)
2559 {
2560         if (!device_has_rmrr(dev))
2561                 return false;
2562
2563         if (dev_is_pci(dev)) {
2564                 struct pci_dev *pdev = to_pci_dev(dev);
2565
2566                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2567                         return false;
2568         }
2569
2570         return true;
2571 }
2572
2573 static int iommu_should_identity_map(struct device *dev, int startup)
2574 {
2575
2576         if (dev_is_pci(dev)) {
2577                 struct pci_dev *pdev = to_pci_dev(dev);
2578
2579                 if (device_is_rmrr_locked(dev))
2580                         return 0;
2581
2582                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2583                         return 1;
2584
2585                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2586                         return 1;
2587
2588                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2589                         return 0;
2590
2591                 /*
2592                  * We want to start off with all devices in the 1:1 domain, and
2593                  * take them out later if we find they can't access all of memory.
2594                  *
2595                  * However, we can't do this for PCI devices behind bridges,
2596                  * because all PCI devices behind the same bridge will end up
2597                  * with the same source-id on their transactions.
2598                  *
2599                  * Practically speaking, we can't change things around for these
2600                  * devices at run-time, because we can't be sure there'll be no
2601                  * DMA transactions in flight for any of their siblings.
2602                  *
2603                  * So PCI devices (unless they're on the root bus) as well as
2604                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2605                  * the 1:1 domain, just in _case_ one of their siblings turns out
2606                  * not to be able to map all of memory.
2607                  */
2608                 if (!pci_is_pcie(pdev)) {
2609                         if (!pci_is_root_bus(pdev->bus))
2610                                 return 0;
2611                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2612                                 return 0;
2613                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2614                         return 0;
2615         } else {
2616                 if (device_has_rmrr(dev))
2617                         return 0;
2618         }
2619
2620         /*
2621          * At boot time, we don't yet know if devices will be 64-bit capable.
2622          * Assume that they will — if they turn out not to be, then we can
2623          * take them out of the 1:1 domain later.
2624          */
2625         if (!startup) {
2626                 /*
2627                  * If the device's dma_mask is less than the system's memory
2628                  * size then this is not a candidate for identity mapping.
2629                  */
2630                 u64 dma_mask = *dev->dma_mask;
2631
2632                 if (dev->coherent_dma_mask &&
2633                     dev->coherent_dma_mask < dma_mask)
2634                         dma_mask = dev->coherent_dma_mask;
2635
2636                 return dma_mask >= dma_get_required_mask(dev);
2637         }
2638
2639         return 1;
2640 }
2641
2642 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2643 {
2644         int ret;
2645
2646         if (!iommu_should_identity_map(dev, 1))
2647                 return 0;
2648
2649         ret = domain_add_dev_info(si_domain, dev,
2650                                   hw ? CONTEXT_TT_PASS_THROUGH :
2651                                        CONTEXT_TT_MULTI_LEVEL);
2652         if (!ret)
2653                 pr_info("IOMMU: %s identity mapping for device %s\n",
2654                         hw ? "hardware" : "software", dev_name(dev));
2655         else if (ret == -ENODEV)
2656                 /* device not associated with an iommu */
2657                 ret = 0;
2658
2659         return ret;
2660 }
2661
2662
2663 static int __init iommu_prepare_static_identity_mapping(int hw)
2664 {
2665         struct pci_dev *pdev = NULL;
2666         struct dmar_drhd_unit *drhd;
2667         struct intel_iommu *iommu;
2668         struct device *dev;
2669         int i;
2670         int ret = 0;
2671
2672         ret = si_domain_init(hw);
2673         if (ret)
2674                 return -EFAULT;
2675
2676         for_each_pci_dev(pdev) {
2677                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2678                 if (ret)
2679                         return ret;
2680         }
2681
2682         for_each_active_iommu(iommu, drhd)
2683                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2684                         struct acpi_device_physical_node *pn;
2685                         struct acpi_device *adev;
2686
2687                         if (dev->bus != &acpi_bus_type)
2688                                 continue;
2689                                 
2690                         adev= to_acpi_device(dev);
2691                         mutex_lock(&adev->physical_node_lock);
2692                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2693                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2694                                 if (ret)
2695                                         break;
2696                         }
2697                         mutex_unlock(&adev->physical_node_lock);
2698                         if (ret)
2699                                 return ret;
2700                 }
2701
2702         return 0;
2703 }
2704
2705 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2706 {
2707         /*
2708          * Start from the sane iommu hardware state.
2709          * If the queued invalidation is already initialized by us
2710          * (for example, while enabling interrupt-remapping) then
2711          * we got the things already rolling from a sane state.
2712          */
2713         if (!iommu->qi) {
2714                 /*
2715                  * Clear any previous faults.
2716                  */
2717                 dmar_fault(-1, iommu);
2718                 /*
2719                  * Disable queued invalidation if supported and already enabled
2720                  * before OS handover.
2721                  */
2722                 dmar_disable_qi(iommu);
2723         }
2724
2725         if (dmar_enable_qi(iommu)) {
2726                 /*
2727                  * Queued Invalidate not enabled, use Register Based Invalidate
2728                  */
2729                 iommu->flush.flush_context = __iommu_flush_context;
2730                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2731                 pr_info("IOMMU: %s using Register based invalidation\n",
2732                         iommu->name);
2733         } else {
2734                 iommu->flush.flush_context = qi_flush_context;
2735                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2736                 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2737         }
2738 }
2739
2740 static int __init init_dmars(void)
2741 {
2742         struct dmar_drhd_unit *drhd;
2743         struct dmar_rmrr_unit *rmrr;
2744         struct device *dev;
2745         struct intel_iommu *iommu;
2746         int i, ret;
2747
2748         /*
2749          * for each drhd
2750          *    allocate root
2751          *    initialize and program root entry to not present
2752          * endfor
2753          */
2754         for_each_drhd_unit(drhd) {
2755                 /*
2756                  * lock not needed as this is only incremented in the single
2757                  * threaded kernel __init code path all other access are read
2758                  * only
2759                  */
2760                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2761                         g_num_of_iommus++;
2762                         continue;
2763                 }
2764                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2765                           DMAR_UNITS_SUPPORTED);
2766         }
2767
2768         /* Preallocate enough resources for IOMMU hot-addition */
2769         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2770                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2771
2772         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2773                         GFP_KERNEL);
2774         if (!g_iommus) {
2775                 printk(KERN_ERR "Allocating global iommu array failed\n");
2776                 ret = -ENOMEM;
2777                 goto error;
2778         }
2779
2780         deferred_flush = kzalloc(g_num_of_iommus *
2781                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2782         if (!deferred_flush) {
2783                 ret = -ENOMEM;
2784                 goto free_g_iommus;
2785         }
2786
2787         for_each_active_iommu(iommu, drhd) {
2788                 g_iommus[iommu->seq_id] = iommu;
2789
2790                 ret = iommu_init_domains(iommu);
2791                 if (ret)
2792                         goto free_iommu;
2793
2794                 /*
2795                  * TBD:
2796                  * we could share the same root & context tables
2797                  * among all IOMMU's. Need to Split it later.
2798                  */
2799                 ret = iommu_alloc_root_entry(iommu);
2800                 if (ret)
2801                         goto free_iommu;
2802                 if (!ecap_pass_through(iommu->ecap))
2803                         hw_pass_through = 0;
2804         }
2805
2806         for_each_active_iommu(iommu, drhd)
2807                 intel_iommu_init_qi(iommu);
2808
2809         if (iommu_pass_through)
2810                 iommu_identity_mapping |= IDENTMAP_ALL;
2811
2812 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2813         iommu_identity_mapping |= IDENTMAP_GFX;
2814 #endif
2815
2816         check_tylersburg_isoch();
2817
2818         /*
2819          * If pass through is not set or not enabled, setup context entries for
2820          * identity mappings for rmrr, gfx, and isa and may fall back to static
2821          * identity mapping if iommu_identity_mapping is set.
2822          */
2823         if (iommu_identity_mapping) {
2824                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2825                 if (ret) {
2826                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2827                         goto free_iommu;
2828                 }
2829         }
2830         /*
2831          * For each rmrr
2832          *   for each dev attached to rmrr
2833          *   do
2834          *     locate drhd for dev, alloc domain for dev
2835          *     allocate free domain
2836          *     allocate page table entries for rmrr
2837          *     if context not allocated for bus
2838          *           allocate and init context
2839          *           set present in root table for this bus
2840          *     init context with domain, translation etc
2841          *    endfor
2842          * endfor
2843          */
2844         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2845         for_each_rmrr_units(rmrr) {
2846                 /* some BIOS lists non-exist devices in DMAR table. */
2847                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2848                                           i, dev) {
2849                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2850                         if (ret)
2851                                 printk(KERN_ERR
2852                                        "IOMMU: mapping reserved region failed\n");
2853                 }
2854         }
2855
2856         iommu_prepare_isa();
2857
2858         /*
2859          * for each drhd
2860          *   enable fault log
2861          *   global invalidate context cache
2862          *   global invalidate iotlb
2863          *   enable translation
2864          */
2865         for_each_iommu(iommu, drhd) {
2866                 if (drhd->ignored) {
2867                         /*
2868                          * we always have to disable PMRs or DMA may fail on
2869                          * this device
2870                          */
2871                         if (force_on)
2872                                 iommu_disable_protect_mem_regions(iommu);
2873                         continue;
2874                 }
2875
2876                 iommu_flush_write_buffer(iommu);
2877
2878                 ret = dmar_set_interrupt(iommu);
2879                 if (ret)
2880                         goto free_iommu;
2881
2882                 iommu_set_root_entry(iommu);
2883
2884                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2885                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2886                 iommu_enable_translation(iommu);
2887                 iommu_disable_protect_mem_regions(iommu);
2888         }
2889
2890         return 0;
2891
2892 free_iommu:
2893         for_each_active_iommu(iommu, drhd) {
2894                 disable_dmar_iommu(iommu);
2895                 free_dmar_iommu(iommu);
2896         }
2897         kfree(deferred_flush);
2898 free_g_iommus:
2899         kfree(g_iommus);
2900 error:
2901         return ret;
2902 }
2903
2904 /* This takes a number of _MM_ pages, not VTD pages */
2905 static struct iova *intel_alloc_iova(struct device *dev,
2906                                      struct dmar_domain *domain,
2907                                      unsigned long nrpages, uint64_t dma_mask)
2908 {
2909         struct iova *iova = NULL;
2910
2911         /* Restrict dma_mask to the width that the iommu can handle */
2912         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2913
2914         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2915                 /*
2916                  * First try to allocate an io virtual address in
2917                  * DMA_BIT_MASK(32) and if that fails then try allocating
2918                  * from higher range
2919                  */
2920                 iova = alloc_iova(&domain->iovad, nrpages,
2921                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2922                 if (iova)
2923                         return iova;
2924         }
2925         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2926         if (unlikely(!iova)) {
2927                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2928                        nrpages, dev_name(dev));
2929                 return NULL;
2930         }
2931
2932         return iova;
2933 }
2934
2935 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2936 {
2937         struct dmar_domain *domain;
2938         int ret;
2939
2940         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2941         if (!domain) {
2942                 printk(KERN_ERR "Allocating domain for %s failed",
2943                        dev_name(dev));
2944                 return NULL;
2945         }
2946
2947         /* make sure context mapping is ok */
2948         if (unlikely(!domain_context_mapped(dev))) {
2949                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2950                 if (ret) {
2951                         printk(KERN_ERR "Domain context map for %s failed",
2952                                dev_name(dev));
2953                         return NULL;
2954                 }
2955         }
2956
2957         return domain;
2958 }
2959
2960 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2961 {
2962         struct device_domain_info *info;
2963
2964         /* No lock here, assumes no domain exit in normal case */
2965         info = dev->archdata.iommu;
2966         if (likely(info))
2967                 return info->domain;
2968
2969         return __get_valid_domain_for_dev(dev);
2970 }
2971
2972 static int iommu_dummy(struct device *dev)
2973 {
2974         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2975 }
2976
2977 /* Check if the dev needs to go through non-identity map and unmap process.*/
2978 static int iommu_no_mapping(struct device *dev)
2979 {
2980         int found;
2981
2982         if (iommu_dummy(dev))
2983                 return 1;
2984
2985         if (!iommu_identity_mapping)
2986                 return 0;
2987
2988         found = identity_mapping(dev);
2989         if (found) {
2990                 if (iommu_should_identity_map(dev, 0))
2991                         return 1;
2992                 else {
2993                         /*
2994                          * 32 bit DMA is removed from si_domain and fall back
2995                          * to non-identity mapping.
2996                          */
2997                         domain_remove_one_dev_info(si_domain, dev);
2998                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2999                                dev_name(dev));
3000                         return 0;
3001                 }
3002         } else {
3003                 /*
3004                  * In case of a detached 64 bit DMA device from vm, the device
3005                  * is put into si_domain for identity mapping.
3006                  */
3007                 if (iommu_should_identity_map(dev, 0)) {
3008                         int ret;
3009                         ret = domain_add_dev_info(si_domain, dev,
3010                                                   hw_pass_through ?
3011                                                   CONTEXT_TT_PASS_THROUGH :
3012                                                   CONTEXT_TT_MULTI_LEVEL);
3013                         if (!ret) {
3014                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
3015                                        dev_name(dev));
3016                                 return 1;
3017                         }
3018                 }
3019         }
3020
3021         return 0;
3022 }
3023
3024 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3025                                      size_t size, int dir, u64 dma_mask)
3026 {
3027         struct dmar_domain *domain;
3028         phys_addr_t start_paddr;
3029         struct iova *iova;
3030         int prot = 0;
3031         int ret;
3032         struct intel_iommu *iommu;
3033         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3034
3035         BUG_ON(dir == DMA_NONE);
3036
3037         if (iommu_no_mapping(dev))
3038                 return paddr;
3039
3040         domain = get_valid_domain_for_dev(dev);
3041         if (!domain)
3042                 return 0;
3043
3044         iommu = domain_get_iommu(domain);
3045         size = aligned_nrpages(paddr, size);
3046
3047         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3048         if (!iova)
3049                 goto error;
3050
3051         /*
3052          * Check if DMAR supports zero-length reads on write only
3053          * mappings..
3054          */
3055         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3056                         !cap_zlr(iommu->cap))
3057                 prot |= DMA_PTE_READ;
3058         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3059                 prot |= DMA_PTE_WRITE;
3060         /*
3061          * paddr - (paddr + size) might be partial page, we should map the whole
3062          * page.  Note: if two part of one page are separately mapped, we
3063          * might have two guest_addr mapping to the same host paddr, but this
3064          * is not a big problem
3065          */
3066         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3067                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3068         if (ret)
3069                 goto error;
3070
3071         /* it's a non-present to present mapping. Only flush if caching mode */
3072         if (cap_caching_mode(iommu->cap))
3073                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3074         else
3075                 iommu_flush_write_buffer(iommu);
3076
3077         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3078         start_paddr += paddr & ~PAGE_MASK;
3079         return start_paddr;
3080
3081 error:
3082         if (iova)
3083                 __free_iova(&domain->iovad, iova);
3084         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3085                 dev_name(dev), size, (unsigned long long)paddr, dir);
3086         return 0;
3087 }
3088
3089 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3090                                  unsigned long offset, size_t size,
3091                                  enum dma_data_direction dir,
3092                                  struct dma_attrs *attrs)
3093 {
3094         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3095                                   dir, *dev->dma_mask);
3096 }
3097
3098 static void flush_unmaps(void)
3099 {
3100         int i, j;
3101
3102         timer_on = 0;
3103
3104         /* just flush them all */
3105         for (i = 0; i < g_num_of_iommus; i++) {
3106                 struct intel_iommu *iommu = g_iommus[i];
3107                 if (!iommu)
3108                         continue;
3109
3110                 if (!deferred_flush[i].next)
3111                         continue;
3112
3113                 /* In caching mode, global flushes turn emulation expensive */
3114                 if (!cap_caching_mode(iommu->cap))
3115                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3116                                          DMA_TLB_GLOBAL_FLUSH);
3117                 for (j = 0; j < deferred_flush[i].next; j++) {
3118                         unsigned long mask;
3119                         struct iova *iova = deferred_flush[i].iova[j];
3120                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3121
3122                         /* On real hardware multiple invalidations are expensive */
3123                         if (cap_caching_mode(iommu->cap))
3124                                 iommu_flush_iotlb_psi(iommu, domain->id,
3125                                         iova->pfn_lo, iova_size(iova),
3126                                         !deferred_flush[i].freelist[j], 0);
3127                         else {
3128                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3129                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3130                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3131                         }
3132                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3133                         if (deferred_flush[i].freelist[j])
3134                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3135                 }
3136                 deferred_flush[i].next = 0;
3137         }
3138
3139         list_size = 0;
3140 }
3141
3142 static void flush_unmaps_timeout(unsigned long data)
3143 {
3144         unsigned long flags;
3145
3146         spin_lock_irqsave(&async_umap_flush_lock, flags);
3147         flush_unmaps();
3148         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3149 }
3150
3151 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3152 {
3153         unsigned long flags;
3154         int next, iommu_id;
3155         struct intel_iommu *iommu;
3156
3157         spin_lock_irqsave(&async_umap_flush_lock, flags);
3158         if (list_size == HIGH_WATER_MARK)
3159                 flush_unmaps();
3160
3161         iommu = domain_get_iommu(dom);
3162         iommu_id = iommu->seq_id;
3163
3164         next = deferred_flush[iommu_id].next;
3165         deferred_flush[iommu_id].domain[next] = dom;
3166         deferred_flush[iommu_id].iova[next] = iova;
3167         deferred_flush[iommu_id].freelist[next] = freelist;
3168         deferred_flush[iommu_id].next++;
3169
3170         if (!timer_on) {
3171                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3172                 timer_on = 1;
3173         }
3174         list_size++;
3175         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3176 }
3177
3178 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3179 {
3180         struct dmar_domain *domain;
3181         unsigned long start_pfn, last_pfn;
3182         struct iova *iova;
3183         struct intel_iommu *iommu;
3184         struct page *freelist;
3185
3186         if (iommu_no_mapping(dev))
3187                 return;
3188
3189         domain = find_domain(dev);
3190         BUG_ON(!domain);
3191
3192         iommu = domain_get_iommu(domain);
3193
3194         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3195         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3196                       (unsigned long long)dev_addr))
3197                 return;
3198
3199         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3200         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3201
3202         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3203                  dev_name(dev), start_pfn, last_pfn);
3204
3205         freelist = domain_unmap(domain, start_pfn, last_pfn);
3206
3207         if (intel_iommu_strict) {
3208                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3209                                       last_pfn - start_pfn + 1, !freelist, 0);
3210                 /* free iova */
3211                 __free_iova(&domain->iovad, iova);
3212                 dma_free_pagelist(freelist);
3213         } else {
3214                 add_unmap(domain, iova, freelist);
3215                 /*
3216                  * queue up the release of the unmap to save the 1/6th of the
3217                  * cpu used up by the iotlb flush operation...
3218                  */
3219         }
3220 }
3221
3222 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3223                              size_t size, enum dma_data_direction dir,
3224                              struct dma_attrs *attrs)
3225 {
3226         intel_unmap(dev, dev_addr);
3227 }
3228
3229 static void *intel_alloc_coherent(struct device *dev, size_t size,
3230                                   dma_addr_t *dma_handle, gfp_t flags,
3231                                   struct dma_attrs *attrs)
3232 {
3233         struct page *page = NULL;
3234         int order;
3235
3236         size = PAGE_ALIGN(size);
3237         order = get_order(size);
3238
3239         if (!iommu_no_mapping(dev))
3240                 flags &= ~(GFP_DMA | GFP_DMA32);
3241         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3242                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3243                         flags |= GFP_DMA;
3244                 else
3245                         flags |= GFP_DMA32;
3246         }
3247
3248         if (flags & __GFP_WAIT) {
3249                 unsigned int count = size >> PAGE_SHIFT;
3250
3251                 page = dma_alloc_from_contiguous(dev, count, order);
3252                 if (page && iommu_no_mapping(dev) &&
3253                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3254                         dma_release_from_contiguous(dev, page, count);
3255                         page = NULL;
3256                 }
3257         }
3258
3259         if (!page)
3260                 page = alloc_pages(flags, order);
3261         if (!page)
3262                 return NULL;
3263         memset(page_address(page), 0, size);
3264
3265         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3266                                          DMA_BIDIRECTIONAL,
3267                                          dev->coherent_dma_mask);
3268         if (*dma_handle)
3269                 return page_address(page);
3270         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3271                 __free_pages(page, order);
3272
3273         return NULL;
3274 }
3275
3276 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3277                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3278 {
3279         int order;
3280         struct page *page = virt_to_page(vaddr);
3281
3282         size = PAGE_ALIGN(size);
3283         order = get_order(size);
3284
3285         intel_unmap(dev, dma_handle);
3286         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3287                 __free_pages(page, order);
3288 }
3289
3290 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3291                            int nelems, enum dma_data_direction dir,
3292                            struct dma_attrs *attrs)
3293 {
3294         intel_unmap(dev, sglist[0].dma_address);
3295 }
3296
3297 static int intel_nontranslate_map_sg(struct device *hddev,
3298         struct scatterlist *sglist, int nelems, int dir)
3299 {
3300         int i;
3301         struct scatterlist *sg;
3302
3303         for_each_sg(sglist, sg, nelems, i) {
3304                 BUG_ON(!sg_page(sg));
3305                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3306                 sg->dma_length = sg->length;
3307         }
3308         return nelems;
3309 }
3310
3311 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3312                         enum dma_data_direction dir, struct dma_attrs *attrs)
3313 {
3314         int i;
3315         struct dmar_domain *domain;
3316         size_t size = 0;
3317         int prot = 0;
3318         struct iova *iova = NULL;
3319         int ret;
3320         struct scatterlist *sg;
3321         unsigned long start_vpfn;
3322         struct intel_iommu *iommu;
3323
3324         BUG_ON(dir == DMA_NONE);
3325         if (iommu_no_mapping(dev))
3326                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3327
3328         domain = get_valid_domain_for_dev(dev);
3329         if (!domain)
3330                 return 0;
3331
3332         iommu = domain_get_iommu(domain);
3333
3334         for_each_sg(sglist, sg, nelems, i)
3335                 size += aligned_nrpages(sg->offset, sg->length);
3336
3337         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3338                                 *dev->dma_mask);
3339         if (!iova) {
3340                 sglist->dma_length = 0;
3341                 return 0;
3342         }
3343
3344         /*
3345          * Check if DMAR supports zero-length reads on write only
3346          * mappings..
3347          */
3348         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3349                         !cap_zlr(iommu->cap))
3350                 prot |= DMA_PTE_READ;
3351         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3352                 prot |= DMA_PTE_WRITE;
3353
3354         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3355
3356         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3357         if (unlikely(ret)) {
3358                 dma_pte_free_pagetable(domain, start_vpfn,
3359                                        start_vpfn + size - 1);
3360                 __free_iova(&domain->iovad, iova);
3361                 return 0;
3362         }
3363
3364         /* it's a non-present to present mapping. Only flush if caching mode */
3365         if (cap_caching_mode(iommu->cap))
3366                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3367         else
3368                 iommu_flush_write_buffer(iommu);
3369
3370         return nelems;
3371 }
3372
3373 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3374 {
3375         return !dma_addr;
3376 }
3377
3378 struct dma_map_ops intel_dma_ops = {
3379         .alloc = intel_alloc_coherent,
3380         .free = intel_free_coherent,
3381         .map_sg = intel_map_sg,
3382         .unmap_sg = intel_unmap_sg,
3383         .map_page = intel_map_page,
3384         .unmap_page = intel_unmap_page,
3385         .mapping_error = intel_mapping_error,
3386 };
3387
3388 static inline int iommu_domain_cache_init(void)
3389 {
3390         int ret = 0;
3391
3392         iommu_domain_cache = kmem_cache_create("iommu_domain",
3393                                          sizeof(struct dmar_domain),
3394                                          0,
3395                                          SLAB_HWCACHE_ALIGN,
3396
3397                                          NULL);
3398         if (!iommu_domain_cache) {
3399                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3400                 ret = -ENOMEM;
3401         }
3402
3403         return ret;
3404 }
3405
3406 static inline int iommu_devinfo_cache_init(void)
3407 {
3408         int ret = 0;
3409
3410         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3411                                          sizeof(struct device_domain_info),
3412                                          0,
3413                                          SLAB_HWCACHE_ALIGN,
3414                                          NULL);
3415         if (!iommu_devinfo_cache) {
3416                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3417                 ret = -ENOMEM;
3418         }
3419
3420         return ret;
3421 }
3422
3423 static int __init iommu_init_mempool(void)
3424 {
3425         int ret;
3426         ret = iommu_iova_cache_init();
3427         if (ret)
3428                 return ret;
3429
3430         ret = iommu_domain_cache_init();
3431         if (ret)
3432                 goto domain_error;
3433
3434         ret = iommu_devinfo_cache_init();
3435         if (!ret)
3436                 return ret;
3437
3438         kmem_cache_destroy(iommu_domain_cache);
3439 domain_error:
3440         iommu_iova_cache_destroy();
3441
3442         return -ENOMEM;
3443 }
3444
3445 static void __init iommu_exit_mempool(void)
3446 {
3447         kmem_cache_destroy(iommu_devinfo_cache);
3448         kmem_cache_destroy(iommu_domain_cache);
3449         iommu_iova_cache_destroy();
3450 }
3451
3452 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3453 {
3454         struct dmar_drhd_unit *drhd;
3455         u32 vtbar;
3456         int rc;
3457
3458         /* We know that this device on this chipset has its own IOMMU.
3459          * If we find it under a different IOMMU, then the BIOS is lying
3460          * to us. Hope that the IOMMU for this device is actually
3461          * disabled, and it needs no translation...
3462          */
3463         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3464         if (rc) {
3465                 /* "can't" happen */
3466                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3467                 return;
3468         }
3469         vtbar &= 0xffff0000;
3470
3471         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3472         drhd = dmar_find_matched_drhd_unit(pdev);
3473         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3474                             TAINT_FIRMWARE_WORKAROUND,
3475                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3476                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3477 }
3478 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3479
3480 static void __init init_no_remapping_devices(void)
3481 {
3482         struct dmar_drhd_unit *drhd;
3483         struct device *dev;
3484         int i;
3485
3486         for_each_drhd_unit(drhd) {
3487                 if (!drhd->include_all) {
3488                         for_each_active_dev_scope(drhd->devices,
3489                                                   drhd->devices_cnt, i, dev)
3490                                 break;
3491                         /* ignore DMAR unit if no devices exist */
3492                         if (i == drhd->devices_cnt)
3493                                 drhd->ignored = 1;
3494                 }
3495         }
3496
3497         for_each_active_drhd_unit(drhd) {
3498                 if (drhd->include_all)
3499                         continue;
3500
3501                 for_each_active_dev_scope(drhd->devices,
3502                                           drhd->devices_cnt, i, dev)
3503                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3504                                 break;
3505                 if (i < drhd->devices_cnt)
3506                         continue;
3507
3508                 /* This IOMMU has *only* gfx devices. Either bypass it or
3509                    set the gfx_mapped flag, as appropriate */
3510                 if (dmar_map_gfx) {
3511                         intel_iommu_gfx_mapped = 1;
3512                 } else {
3513                         drhd->ignored = 1;
3514                         for_each_active_dev_scope(drhd->devices,
3515                                                   drhd->devices_cnt, i, dev)
3516                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3517                 }
3518         }
3519 }
3520
3521 #ifdef CONFIG_SUSPEND
3522 static int init_iommu_hw(void)
3523 {
3524         struct dmar_drhd_unit *drhd;
3525         struct intel_iommu *iommu = NULL;
3526
3527         for_each_active_iommu(iommu, drhd)
3528                 if (iommu->qi)
3529                         dmar_reenable_qi(iommu);
3530
3531         for_each_iommu(iommu, drhd) {
3532                 if (drhd->ignored) {
3533                         /*
3534                          * we always have to disable PMRs or DMA may fail on
3535                          * this device
3536                          */
3537                         if (force_on)
3538                                 iommu_disable_protect_mem_regions(iommu);
3539                         continue;
3540                 }
3541         
3542                 iommu_flush_write_buffer(iommu);
3543
3544                 iommu_set_root_entry(iommu);
3545
3546                 iommu->flush.flush_context(iommu, 0, 0, 0,
3547                                            DMA_CCMD_GLOBAL_INVL);
3548                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3549                 iommu_enable_translation(iommu);
3550                 iommu_disable_protect_mem_regions(iommu);
3551         }
3552
3553         return 0;
3554 }
3555
3556 static void iommu_flush_all(void)
3557 {
3558         struct dmar_drhd_unit *drhd;
3559         struct intel_iommu *iommu;
3560
3561         for_each_active_iommu(iommu, drhd) {
3562                 iommu->flush.flush_context(iommu, 0, 0, 0,
3563                                            DMA_CCMD_GLOBAL_INVL);
3564                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3565                                          DMA_TLB_GLOBAL_FLUSH);
3566         }
3567 }
3568
3569 static int iommu_suspend(void)
3570 {
3571         struct dmar_drhd_unit *drhd;
3572         struct intel_iommu *iommu = NULL;
3573         unsigned long flag;
3574
3575         for_each_active_iommu(iommu, drhd) {
3576                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3577                                                  GFP_ATOMIC);
3578                 if (!iommu->iommu_state)
3579                         goto nomem;
3580         }
3581
3582         iommu_flush_all();
3583
3584         for_each_active_iommu(iommu, drhd) {
3585                 iommu_disable_translation(iommu);
3586
3587                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3588
3589                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3590                         readl(iommu->reg + DMAR_FECTL_REG);
3591                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3592                         readl(iommu->reg + DMAR_FEDATA_REG);
3593                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3594                         readl(iommu->reg + DMAR_FEADDR_REG);
3595                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3596                         readl(iommu->reg + DMAR_FEUADDR_REG);
3597
3598                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3599         }
3600         return 0;
3601
3602 nomem:
3603         for_each_active_iommu(iommu, drhd)
3604                 kfree(iommu->iommu_state);
3605
3606         return -ENOMEM;
3607 }
3608
3609 static void iommu_resume(void)
3610 {
3611         struct dmar_drhd_unit *drhd;
3612         struct intel_iommu *iommu = NULL;
3613         unsigned long flag;
3614
3615         if (init_iommu_hw()) {
3616                 if (force_on)
3617                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3618                 else
3619                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3620                 return;
3621         }
3622
3623         for_each_active_iommu(iommu, drhd) {
3624
3625                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3626
3627                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3628                         iommu->reg + DMAR_FECTL_REG);
3629                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3630                         iommu->reg + DMAR_FEDATA_REG);
3631                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3632                         iommu->reg + DMAR_FEADDR_REG);
3633                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3634                         iommu->reg + DMAR_FEUADDR_REG);
3635
3636                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3637         }
3638
3639         for_each_active_iommu(iommu, drhd)
3640                 kfree(iommu->iommu_state);
3641 }
3642
3643 static struct syscore_ops iommu_syscore_ops = {
3644         .resume         = iommu_resume,
3645         .suspend        = iommu_suspend,
3646 };
3647
3648 static void __init init_iommu_pm_ops(void)
3649 {
3650         register_syscore_ops(&iommu_syscore_ops);
3651 }
3652
3653 #else
3654 static inline void init_iommu_pm_ops(void) {}
3655 #endif  /* CONFIG_PM */
3656
3657
3658 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3659 {
3660         struct acpi_dmar_reserved_memory *rmrr;
3661         struct dmar_rmrr_unit *rmrru;
3662
3663         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3664         if (!rmrru)
3665                 return -ENOMEM;
3666
3667         rmrru->hdr = header;
3668         rmrr = (struct acpi_dmar_reserved_memory *)header;
3669         rmrru->base_address = rmrr->base_address;
3670         rmrru->end_address = rmrr->end_address;
3671         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3672                                 ((void *)rmrr) + rmrr->header.length,
3673                                 &rmrru->devices_cnt);
3674         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3675                 kfree(rmrru);
3676                 return -ENOMEM;
3677         }
3678
3679         list_add(&rmrru->list, &dmar_rmrr_units);
3680
3681         return 0;
3682 }
3683
3684 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3685 {
3686         struct dmar_atsr_unit *atsru;
3687         struct acpi_dmar_atsr *tmp;
3688
3689         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3690                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3691                 if (atsr->segment != tmp->segment)
3692                         continue;
3693                 if (atsr->header.length != tmp->header.length)
3694                         continue;
3695                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3696                         return atsru;
3697         }
3698
3699         return NULL;
3700 }
3701
3702 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3703 {
3704         struct acpi_dmar_atsr *atsr;
3705         struct dmar_atsr_unit *atsru;
3706
3707         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3708                 return 0;
3709
3710         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3711         atsru = dmar_find_atsr(atsr);
3712         if (atsru)
3713                 return 0;
3714
3715         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3716         if (!atsru)
3717                 return -ENOMEM;
3718
3719         /*
3720          * If memory is allocated from slab by ACPI _DSM method, we need to
3721          * copy the memory content because the memory buffer will be freed
3722          * on return.
3723          */
3724         atsru->hdr = (void *)(atsru + 1);
3725         memcpy(atsru->hdr, hdr, hdr->length);
3726         atsru->include_all = atsr->flags & 0x1;
3727         if (!atsru->include_all) {
3728                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3729                                 (void *)atsr + atsr->header.length,
3730                                 &atsru->devices_cnt);
3731                 if (atsru->devices_cnt && atsru->devices == NULL) {
3732                         kfree(atsru);
3733                         return -ENOMEM;
3734                 }
3735         }
3736
3737         list_add_rcu(&atsru->list, &dmar_atsr_units);
3738
3739         return 0;
3740 }
3741
3742 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3743 {
3744         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3745         kfree(atsru);
3746 }
3747
3748 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3749 {
3750         struct acpi_dmar_atsr *atsr;
3751         struct dmar_atsr_unit *atsru;
3752
3753         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3754         atsru = dmar_find_atsr(atsr);
3755         if (atsru) {
3756                 list_del_rcu(&atsru->list);
3757                 synchronize_rcu();
3758                 intel_iommu_free_atsr(atsru);
3759         }
3760
3761         return 0;
3762 }
3763
3764 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3765 {
3766         int i;
3767         struct device *dev;
3768         struct acpi_dmar_atsr *atsr;
3769         struct dmar_atsr_unit *atsru;
3770
3771         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3772         atsru = dmar_find_atsr(atsr);
3773         if (!atsru)
3774                 return 0;
3775
3776         if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3777                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3778                                           i, dev)
3779                         return -EBUSY;
3780
3781         return 0;
3782 }
3783
3784 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3785 {
3786         int sp, ret = 0;
3787         struct intel_iommu *iommu = dmaru->iommu;
3788
3789         if (g_iommus[iommu->seq_id])
3790                 return 0;
3791
3792         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3793                 pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3794                         iommu->name);
3795                 return -ENXIO;
3796         }
3797         if (!ecap_sc_support(iommu->ecap) &&
3798             domain_update_iommu_snooping(iommu)) {
3799                 pr_warn("IOMMU: %s doesn't support snooping.\n",
3800                         iommu->name);
3801                 return -ENXIO;
3802         }
3803         sp = domain_update_iommu_superpage(iommu) - 1;
3804         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3805                 pr_warn("IOMMU: %s doesn't support large page.\n",
3806                         iommu->name);
3807                 return -ENXIO;
3808         }
3809
3810         /*
3811          * Disable translation if already enabled prior to OS handover.
3812          */
3813         if (iommu->gcmd & DMA_GCMD_TE)
3814                 iommu_disable_translation(iommu);
3815
3816         g_iommus[iommu->seq_id] = iommu;
3817         ret = iommu_init_domains(iommu);
3818         if (ret == 0)
3819                 ret = iommu_alloc_root_entry(iommu);
3820         if (ret)
3821                 goto out;
3822
3823         if (dmaru->ignored) {
3824                 /*
3825                  * we always have to disable PMRs or DMA may fail on this device
3826                  */
3827                 if (force_on)
3828                         iommu_disable_protect_mem_regions(iommu);
3829                 return 0;
3830         }
3831
3832         intel_iommu_init_qi(iommu);
3833         iommu_flush_write_buffer(iommu);
3834         ret = dmar_set_interrupt(iommu);
3835         if (ret)
3836                 goto disable_iommu;
3837
3838         iommu_set_root_entry(iommu);
3839         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3840         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3841         iommu_enable_translation(iommu);
3842
3843         if (si_domain) {
3844                 ret = iommu_attach_domain(si_domain, iommu);
3845                 if (ret < 0 || si_domain->id != ret)
3846                         goto disable_iommu;
3847                 domain_attach_iommu(si_domain, iommu);
3848         }
3849
3850         iommu_disable_protect_mem_regions(iommu);
3851         return 0;
3852
3853 disable_iommu:
3854         disable_dmar_iommu(iommu);
3855 out:
3856         free_dmar_iommu(iommu);
3857         return ret;
3858 }
3859
3860 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3861 {
3862         int ret = 0;
3863         struct intel_iommu *iommu = dmaru->iommu;
3864
3865         if (!intel_iommu_enabled)
3866                 return 0;
3867         if (iommu == NULL)
3868                 return -EINVAL;
3869
3870         if (insert) {
3871                 ret = intel_iommu_add(dmaru);
3872         } else {
3873                 disable_dmar_iommu(iommu);
3874                 free_dmar_iommu(iommu);
3875         }
3876
3877         return ret;
3878 }
3879
3880 static void intel_iommu_free_dmars(void)
3881 {
3882         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3883         struct dmar_atsr_unit *atsru, *atsr_n;
3884
3885         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3886                 list_del(&rmrru->list);
3887                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3888                 kfree(rmrru);
3889         }
3890
3891         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3892                 list_del(&atsru->list);
3893                 intel_iommu_free_atsr(atsru);
3894         }
3895 }
3896
3897 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3898 {
3899         int i, ret = 1;
3900         struct pci_bus *bus;
3901         struct pci_dev *bridge = NULL;
3902         struct device *tmp;
3903         struct acpi_dmar_atsr *atsr;
3904         struct dmar_atsr_unit *atsru;
3905
3906         dev = pci_physfn(dev);
3907         for (bus = dev->bus; bus; bus = bus->parent) {
3908                 bridge = bus->self;
3909                 if (!bridge || !pci_is_pcie(bridge) ||
3910                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3911                         return 0;
3912                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3913                         break;
3914         }
3915         if (!bridge)
3916                 return 0;
3917
3918         rcu_read_lock();
3919         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3920                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3921                 if (atsr->segment != pci_domain_nr(dev->bus))
3922                         continue;
3923
3924                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3925                         if (tmp == &bridge->dev)
3926                                 goto out;
3927
3928                 if (atsru->include_all)
3929                         goto out;
3930         }
3931         ret = 0;
3932 out:
3933         rcu_read_unlock();
3934
3935         return ret;
3936 }
3937
3938 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3939 {
3940         int ret = 0;
3941         struct dmar_rmrr_unit *rmrru;
3942         struct dmar_atsr_unit *atsru;
3943         struct acpi_dmar_atsr *atsr;
3944         struct acpi_dmar_reserved_memory *rmrr;
3945
3946         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3947                 return 0;
3948
3949         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3950                 rmrr = container_of(rmrru->hdr,
3951                                     struct acpi_dmar_reserved_memory, header);
3952                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3953                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3954                                 ((void *)rmrr) + rmrr->header.length,
3955                                 rmrr->segment, rmrru->devices,
3956                                 rmrru->devices_cnt);
3957                         if(ret < 0)
3958                                 return ret;
3959                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3960                         dmar_remove_dev_scope(info, rmrr->segment,
3961                                 rmrru->devices, rmrru->devices_cnt);
3962                 }
3963         }
3964
3965         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3966                 if (atsru->include_all)
3967                         continue;
3968
3969                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3970                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3971                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3972                                         (void *)atsr + atsr->header.length,
3973                                         atsr->segment, atsru->devices,
3974                                         atsru->devices_cnt);
3975                         if (ret > 0)
3976                                 break;
3977                         else if(ret < 0)
3978                                 return ret;
3979                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3980                         if (dmar_remove_dev_scope(info, atsr->segment,
3981                                         atsru->devices, atsru->devices_cnt))
3982                                 break;
3983                 }
3984         }
3985
3986         return 0;
3987 }
3988
3989 /*
3990  * Here we only respond to action of unbound device from driver.
3991  *
3992  * Added device is not attached to its DMAR domain here yet. That will happen
3993  * when mapping the device to iova.
3994  */
3995 static int device_notifier(struct notifier_block *nb,
3996                                   unsigned long action, void *data)
3997 {
3998         struct device *dev = data;
3999         struct dmar_domain *domain;
4000
4001         if (iommu_dummy(dev))
4002                 return 0;
4003
4004         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4005                 return 0;
4006
4007         domain = find_domain(dev);
4008         if (!domain)
4009                 return 0;
4010
4011         down_read(&dmar_global_lock);
4012         domain_remove_one_dev_info(domain, dev);
4013         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4014                 domain_exit(domain);
4015         up_read(&dmar_global_lock);
4016
4017         return 0;
4018 }
4019
4020 static struct notifier_block device_nb = {
4021         .notifier_call = device_notifier,
4022 };
4023
4024 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4025                                        unsigned long val, void *v)
4026 {
4027         struct memory_notify *mhp = v;
4028         unsigned long long start, end;
4029         unsigned long start_vpfn, last_vpfn;
4030
4031         switch (val) {
4032         case MEM_GOING_ONLINE:
4033                 start = mhp->start_pfn << PAGE_SHIFT;
4034                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4035                 if (iommu_domain_identity_map(si_domain, start, end)) {
4036                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4037                                 start, end);
4038                         return NOTIFY_BAD;
4039                 }
4040                 break;
4041
4042         case MEM_OFFLINE:
4043         case MEM_CANCEL_ONLINE:
4044                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4045                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4046                 while (start_vpfn <= last_vpfn) {
4047                         struct iova *iova;
4048                         struct dmar_drhd_unit *drhd;
4049                         struct intel_iommu *iommu;
4050                         struct page *freelist;
4051
4052                         iova = find_iova(&si_domain->iovad, start_vpfn);
4053                         if (iova == NULL) {
4054                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
4055                                          start_vpfn);
4056                                 break;
4057                         }
4058
4059                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4060                                                      start_vpfn, last_vpfn);
4061                         if (iova == NULL) {
4062                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4063                                         start_vpfn, last_vpfn);
4064                                 return NOTIFY_BAD;
4065                         }
4066
4067                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4068                                                iova->pfn_hi);
4069
4070                         rcu_read_lock();
4071                         for_each_active_iommu(iommu, drhd)
4072                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
4073                                         iova->pfn_lo, iova_size(iova),
4074                                         !freelist, 0);
4075                         rcu_read_unlock();
4076                         dma_free_pagelist(freelist);
4077
4078                         start_vpfn = iova->pfn_hi + 1;
4079                         free_iova_mem(iova);
4080                 }
4081                 break;
4082         }
4083
4084         return NOTIFY_OK;
4085 }
4086
4087 static struct notifier_block intel_iommu_memory_nb = {
4088         .notifier_call = intel_iommu_memory_notifier,
4089         .priority = 0
4090 };
4091
4092
4093 static ssize_t intel_iommu_show_version(struct device *dev,
4094                                         struct device_attribute *attr,
4095                                         char *buf)
4096 {
4097         struct intel_iommu *iommu = dev_get_drvdata(dev);
4098         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4099         return sprintf(buf, "%d:%d\n",
4100                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4101 }
4102 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4103
4104 static ssize_t intel_iommu_show_address(struct device *dev,
4105                                         struct device_attribute *attr,
4106                                         char *buf)
4107 {
4108         struct intel_iommu *iommu = dev_get_drvdata(dev);
4109         return sprintf(buf, "%llx\n", iommu->reg_phys);
4110 }
4111 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4112
4113 static ssize_t intel_iommu_show_cap(struct device *dev,
4114                                     struct device_attribute *attr,
4115                                     char *buf)
4116 {
4117         struct intel_iommu *iommu = dev_get_drvdata(dev);
4118         return sprintf(buf, "%llx\n", iommu->cap);
4119 }
4120 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4121
4122 static ssize_t intel_iommu_show_ecap(struct device *dev,
4123                                     struct device_attribute *attr,
4124                                     char *buf)
4125 {
4126         struct intel_iommu *iommu = dev_get_drvdata(dev);
4127         return sprintf(buf, "%llx\n", iommu->ecap);
4128 }
4129 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4130
4131 static struct attribute *intel_iommu_attrs[] = {
4132         &dev_attr_version.attr,
4133         &dev_attr_address.attr,
4134         &dev_attr_cap.attr,
4135         &dev_attr_ecap.attr,
4136         NULL,
4137 };
4138
4139 static struct attribute_group intel_iommu_group = {
4140         .name = "intel-iommu",
4141         .attrs = intel_iommu_attrs,
4142 };
4143
4144 const struct attribute_group *intel_iommu_groups[] = {
4145         &intel_iommu_group,
4146         NULL,
4147 };
4148
4149 int __init intel_iommu_init(void)
4150 {
4151         int ret = -ENODEV;
4152         struct dmar_drhd_unit *drhd;
4153         struct intel_iommu *iommu;
4154
4155         /* VT-d is required for a TXT/tboot launch, so enforce that */
4156         force_on = tboot_force_iommu();
4157
4158         if (iommu_init_mempool()) {
4159                 if (force_on)
4160                         panic("tboot: Failed to initialize iommu memory\n");
4161                 return -ENOMEM;
4162         }
4163
4164         down_write(&dmar_global_lock);
4165         if (dmar_table_init()) {
4166                 if (force_on)
4167                         panic("tboot: Failed to initialize DMAR table\n");
4168                 goto out_free_dmar;
4169         }
4170
4171         /*
4172          * Disable translation if already enabled prior to OS handover.
4173          */
4174         for_each_active_iommu(iommu, drhd)
4175                 if (iommu->gcmd & DMA_GCMD_TE)
4176                         iommu_disable_translation(iommu);
4177
4178         if (dmar_dev_scope_init() < 0) {
4179                 if (force_on)
4180                         panic("tboot: Failed to initialize DMAR device scope\n");
4181                 goto out_free_dmar;
4182         }
4183
4184         if (no_iommu || dmar_disabled)
4185                 goto out_free_dmar;
4186
4187         if (list_empty(&dmar_rmrr_units))
4188                 printk(KERN_INFO "DMAR: No RMRR found\n");
4189
4190         if (list_empty(&dmar_atsr_units))
4191                 printk(KERN_INFO "DMAR: No ATSR found\n");
4192
4193         if (dmar_init_reserved_ranges()) {
4194                 if (force_on)
4195                         panic("tboot: Failed to reserve iommu ranges\n");
4196                 goto out_free_reserved_range;
4197         }
4198
4199         init_no_remapping_devices();
4200
4201         ret = init_dmars();
4202         if (ret) {
4203                 if (force_on)
4204                         panic("tboot: Failed to initialize DMARs\n");
4205                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4206                 goto out_free_reserved_range;
4207         }
4208         up_write(&dmar_global_lock);
4209         printk(KERN_INFO
4210         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4211
4212         init_timer(&unmap_timer);
4213 #ifdef CONFIG_SWIOTLB
4214         swiotlb = 0;
4215 #endif
4216         dma_ops = &intel_dma_ops;
4217
4218         init_iommu_pm_ops();
4219
4220         for_each_active_iommu(iommu, drhd)
4221                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4222                                                        intel_iommu_groups,
4223                                                        iommu->name);
4224
4225         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4226         bus_register_notifier(&pci_bus_type, &device_nb);
4227         if (si_domain && !hw_pass_through)
4228                 register_memory_notifier(&intel_iommu_memory_nb);
4229
4230         intel_iommu_enabled = 1;
4231
4232         return 0;
4233
4234 out_free_reserved_range:
4235         put_iova_domain(&reserved_iova_list);
4236 out_free_dmar:
4237         intel_iommu_free_dmars();
4238         up_write(&dmar_global_lock);
4239         iommu_exit_mempool();
4240         return ret;
4241 }
4242
4243 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4244 {
4245         struct intel_iommu *iommu = opaque;
4246
4247         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4248         return 0;
4249 }
4250
4251 /*
4252  * NB - intel-iommu lacks any sort of reference counting for the users of
4253  * dependent devices.  If multiple endpoints have intersecting dependent
4254  * devices, unbinding the driver from any one of them will possibly leave
4255  * the others unable to operate.
4256  */
4257 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4258                                            struct device *dev)
4259 {
4260         if (!iommu || !dev || !dev_is_pci(dev))
4261                 return;
4262
4263         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4264 }
4265
4266 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4267                                        struct device *dev)
4268 {
4269         struct device_domain_info *info, *tmp;
4270         struct intel_iommu *iommu;
4271         unsigned long flags;
4272         bool found = false;
4273         u8 bus, devfn;
4274
4275         iommu = device_to_iommu(dev, &bus, &devfn);
4276         if (!iommu)
4277                 return;
4278
4279         spin_lock_irqsave(&device_domain_lock, flags);
4280         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4281                 if (info->iommu == iommu && info->bus == bus &&
4282                     info->devfn == devfn) {
4283                         unlink_domain_info(info);
4284                         spin_unlock_irqrestore(&device_domain_lock, flags);
4285
4286                         iommu_disable_dev_iotlb(info);
4287                         iommu_detach_dev(iommu, info->bus, info->devfn);
4288                         iommu_detach_dependent_devices(iommu, dev);
4289                         free_devinfo_mem(info);
4290
4291                         spin_lock_irqsave(&device_domain_lock, flags);
4292
4293                         if (found)
4294                                 break;
4295                         else
4296                                 continue;
4297                 }
4298
4299                 /* if there is no other devices under the same iommu
4300                  * owned by this domain, clear this iommu in iommu_bmp
4301                  * update iommu count and coherency
4302                  */
4303                 if (info->iommu == iommu)
4304                         found = true;
4305         }
4306
4307         spin_unlock_irqrestore(&device_domain_lock, flags);
4308
4309         if (found == 0) {
4310                 domain_detach_iommu(domain, iommu);
4311                 if (!domain_type_is_vm_or_si(domain))
4312                         iommu_detach_domain(domain, iommu);
4313         }
4314 }
4315
4316 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4317 {
4318         int adjust_width;
4319
4320         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4321                         DMA_32BIT_PFN);
4322         domain_reserve_special_ranges(domain);
4323
4324         /* calculate AGAW */
4325         domain->gaw = guest_width;
4326         adjust_width = guestwidth_to_adjustwidth(guest_width);
4327         domain->agaw = width_to_agaw(adjust_width);
4328
4329         domain->iommu_coherency = 0;
4330         domain->iommu_snooping = 0;
4331         domain->iommu_superpage = 0;
4332         domain->max_addr = 0;
4333
4334         /* always allocate the top pgd */
4335         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4336         if (!domain->pgd)
4337                 return -ENOMEM;
4338         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4339         return 0;
4340 }
4341
4342 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4343 {
4344         struct dmar_domain *dmar_domain;
4345         struct iommu_domain *domain;
4346
4347         if (type != IOMMU_DOMAIN_UNMANAGED)
4348                 return NULL;
4349
4350         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4351         if (!dmar_domain) {
4352                 printk(KERN_ERR
4353                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4354                 return NULL;
4355         }
4356         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4357                 printk(KERN_ERR
4358                         "intel_iommu_domain_init() failed\n");
4359                 domain_exit(dmar_domain);
4360                 return NULL;
4361         }
4362         domain_update_iommu_cap(dmar_domain);
4363
4364         domain = &dmar_domain->domain;
4365         domain->geometry.aperture_start = 0;
4366         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4367         domain->geometry.force_aperture = true;
4368
4369         return domain;
4370 }
4371
4372 static void intel_iommu_domain_free(struct iommu_domain *domain)
4373 {
4374         domain_exit(to_dmar_domain(domain));
4375 }
4376
4377 static int intel_iommu_attach_device(struct iommu_domain *domain,
4378                                      struct device *dev)
4379 {
4380         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4381         struct intel_iommu *iommu;
4382         int addr_width;
4383         u8 bus, devfn;
4384
4385         if (device_is_rmrr_locked(dev)) {
4386                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4387                 return -EPERM;
4388         }
4389
4390         /* normally dev is not mapped */
4391         if (unlikely(domain_context_mapped(dev))) {
4392                 struct dmar_domain *old_domain;
4393
4394                 old_domain = find_domain(dev);
4395                 if (old_domain) {
4396                         if (domain_type_is_vm_or_si(dmar_domain))
4397                                 domain_remove_one_dev_info(old_domain, dev);
4398                         else
4399                                 domain_remove_dev_info(old_domain);
4400
4401                         if (!domain_type_is_vm_or_si(old_domain) &&
4402                              list_empty(&old_domain->devices))
4403                                 domain_exit(old_domain);
4404                 }
4405         }
4406
4407         iommu = device_to_iommu(dev, &bus, &devfn);
4408         if (!iommu)
4409                 return -ENODEV;
4410
4411         /* check if this iommu agaw is sufficient for max mapped address */
4412         addr_width = agaw_to_width(iommu->agaw);
4413         if (addr_width > cap_mgaw(iommu->cap))
4414                 addr_width = cap_mgaw(iommu->cap);
4415
4416         if (dmar_domain->max_addr > (1LL << addr_width)) {
4417                 printk(KERN_ERR "%s: iommu width (%d) is not "
4418                        "sufficient for the mapped address (%llx)\n",
4419                        __func__, addr_width, dmar_domain->max_addr);
4420                 return -EFAULT;
4421         }
4422         dmar_domain->gaw = addr_width;
4423
4424         /*
4425          * Knock out extra levels of page tables if necessary
4426          */
4427         while (iommu->agaw < dmar_domain->agaw) {
4428                 struct dma_pte *pte;
4429
4430                 pte = dmar_domain->pgd;
4431                 if (dma_pte_present(pte)) {
4432                         dmar_domain->pgd = (struct dma_pte *)
4433                                 phys_to_virt(dma_pte_addr(pte));
4434                         free_pgtable_page(pte);
4435                 }
4436                 dmar_domain->agaw--;
4437         }
4438
4439         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4440 }
4441
4442 static void intel_iommu_detach_device(struct iommu_domain *domain,
4443                                       struct device *dev)
4444 {
4445         domain_remove_one_dev_info(to_dmar_domain(domain), dev);
4446 }
4447
4448 static int intel_iommu_map(struct iommu_domain *domain,
4449                            unsigned long iova, phys_addr_t hpa,
4450                            size_t size, int iommu_prot)
4451 {
4452         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4453         u64 max_addr;
4454         int prot = 0;
4455         int ret;
4456
4457         if (iommu_prot & IOMMU_READ)
4458                 prot |= DMA_PTE_READ;
4459         if (iommu_prot & IOMMU_WRITE)
4460                 prot |= DMA_PTE_WRITE;
4461         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4462                 prot |= DMA_PTE_SNP;
4463
4464         max_addr = iova + size;
4465         if (dmar_domain->max_addr < max_addr) {
4466                 u64 end;
4467
4468                 /* check if minimum agaw is sufficient for mapped address */
4469                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4470                 if (end < max_addr) {
4471                         printk(KERN_ERR "%s: iommu width (%d) is not "
4472                                "sufficient for the mapped address (%llx)\n",
4473                                __func__, dmar_domain->gaw, max_addr);
4474                         return -EFAULT;
4475                 }
4476                 dmar_domain->max_addr = max_addr;
4477         }
4478         /* Round up size to next multiple of PAGE_SIZE, if it and
4479            the low bits of hpa would take us onto the next page */
4480         size = aligned_nrpages(hpa, size);
4481         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4482                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4483         return ret;
4484 }
4485
4486 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4487                                 unsigned long iova, size_t size)
4488 {
4489         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4490         struct page *freelist = NULL;
4491         struct intel_iommu *iommu;
4492         unsigned long start_pfn, last_pfn;
4493         unsigned int npages;
4494         int iommu_id, num, ndomains, level = 0;
4495
4496         /* Cope with horrid API which requires us to unmap more than the
4497            size argument if it happens to be a large-page mapping. */
4498         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4499                 BUG();
4500
4501         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4502                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4503
4504         start_pfn = iova >> VTD_PAGE_SHIFT;
4505         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4506
4507         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4508
4509         npages = last_pfn - start_pfn + 1;
4510
4511         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4512                iommu = g_iommus[iommu_id];
4513
4514                /*
4515                 * find bit position of dmar_domain
4516                 */
4517                ndomains = cap_ndoms(iommu->cap);
4518                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4519                        if (iommu->domains[num] == dmar_domain)
4520                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4521                                                      npages, !freelist, 0);
4522                }
4523
4524         }
4525
4526         dma_free_pagelist(freelist);
4527
4528         if (dmar_domain->max_addr == iova + size)
4529                 dmar_domain->max_addr = iova;
4530
4531         return size;
4532 }
4533
4534 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4535                                             dma_addr_t iova)
4536 {
4537         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4538         struct dma_pte *pte;
4539         int level = 0;
4540         u64 phys = 0;
4541
4542         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4543         if (pte)
4544                 phys = dma_pte_addr(pte);
4545
4546         return phys;
4547 }
4548
4549 static bool intel_iommu_capable(enum iommu_cap cap)
4550 {
4551         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4552                 return domain_update_iommu_snooping(NULL) == 1;
4553         if (cap == IOMMU_CAP_INTR_REMAP)
4554                 return irq_remapping_enabled == 1;
4555
4556         return false;
4557 }
4558
4559 static int intel_iommu_add_device(struct device *dev)
4560 {
4561         struct intel_iommu *iommu;
4562         struct iommu_group *group;
4563         u8 bus, devfn;
4564
4565         iommu = device_to_iommu(dev, &bus, &devfn);
4566         if (!iommu)
4567                 return -ENODEV;
4568
4569         iommu_device_link(iommu->iommu_dev, dev);
4570
4571         group = iommu_group_get_for_dev(dev);
4572
4573         if (IS_ERR(group))
4574                 return PTR_ERR(group);
4575
4576         iommu_group_put(group);
4577         return 0;
4578 }
4579
4580 static void intel_iommu_remove_device(struct device *dev)
4581 {
4582         struct intel_iommu *iommu;
4583         u8 bus, devfn;
4584
4585         iommu = device_to_iommu(dev, &bus, &devfn);
4586         if (!iommu)
4587                 return;
4588
4589         iommu_group_remove_device(dev);
4590
4591         iommu_device_unlink(iommu->iommu_dev, dev);
4592 }
4593
4594 static const struct iommu_ops intel_iommu_ops = {
4595         .capable        = intel_iommu_capable,
4596         .domain_alloc   = intel_iommu_domain_alloc,
4597         .domain_free    = intel_iommu_domain_free,
4598         .attach_dev     = intel_iommu_attach_device,
4599         .detach_dev     = intel_iommu_detach_device,
4600         .map            = intel_iommu_map,
4601         .unmap          = intel_iommu_unmap,
4602         .map_sg         = default_iommu_map_sg,
4603         .iova_to_phys   = intel_iommu_iova_to_phys,
4604         .add_device     = intel_iommu_add_device,
4605         .remove_device  = intel_iommu_remove_device,
4606         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4607 };
4608
4609 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4610 {
4611         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4612         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4613         dmar_map_gfx = 0;
4614 }
4615
4616 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4617 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4618 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4619 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4620 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4621 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4622 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4623
4624 static void quirk_iommu_rwbf(struct pci_dev *dev)
4625 {
4626         /*
4627          * Mobile 4 Series Chipset neglects to set RWBF capability,
4628          * but needs it. Same seems to hold for the desktop versions.
4629          */
4630         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4631         rwbf_quirk = 1;
4632 }
4633
4634 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4635 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4636 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4637 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4638 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4639 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4640 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4641
4642 #define GGC 0x52
4643 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4644 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4645 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4646 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4647 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4648 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4649 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4650 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4651
4652 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4653 {
4654         unsigned short ggc;
4655
4656         if (pci_read_config_word(dev, GGC, &ggc))
4657                 return;
4658
4659         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4660                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4661                 dmar_map_gfx = 0;
4662         } else if (dmar_map_gfx) {
4663                 /* we have to ensure the gfx device is idle before we flush */
4664                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4665                 intel_iommu_strict = 1;
4666        }
4667 }
4668 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4669 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4670 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4671 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4672
4673 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4674    ISOCH DMAR unit for the Azalia sound device, but not give it any
4675    TLB entries, which causes it to deadlock. Check for that.  We do
4676    this in a function called from init_dmars(), instead of in a PCI
4677    quirk, because we don't want to print the obnoxious "BIOS broken"
4678    message if VT-d is actually disabled.
4679 */
4680 static void __init check_tylersburg_isoch(void)
4681 {
4682         struct pci_dev *pdev;
4683         uint32_t vtisochctrl;
4684
4685         /* If there's no Azalia in the system anyway, forget it. */
4686         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4687         if (!pdev)
4688                 return;
4689         pci_dev_put(pdev);
4690
4691         /* System Management Registers. Might be hidden, in which case
4692            we can't do the sanity check. But that's OK, because the
4693            known-broken BIOSes _don't_ actually hide it, so far. */
4694         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4695         if (!pdev)
4696                 return;
4697
4698         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4699                 pci_dev_put(pdev);
4700                 return;
4701         }
4702
4703         pci_dev_put(pdev);
4704
4705         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4706         if (vtisochctrl & 1)
4707                 return;
4708
4709         /* Drop all bits other than the number of TLB entries */
4710         vtisochctrl &= 0x1c;
4711
4712         /* If we have the recommended number of TLB entries (16), fine. */
4713         if (vtisochctrl == 0x10)
4714                 return;
4715
4716         /* Zero TLB entries? You get to ride the short bus to school. */
4717         if (!vtisochctrl) {
4718                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4719                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4720                      dmi_get_system_info(DMI_BIOS_VENDOR),
4721                      dmi_get_system_info(DMI_BIOS_VERSION),
4722                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4723                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4724                 return;
4725         }
4726         
4727         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4728                vtisochctrl);
4729 }