Merge tag 'platform-drivers-x86-v5.14-1' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 #include <trace/events/intel_iommu.h>
35
36 #include "../irq_remapping.h"
37
38 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
39 struct dmar_res_callback {
40         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
41         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
42         bool                    ignore_unhandled;
43         bool                    print_entry;
44 };
45
46 /*
47  * Assumptions:
48  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
49  *    before IO devices managed by that unit.
50  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
51  *    after IO devices managed by that unit.
52  * 3) Hotplug events are rare.
53  *
54  * Locking rules for DMA and interrupt remapping related global data structures:
55  * 1) Use dmar_global_lock in process context
56  * 2) Use RCU in interrupt context
57  */
58 DECLARE_RWSEM(dmar_global_lock);
59 LIST_HEAD(dmar_drhd_units);
60
61 struct acpi_table_header * __initdata dmar_tbl;
62 static int dmar_dev_scope_status = 1;
63 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
64
65 static int alloc_iommu(struct dmar_drhd_unit *drhd);
66 static void free_iommu(struct intel_iommu *iommu);
67
68 extern const struct iommu_ops intel_iommu_ops;
69
70 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
71 {
72         /*
73          * add INCLUDE_ALL at the tail, so scan the list will find it at
74          * the very end.
75          */
76         if (drhd->include_all)
77                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
78         else
79                 list_add_rcu(&drhd->list, &dmar_drhd_units);
80 }
81
82 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
83 {
84         struct acpi_dmar_device_scope *scope;
85
86         *cnt = 0;
87         while (start < end) {
88                 scope = start;
89                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
91                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
92                         (*cnt)++;
93                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
94                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
95                         pr_warn("Unsupported device scope\n");
96                 }
97                 start += scope->length;
98         }
99         if (*cnt == 0)
100                 return NULL;
101
102         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
103 }
104
105 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
106 {
107         int i;
108         struct device *tmp_dev;
109
110         if (*devices && *cnt) {
111                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
112                         put_device(tmp_dev);
113                 kfree(*devices);
114         }
115
116         *devices = NULL;
117         *cnt = 0;
118 }
119
120 /* Optimize out kzalloc()/kfree() for normal cases */
121 static char dmar_pci_notify_info_buf[64];
122
123 static struct dmar_pci_notify_info *
124 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
125 {
126         int level = 0;
127         size_t size;
128         struct pci_dev *tmp;
129         struct dmar_pci_notify_info *info;
130
131         BUG_ON(dev->is_virtfn);
132
133         /*
134          * Ignore devices that have a domain number higher than what can
135          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
136          */
137         if (pci_domain_nr(dev->bus) > U16_MAX)
138                 return NULL;
139
140         /* Only generate path[] for device addition event */
141         if (event == BUS_NOTIFY_ADD_DEVICE)
142                 for (tmp = dev; tmp; tmp = tmp->bus->self)
143                         level++;
144
145         size = struct_size(info, path, level);
146         if (size <= sizeof(dmar_pci_notify_info_buf)) {
147                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
148         } else {
149                 info = kzalloc(size, GFP_KERNEL);
150                 if (!info) {
151                         pr_warn("Out of memory when allocating notify_info "
152                                 "for %s.\n", pci_name(dev));
153                         if (dmar_dev_scope_status == 0)
154                                 dmar_dev_scope_status = -ENOMEM;
155                         return NULL;
156                 }
157         }
158
159         info->event = event;
160         info->dev = dev;
161         info->seg = pci_domain_nr(dev->bus);
162         info->level = level;
163         if (event == BUS_NOTIFY_ADD_DEVICE) {
164                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
165                         level--;
166                         info->path[level].bus = tmp->bus->number;
167                         info->path[level].device = PCI_SLOT(tmp->devfn);
168                         info->path[level].function = PCI_FUNC(tmp->devfn);
169                         if (pci_is_root_bus(tmp->bus))
170                                 info->bus = tmp->bus->number;
171                 }
172         }
173
174         return info;
175 }
176
177 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
178 {
179         if ((void *)info != dmar_pci_notify_info_buf)
180                 kfree(info);
181 }
182
183 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
184                                 struct acpi_dmar_pci_path *path, int count)
185 {
186         int i;
187
188         if (info->bus != bus)
189                 goto fallback;
190         if (info->level != count)
191                 goto fallback;
192
193         for (i = 0; i < count; i++) {
194                 if (path[i].device != info->path[i].device ||
195                     path[i].function != info->path[i].function)
196                         goto fallback;
197         }
198
199         return true;
200
201 fallback:
202
203         if (count != 1)
204                 return false;
205
206         i = info->level - 1;
207         if (bus              == info->path[i].bus &&
208             path[0].device   == info->path[i].device &&
209             path[0].function == info->path[i].function) {
210                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
211                         bus, path[0].device, path[0].function);
212                 return true;
213         }
214
215         return false;
216 }
217
218 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
219 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
220                           void *start, void*end, u16 segment,
221                           struct dmar_dev_scope *devices,
222                           int devices_cnt)
223 {
224         int i, level;
225         struct device *tmp, *dev = &info->dev->dev;
226         struct acpi_dmar_device_scope *scope;
227         struct acpi_dmar_pci_path *path;
228
229         if (segment != info->seg)
230                 return 0;
231
232         for (; start < end; start += scope->length) {
233                 scope = start;
234                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
235                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
236                         continue;
237
238                 path = (struct acpi_dmar_pci_path *)(scope + 1);
239                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
240                 if (!dmar_match_pci_path(info, scope->bus, path, level))
241                         continue;
242
243                 /*
244                  * We expect devices with endpoint scope to have normal PCI
245                  * headers, and devices with bridge scope to have bridge PCI
246                  * headers.  However PCI NTB devices may be listed in the
247                  * DMAR table with bridge scope, even though they have a
248                  * normal PCI header.  NTB devices are identified by class
249                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
250                  * for this special case.
251                  */
252                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
253                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
254                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
255                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
256                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
257                         pr_warn("Device scope type does not match for %s\n",
258                                 pci_name(info->dev));
259                         return -EINVAL;
260                 }
261
262                 for_each_dev_scope(devices, devices_cnt, i, tmp)
263                         if (tmp == NULL) {
264                                 devices[i].bus = info->dev->bus->number;
265                                 devices[i].devfn = info->dev->devfn;
266                                 rcu_assign_pointer(devices[i].dev,
267                                                    get_device(dev));
268                                 return 1;
269                         }
270                 BUG_ON(i >= devices_cnt);
271         }
272
273         return 0;
274 }
275
276 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
277                           struct dmar_dev_scope *devices, int count)
278 {
279         int index;
280         struct device *tmp;
281
282         if (info->seg != segment)
283                 return 0;
284
285         for_each_active_dev_scope(devices, count, index, tmp)
286                 if (tmp == &info->dev->dev) {
287                         RCU_INIT_POINTER(devices[index].dev, NULL);
288                         synchronize_rcu();
289                         put_device(tmp);
290                         return 1;
291                 }
292
293         return 0;
294 }
295
296 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
297 {
298         int ret = 0;
299         struct dmar_drhd_unit *dmaru;
300         struct acpi_dmar_hardware_unit *drhd;
301
302         for_each_drhd_unit(dmaru) {
303                 if (dmaru->include_all)
304                         continue;
305
306                 drhd = container_of(dmaru->hdr,
307                                     struct acpi_dmar_hardware_unit, header);
308                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
309                                 ((void *)drhd) + drhd->header.length,
310                                 dmaru->segment,
311                                 dmaru->devices, dmaru->devices_cnt);
312                 if (ret)
313                         break;
314         }
315         if (ret >= 0)
316                 ret = dmar_iommu_notify_scope_dev(info);
317         if (ret < 0 && dmar_dev_scope_status == 0)
318                 dmar_dev_scope_status = ret;
319
320         if (ret >= 0)
321                 intel_irq_remap_add_device(info);
322
323         return ret;
324 }
325
326 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
327 {
328         struct dmar_drhd_unit *dmaru;
329
330         for_each_drhd_unit(dmaru)
331                 if (dmar_remove_dev_scope(info, dmaru->segment,
332                         dmaru->devices, dmaru->devices_cnt))
333                         break;
334         dmar_iommu_notify_scope_dev(info);
335 }
336
337 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
338 {
339         struct pci_dev *physfn = pci_physfn(pdev);
340
341         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
342 }
343
344 static int dmar_pci_bus_notifier(struct notifier_block *nb,
345                                  unsigned long action, void *data)
346 {
347         struct pci_dev *pdev = to_pci_dev(data);
348         struct dmar_pci_notify_info *info;
349
350         /* Only care about add/remove events for physical functions.
351          * For VFs we actually do the lookup based on the corresponding
352          * PF in device_to_iommu() anyway. */
353         if (pdev->is_virtfn) {
354                 /*
355                  * Ensure that the VF device inherits the irq domain of the
356                  * PF device. Ideally the device would inherit the domain
357                  * from the bus, but DMAR can have multiple units per bus
358                  * which makes this impossible. The VF 'bus' could inherit
359                  * from the PF device, but that's yet another x86'sism to
360                  * inflict on everybody else.
361                  */
362                 if (action == BUS_NOTIFY_ADD_DEVICE)
363                         vf_inherit_msi_domain(pdev);
364                 return NOTIFY_DONE;
365         }
366
367         if (action != BUS_NOTIFY_ADD_DEVICE &&
368             action != BUS_NOTIFY_REMOVED_DEVICE)
369                 return NOTIFY_DONE;
370
371         info = dmar_alloc_pci_notify_info(pdev, action);
372         if (!info)
373                 return NOTIFY_DONE;
374
375         down_write(&dmar_global_lock);
376         if (action == BUS_NOTIFY_ADD_DEVICE)
377                 dmar_pci_bus_add_dev(info);
378         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
379                 dmar_pci_bus_del_dev(info);
380         up_write(&dmar_global_lock);
381
382         dmar_free_pci_notify_info(info);
383
384         return NOTIFY_OK;
385 }
386
387 static struct notifier_block dmar_pci_bus_nb = {
388         .notifier_call = dmar_pci_bus_notifier,
389         .priority = INT_MIN,
390 };
391
392 static struct dmar_drhd_unit *
393 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
394 {
395         struct dmar_drhd_unit *dmaru;
396
397         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
398                                 dmar_rcu_check())
399                 if (dmaru->segment == drhd->segment &&
400                     dmaru->reg_base_addr == drhd->address)
401                         return dmaru;
402
403         return NULL;
404 }
405
406 /*
407  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
408  * structure which uniquely represent one DMA remapping hardware unit
409  * present in the platform
410  */
411 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
412 {
413         struct acpi_dmar_hardware_unit *drhd;
414         struct dmar_drhd_unit *dmaru;
415         int ret;
416
417         drhd = (struct acpi_dmar_hardware_unit *)header;
418         dmaru = dmar_find_dmaru(drhd);
419         if (dmaru)
420                 goto out;
421
422         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
423         if (!dmaru)
424                 return -ENOMEM;
425
426         /*
427          * If header is allocated from slab by ACPI _DSM method, we need to
428          * copy the content because the memory buffer will be freed on return.
429          */
430         dmaru->hdr = (void *)(dmaru + 1);
431         memcpy(dmaru->hdr, header, header->length);
432         dmaru->reg_base_addr = drhd->address;
433         dmaru->segment = drhd->segment;
434         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
435         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
436                                               ((void *)drhd) + drhd->header.length,
437                                               &dmaru->devices_cnt);
438         if (dmaru->devices_cnt && dmaru->devices == NULL) {
439                 kfree(dmaru);
440                 return -ENOMEM;
441         }
442
443         ret = alloc_iommu(dmaru);
444         if (ret) {
445                 dmar_free_dev_scope(&dmaru->devices,
446                                     &dmaru->devices_cnt);
447                 kfree(dmaru);
448                 return ret;
449         }
450         dmar_register_drhd_unit(dmaru);
451
452 out:
453         if (arg)
454                 (*(int *)arg)++;
455
456         return 0;
457 }
458
459 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
460 {
461         if (dmaru->devices && dmaru->devices_cnt)
462                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
463         if (dmaru->iommu)
464                 free_iommu(dmaru->iommu);
465         kfree(dmaru);
466 }
467
468 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
469                                       void *arg)
470 {
471         struct acpi_dmar_andd *andd = (void *)header;
472
473         /* Check for NUL termination within the designated length */
474         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
475                 pr_warn(FW_BUG
476                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
477                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
478                            dmi_get_system_info(DMI_BIOS_VENDOR),
479                            dmi_get_system_info(DMI_BIOS_VERSION),
480                            dmi_get_system_info(DMI_PRODUCT_VERSION));
481                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
482                 return -EINVAL;
483         }
484         pr_info("ANDD device: %x name: %s\n", andd->device_number,
485                 andd->device_name);
486
487         return 0;
488 }
489
490 #ifdef CONFIG_ACPI_NUMA
491 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
492 {
493         struct acpi_dmar_rhsa *rhsa;
494         struct dmar_drhd_unit *drhd;
495
496         rhsa = (struct acpi_dmar_rhsa *)header;
497         for_each_drhd_unit(drhd) {
498                 if (drhd->reg_base_addr == rhsa->base_address) {
499                         int node = pxm_to_node(rhsa->proximity_domain);
500
501                         if (!node_online(node))
502                                 node = NUMA_NO_NODE;
503                         drhd->iommu->node = node;
504                         return 0;
505                 }
506         }
507         pr_warn(FW_BUG
508                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
509                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
510                 rhsa->base_address,
511                 dmi_get_system_info(DMI_BIOS_VENDOR),
512                 dmi_get_system_info(DMI_BIOS_VERSION),
513                 dmi_get_system_info(DMI_PRODUCT_VERSION));
514         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
515
516         return 0;
517 }
518 #else
519 #define dmar_parse_one_rhsa             dmar_res_noop
520 #endif
521
522 static void
523 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
524 {
525         struct acpi_dmar_hardware_unit *drhd;
526         struct acpi_dmar_reserved_memory *rmrr;
527         struct acpi_dmar_atsr *atsr;
528         struct acpi_dmar_rhsa *rhsa;
529         struct acpi_dmar_satc *satc;
530
531         switch (header->type) {
532         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
533                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
534                                     header);
535                 pr_info("DRHD base: %#016Lx flags: %#x\n",
536                         (unsigned long long)drhd->address, drhd->flags);
537                 break;
538         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
539                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
540                                     header);
541                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
542                         (unsigned long long)rmrr->base_address,
543                         (unsigned long long)rmrr->end_address);
544                 break;
545         case ACPI_DMAR_TYPE_ROOT_ATS:
546                 atsr = container_of(header, struct acpi_dmar_atsr, header);
547                 pr_info("ATSR flags: %#x\n", atsr->flags);
548                 break;
549         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
550                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
551                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
552                        (unsigned long long)rhsa->base_address,
553                        rhsa->proximity_domain);
554                 break;
555         case ACPI_DMAR_TYPE_NAMESPACE:
556                 /* We don't print this here because we need to sanity-check
557                    it first. So print it in dmar_parse_one_andd() instead. */
558                 break;
559         case ACPI_DMAR_TYPE_SATC:
560                 satc = container_of(header, struct acpi_dmar_satc, header);
561                 pr_info("SATC flags: 0x%x\n", satc->flags);
562                 break;
563         }
564 }
565
566 /**
567  * dmar_table_detect - checks to see if the platform supports DMAR devices
568  */
569 static int __init dmar_table_detect(void)
570 {
571         acpi_status status = AE_OK;
572
573         /* if we could find DMAR table, then there are DMAR devices */
574         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
575
576         if (ACPI_SUCCESS(status) && !dmar_tbl) {
577                 pr_warn("Unable to map DMAR\n");
578                 status = AE_NOT_FOUND;
579         }
580
581         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
582 }
583
584 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
585                                        size_t len, struct dmar_res_callback *cb)
586 {
587         struct acpi_dmar_header *iter, *next;
588         struct acpi_dmar_header *end = ((void *)start) + len;
589
590         for (iter = start; iter < end; iter = next) {
591                 next = (void *)iter + iter->length;
592                 if (iter->length == 0) {
593                         /* Avoid looping forever on bad ACPI tables */
594                         pr_debug(FW_BUG "Invalid 0-length structure\n");
595                         break;
596                 } else if (next > end) {
597                         /* Avoid passing table end */
598                         pr_warn(FW_BUG "Record passes table end\n");
599                         return -EINVAL;
600                 }
601
602                 if (cb->print_entry)
603                         dmar_table_print_dmar_entry(iter);
604
605                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
606                         /* continue for forward compatibility */
607                         pr_debug("Unknown DMAR structure type %d\n",
608                                  iter->type);
609                 } else if (cb->cb[iter->type]) {
610                         int ret;
611
612                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
613                         if (ret)
614                                 return ret;
615                 } else if (!cb->ignore_unhandled) {
616                         pr_warn("No handler for DMAR structure type %d\n",
617                                 iter->type);
618                         return -EINVAL;
619                 }
620         }
621
622         return 0;
623 }
624
625 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
626                                        struct dmar_res_callback *cb)
627 {
628         return dmar_walk_remapping_entries((void *)(dmar + 1),
629                         dmar->header.length - sizeof(*dmar), cb);
630 }
631
632 /**
633  * parse_dmar_table - parses the DMA reporting table
634  */
635 static int __init
636 parse_dmar_table(void)
637 {
638         struct acpi_table_dmar *dmar;
639         int drhd_count = 0;
640         int ret;
641         struct dmar_res_callback cb = {
642                 .print_entry = true,
643                 .ignore_unhandled = true,
644                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
645                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
646                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
647                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
648                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
649                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
650                 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
651         };
652
653         /*
654          * Do it again, earlier dmar_tbl mapping could be mapped with
655          * fixed map.
656          */
657         dmar_table_detect();
658
659         /*
660          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
661          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
662          */
663         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
664
665         dmar = (struct acpi_table_dmar *)dmar_tbl;
666         if (!dmar)
667                 return -ENODEV;
668
669         if (dmar->width < PAGE_SHIFT - 1) {
670                 pr_warn("Invalid DMAR haw\n");
671                 return -EINVAL;
672         }
673
674         pr_info("Host address width %d\n", dmar->width + 1);
675         ret = dmar_walk_dmar_table(dmar, &cb);
676         if (ret == 0 && drhd_count == 0)
677                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
678
679         return ret;
680 }
681
682 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
683                                  int cnt, struct pci_dev *dev)
684 {
685         int index;
686         struct device *tmp;
687
688         while (dev) {
689                 for_each_active_dev_scope(devices, cnt, index, tmp)
690                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
691                                 return 1;
692
693                 /* Check our parent */
694                 dev = dev->bus->self;
695         }
696
697         return 0;
698 }
699
700 struct dmar_drhd_unit *
701 dmar_find_matched_drhd_unit(struct pci_dev *dev)
702 {
703         struct dmar_drhd_unit *dmaru;
704         struct acpi_dmar_hardware_unit *drhd;
705
706         dev = pci_physfn(dev);
707
708         rcu_read_lock();
709         for_each_drhd_unit(dmaru) {
710                 drhd = container_of(dmaru->hdr,
711                                     struct acpi_dmar_hardware_unit,
712                                     header);
713
714                 if (dmaru->include_all &&
715                     drhd->segment == pci_domain_nr(dev->bus))
716                         goto out;
717
718                 if (dmar_pci_device_match(dmaru->devices,
719                                           dmaru->devices_cnt, dev))
720                         goto out;
721         }
722         dmaru = NULL;
723 out:
724         rcu_read_unlock();
725
726         return dmaru;
727 }
728
729 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
730                                               struct acpi_device *adev)
731 {
732         struct dmar_drhd_unit *dmaru;
733         struct acpi_dmar_hardware_unit *drhd;
734         struct acpi_dmar_device_scope *scope;
735         struct device *tmp;
736         int i;
737         struct acpi_dmar_pci_path *path;
738
739         for_each_drhd_unit(dmaru) {
740                 drhd = container_of(dmaru->hdr,
741                                     struct acpi_dmar_hardware_unit,
742                                     header);
743
744                 for (scope = (void *)(drhd + 1);
745                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
746                      scope = ((void *)scope) + scope->length) {
747                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
748                                 continue;
749                         if (scope->enumeration_id != device_number)
750                                 continue;
751
752                         path = (void *)(scope + 1);
753                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
754                                 dev_name(&adev->dev), dmaru->reg_base_addr,
755                                 scope->bus, path->device, path->function);
756                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
757                                 if (tmp == NULL) {
758                                         dmaru->devices[i].bus = scope->bus;
759                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
760                                                                             path->function);
761                                         rcu_assign_pointer(dmaru->devices[i].dev,
762                                                            get_device(&adev->dev));
763                                         return;
764                                 }
765                         BUG_ON(i >= dmaru->devices_cnt);
766                 }
767         }
768         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
769                 device_number, dev_name(&adev->dev));
770 }
771
772 static int __init dmar_acpi_dev_scope_init(void)
773 {
774         struct acpi_dmar_andd *andd;
775
776         if (dmar_tbl == NULL)
777                 return -ENODEV;
778
779         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
780              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
781              andd = ((void *)andd) + andd->header.length) {
782                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
783                         acpi_handle h;
784                         struct acpi_device *adev;
785
786                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
787                                                           andd->device_name,
788                                                           &h))) {
789                                 pr_err("Failed to find handle for ACPI object %s\n",
790                                        andd->device_name);
791                                 continue;
792                         }
793                         if (acpi_bus_get_device(h, &adev)) {
794                                 pr_err("Failed to get device for ACPI object %s\n",
795                                        andd->device_name);
796                                 continue;
797                         }
798                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
799                 }
800         }
801         return 0;
802 }
803
804 int __init dmar_dev_scope_init(void)
805 {
806         struct pci_dev *dev = NULL;
807         struct dmar_pci_notify_info *info;
808
809         if (dmar_dev_scope_status != 1)
810                 return dmar_dev_scope_status;
811
812         if (list_empty(&dmar_drhd_units)) {
813                 dmar_dev_scope_status = -ENODEV;
814         } else {
815                 dmar_dev_scope_status = 0;
816
817                 dmar_acpi_dev_scope_init();
818
819                 for_each_pci_dev(dev) {
820                         if (dev->is_virtfn)
821                                 continue;
822
823                         info = dmar_alloc_pci_notify_info(dev,
824                                         BUS_NOTIFY_ADD_DEVICE);
825                         if (!info) {
826                                 return dmar_dev_scope_status;
827                         } else {
828                                 dmar_pci_bus_add_dev(info);
829                                 dmar_free_pci_notify_info(info);
830                         }
831                 }
832         }
833
834         return dmar_dev_scope_status;
835 }
836
837 void __init dmar_register_bus_notifier(void)
838 {
839         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
840 }
841
842
843 int __init dmar_table_init(void)
844 {
845         static int dmar_table_initialized;
846         int ret;
847
848         if (dmar_table_initialized == 0) {
849                 ret = parse_dmar_table();
850                 if (ret < 0) {
851                         if (ret != -ENODEV)
852                                 pr_info("Parse DMAR table failure.\n");
853                 } else  if (list_empty(&dmar_drhd_units)) {
854                         pr_info("No DMAR devices found\n");
855                         ret = -ENODEV;
856                 }
857
858                 if (ret < 0)
859                         dmar_table_initialized = ret;
860                 else
861                         dmar_table_initialized = 1;
862         }
863
864         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
865 }
866
867 static void warn_invalid_dmar(u64 addr, const char *message)
868 {
869         pr_warn_once(FW_BUG
870                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
871                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
872                 addr, message,
873                 dmi_get_system_info(DMI_BIOS_VENDOR),
874                 dmi_get_system_info(DMI_BIOS_VERSION),
875                 dmi_get_system_info(DMI_PRODUCT_VERSION));
876         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
877 }
878
879 static int __ref
880 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
881 {
882         struct acpi_dmar_hardware_unit *drhd;
883         void __iomem *addr;
884         u64 cap, ecap;
885
886         drhd = (void *)entry;
887         if (!drhd->address) {
888                 warn_invalid_dmar(0, "");
889                 return -EINVAL;
890         }
891
892         if (arg)
893                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
894         else
895                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
896         if (!addr) {
897                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
898                 return -EINVAL;
899         }
900
901         cap = dmar_readq(addr + DMAR_CAP_REG);
902         ecap = dmar_readq(addr + DMAR_ECAP_REG);
903
904         if (arg)
905                 iounmap(addr);
906         else
907                 early_iounmap(addr, VTD_PAGE_SIZE);
908
909         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
910                 warn_invalid_dmar(drhd->address, " returns all ones");
911                 return -EINVAL;
912         }
913
914         return 0;
915 }
916
917 int __init detect_intel_iommu(void)
918 {
919         int ret;
920         struct dmar_res_callback validate_drhd_cb = {
921                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
922                 .ignore_unhandled = true,
923         };
924
925         down_write(&dmar_global_lock);
926         ret = dmar_table_detect();
927         if (!ret)
928                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
929                                            &validate_drhd_cb);
930         if (!ret && !no_iommu && !iommu_detected &&
931             (!dmar_disabled || dmar_platform_optin())) {
932                 iommu_detected = 1;
933                 /* Make sure ACS will be enabled */
934                 pci_request_acs();
935         }
936
937 #ifdef CONFIG_X86
938         if (!ret) {
939                 x86_init.iommu.iommu_init = intel_iommu_init;
940                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
941         }
942
943 #endif
944
945         if (dmar_tbl) {
946                 acpi_put_table(dmar_tbl);
947                 dmar_tbl = NULL;
948         }
949         up_write(&dmar_global_lock);
950
951         return ret ? ret : 1;
952 }
953
954 static void unmap_iommu(struct intel_iommu *iommu)
955 {
956         iounmap(iommu->reg);
957         release_mem_region(iommu->reg_phys, iommu->reg_size);
958 }
959
960 /**
961  * map_iommu: map the iommu's registers
962  * @iommu: the iommu to map
963  * @phys_addr: the physical address of the base resgister
964  *
965  * Memory map the iommu's registers.  Start w/ a single page, and
966  * possibly expand if that turns out to be insufficent.
967  */
968 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
969 {
970         int map_size, err=0;
971
972         iommu->reg_phys = phys_addr;
973         iommu->reg_size = VTD_PAGE_SIZE;
974
975         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
976                 pr_err("Can't reserve memory\n");
977                 err = -EBUSY;
978                 goto out;
979         }
980
981         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
982         if (!iommu->reg) {
983                 pr_err("Can't map the region\n");
984                 err = -ENOMEM;
985                 goto release;
986         }
987
988         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
989         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
990
991         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
992                 err = -EINVAL;
993                 warn_invalid_dmar(phys_addr, " returns all ones");
994                 goto unmap;
995         }
996         if (ecap_vcs(iommu->ecap))
997                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
998
999         /* the registers might be more than one page */
1000         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1001                          cap_max_fault_reg_offset(iommu->cap));
1002         map_size = VTD_PAGE_ALIGN(map_size);
1003         if (map_size > iommu->reg_size) {
1004                 iounmap(iommu->reg);
1005                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1006                 iommu->reg_size = map_size;
1007                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1008                                         iommu->name)) {
1009                         pr_err("Can't reserve memory\n");
1010                         err = -EBUSY;
1011                         goto out;
1012                 }
1013                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1014                 if (!iommu->reg) {
1015                         pr_err("Can't map the region\n");
1016                         err = -ENOMEM;
1017                         goto release;
1018                 }
1019         }
1020         err = 0;
1021         goto out;
1022
1023 unmap:
1024         iounmap(iommu->reg);
1025 release:
1026         release_mem_region(iommu->reg_phys, iommu->reg_size);
1027 out:
1028         return err;
1029 }
1030
1031 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1032 {
1033         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1034                                             DMAR_UNITS_SUPPORTED);
1035         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1036                 iommu->seq_id = -1;
1037         } else {
1038                 set_bit(iommu->seq_id, dmar_seq_ids);
1039                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1040         }
1041
1042         return iommu->seq_id;
1043 }
1044
1045 static void dmar_free_seq_id(struct intel_iommu *iommu)
1046 {
1047         if (iommu->seq_id >= 0) {
1048                 clear_bit(iommu->seq_id, dmar_seq_ids);
1049                 iommu->seq_id = -1;
1050         }
1051 }
1052
1053 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1054 {
1055         struct intel_iommu *iommu;
1056         u32 ver, sts;
1057         int agaw = -1;
1058         int msagaw = -1;
1059         int err;
1060
1061         if (!drhd->reg_base_addr) {
1062                 warn_invalid_dmar(0, "");
1063                 return -EINVAL;
1064         }
1065
1066         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1067         if (!iommu)
1068                 return -ENOMEM;
1069
1070         if (dmar_alloc_seq_id(iommu) < 0) {
1071                 pr_err("Failed to allocate seq_id\n");
1072                 err = -ENOSPC;
1073                 goto error;
1074         }
1075
1076         err = map_iommu(iommu, drhd->reg_base_addr);
1077         if (err) {
1078                 pr_err("Failed to map %s\n", iommu->name);
1079                 goto error_free_seq_id;
1080         }
1081
1082         err = -EINVAL;
1083         if (cap_sagaw(iommu->cap) == 0) {
1084                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1085                         iommu->name);
1086                 drhd->ignored = 1;
1087         }
1088
1089         if (!drhd->ignored) {
1090                 agaw = iommu_calculate_agaw(iommu);
1091                 if (agaw < 0) {
1092                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1093                                iommu->seq_id);
1094                         drhd->ignored = 1;
1095                 }
1096         }
1097         if (!drhd->ignored) {
1098                 msagaw = iommu_calculate_max_sagaw(iommu);
1099                 if (msagaw < 0) {
1100                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1101                                iommu->seq_id);
1102                         drhd->ignored = 1;
1103                         agaw = -1;
1104                 }
1105         }
1106         iommu->agaw = agaw;
1107         iommu->msagaw = msagaw;
1108         iommu->segment = drhd->segment;
1109
1110         iommu->node = NUMA_NO_NODE;
1111
1112         ver = readl(iommu->reg + DMAR_VER_REG);
1113         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1114                 iommu->name,
1115                 (unsigned long long)drhd->reg_base_addr,
1116                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1117                 (unsigned long long)iommu->cap,
1118                 (unsigned long long)iommu->ecap);
1119
1120         /* Reflect status in gcmd */
1121         sts = readl(iommu->reg + DMAR_GSTS_REG);
1122         if (sts & DMA_GSTS_IRES)
1123                 iommu->gcmd |= DMA_GCMD_IRE;
1124         if (sts & DMA_GSTS_TES)
1125                 iommu->gcmd |= DMA_GCMD_TE;
1126         if (sts & DMA_GSTS_QIES)
1127                 iommu->gcmd |= DMA_GCMD_QIE;
1128
1129         raw_spin_lock_init(&iommu->register_lock);
1130
1131         /*
1132          * This is only for hotplug; at boot time intel_iommu_enabled won't
1133          * be set yet. When intel_iommu_init() runs, it registers the units
1134          * present at boot time, then sets intel_iommu_enabled.
1135          */
1136         if (intel_iommu_enabled && !drhd->ignored) {
1137                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1138                                              intel_iommu_groups,
1139                                              "%s", iommu->name);
1140                 if (err)
1141                         goto err_unmap;
1142
1143                 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1144                 if (err)
1145                         goto err_sysfs;
1146         }
1147
1148         drhd->iommu = iommu;
1149         iommu->drhd = drhd;
1150
1151         return 0;
1152
1153 err_sysfs:
1154         iommu_device_sysfs_remove(&iommu->iommu);
1155 err_unmap:
1156         unmap_iommu(iommu);
1157 error_free_seq_id:
1158         dmar_free_seq_id(iommu);
1159 error:
1160         kfree(iommu);
1161         return err;
1162 }
1163
1164 static void free_iommu(struct intel_iommu *iommu)
1165 {
1166         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1167                 iommu_device_unregister(&iommu->iommu);
1168                 iommu_device_sysfs_remove(&iommu->iommu);
1169         }
1170
1171         if (iommu->irq) {
1172                 if (iommu->pr_irq) {
1173                         free_irq(iommu->pr_irq, iommu);
1174                         dmar_free_hwirq(iommu->pr_irq);
1175                         iommu->pr_irq = 0;
1176                 }
1177                 free_irq(iommu->irq, iommu);
1178                 dmar_free_hwirq(iommu->irq);
1179                 iommu->irq = 0;
1180         }
1181
1182         if (iommu->qi) {
1183                 free_page((unsigned long)iommu->qi->desc);
1184                 kfree(iommu->qi->desc_status);
1185                 kfree(iommu->qi);
1186         }
1187
1188         if (iommu->reg)
1189                 unmap_iommu(iommu);
1190
1191         dmar_free_seq_id(iommu);
1192         kfree(iommu);
1193 }
1194
1195 /*
1196  * Reclaim all the submitted descriptors which have completed its work.
1197  */
1198 static inline void reclaim_free_desc(struct q_inval *qi)
1199 {
1200         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1201                qi->desc_status[qi->free_tail] == QI_ABORT) {
1202                 qi->desc_status[qi->free_tail] = QI_FREE;
1203                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1204                 qi->free_cnt++;
1205         }
1206 }
1207
1208 static const char *qi_type_string(u8 type)
1209 {
1210         switch (type) {
1211         case QI_CC_TYPE:
1212                 return "Context-cache Invalidation";
1213         case QI_IOTLB_TYPE:
1214                 return "IOTLB Invalidation";
1215         case QI_DIOTLB_TYPE:
1216                 return "Device-TLB Invalidation";
1217         case QI_IEC_TYPE:
1218                 return "Interrupt Entry Cache Invalidation";
1219         case QI_IWD_TYPE:
1220                 return "Invalidation Wait";
1221         case QI_EIOTLB_TYPE:
1222                 return "PASID-based IOTLB Invalidation";
1223         case QI_PC_TYPE:
1224                 return "PASID-cache Invalidation";
1225         case QI_DEIOTLB_TYPE:
1226                 return "PASID-based Device-TLB Invalidation";
1227         case QI_PGRP_RESP_TYPE:
1228                 return "Page Group Response";
1229         default:
1230                 return "UNKNOWN";
1231         }
1232 }
1233
1234 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1235 {
1236         unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1237         u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1238         struct qi_desc *desc = iommu->qi->desc + head;
1239
1240         if (fault & DMA_FSTS_IQE)
1241                 pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1242                        DMAR_IQER_REG_IQEI(iqe_err));
1243         if (fault & DMA_FSTS_ITE)
1244                 pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1245                        DMAR_IQER_REG_ITESID(iqe_err));
1246         if (fault & DMA_FSTS_ICE)
1247                 pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1248                        DMAR_IQER_REG_ICESID(iqe_err));
1249
1250         pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1251                qi_type_string(desc->qw0 & 0xf),
1252                (unsigned long long)desc->qw0,
1253                (unsigned long long)desc->qw1);
1254
1255         head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1256         head <<= qi_shift(iommu);
1257         desc = iommu->qi->desc + head;
1258
1259         pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1260                qi_type_string(desc->qw0 & 0xf),
1261                (unsigned long long)desc->qw0,
1262                (unsigned long long)desc->qw1);
1263 }
1264
1265 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1266 {
1267         u32 fault;
1268         int head, tail;
1269         struct q_inval *qi = iommu->qi;
1270         int shift = qi_shift(iommu);
1271
1272         if (qi->desc_status[wait_index] == QI_ABORT)
1273                 return -EAGAIN;
1274
1275         fault = readl(iommu->reg + DMAR_FSTS_REG);
1276         if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1277                 qi_dump_fault(iommu, fault);
1278
1279         /*
1280          * If IQE happens, the head points to the descriptor associated
1281          * with the error. No new descriptors are fetched until the IQE
1282          * is cleared.
1283          */
1284         if (fault & DMA_FSTS_IQE) {
1285                 head = readl(iommu->reg + DMAR_IQH_REG);
1286                 if ((head >> shift) == index) {
1287                         struct qi_desc *desc = qi->desc + head;
1288
1289                         /*
1290                          * desc->qw2 and desc->qw3 are either reserved or
1291                          * used by software as private data. We won't print
1292                          * out these two qw's for security consideration.
1293                          */
1294                         memcpy(desc, qi->desc + (wait_index << shift),
1295                                1 << shift);
1296                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1297                         pr_info("Invalidation Queue Error (IQE) cleared\n");
1298                         return -EINVAL;
1299                 }
1300         }
1301
1302         /*
1303          * If ITE happens, all pending wait_desc commands are aborted.
1304          * No new descriptors are fetched until the ITE is cleared.
1305          */
1306         if (fault & DMA_FSTS_ITE) {
1307                 head = readl(iommu->reg + DMAR_IQH_REG);
1308                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1309                 head |= 1;
1310                 tail = readl(iommu->reg + DMAR_IQT_REG);
1311                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1312
1313                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1314                 pr_info("Invalidation Time-out Error (ITE) cleared\n");
1315
1316                 do {
1317                         if (qi->desc_status[head] == QI_IN_USE)
1318                                 qi->desc_status[head] = QI_ABORT;
1319                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1320                 } while (head != tail);
1321
1322                 if (qi->desc_status[wait_index] == QI_ABORT)
1323                         return -EAGAIN;
1324         }
1325
1326         if (fault & DMA_FSTS_ICE) {
1327                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1328                 pr_info("Invalidation Completion Error (ICE) cleared\n");
1329         }
1330
1331         return 0;
1332 }
1333
1334 /*
1335  * Function to submit invalidation descriptors of all types to the queued
1336  * invalidation interface(QI). Multiple descriptors can be submitted at a
1337  * time, a wait descriptor will be appended to each submission to ensure
1338  * hardware has completed the invalidation before return. Wait descriptors
1339  * can be part of the submission but it will not be polled for completion.
1340  */
1341 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1342                    unsigned int count, unsigned long options)
1343 {
1344         struct q_inval *qi = iommu->qi;
1345         struct qi_desc wait_desc;
1346         int wait_index, index;
1347         unsigned long flags;
1348         int offset, shift;
1349         int rc, i;
1350
1351         if (!qi)
1352                 return 0;
1353
1354 restart:
1355         rc = 0;
1356
1357         raw_spin_lock_irqsave(&qi->q_lock, flags);
1358         /*
1359          * Check if we have enough empty slots in the queue to submit,
1360          * the calculation is based on:
1361          * # of desc + 1 wait desc + 1 space between head and tail
1362          */
1363         while (qi->free_cnt < count + 2) {
1364                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1365                 cpu_relax();
1366                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1367         }
1368
1369         index = qi->free_head;
1370         wait_index = (index + count) % QI_LENGTH;
1371         shift = qi_shift(iommu);
1372
1373         for (i = 0; i < count; i++) {
1374                 offset = ((index + i) % QI_LENGTH) << shift;
1375                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1376                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1377                 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1378                                 desc[i].qw2, desc[i].qw3);
1379         }
1380         qi->desc_status[wait_index] = QI_IN_USE;
1381
1382         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1383                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1384         if (options & QI_OPT_WAIT_DRAIN)
1385                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1386         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1387         wait_desc.qw2 = 0;
1388         wait_desc.qw3 = 0;
1389
1390         offset = wait_index << shift;
1391         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1392
1393         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1394         qi->free_cnt -= count + 1;
1395
1396         /*
1397          * update the HW tail register indicating the presence of
1398          * new descriptors.
1399          */
1400         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1401
1402         while (qi->desc_status[wait_index] != QI_DONE) {
1403                 /*
1404                  * We will leave the interrupts disabled, to prevent interrupt
1405                  * context to queue another cmd while a cmd is already submitted
1406                  * and waiting for completion on this cpu. This is to avoid
1407                  * a deadlock where the interrupt context can wait indefinitely
1408                  * for free slots in the queue.
1409                  */
1410                 rc = qi_check_fault(iommu, index, wait_index);
1411                 if (rc)
1412                         break;
1413
1414                 raw_spin_unlock(&qi->q_lock);
1415                 cpu_relax();
1416                 raw_spin_lock(&qi->q_lock);
1417         }
1418
1419         for (i = 0; i < count; i++)
1420                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1421
1422         reclaim_free_desc(qi);
1423         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1424
1425         if (rc == -EAGAIN)
1426                 goto restart;
1427
1428         return rc;
1429 }
1430
1431 /*
1432  * Flush the global interrupt entry cache.
1433  */
1434 void qi_global_iec(struct intel_iommu *iommu)
1435 {
1436         struct qi_desc desc;
1437
1438         desc.qw0 = QI_IEC_TYPE;
1439         desc.qw1 = 0;
1440         desc.qw2 = 0;
1441         desc.qw3 = 0;
1442
1443         /* should never fail */
1444         qi_submit_sync(iommu, &desc, 1, 0);
1445 }
1446
1447 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1448                       u64 type)
1449 {
1450         struct qi_desc desc;
1451
1452         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1453                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1454         desc.qw1 = 0;
1455         desc.qw2 = 0;
1456         desc.qw3 = 0;
1457
1458         qi_submit_sync(iommu, &desc, 1, 0);
1459 }
1460
1461 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1462                     unsigned int size_order, u64 type)
1463 {
1464         u8 dw = 0, dr = 0;
1465
1466         struct qi_desc desc;
1467         int ih = 0;
1468
1469         if (cap_write_drain(iommu->cap))
1470                 dw = 1;
1471
1472         if (cap_read_drain(iommu->cap))
1473                 dr = 1;
1474
1475         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1476                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1477         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1478                 | QI_IOTLB_AM(size_order);
1479         desc.qw2 = 0;
1480         desc.qw3 = 0;
1481
1482         qi_submit_sync(iommu, &desc, 1, 0);
1483 }
1484
1485 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1486                         u16 qdep, u64 addr, unsigned mask)
1487 {
1488         struct qi_desc desc;
1489
1490         if (mask) {
1491                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1492                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1493         } else
1494                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1495
1496         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1497                 qdep = 0;
1498
1499         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1500                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1501         desc.qw2 = 0;
1502         desc.qw3 = 0;
1503
1504         qi_submit_sync(iommu, &desc, 1, 0);
1505 }
1506
1507 /* PASID-based IOTLB invalidation */
1508 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1509                      unsigned long npages, bool ih)
1510 {
1511         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1512
1513         /*
1514          * npages == -1 means a PASID-selective invalidation, otherwise,
1515          * a positive value for Page-selective-within-PASID invalidation.
1516          * 0 is not a valid input.
1517          */
1518         if (WARN_ON(!npages)) {
1519                 pr_err("Invalid input npages = %ld\n", npages);
1520                 return;
1521         }
1522
1523         if (npages == -1) {
1524                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1525                                 QI_EIOTLB_DID(did) |
1526                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1527                                 QI_EIOTLB_TYPE;
1528                 desc.qw1 = 0;
1529         } else {
1530                 int mask = ilog2(__roundup_pow_of_two(npages));
1531                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1532
1533                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1534                         addr = ALIGN_DOWN(addr, align);
1535
1536                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1537                                 QI_EIOTLB_DID(did) |
1538                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1539                                 QI_EIOTLB_TYPE;
1540                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1541                                 QI_EIOTLB_IH(ih) |
1542                                 QI_EIOTLB_AM(mask);
1543         }
1544
1545         qi_submit_sync(iommu, &desc, 1, 0);
1546 }
1547
1548 /* PASID-based device IOTLB Invalidate */
1549 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1550                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1551 {
1552         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1553         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1554
1555         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1556                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1557                 QI_DEV_IOTLB_PFSID(pfsid);
1558
1559         /*
1560          * If S bit is 0, we only flush a single page. If S bit is set,
1561          * The least significant zero bit indicates the invalidation address
1562          * range. VT-d spec 6.5.2.6.
1563          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1564          * size order = 0 is PAGE_SIZE 4KB
1565          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1566          * ECAP.
1567          */
1568         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1569                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1570                                     addr, size_order);
1571
1572         /* Take page address */
1573         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1574
1575         if (size_order) {
1576                 /*
1577                  * Existing 0s in address below size_order may be the least
1578                  * significant bit, we must set them to 1s to avoid having
1579                  * smaller size than desired.
1580                  */
1581                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1582                                         VTD_PAGE_SHIFT);
1583                 /* Clear size_order bit to indicate size */
1584                 desc.qw1 &= ~mask;
1585                 /* Set the S bit to indicate flushing more than 1 page */
1586                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1587         }
1588
1589         qi_submit_sync(iommu, &desc, 1, 0);
1590 }
1591
1592 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1593                           u64 granu, u32 pasid)
1594 {
1595         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1596
1597         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1598                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1599         qi_submit_sync(iommu, &desc, 1, 0);
1600 }
1601
1602 /*
1603  * Disable Queued Invalidation interface.
1604  */
1605 void dmar_disable_qi(struct intel_iommu *iommu)
1606 {
1607         unsigned long flags;
1608         u32 sts;
1609         cycles_t start_time = get_cycles();
1610
1611         if (!ecap_qis(iommu->ecap))
1612                 return;
1613
1614         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1615
1616         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1617         if (!(sts & DMA_GSTS_QIES))
1618                 goto end;
1619
1620         /*
1621          * Give a chance to HW to complete the pending invalidation requests.
1622          */
1623         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1624                 readl(iommu->reg + DMAR_IQH_REG)) &&
1625                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1626                 cpu_relax();
1627
1628         iommu->gcmd &= ~DMA_GCMD_QIE;
1629         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1630
1631         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1632                       !(sts & DMA_GSTS_QIES), sts);
1633 end:
1634         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1635 }
1636
1637 /*
1638  * Enable queued invalidation.
1639  */
1640 static void __dmar_enable_qi(struct intel_iommu *iommu)
1641 {
1642         u32 sts;
1643         unsigned long flags;
1644         struct q_inval *qi = iommu->qi;
1645         u64 val = virt_to_phys(qi->desc);
1646
1647         qi->free_head = qi->free_tail = 0;
1648         qi->free_cnt = QI_LENGTH;
1649
1650         /*
1651          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1652          * is present.
1653          */
1654         if (ecap_smts(iommu->ecap))
1655                 val |= (1 << 11) | 1;
1656
1657         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1658
1659         /* write zero to the tail reg */
1660         writel(0, iommu->reg + DMAR_IQT_REG);
1661
1662         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1663
1664         iommu->gcmd |= DMA_GCMD_QIE;
1665         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1666
1667         /* Make sure hardware complete it */
1668         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1669
1670         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1671 }
1672
1673 /*
1674  * Enable Queued Invalidation interface. This is a must to support
1675  * interrupt-remapping. Also used by DMA-remapping, which replaces
1676  * register based IOTLB invalidation.
1677  */
1678 int dmar_enable_qi(struct intel_iommu *iommu)
1679 {
1680         struct q_inval *qi;
1681         struct page *desc_page;
1682
1683         if (!ecap_qis(iommu->ecap))
1684                 return -ENOENT;
1685
1686         /*
1687          * queued invalidation is already setup and enabled.
1688          */
1689         if (iommu->qi)
1690                 return 0;
1691
1692         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1693         if (!iommu->qi)
1694                 return -ENOMEM;
1695
1696         qi = iommu->qi;
1697
1698         /*
1699          * Need two pages to accommodate 256 descriptors of 256 bits each
1700          * if the remapping hardware supports scalable mode translation.
1701          */
1702         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1703                                      !!ecap_smts(iommu->ecap));
1704         if (!desc_page) {
1705                 kfree(qi);
1706                 iommu->qi = NULL;
1707                 return -ENOMEM;
1708         }
1709
1710         qi->desc = page_address(desc_page);
1711
1712         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1713         if (!qi->desc_status) {
1714                 free_page((unsigned long) qi->desc);
1715                 kfree(qi);
1716                 iommu->qi = NULL;
1717                 return -ENOMEM;
1718         }
1719
1720         raw_spin_lock_init(&qi->q_lock);
1721
1722         __dmar_enable_qi(iommu);
1723
1724         return 0;
1725 }
1726
1727 /* iommu interrupt handling. Most stuff are MSI-like. */
1728
1729 enum faulttype {
1730         DMA_REMAP,
1731         INTR_REMAP,
1732         UNKNOWN,
1733 };
1734
1735 static const char *dma_remap_fault_reasons[] =
1736 {
1737         "Software",
1738         "Present bit in root entry is clear",
1739         "Present bit in context entry is clear",
1740         "Invalid context entry",
1741         "Access beyond MGAW",
1742         "PTE Write access is not set",
1743         "PTE Read access is not set",
1744         "Next page table ptr is invalid",
1745         "Root table address invalid",
1746         "Context table ptr is invalid",
1747         "non-zero reserved fields in RTP",
1748         "non-zero reserved fields in CTP",
1749         "non-zero reserved fields in PTE",
1750         "PCE for translation request specifies blocking",
1751 };
1752
1753 static const char * const dma_remap_sm_fault_reasons[] = {
1754         "SM: Invalid Root Table Address",
1755         "SM: TTM 0 for request with PASID",
1756         "SM: TTM 0 for page group request",
1757         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1758         "SM: Error attempting to access Root Entry",
1759         "SM: Present bit in Root Entry is clear",
1760         "SM: Non-zero reserved field set in Root Entry",
1761         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1762         "SM: Error attempting to access Context Entry",
1763         "SM: Present bit in Context Entry is clear",
1764         "SM: Non-zero reserved field set in the Context Entry",
1765         "SM: Invalid Context Entry",
1766         "SM: DTE field in Context Entry is clear",
1767         "SM: PASID Enable field in Context Entry is clear",
1768         "SM: PASID is larger than the max in Context Entry",
1769         "SM: PRE field in Context-Entry is clear",
1770         "SM: RID_PASID field error in Context-Entry",
1771         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1772         "SM: Error attempting to access the PASID Directory Entry",
1773         "SM: Present bit in Directory Entry is clear",
1774         "SM: Non-zero reserved field set in PASID Directory Entry",
1775         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1776         "SM: Error attempting to access PASID Table Entry",
1777         "SM: Present bit in PASID Table Entry is clear",
1778         "SM: Non-zero reserved field set in PASID Table Entry",
1779         "SM: Invalid Scalable-Mode PASID Table Entry",
1780         "SM: ERE field is clear in PASID Table Entry",
1781         "SM: SRE field is clear in PASID Table Entry",
1782         "Unknown", "Unknown",/* 0x5E-0x5F */
1783         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1784         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1785         "SM: Error attempting to access first-level paging entry",
1786         "SM: Present bit in first-level paging entry is clear",
1787         "SM: Non-zero reserved field set in first-level paging entry",
1788         "SM: Error attempting to access FL-PML4 entry",
1789         "SM: First-level entry address beyond MGAW in Nested translation",
1790         "SM: Read permission error in FL-PML4 entry in Nested translation",
1791         "SM: Read permission error in first-level paging entry in Nested translation",
1792         "SM: Write permission error in first-level paging entry in Nested translation",
1793         "SM: Error attempting to access second-level paging entry",
1794         "SM: Read/Write permission error in second-level paging entry",
1795         "SM: Non-zero reserved field set in second-level paging entry",
1796         "SM: Invalid second-level page table pointer",
1797         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1798         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1799         "SM: Address in first-level translation is not canonical",
1800         "SM: U/S set 0 for first-level translation with user privilege",
1801         "SM: No execute permission for request with PASID and ER=1",
1802         "SM: Address beyond the DMA hardware max",
1803         "SM: Second-level entry address beyond the max",
1804         "SM: No write permission for Write/AtomicOp request",
1805         "SM: No read permission for Read/AtomicOp request",
1806         "SM: Invalid address-interrupt address",
1807         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1808         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1809 };
1810
1811 static const char *irq_remap_fault_reasons[] =
1812 {
1813         "Detected reserved fields in the decoded interrupt-remapped request",
1814         "Interrupt index exceeded the interrupt-remapping table size",
1815         "Present field in the IRTE entry is clear",
1816         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1817         "Detected reserved fields in the IRTE entry",
1818         "Blocked a compatibility format interrupt request",
1819         "Blocked an interrupt request due to source-id verification failure",
1820 };
1821
1822 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1823 {
1824         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1825                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1826                 *fault_type = INTR_REMAP;
1827                 return irq_remap_fault_reasons[fault_reason - 0x20];
1828         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1829                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1830                 *fault_type = DMA_REMAP;
1831                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1832         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1833                 *fault_type = DMA_REMAP;
1834                 return dma_remap_fault_reasons[fault_reason];
1835         } else {
1836                 *fault_type = UNKNOWN;
1837                 return "Unknown";
1838         }
1839 }
1840
1841
1842 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1843 {
1844         if (iommu->irq == irq)
1845                 return DMAR_FECTL_REG;
1846         else if (iommu->pr_irq == irq)
1847                 return DMAR_PECTL_REG;
1848         else
1849                 BUG();
1850 }
1851
1852 void dmar_msi_unmask(struct irq_data *data)
1853 {
1854         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1855         int reg = dmar_msi_reg(iommu, data->irq);
1856         unsigned long flag;
1857
1858         /* unmask it */
1859         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1860         writel(0, iommu->reg + reg);
1861         /* Read a reg to force flush the post write */
1862         readl(iommu->reg + reg);
1863         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1864 }
1865
1866 void dmar_msi_mask(struct irq_data *data)
1867 {
1868         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1869         int reg = dmar_msi_reg(iommu, data->irq);
1870         unsigned long flag;
1871
1872         /* mask it */
1873         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1874         writel(DMA_FECTL_IM, iommu->reg + reg);
1875         /* Read a reg to force flush the post write */
1876         readl(iommu->reg + reg);
1877         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1878 }
1879
1880 void dmar_msi_write(int irq, struct msi_msg *msg)
1881 {
1882         struct intel_iommu *iommu = irq_get_handler_data(irq);
1883         int reg = dmar_msi_reg(iommu, irq);
1884         unsigned long flag;
1885
1886         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1887         writel(msg->data, iommu->reg + reg + 4);
1888         writel(msg->address_lo, iommu->reg + reg + 8);
1889         writel(msg->address_hi, iommu->reg + reg + 12);
1890         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1891 }
1892
1893 void dmar_msi_read(int irq, struct msi_msg *msg)
1894 {
1895         struct intel_iommu *iommu = irq_get_handler_data(irq);
1896         int reg = dmar_msi_reg(iommu, irq);
1897         unsigned long flag;
1898
1899         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1900         msg->data = readl(iommu->reg + reg + 4);
1901         msg->address_lo = readl(iommu->reg + reg + 8);
1902         msg->address_hi = readl(iommu->reg + reg + 12);
1903         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1904 }
1905
1906 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1907                 u8 fault_reason, u32 pasid, u16 source_id,
1908                 unsigned long long addr)
1909 {
1910         const char *reason;
1911         int fault_type;
1912
1913         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1914
1915         if (fault_type == INTR_REMAP)
1916                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1917                         source_id >> 8, PCI_SLOT(source_id & 0xFF),
1918                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1919                         fault_reason, reason);
1920         else
1921                 pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1922                        type ? "DMA Read" : "DMA Write",
1923                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1924                        PCI_FUNC(source_id & 0xFF), pasid, addr,
1925                        fault_reason, reason);
1926         return 0;
1927 }
1928
1929 #define PRIMARY_FAULT_REG_LEN (16)
1930 irqreturn_t dmar_fault(int irq, void *dev_id)
1931 {
1932         struct intel_iommu *iommu = dev_id;
1933         int reg, fault_index;
1934         u32 fault_status;
1935         unsigned long flag;
1936         static DEFINE_RATELIMIT_STATE(rs,
1937                                       DEFAULT_RATELIMIT_INTERVAL,
1938                                       DEFAULT_RATELIMIT_BURST);
1939
1940         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1941         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1942         if (fault_status && __ratelimit(&rs))
1943                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1944
1945         /* TBD: ignore advanced fault log currently */
1946         if (!(fault_status & DMA_FSTS_PPF))
1947                 goto unlock_exit;
1948
1949         fault_index = dma_fsts_fault_record_index(fault_status);
1950         reg = cap_fault_reg_offset(iommu->cap);
1951         while (1) {
1952                 /* Disable printing, simply clear the fault when ratelimited */
1953                 bool ratelimited = !__ratelimit(&rs);
1954                 u8 fault_reason;
1955                 u16 source_id;
1956                 u64 guest_addr;
1957                 u32 pasid;
1958                 int type;
1959                 u32 data;
1960                 bool pasid_present;
1961
1962                 /* highest 32 bits */
1963                 data = readl(iommu->reg + reg +
1964                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1965                 if (!(data & DMA_FRCD_F))
1966                         break;
1967
1968                 if (!ratelimited) {
1969                         fault_reason = dma_frcd_fault_reason(data);
1970                         type = dma_frcd_type(data);
1971
1972                         pasid = dma_frcd_pasid_value(data);
1973                         data = readl(iommu->reg + reg +
1974                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1975                         source_id = dma_frcd_source_id(data);
1976
1977                         pasid_present = dma_frcd_pasid_present(data);
1978                         guest_addr = dmar_readq(iommu->reg + reg +
1979                                         fault_index * PRIMARY_FAULT_REG_LEN);
1980                         guest_addr = dma_frcd_page_addr(guest_addr);
1981                 }
1982
1983                 /* clear the fault */
1984                 writel(DMA_FRCD_F, iommu->reg + reg +
1985                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1986
1987                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1988
1989                 if (!ratelimited)
1990                         /* Using pasid -1 if pasid is not present */
1991                         dmar_fault_do_one(iommu, type, fault_reason,
1992                                           pasid_present ? pasid : -1,
1993                                           source_id, guest_addr);
1994
1995                 fault_index++;
1996                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1997                         fault_index = 0;
1998                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1999         }
2000
2001         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2002                iommu->reg + DMAR_FSTS_REG);
2003
2004 unlock_exit:
2005         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2006         return IRQ_HANDLED;
2007 }
2008
2009 int dmar_set_interrupt(struct intel_iommu *iommu)
2010 {
2011         int irq, ret;
2012
2013         /*
2014          * Check if the fault interrupt is already initialized.
2015          */
2016         if (iommu->irq)
2017                 return 0;
2018
2019         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2020         if (irq > 0) {
2021                 iommu->irq = irq;
2022         } else {
2023                 pr_err("No free IRQ vectors\n");
2024                 return -EINVAL;
2025         }
2026
2027         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2028         if (ret)
2029                 pr_err("Can't request irq\n");
2030         return ret;
2031 }
2032
2033 int __init enable_drhd_fault_handling(void)
2034 {
2035         struct dmar_drhd_unit *drhd;
2036         struct intel_iommu *iommu;
2037
2038         /*
2039          * Enable fault control interrupt.
2040          */
2041         for_each_iommu(iommu, drhd) {
2042                 u32 fault_status;
2043                 int ret = dmar_set_interrupt(iommu);
2044
2045                 if (ret) {
2046                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2047                                (unsigned long long)drhd->reg_base_addr, ret);
2048                         return -1;
2049                 }
2050
2051                 /*
2052                  * Clear any previous faults.
2053                  */
2054                 dmar_fault(iommu->irq, iommu);
2055                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2056                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2057         }
2058
2059         return 0;
2060 }
2061
2062 /*
2063  * Re-enable Queued Invalidation interface.
2064  */
2065 int dmar_reenable_qi(struct intel_iommu *iommu)
2066 {
2067         if (!ecap_qis(iommu->ecap))
2068                 return -ENOENT;
2069
2070         if (!iommu->qi)
2071                 return -ENOENT;
2072
2073         /*
2074          * First disable queued invalidation.
2075          */
2076         dmar_disable_qi(iommu);
2077         /*
2078          * Then enable queued invalidation again. Since there is no pending
2079          * invalidation requests now, it's safe to re-enable queued
2080          * invalidation.
2081          */
2082         __dmar_enable_qi(iommu);
2083
2084         return 0;
2085 }
2086
2087 /*
2088  * Check interrupt remapping support in DMAR table description.
2089  */
2090 int __init dmar_ir_support(void)
2091 {
2092         struct acpi_table_dmar *dmar;
2093         dmar = (struct acpi_table_dmar *)dmar_tbl;
2094         if (!dmar)
2095                 return 0;
2096         return dmar->flags & 0x1;
2097 }
2098
2099 /* Check whether DMAR units are in use */
2100 static inline bool dmar_in_use(void)
2101 {
2102         return irq_remapping_enabled || intel_iommu_enabled;
2103 }
2104
2105 static int __init dmar_free_unused_resources(void)
2106 {
2107         struct dmar_drhd_unit *dmaru, *dmaru_n;
2108
2109         if (dmar_in_use())
2110                 return 0;
2111
2112         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2113                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2114
2115         down_write(&dmar_global_lock);
2116         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2117                 list_del(&dmaru->list);
2118                 dmar_free_drhd(dmaru);
2119         }
2120         up_write(&dmar_global_lock);
2121
2122         return 0;
2123 }
2124
2125 late_initcall(dmar_free_unused_resources);
2126 IOMMU_INIT_POST(detect_intel_iommu);
2127
2128 /*
2129  * DMAR Hotplug Support
2130  * For more details, please refer to Intel(R) Virtualization Technology
2131  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2132  * "Remapping Hardware Unit Hot Plug".
2133  */
2134 static guid_t dmar_hp_guid =
2135         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2136                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2137
2138 /*
2139  * Currently there's only one revision and BIOS will not check the revision id,
2140  * so use 0 for safety.
2141  */
2142 #define DMAR_DSM_REV_ID                 0
2143 #define DMAR_DSM_FUNC_DRHD              1
2144 #define DMAR_DSM_FUNC_ATSR              2
2145 #define DMAR_DSM_FUNC_RHSA              3
2146 #define DMAR_DSM_FUNC_SATC              4
2147
2148 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2149 {
2150         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2151 }
2152
2153 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2154                                   dmar_res_handler_t handler, void *arg)
2155 {
2156         int ret = -ENODEV;
2157         union acpi_object *obj;
2158         struct acpi_dmar_header *start;
2159         struct dmar_res_callback callback;
2160         static int res_type[] = {
2161                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2162                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2163                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2164                 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2165         };
2166
2167         if (!dmar_detect_dsm(handle, func))
2168                 return 0;
2169
2170         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2171                                       func, NULL, ACPI_TYPE_BUFFER);
2172         if (!obj)
2173                 return -ENODEV;
2174
2175         memset(&callback, 0, sizeof(callback));
2176         callback.cb[res_type[func]] = handler;
2177         callback.arg[res_type[func]] = arg;
2178         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2179         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2180
2181         ACPI_FREE(obj);
2182
2183         return ret;
2184 }
2185
2186 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2187 {
2188         int ret;
2189         struct dmar_drhd_unit *dmaru;
2190
2191         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2192         if (!dmaru)
2193                 return -ENODEV;
2194
2195         ret = dmar_ir_hotplug(dmaru, true);
2196         if (ret == 0)
2197                 ret = dmar_iommu_hotplug(dmaru, true);
2198
2199         return ret;
2200 }
2201
2202 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2203 {
2204         int i, ret;
2205         struct device *dev;
2206         struct dmar_drhd_unit *dmaru;
2207
2208         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2209         if (!dmaru)
2210                 return 0;
2211
2212         /*
2213          * All PCI devices managed by this unit should have been destroyed.
2214          */
2215         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2216                 for_each_active_dev_scope(dmaru->devices,
2217                                           dmaru->devices_cnt, i, dev)
2218                         return -EBUSY;
2219         }
2220
2221         ret = dmar_ir_hotplug(dmaru, false);
2222         if (ret == 0)
2223                 ret = dmar_iommu_hotplug(dmaru, false);
2224
2225         return ret;
2226 }
2227
2228 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2229 {
2230         struct dmar_drhd_unit *dmaru;
2231
2232         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2233         if (dmaru) {
2234                 list_del_rcu(&dmaru->list);
2235                 synchronize_rcu();
2236                 dmar_free_drhd(dmaru);
2237         }
2238
2239         return 0;
2240 }
2241
2242 static int dmar_hotplug_insert(acpi_handle handle)
2243 {
2244         int ret;
2245         int drhd_count = 0;
2246
2247         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2248                                      &dmar_validate_one_drhd, (void *)1);
2249         if (ret)
2250                 goto out;
2251
2252         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2253                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2254         if (ret == 0 && drhd_count == 0) {
2255                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2256                 goto out;
2257         } else if (ret) {
2258                 goto release_drhd;
2259         }
2260
2261         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2262                                      &dmar_parse_one_rhsa, NULL);
2263         if (ret)
2264                 goto release_drhd;
2265
2266         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2267                                      &dmar_parse_one_atsr, NULL);
2268         if (ret)
2269                 goto release_atsr;
2270
2271         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2272                                      &dmar_hp_add_drhd, NULL);
2273         if (!ret)
2274                 return 0;
2275
2276         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2277                                &dmar_hp_remove_drhd, NULL);
2278 release_atsr:
2279         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2280                                &dmar_release_one_atsr, NULL);
2281 release_drhd:
2282         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2283                                &dmar_hp_release_drhd, NULL);
2284 out:
2285         return ret;
2286 }
2287
2288 static int dmar_hotplug_remove(acpi_handle handle)
2289 {
2290         int ret;
2291
2292         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2293                                      &dmar_check_one_atsr, NULL);
2294         if (ret)
2295                 return ret;
2296
2297         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2298                                      &dmar_hp_remove_drhd, NULL);
2299         if (ret == 0) {
2300                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2301                                                &dmar_release_one_atsr, NULL));
2302                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2303                                                &dmar_hp_release_drhd, NULL));
2304         } else {
2305                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2306                                        &dmar_hp_add_drhd, NULL);
2307         }
2308
2309         return ret;
2310 }
2311
2312 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2313                                        void *context, void **retval)
2314 {
2315         acpi_handle *phdl = retval;
2316
2317         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2318                 *phdl = handle;
2319                 return AE_CTRL_TERMINATE;
2320         }
2321
2322         return AE_OK;
2323 }
2324
2325 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2326 {
2327         int ret;
2328         acpi_handle tmp = NULL;
2329         acpi_status status;
2330
2331         if (!dmar_in_use())
2332                 return 0;
2333
2334         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2335                 tmp = handle;
2336         } else {
2337                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2338                                              ACPI_UINT32_MAX,
2339                                              dmar_get_dsm_handle,
2340                                              NULL, NULL, &tmp);
2341                 if (ACPI_FAILURE(status)) {
2342                         pr_warn("Failed to locate _DSM method.\n");
2343                         return -ENXIO;
2344                 }
2345         }
2346         if (tmp == NULL)
2347                 return 0;
2348
2349         down_write(&dmar_global_lock);
2350         if (insert)
2351                 ret = dmar_hotplug_insert(tmp);
2352         else
2353                 ret = dmar_hotplug_remove(tmp);
2354         up_write(&dmar_global_lock);
2355
2356         return ret;
2357 }
2358
2359 int dmar_device_add(acpi_handle handle)
2360 {
2361         return dmar_device_hotplug(handle, true);
2362 }
2363
2364 int dmar_device_remove(acpi_handle handle)
2365 {
2366         return dmar_device_hotplug(handle, false);
2367 }
2368
2369 /*
2370  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2371  *
2372  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2373  * the ACPI DMAR table. This means that the platform boot firmware has made
2374  * sure no device can issue DMA outside of RMRR regions.
2375  */
2376 bool dmar_platform_optin(void)
2377 {
2378         struct acpi_table_dmar *dmar;
2379         acpi_status status;
2380         bool ret;
2381
2382         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2383                                 (struct acpi_table_header **)&dmar);
2384         if (ACPI_FAILURE(status))
2385                 return false;
2386
2387         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2388         acpi_put_table((struct acpi_table_header *)dmar);
2389
2390         return ret;
2391 }
2392 EXPORT_SYMBOL_GPL(dmar_platform_optin);