Merge tag 'x86_fpu_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
[sfrench/cifs-2.6.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/timer.h>
23 #include <linux/irq.h>
24 #include <linux/interrupt.h>
25 #include <linux/tboot.h>
26 #include <linux/dmi.h>
27 #include <linux/slab.h>
28 #include <linux/iommu.h>
29 #include <linux/numa.h>
30 #include <linux/limits.h>
31 #include <asm/irq_remapping.h>
32
33 #include "iommu.h"
34 #include "../irq_remapping.h"
35 #include "perf.h"
36 #include "trace.h"
37 #include "perfmon.h"
38
39 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
40 struct dmar_res_callback {
41         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
42         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
43         bool                    ignore_unhandled;
44         bool                    print_entry;
45 };
46
47 /*
48  * Assumptions:
49  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
50  *    before IO devices managed by that unit.
51  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
52  *    after IO devices managed by that unit.
53  * 3) Hotplug events are rare.
54  *
55  * Locking rules for DMA and interrupt remapping related global data structures:
56  * 1) Use dmar_global_lock in process context
57  * 2) Use RCU in interrupt context
58  */
59 DECLARE_RWSEM(dmar_global_lock);
60 LIST_HEAD(dmar_drhd_units);
61
62 struct acpi_table_header * __initdata dmar_tbl;
63 static int dmar_dev_scope_status = 1;
64 static DEFINE_IDA(dmar_seq_ids);
65
66 static int alloc_iommu(struct dmar_drhd_unit *drhd);
67 static void free_iommu(struct intel_iommu *iommu);
68
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /*
133          * Ignore devices that have a domain number higher than what can
134          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135          */
136         if (pci_domain_nr(dev->bus) > U16_MAX)
137                 return NULL;
138
139         /* Only generate path[] for device addition event */
140         if (event == BUS_NOTIFY_ADD_DEVICE)
141                 for (tmp = dev; tmp; tmp = tmp->bus->self)
142                         level++;
143
144         size = struct_size(info, path, level);
145         if (size <= sizeof(dmar_pci_notify_info_buf)) {
146                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147         } else {
148                 info = kzalloc(size, GFP_KERNEL);
149                 if (!info) {
150                         if (dmar_dev_scope_status == 0)
151                                 dmar_dev_scope_status = -ENOMEM;
152                         return NULL;
153                 }
154         }
155
156         info->event = event;
157         info->dev = dev;
158         info->seg = pci_domain_nr(dev->bus);
159         info->level = level;
160         if (event == BUS_NOTIFY_ADD_DEVICE) {
161                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
162                         level--;
163                         info->path[level].bus = tmp->bus->number;
164                         info->path[level].device = PCI_SLOT(tmp->devfn);
165                         info->path[level].function = PCI_FUNC(tmp->devfn);
166                         if (pci_is_root_bus(tmp->bus))
167                                 info->bus = tmp->bus->number;
168                 }
169         }
170
171         return info;
172 }
173
174 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
175 {
176         if ((void *)info != dmar_pci_notify_info_buf)
177                 kfree(info);
178 }
179
180 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
181                                 struct acpi_dmar_pci_path *path, int count)
182 {
183         int i;
184
185         if (info->bus != bus)
186                 goto fallback;
187         if (info->level != count)
188                 goto fallback;
189
190         for (i = 0; i < count; i++) {
191                 if (path[i].device != info->path[i].device ||
192                     path[i].function != info->path[i].function)
193                         goto fallback;
194         }
195
196         return true;
197
198 fallback:
199
200         if (count != 1)
201                 return false;
202
203         i = info->level - 1;
204         if (bus              == info->path[i].bus &&
205             path[0].device   == info->path[i].device &&
206             path[0].function == info->path[i].function) {
207                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
208                         bus, path[0].device, path[0].function);
209                 return true;
210         }
211
212         return false;
213 }
214
215 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
216 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
217                           void *start, void*end, u16 segment,
218                           struct dmar_dev_scope *devices,
219                           int devices_cnt)
220 {
221         int i, level;
222         struct device *tmp, *dev = &info->dev->dev;
223         struct acpi_dmar_device_scope *scope;
224         struct acpi_dmar_pci_path *path;
225
226         if (segment != info->seg)
227                 return 0;
228
229         for (; start < end; start += scope->length) {
230                 scope = start;
231                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
232                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
233                         continue;
234
235                 path = (struct acpi_dmar_pci_path *)(scope + 1);
236                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
237                 if (!dmar_match_pci_path(info, scope->bus, path, level))
238                         continue;
239
240                 /*
241                  * We expect devices with endpoint scope to have normal PCI
242                  * headers, and devices with bridge scope to have bridge PCI
243                  * headers.  However PCI NTB devices may be listed in the
244                  * DMAR table with bridge scope, even though they have a
245                  * normal PCI header.  NTB devices are identified by class
246                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
247                  * for this special case.
248                  */
249                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
250                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
251                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
252                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
253                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
254                         pr_warn("Device scope type does not match for %s\n",
255                                 pci_name(info->dev));
256                         return -EINVAL;
257                 }
258
259                 for_each_dev_scope(devices, devices_cnt, i, tmp)
260                         if (tmp == NULL) {
261                                 devices[i].bus = info->dev->bus->number;
262                                 devices[i].devfn = info->dev->devfn;
263                                 rcu_assign_pointer(devices[i].dev,
264                                                    get_device(dev));
265                                 return 1;
266                         }
267                 BUG_ON(i >= devices_cnt);
268         }
269
270         return 0;
271 }
272
273 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
274                           struct dmar_dev_scope *devices, int count)
275 {
276         int index;
277         struct device *tmp;
278
279         if (info->seg != segment)
280                 return 0;
281
282         for_each_active_dev_scope(devices, count, index, tmp)
283                 if (tmp == &info->dev->dev) {
284                         RCU_INIT_POINTER(devices[index].dev, NULL);
285                         synchronize_rcu();
286                         put_device(tmp);
287                         return 1;
288                 }
289
290         return 0;
291 }
292
293 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
294 {
295         int ret = 0;
296         struct dmar_drhd_unit *dmaru;
297         struct acpi_dmar_hardware_unit *drhd;
298
299         for_each_drhd_unit(dmaru) {
300                 if (dmaru->include_all)
301                         continue;
302
303                 drhd = container_of(dmaru->hdr,
304                                     struct acpi_dmar_hardware_unit, header);
305                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
306                                 ((void *)drhd) + drhd->header.length,
307                                 dmaru->segment,
308                                 dmaru->devices, dmaru->devices_cnt);
309                 if (ret)
310                         break;
311         }
312         if (ret >= 0)
313                 ret = dmar_iommu_notify_scope_dev(info);
314         if (ret < 0 && dmar_dev_scope_status == 0)
315                 dmar_dev_scope_status = ret;
316
317         if (ret >= 0)
318                 intel_irq_remap_add_device(info);
319
320         return ret;
321 }
322
323 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
324 {
325         struct dmar_drhd_unit *dmaru;
326
327         for_each_drhd_unit(dmaru)
328                 if (dmar_remove_dev_scope(info, dmaru->segment,
329                         dmaru->devices, dmaru->devices_cnt))
330                         break;
331         dmar_iommu_notify_scope_dev(info);
332 }
333
334 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
335 {
336         struct pci_dev *physfn = pci_physfn(pdev);
337
338         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
339 }
340
341 static int dmar_pci_bus_notifier(struct notifier_block *nb,
342                                  unsigned long action, void *data)
343 {
344         struct pci_dev *pdev = to_pci_dev(data);
345         struct dmar_pci_notify_info *info;
346
347         /* Only care about add/remove events for physical functions.
348          * For VFs we actually do the lookup based on the corresponding
349          * PF in device_to_iommu() anyway. */
350         if (pdev->is_virtfn) {
351                 /*
352                  * Ensure that the VF device inherits the irq domain of the
353                  * PF device. Ideally the device would inherit the domain
354                  * from the bus, but DMAR can have multiple units per bus
355                  * which makes this impossible. The VF 'bus' could inherit
356                  * from the PF device, but that's yet another x86'sism to
357                  * inflict on everybody else.
358                  */
359                 if (action == BUS_NOTIFY_ADD_DEVICE)
360                         vf_inherit_msi_domain(pdev);
361                 return NOTIFY_DONE;
362         }
363
364         if (action != BUS_NOTIFY_ADD_DEVICE &&
365             action != BUS_NOTIFY_REMOVED_DEVICE)
366                 return NOTIFY_DONE;
367
368         info = dmar_alloc_pci_notify_info(pdev, action);
369         if (!info)
370                 return NOTIFY_DONE;
371
372         down_write(&dmar_global_lock);
373         if (action == BUS_NOTIFY_ADD_DEVICE)
374                 dmar_pci_bus_add_dev(info);
375         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
376                 dmar_pci_bus_del_dev(info);
377         up_write(&dmar_global_lock);
378
379         dmar_free_pci_notify_info(info);
380
381         return NOTIFY_OK;
382 }
383
384 static struct notifier_block dmar_pci_bus_nb = {
385         .notifier_call = dmar_pci_bus_notifier,
386         .priority = 1,
387 };
388
389 static struct dmar_drhd_unit *
390 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
391 {
392         struct dmar_drhd_unit *dmaru;
393
394         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
395                                 dmar_rcu_check())
396                 if (dmaru->segment == drhd->segment &&
397                     dmaru->reg_base_addr == drhd->address)
398                         return dmaru;
399
400         return NULL;
401 }
402
403 /*
404  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
405  * structure which uniquely represent one DMA remapping hardware unit
406  * present in the platform
407  */
408 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
409 {
410         struct acpi_dmar_hardware_unit *drhd;
411         struct dmar_drhd_unit *dmaru;
412         int ret;
413
414         drhd = (struct acpi_dmar_hardware_unit *)header;
415         dmaru = dmar_find_dmaru(drhd);
416         if (dmaru)
417                 goto out;
418
419         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
420         if (!dmaru)
421                 return -ENOMEM;
422
423         /*
424          * If header is allocated from slab by ACPI _DSM method, we need to
425          * copy the content because the memory buffer will be freed on return.
426          */
427         dmaru->hdr = (void *)(dmaru + 1);
428         memcpy(dmaru->hdr, header, header->length);
429         dmaru->reg_base_addr = drhd->address;
430         dmaru->segment = drhd->segment;
431         /* The size of the register set is 2 ^ N 4 KB pages. */
432         dmaru->reg_size = 1UL << (drhd->size + 12);
433         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
434         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
435                                               ((void *)drhd) + drhd->header.length,
436                                               &dmaru->devices_cnt);
437         if (dmaru->devices_cnt && dmaru->devices == NULL) {
438                 kfree(dmaru);
439                 return -ENOMEM;
440         }
441
442         ret = alloc_iommu(dmaru);
443         if (ret) {
444                 dmar_free_dev_scope(&dmaru->devices,
445                                     &dmaru->devices_cnt);
446                 kfree(dmaru);
447                 return ret;
448         }
449         dmar_register_drhd_unit(dmaru);
450
451 out:
452         if (arg)
453                 (*(int *)arg)++;
454
455         return 0;
456 }
457
458 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
459 {
460         if (dmaru->devices && dmaru->devices_cnt)
461                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
462         if (dmaru->iommu)
463                 free_iommu(dmaru->iommu);
464         kfree(dmaru);
465 }
466
467 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
468                                       void *arg)
469 {
470         struct acpi_dmar_andd *andd = (void *)header;
471
472         /* Check for NUL termination within the designated length */
473         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
474                 pr_warn(FW_BUG
475                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
476                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
477                            dmi_get_system_info(DMI_BIOS_VENDOR),
478                            dmi_get_system_info(DMI_BIOS_VERSION),
479                            dmi_get_system_info(DMI_PRODUCT_VERSION));
480                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
481                 return -EINVAL;
482         }
483         pr_info("ANDD device: %x name: %s\n", andd->device_number,
484                 andd->device_name);
485
486         return 0;
487 }
488
489 #ifdef CONFIG_ACPI_NUMA
490 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
491 {
492         struct acpi_dmar_rhsa *rhsa;
493         struct dmar_drhd_unit *drhd;
494
495         rhsa = (struct acpi_dmar_rhsa *)header;
496         for_each_drhd_unit(drhd) {
497                 if (drhd->reg_base_addr == rhsa->base_address) {
498                         int node = pxm_to_node(rhsa->proximity_domain);
499
500                         if (node != NUMA_NO_NODE && !node_online(node))
501                                 node = NUMA_NO_NODE;
502                         drhd->iommu->node = node;
503                         return 0;
504                 }
505         }
506         pr_warn(FW_BUG
507                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
508                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
509                 rhsa->base_address,
510                 dmi_get_system_info(DMI_BIOS_VENDOR),
511                 dmi_get_system_info(DMI_BIOS_VERSION),
512                 dmi_get_system_info(DMI_PRODUCT_VERSION));
513         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
514
515         return 0;
516 }
517 #else
518 #define dmar_parse_one_rhsa             dmar_res_noop
519 #endif
520
521 static void
522 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
523 {
524         struct acpi_dmar_hardware_unit *drhd;
525         struct acpi_dmar_reserved_memory *rmrr;
526         struct acpi_dmar_atsr *atsr;
527         struct acpi_dmar_rhsa *rhsa;
528         struct acpi_dmar_satc *satc;
529
530         switch (header->type) {
531         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
532                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
533                                     header);
534                 pr_info("DRHD base: %#016Lx flags: %#x\n",
535                         (unsigned long long)drhd->address, drhd->flags);
536                 break;
537         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
538                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
539                                     header);
540                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
541                         (unsigned long long)rmrr->base_address,
542                         (unsigned long long)rmrr->end_address);
543                 break;
544         case ACPI_DMAR_TYPE_ROOT_ATS:
545                 atsr = container_of(header, struct acpi_dmar_atsr, header);
546                 pr_info("ATSR flags: %#x\n", atsr->flags);
547                 break;
548         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
549                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
550                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
551                        (unsigned long long)rhsa->base_address,
552                        rhsa->proximity_domain);
553                 break;
554         case ACPI_DMAR_TYPE_NAMESPACE:
555                 /* We don't print this here because we need to sanity-check
556                    it first. So print it in dmar_parse_one_andd() instead. */
557                 break;
558         case ACPI_DMAR_TYPE_SATC:
559                 satc = container_of(header, struct acpi_dmar_satc, header);
560                 pr_info("SATC flags: 0x%x\n", satc->flags);
561                 break;
562         }
563 }
564
565 /**
566  * dmar_table_detect - checks to see if the platform supports DMAR devices
567  */
568 static int __init dmar_table_detect(void)
569 {
570         acpi_status status = AE_OK;
571
572         /* if we could find DMAR table, then there are DMAR devices */
573         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
574
575         if (ACPI_SUCCESS(status) && !dmar_tbl) {
576                 pr_warn("Unable to map DMAR\n");
577                 status = AE_NOT_FOUND;
578         }
579
580         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
581 }
582
583 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
584                                        size_t len, struct dmar_res_callback *cb)
585 {
586         struct acpi_dmar_header *iter, *next;
587         struct acpi_dmar_header *end = ((void *)start) + len;
588
589         for (iter = start; iter < end; iter = next) {
590                 next = (void *)iter + iter->length;
591                 if (iter->length == 0) {
592                         /* Avoid looping forever on bad ACPI tables */
593                         pr_debug(FW_BUG "Invalid 0-length structure\n");
594                         break;
595                 } else if (next > end) {
596                         /* Avoid passing table end */
597                         pr_warn(FW_BUG "Record passes table end\n");
598                         return -EINVAL;
599                 }
600
601                 if (cb->print_entry)
602                         dmar_table_print_dmar_entry(iter);
603
604                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
605                         /* continue for forward compatibility */
606                         pr_debug("Unknown DMAR structure type %d\n",
607                                  iter->type);
608                 } else if (cb->cb[iter->type]) {
609                         int ret;
610
611                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
612                         if (ret)
613                                 return ret;
614                 } else if (!cb->ignore_unhandled) {
615                         pr_warn("No handler for DMAR structure type %d\n",
616                                 iter->type);
617                         return -EINVAL;
618                 }
619         }
620
621         return 0;
622 }
623
624 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
625                                        struct dmar_res_callback *cb)
626 {
627         return dmar_walk_remapping_entries((void *)(dmar + 1),
628                         dmar->header.length - sizeof(*dmar), cb);
629 }
630
631 /**
632  * parse_dmar_table - parses the DMA reporting table
633  */
634 static int __init
635 parse_dmar_table(void)
636 {
637         struct acpi_table_dmar *dmar;
638         int drhd_count = 0;
639         int ret;
640         struct dmar_res_callback cb = {
641                 .print_entry = true,
642                 .ignore_unhandled = true,
643                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
644                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
645                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
646                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
647                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
648                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
649                 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
650         };
651
652         /*
653          * Do it again, earlier dmar_tbl mapping could be mapped with
654          * fixed map.
655          */
656         dmar_table_detect();
657
658         /*
659          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
660          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
661          */
662         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
663
664         dmar = (struct acpi_table_dmar *)dmar_tbl;
665         if (!dmar)
666                 return -ENODEV;
667
668         if (dmar->width < PAGE_SHIFT - 1) {
669                 pr_warn("Invalid DMAR haw\n");
670                 return -EINVAL;
671         }
672
673         pr_info("Host address width %d\n", dmar->width + 1);
674         ret = dmar_walk_dmar_table(dmar, &cb);
675         if (ret == 0 && drhd_count == 0)
676                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
677
678         return ret;
679 }
680
681 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
682                                  int cnt, struct pci_dev *dev)
683 {
684         int index;
685         struct device *tmp;
686
687         while (dev) {
688                 for_each_active_dev_scope(devices, cnt, index, tmp)
689                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
690                                 return 1;
691
692                 /* Check our parent */
693                 dev = dev->bus->self;
694         }
695
696         return 0;
697 }
698
699 struct dmar_drhd_unit *
700 dmar_find_matched_drhd_unit(struct pci_dev *dev)
701 {
702         struct dmar_drhd_unit *dmaru;
703         struct acpi_dmar_hardware_unit *drhd;
704
705         dev = pci_physfn(dev);
706
707         rcu_read_lock();
708         for_each_drhd_unit(dmaru) {
709                 drhd = container_of(dmaru->hdr,
710                                     struct acpi_dmar_hardware_unit,
711                                     header);
712
713                 if (dmaru->include_all &&
714                     drhd->segment == pci_domain_nr(dev->bus))
715                         goto out;
716
717                 if (dmar_pci_device_match(dmaru->devices,
718                                           dmaru->devices_cnt, dev))
719                         goto out;
720         }
721         dmaru = NULL;
722 out:
723         rcu_read_unlock();
724
725         return dmaru;
726 }
727
728 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
729                                               struct acpi_device *adev)
730 {
731         struct dmar_drhd_unit *dmaru;
732         struct acpi_dmar_hardware_unit *drhd;
733         struct acpi_dmar_device_scope *scope;
734         struct device *tmp;
735         int i;
736         struct acpi_dmar_pci_path *path;
737
738         for_each_drhd_unit(dmaru) {
739                 drhd = container_of(dmaru->hdr,
740                                     struct acpi_dmar_hardware_unit,
741                                     header);
742
743                 for (scope = (void *)(drhd + 1);
744                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
745                      scope = ((void *)scope) + scope->length) {
746                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
747                                 continue;
748                         if (scope->enumeration_id != device_number)
749                                 continue;
750
751                         path = (void *)(scope + 1);
752                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
753                                 dev_name(&adev->dev), dmaru->reg_base_addr,
754                                 scope->bus, path->device, path->function);
755                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
756                                 if (tmp == NULL) {
757                                         dmaru->devices[i].bus = scope->bus;
758                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
759                                                                             path->function);
760                                         rcu_assign_pointer(dmaru->devices[i].dev,
761                                                            get_device(&adev->dev));
762                                         return;
763                                 }
764                         BUG_ON(i >= dmaru->devices_cnt);
765                 }
766         }
767         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
768                 device_number, dev_name(&adev->dev));
769 }
770
771 static int __init dmar_acpi_dev_scope_init(void)
772 {
773         struct acpi_dmar_andd *andd;
774
775         if (dmar_tbl == NULL)
776                 return -ENODEV;
777
778         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
779              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
780              andd = ((void *)andd) + andd->header.length) {
781                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
782                         acpi_handle h;
783                         struct acpi_device *adev;
784
785                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
786                                                           andd->device_name,
787                                                           &h))) {
788                                 pr_err("Failed to find handle for ACPI object %s\n",
789                                        andd->device_name);
790                                 continue;
791                         }
792                         adev = acpi_fetch_acpi_dev(h);
793                         if (!adev) {
794                                 pr_err("Failed to get device for ACPI object %s\n",
795                                        andd->device_name);
796                                 continue;
797                         }
798                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
799                 }
800         }
801         return 0;
802 }
803
804 int __init dmar_dev_scope_init(void)
805 {
806         struct pci_dev *dev = NULL;
807         struct dmar_pci_notify_info *info;
808
809         if (dmar_dev_scope_status != 1)
810                 return dmar_dev_scope_status;
811
812         if (list_empty(&dmar_drhd_units)) {
813                 dmar_dev_scope_status = -ENODEV;
814         } else {
815                 dmar_dev_scope_status = 0;
816
817                 dmar_acpi_dev_scope_init();
818
819                 for_each_pci_dev(dev) {
820                         if (dev->is_virtfn)
821                                 continue;
822
823                         info = dmar_alloc_pci_notify_info(dev,
824                                         BUS_NOTIFY_ADD_DEVICE);
825                         if (!info) {
826                                 pci_dev_put(dev);
827                                 return dmar_dev_scope_status;
828                         } else {
829                                 dmar_pci_bus_add_dev(info);
830                                 dmar_free_pci_notify_info(info);
831                         }
832                 }
833         }
834
835         return dmar_dev_scope_status;
836 }
837
838 void __init dmar_register_bus_notifier(void)
839 {
840         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
841 }
842
843
844 int __init dmar_table_init(void)
845 {
846         static int dmar_table_initialized;
847         int ret;
848
849         if (dmar_table_initialized == 0) {
850                 ret = parse_dmar_table();
851                 if (ret < 0) {
852                         if (ret != -ENODEV)
853                                 pr_info("Parse DMAR table failure.\n");
854                 } else  if (list_empty(&dmar_drhd_units)) {
855                         pr_info("No DMAR devices found\n");
856                         ret = -ENODEV;
857                 }
858
859                 if (ret < 0)
860                         dmar_table_initialized = ret;
861                 else
862                         dmar_table_initialized = 1;
863         }
864
865         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
866 }
867
868 static void warn_invalid_dmar(u64 addr, const char *message)
869 {
870         pr_warn_once(FW_BUG
871                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
872                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
873                 addr, message,
874                 dmi_get_system_info(DMI_BIOS_VENDOR),
875                 dmi_get_system_info(DMI_BIOS_VERSION),
876                 dmi_get_system_info(DMI_PRODUCT_VERSION));
877         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
878 }
879
880 static int __ref
881 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
882 {
883         struct acpi_dmar_hardware_unit *drhd;
884         void __iomem *addr;
885         u64 cap, ecap;
886
887         drhd = (void *)entry;
888         if (!drhd->address) {
889                 warn_invalid_dmar(0, "");
890                 return -EINVAL;
891         }
892
893         if (arg)
894                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
895         else
896                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
897         if (!addr) {
898                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
899                 return -EINVAL;
900         }
901
902         cap = dmar_readq(addr + DMAR_CAP_REG);
903         ecap = dmar_readq(addr + DMAR_ECAP_REG);
904
905         if (arg)
906                 iounmap(addr);
907         else
908                 early_iounmap(addr, VTD_PAGE_SIZE);
909
910         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
911                 warn_invalid_dmar(drhd->address, " returns all ones");
912                 return -EINVAL;
913         }
914
915         return 0;
916 }
917
918 void __init detect_intel_iommu(void)
919 {
920         int ret;
921         struct dmar_res_callback validate_drhd_cb = {
922                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
923                 .ignore_unhandled = true,
924         };
925
926         down_write(&dmar_global_lock);
927         ret = dmar_table_detect();
928         if (!ret)
929                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
930                                            &validate_drhd_cb);
931         if (!ret && !no_iommu && !iommu_detected &&
932             (!dmar_disabled || dmar_platform_optin())) {
933                 iommu_detected = 1;
934                 /* Make sure ACS will be enabled */
935                 pci_request_acs();
936         }
937
938 #ifdef CONFIG_X86
939         if (!ret) {
940                 x86_init.iommu.iommu_init = intel_iommu_init;
941                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
942         }
943
944 #endif
945
946         if (dmar_tbl) {
947                 acpi_put_table(dmar_tbl);
948                 dmar_tbl = NULL;
949         }
950         up_write(&dmar_global_lock);
951 }
952
953 static void unmap_iommu(struct intel_iommu *iommu)
954 {
955         iounmap(iommu->reg);
956         release_mem_region(iommu->reg_phys, iommu->reg_size);
957 }
958
959 /**
960  * map_iommu: map the iommu's registers
961  * @iommu: the iommu to map
962  * @drhd: DMA remapping hardware definition structure
963  *
964  * Memory map the iommu's registers.  Start w/ a single page, and
965  * possibly expand if that turns out to be insufficent.
966  */
967 static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd)
968 {
969         u64 phys_addr = drhd->reg_base_addr;
970         int map_size, err=0;
971
972         iommu->reg_phys = phys_addr;
973         iommu->reg_size = drhd->reg_size;
974
975         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
976                 pr_err("Can't reserve memory\n");
977                 err = -EBUSY;
978                 goto out;
979         }
980
981         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
982         if (!iommu->reg) {
983                 pr_err("Can't map the region\n");
984                 err = -ENOMEM;
985                 goto release;
986         }
987
988         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
989         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
990
991         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
992                 err = -EINVAL;
993                 warn_invalid_dmar(phys_addr, " returns all ones");
994                 goto unmap;
995         }
996         if (ecap_vcs(iommu->ecap))
997                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
998
999         /* the registers might be more than one page */
1000         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1001                          cap_max_fault_reg_offset(iommu->cap));
1002         map_size = VTD_PAGE_ALIGN(map_size);
1003         if (map_size > iommu->reg_size) {
1004                 iounmap(iommu->reg);
1005                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1006                 iommu->reg_size = map_size;
1007                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1008                                         iommu->name)) {
1009                         pr_err("Can't reserve memory\n");
1010                         err = -EBUSY;
1011                         goto out;
1012                 }
1013                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1014                 if (!iommu->reg) {
1015                         pr_err("Can't map the region\n");
1016                         err = -ENOMEM;
1017                         goto release;
1018                 }
1019         }
1020
1021         if (cap_ecmds(iommu->cap)) {
1022                 int i;
1023
1024                 for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) {
1025                         iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG +
1026                                                        i * DMA_ECMD_REG_STEP);
1027                 }
1028         }
1029
1030         err = 0;
1031         goto out;
1032
1033 unmap:
1034         iounmap(iommu->reg);
1035 release:
1036         release_mem_region(iommu->reg_phys, iommu->reg_size);
1037 out:
1038         return err;
1039 }
1040
1041 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1042 {
1043         struct intel_iommu *iommu;
1044         u32 ver, sts;
1045         int agaw = -1;
1046         int msagaw = -1;
1047         int err;
1048
1049         if (!drhd->reg_base_addr) {
1050                 warn_invalid_dmar(0, "");
1051                 return -EINVAL;
1052         }
1053
1054         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1055         if (!iommu)
1056                 return -ENOMEM;
1057
1058         iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0,
1059                                         DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL);
1060         if (iommu->seq_id < 0) {
1061                 pr_err("Failed to allocate seq_id\n");
1062                 err = iommu->seq_id;
1063                 goto error;
1064         }
1065         sprintf(iommu->name, "dmar%d", iommu->seq_id);
1066
1067         err = map_iommu(iommu, drhd);
1068         if (err) {
1069                 pr_err("Failed to map %s\n", iommu->name);
1070                 goto error_free_seq_id;
1071         }
1072
1073         err = -EINVAL;
1074         if (!cap_sagaw(iommu->cap) &&
1075             (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
1076                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1077                         iommu->name);
1078                 drhd->ignored = 1;
1079         }
1080
1081         if (!drhd->ignored) {
1082                 agaw = iommu_calculate_agaw(iommu);
1083                 if (agaw < 0) {
1084                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1085                                iommu->seq_id);
1086                         drhd->ignored = 1;
1087                 }
1088         }
1089         if (!drhd->ignored) {
1090                 msagaw = iommu_calculate_max_sagaw(iommu);
1091                 if (msagaw < 0) {
1092                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1093                                iommu->seq_id);
1094                         drhd->ignored = 1;
1095                         agaw = -1;
1096                 }
1097         }
1098         iommu->agaw = agaw;
1099         iommu->msagaw = msagaw;
1100         iommu->segment = drhd->segment;
1101
1102         iommu->node = NUMA_NO_NODE;
1103
1104         ver = readl(iommu->reg + DMAR_VER_REG);
1105         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1106                 iommu->name,
1107                 (unsigned long long)drhd->reg_base_addr,
1108                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1109                 (unsigned long long)iommu->cap,
1110                 (unsigned long long)iommu->ecap);
1111
1112         /* Reflect status in gcmd */
1113         sts = readl(iommu->reg + DMAR_GSTS_REG);
1114         if (sts & DMA_GSTS_IRES)
1115                 iommu->gcmd |= DMA_GCMD_IRE;
1116         if (sts & DMA_GSTS_TES)
1117                 iommu->gcmd |= DMA_GCMD_TE;
1118         if (sts & DMA_GSTS_QIES)
1119                 iommu->gcmd |= DMA_GCMD_QIE;
1120
1121         if (alloc_iommu_pmu(iommu))
1122                 pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id);
1123
1124         raw_spin_lock_init(&iommu->register_lock);
1125
1126         /*
1127          * A value of N in PSS field of eCap register indicates hardware
1128          * supports PASID field of N+1 bits.
1129          */
1130         if (pasid_supported(iommu))
1131                 iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap);
1132
1133         /*
1134          * This is only for hotplug; at boot time intel_iommu_enabled won't
1135          * be set yet. When intel_iommu_init() runs, it registers the units
1136          * present at boot time, then sets intel_iommu_enabled.
1137          */
1138         if (intel_iommu_enabled && !drhd->ignored) {
1139                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1140                                              intel_iommu_groups,
1141                                              "%s", iommu->name);
1142                 if (err)
1143                         goto err_unmap;
1144
1145                 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1146                 if (err)
1147                         goto err_sysfs;
1148
1149                 iommu_pmu_register(iommu);
1150         }
1151
1152         drhd->iommu = iommu;
1153         iommu->drhd = drhd;
1154
1155         return 0;
1156
1157 err_sysfs:
1158         iommu_device_sysfs_remove(&iommu->iommu);
1159 err_unmap:
1160         free_iommu_pmu(iommu);
1161         unmap_iommu(iommu);
1162 error_free_seq_id:
1163         ida_free(&dmar_seq_ids, iommu->seq_id);
1164 error:
1165         kfree(iommu);
1166         return err;
1167 }
1168
1169 static void free_iommu(struct intel_iommu *iommu)
1170 {
1171         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1172                 iommu_pmu_unregister(iommu);
1173                 iommu_device_unregister(&iommu->iommu);
1174                 iommu_device_sysfs_remove(&iommu->iommu);
1175         }
1176
1177         free_iommu_pmu(iommu);
1178
1179         if (iommu->irq) {
1180                 if (iommu->pr_irq) {
1181                         free_irq(iommu->pr_irq, iommu);
1182                         dmar_free_hwirq(iommu->pr_irq);
1183                         iommu->pr_irq = 0;
1184                 }
1185                 free_irq(iommu->irq, iommu);
1186                 dmar_free_hwirq(iommu->irq);
1187                 iommu->irq = 0;
1188         }
1189
1190         if (iommu->qi) {
1191                 free_page((unsigned long)iommu->qi->desc);
1192                 kfree(iommu->qi->desc_status);
1193                 kfree(iommu->qi);
1194         }
1195
1196         if (iommu->reg)
1197                 unmap_iommu(iommu);
1198
1199         ida_free(&dmar_seq_ids, iommu->seq_id);
1200         kfree(iommu);
1201 }
1202
1203 /*
1204  * Reclaim all the submitted descriptors which have completed its work.
1205  */
1206 static inline void reclaim_free_desc(struct q_inval *qi)
1207 {
1208         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1209                qi->desc_status[qi->free_tail] == QI_ABORT) {
1210                 qi->desc_status[qi->free_tail] = QI_FREE;
1211                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1212                 qi->free_cnt++;
1213         }
1214 }
1215
1216 static const char *qi_type_string(u8 type)
1217 {
1218         switch (type) {
1219         case QI_CC_TYPE:
1220                 return "Context-cache Invalidation";
1221         case QI_IOTLB_TYPE:
1222                 return "IOTLB Invalidation";
1223         case QI_DIOTLB_TYPE:
1224                 return "Device-TLB Invalidation";
1225         case QI_IEC_TYPE:
1226                 return "Interrupt Entry Cache Invalidation";
1227         case QI_IWD_TYPE:
1228                 return "Invalidation Wait";
1229         case QI_EIOTLB_TYPE:
1230                 return "PASID-based IOTLB Invalidation";
1231         case QI_PC_TYPE:
1232                 return "PASID-cache Invalidation";
1233         case QI_DEIOTLB_TYPE:
1234                 return "PASID-based Device-TLB Invalidation";
1235         case QI_PGRP_RESP_TYPE:
1236                 return "Page Group Response";
1237         default:
1238                 return "UNKNOWN";
1239         }
1240 }
1241
1242 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1243 {
1244         unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1245         u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1246         struct qi_desc *desc = iommu->qi->desc + head;
1247
1248         if (fault & DMA_FSTS_IQE)
1249                 pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1250                        DMAR_IQER_REG_IQEI(iqe_err));
1251         if (fault & DMA_FSTS_ITE)
1252                 pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1253                        DMAR_IQER_REG_ITESID(iqe_err));
1254         if (fault & DMA_FSTS_ICE)
1255                 pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1256                        DMAR_IQER_REG_ICESID(iqe_err));
1257
1258         pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1259                qi_type_string(desc->qw0 & 0xf),
1260                (unsigned long long)desc->qw0,
1261                (unsigned long long)desc->qw1);
1262
1263         head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1264         head <<= qi_shift(iommu);
1265         desc = iommu->qi->desc + head;
1266
1267         pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1268                qi_type_string(desc->qw0 & 0xf),
1269                (unsigned long long)desc->qw0,
1270                (unsigned long long)desc->qw1);
1271 }
1272
1273 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1274 {
1275         u32 fault;
1276         int head, tail;
1277         struct q_inval *qi = iommu->qi;
1278         int shift = qi_shift(iommu);
1279
1280         if (qi->desc_status[wait_index] == QI_ABORT)
1281                 return -EAGAIN;
1282
1283         fault = readl(iommu->reg + DMAR_FSTS_REG);
1284         if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1285                 qi_dump_fault(iommu, fault);
1286
1287         /*
1288          * If IQE happens, the head points to the descriptor associated
1289          * with the error. No new descriptors are fetched until the IQE
1290          * is cleared.
1291          */
1292         if (fault & DMA_FSTS_IQE) {
1293                 head = readl(iommu->reg + DMAR_IQH_REG);
1294                 if ((head >> shift) == index) {
1295                         struct qi_desc *desc = qi->desc + head;
1296
1297                         /*
1298                          * desc->qw2 and desc->qw3 are either reserved or
1299                          * used by software as private data. We won't print
1300                          * out these two qw's for security consideration.
1301                          */
1302                         memcpy(desc, qi->desc + (wait_index << shift),
1303                                1 << shift);
1304                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1305                         pr_info("Invalidation Queue Error (IQE) cleared\n");
1306                         return -EINVAL;
1307                 }
1308         }
1309
1310         /*
1311          * If ITE happens, all pending wait_desc commands are aborted.
1312          * No new descriptors are fetched until the ITE is cleared.
1313          */
1314         if (fault & DMA_FSTS_ITE) {
1315                 head = readl(iommu->reg + DMAR_IQH_REG);
1316                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1317                 head |= 1;
1318                 tail = readl(iommu->reg + DMAR_IQT_REG);
1319                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1320
1321                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1322                 pr_info("Invalidation Time-out Error (ITE) cleared\n");
1323
1324                 do {
1325                         if (qi->desc_status[head] == QI_IN_USE)
1326                                 qi->desc_status[head] = QI_ABORT;
1327                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1328                 } while (head != tail);
1329
1330                 if (qi->desc_status[wait_index] == QI_ABORT)
1331                         return -EAGAIN;
1332         }
1333
1334         if (fault & DMA_FSTS_ICE) {
1335                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1336                 pr_info("Invalidation Completion Error (ICE) cleared\n");
1337         }
1338
1339         return 0;
1340 }
1341
1342 /*
1343  * Function to submit invalidation descriptors of all types to the queued
1344  * invalidation interface(QI). Multiple descriptors can be submitted at a
1345  * time, a wait descriptor will be appended to each submission to ensure
1346  * hardware has completed the invalidation before return. Wait descriptors
1347  * can be part of the submission but it will not be polled for completion.
1348  */
1349 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1350                    unsigned int count, unsigned long options)
1351 {
1352         struct q_inval *qi = iommu->qi;
1353         s64 devtlb_start_ktime = 0;
1354         s64 iotlb_start_ktime = 0;
1355         s64 iec_start_ktime = 0;
1356         struct qi_desc wait_desc;
1357         int wait_index, index;
1358         unsigned long flags;
1359         int offset, shift;
1360         int rc, i;
1361         u64 type;
1362
1363         if (!qi)
1364                 return 0;
1365
1366         type = desc->qw0 & GENMASK_ULL(3, 0);
1367
1368         if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1369             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1370                 iotlb_start_ktime = ktime_to_ns(ktime_get());
1371
1372         if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1373             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1374                 devtlb_start_ktime = ktime_to_ns(ktime_get());
1375
1376         if (type == QI_IEC_TYPE &&
1377             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1378                 iec_start_ktime = ktime_to_ns(ktime_get());
1379
1380 restart:
1381         rc = 0;
1382
1383         raw_spin_lock_irqsave(&qi->q_lock, flags);
1384         /*
1385          * Check if we have enough empty slots in the queue to submit,
1386          * the calculation is based on:
1387          * # of desc + 1 wait desc + 1 space between head and tail
1388          */
1389         while (qi->free_cnt < count + 2) {
1390                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1391                 cpu_relax();
1392                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1393         }
1394
1395         index = qi->free_head;
1396         wait_index = (index + count) % QI_LENGTH;
1397         shift = qi_shift(iommu);
1398
1399         for (i = 0; i < count; i++) {
1400                 offset = ((index + i) % QI_LENGTH) << shift;
1401                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1402                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1403                 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1404                                 desc[i].qw2, desc[i].qw3);
1405         }
1406         qi->desc_status[wait_index] = QI_IN_USE;
1407
1408         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1409                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1410         if (options & QI_OPT_WAIT_DRAIN)
1411                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1412         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1413         wait_desc.qw2 = 0;
1414         wait_desc.qw3 = 0;
1415
1416         offset = wait_index << shift;
1417         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1418
1419         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1420         qi->free_cnt -= count + 1;
1421
1422         /*
1423          * update the HW tail register indicating the presence of
1424          * new descriptors.
1425          */
1426         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1427
1428         while (qi->desc_status[wait_index] != QI_DONE) {
1429                 /*
1430                  * We will leave the interrupts disabled, to prevent interrupt
1431                  * context to queue another cmd while a cmd is already submitted
1432                  * and waiting for completion on this cpu. This is to avoid
1433                  * a deadlock where the interrupt context can wait indefinitely
1434                  * for free slots in the queue.
1435                  */
1436                 rc = qi_check_fault(iommu, index, wait_index);
1437                 if (rc)
1438                         break;
1439
1440                 raw_spin_unlock(&qi->q_lock);
1441                 cpu_relax();
1442                 raw_spin_lock(&qi->q_lock);
1443         }
1444
1445         for (i = 0; i < count; i++)
1446                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1447
1448         reclaim_free_desc(qi);
1449         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1450
1451         if (rc == -EAGAIN)
1452                 goto restart;
1453
1454         if (iotlb_start_ktime)
1455                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1456                                 ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1457
1458         if (devtlb_start_ktime)
1459                 dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1460                                 ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1461
1462         if (iec_start_ktime)
1463                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1464                                 ktime_to_ns(ktime_get()) - iec_start_ktime);
1465
1466         return rc;
1467 }
1468
1469 /*
1470  * Flush the global interrupt entry cache.
1471  */
1472 void qi_global_iec(struct intel_iommu *iommu)
1473 {
1474         struct qi_desc desc;
1475
1476         desc.qw0 = QI_IEC_TYPE;
1477         desc.qw1 = 0;
1478         desc.qw2 = 0;
1479         desc.qw3 = 0;
1480
1481         /* should never fail */
1482         qi_submit_sync(iommu, &desc, 1, 0);
1483 }
1484
1485 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1486                       u64 type)
1487 {
1488         struct qi_desc desc;
1489
1490         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1491                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1492         desc.qw1 = 0;
1493         desc.qw2 = 0;
1494         desc.qw3 = 0;
1495
1496         qi_submit_sync(iommu, &desc, 1, 0);
1497 }
1498
1499 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1500                     unsigned int size_order, u64 type)
1501 {
1502         u8 dw = 0, dr = 0;
1503
1504         struct qi_desc desc;
1505         int ih = 0;
1506
1507         if (cap_write_drain(iommu->cap))
1508                 dw = 1;
1509
1510         if (cap_read_drain(iommu->cap))
1511                 dr = 1;
1512
1513         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1514                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1515         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1516                 | QI_IOTLB_AM(size_order);
1517         desc.qw2 = 0;
1518         desc.qw3 = 0;
1519
1520         qi_submit_sync(iommu, &desc, 1, 0);
1521 }
1522
1523 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1524                         u16 qdep, u64 addr, unsigned mask)
1525 {
1526         struct qi_desc desc;
1527
1528         if (mask) {
1529                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1530                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1531         } else
1532                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1533
1534         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1535                 qdep = 0;
1536
1537         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1538                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1539         desc.qw2 = 0;
1540         desc.qw3 = 0;
1541
1542         qi_submit_sync(iommu, &desc, 1, 0);
1543 }
1544
1545 /* PASID-based IOTLB invalidation */
1546 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1547                      unsigned long npages, bool ih)
1548 {
1549         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1550
1551         /*
1552          * npages == -1 means a PASID-selective invalidation, otherwise,
1553          * a positive value for Page-selective-within-PASID invalidation.
1554          * 0 is not a valid input.
1555          */
1556         if (WARN_ON(!npages)) {
1557                 pr_err("Invalid input npages = %ld\n", npages);
1558                 return;
1559         }
1560
1561         if (npages == -1) {
1562                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1563                                 QI_EIOTLB_DID(did) |
1564                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1565                                 QI_EIOTLB_TYPE;
1566                 desc.qw1 = 0;
1567         } else {
1568                 int mask = ilog2(__roundup_pow_of_two(npages));
1569                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1570
1571                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1572                         addr = ALIGN_DOWN(addr, align);
1573
1574                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1575                                 QI_EIOTLB_DID(did) |
1576                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1577                                 QI_EIOTLB_TYPE;
1578                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1579                                 QI_EIOTLB_IH(ih) |
1580                                 QI_EIOTLB_AM(mask);
1581         }
1582
1583         qi_submit_sync(iommu, &desc, 1, 0);
1584 }
1585
1586 /* PASID-based device IOTLB Invalidate */
1587 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1588                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1589 {
1590         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1591         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1592
1593         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1594                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1595                 QI_DEV_IOTLB_PFSID(pfsid);
1596
1597         /*
1598          * If S bit is 0, we only flush a single page. If S bit is set,
1599          * The least significant zero bit indicates the invalidation address
1600          * range. VT-d spec 6.5.2.6.
1601          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1602          * size order = 0 is PAGE_SIZE 4KB
1603          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1604          * ECAP.
1605          */
1606         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1607                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1608                                     addr, size_order);
1609
1610         /* Take page address */
1611         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1612
1613         if (size_order) {
1614                 /*
1615                  * Existing 0s in address below size_order may be the least
1616                  * significant bit, we must set them to 1s to avoid having
1617                  * smaller size than desired.
1618                  */
1619                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1620                                         VTD_PAGE_SHIFT);
1621                 /* Clear size_order bit to indicate size */
1622                 desc.qw1 &= ~mask;
1623                 /* Set the S bit to indicate flushing more than 1 page */
1624                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1625         }
1626
1627         qi_submit_sync(iommu, &desc, 1, 0);
1628 }
1629
1630 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1631                           u64 granu, u32 pasid)
1632 {
1633         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1634
1635         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1636                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1637         qi_submit_sync(iommu, &desc, 1, 0);
1638 }
1639
1640 /*
1641  * Disable Queued Invalidation interface.
1642  */
1643 void dmar_disable_qi(struct intel_iommu *iommu)
1644 {
1645         unsigned long flags;
1646         u32 sts;
1647         cycles_t start_time = get_cycles();
1648
1649         if (!ecap_qis(iommu->ecap))
1650                 return;
1651
1652         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1653
1654         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1655         if (!(sts & DMA_GSTS_QIES))
1656                 goto end;
1657
1658         /*
1659          * Give a chance to HW to complete the pending invalidation requests.
1660          */
1661         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1662                 readl(iommu->reg + DMAR_IQH_REG)) &&
1663                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1664                 cpu_relax();
1665
1666         iommu->gcmd &= ~DMA_GCMD_QIE;
1667         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1668
1669         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1670                       !(sts & DMA_GSTS_QIES), sts);
1671 end:
1672         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1673 }
1674
1675 /*
1676  * Enable queued invalidation.
1677  */
1678 static void __dmar_enable_qi(struct intel_iommu *iommu)
1679 {
1680         u32 sts;
1681         unsigned long flags;
1682         struct q_inval *qi = iommu->qi;
1683         u64 val = virt_to_phys(qi->desc);
1684
1685         qi->free_head = qi->free_tail = 0;
1686         qi->free_cnt = QI_LENGTH;
1687
1688         /*
1689          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1690          * is present.
1691          */
1692         if (ecap_smts(iommu->ecap))
1693                 val |= (1 << 11) | 1;
1694
1695         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1696
1697         /* write zero to the tail reg */
1698         writel(0, iommu->reg + DMAR_IQT_REG);
1699
1700         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1701
1702         iommu->gcmd |= DMA_GCMD_QIE;
1703         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1704
1705         /* Make sure hardware complete it */
1706         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1707
1708         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1709 }
1710
1711 /*
1712  * Enable Queued Invalidation interface. This is a must to support
1713  * interrupt-remapping. Also used by DMA-remapping, which replaces
1714  * register based IOTLB invalidation.
1715  */
1716 int dmar_enable_qi(struct intel_iommu *iommu)
1717 {
1718         struct q_inval *qi;
1719         struct page *desc_page;
1720
1721         if (!ecap_qis(iommu->ecap))
1722                 return -ENOENT;
1723
1724         /*
1725          * queued invalidation is already setup and enabled.
1726          */
1727         if (iommu->qi)
1728                 return 0;
1729
1730         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1731         if (!iommu->qi)
1732                 return -ENOMEM;
1733
1734         qi = iommu->qi;
1735
1736         /*
1737          * Need two pages to accommodate 256 descriptors of 256 bits each
1738          * if the remapping hardware supports scalable mode translation.
1739          */
1740         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1741                                      !!ecap_smts(iommu->ecap));
1742         if (!desc_page) {
1743                 kfree(qi);
1744                 iommu->qi = NULL;
1745                 return -ENOMEM;
1746         }
1747
1748         qi->desc = page_address(desc_page);
1749
1750         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1751         if (!qi->desc_status) {
1752                 free_page((unsigned long) qi->desc);
1753                 kfree(qi);
1754                 iommu->qi = NULL;
1755                 return -ENOMEM;
1756         }
1757
1758         raw_spin_lock_init(&qi->q_lock);
1759
1760         __dmar_enable_qi(iommu);
1761
1762         return 0;
1763 }
1764
1765 /* iommu interrupt handling. Most stuff are MSI-like. */
1766
1767 enum faulttype {
1768         DMA_REMAP,
1769         INTR_REMAP,
1770         UNKNOWN,
1771 };
1772
1773 static const char *dma_remap_fault_reasons[] =
1774 {
1775         "Software",
1776         "Present bit in root entry is clear",
1777         "Present bit in context entry is clear",
1778         "Invalid context entry",
1779         "Access beyond MGAW",
1780         "PTE Write access is not set",
1781         "PTE Read access is not set",
1782         "Next page table ptr is invalid",
1783         "Root table address invalid",
1784         "Context table ptr is invalid",
1785         "non-zero reserved fields in RTP",
1786         "non-zero reserved fields in CTP",
1787         "non-zero reserved fields in PTE",
1788         "PCE for translation request specifies blocking",
1789 };
1790
1791 static const char * const dma_remap_sm_fault_reasons[] = {
1792         "SM: Invalid Root Table Address",
1793         "SM: TTM 0 for request with PASID",
1794         "SM: TTM 0 for page group request",
1795         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1796         "SM: Error attempting to access Root Entry",
1797         "SM: Present bit in Root Entry is clear",
1798         "SM: Non-zero reserved field set in Root Entry",
1799         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1800         "SM: Error attempting to access Context Entry",
1801         "SM: Present bit in Context Entry is clear",
1802         "SM: Non-zero reserved field set in the Context Entry",
1803         "SM: Invalid Context Entry",
1804         "SM: DTE field in Context Entry is clear",
1805         "SM: PASID Enable field in Context Entry is clear",
1806         "SM: PASID is larger than the max in Context Entry",
1807         "SM: PRE field in Context-Entry is clear",
1808         "SM: RID_PASID field error in Context-Entry",
1809         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1810         "SM: Error attempting to access the PASID Directory Entry",
1811         "SM: Present bit in Directory Entry is clear",
1812         "SM: Non-zero reserved field set in PASID Directory Entry",
1813         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1814         "SM: Error attempting to access PASID Table Entry",
1815         "SM: Present bit in PASID Table Entry is clear",
1816         "SM: Non-zero reserved field set in PASID Table Entry",
1817         "SM: Invalid Scalable-Mode PASID Table Entry",
1818         "SM: ERE field is clear in PASID Table Entry",
1819         "SM: SRE field is clear in PASID Table Entry",
1820         "Unknown", "Unknown",/* 0x5E-0x5F */
1821         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1822         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1823         "SM: Error attempting to access first-level paging entry",
1824         "SM: Present bit in first-level paging entry is clear",
1825         "SM: Non-zero reserved field set in first-level paging entry",
1826         "SM: Error attempting to access FL-PML4 entry",
1827         "SM: First-level entry address beyond MGAW in Nested translation",
1828         "SM: Read permission error in FL-PML4 entry in Nested translation",
1829         "SM: Read permission error in first-level paging entry in Nested translation",
1830         "SM: Write permission error in first-level paging entry in Nested translation",
1831         "SM: Error attempting to access second-level paging entry",
1832         "SM: Read/Write permission error in second-level paging entry",
1833         "SM: Non-zero reserved field set in second-level paging entry",
1834         "SM: Invalid second-level page table pointer",
1835         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1836         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1837         "SM: Address in first-level translation is not canonical",
1838         "SM: U/S set 0 for first-level translation with user privilege",
1839         "SM: No execute permission for request with PASID and ER=1",
1840         "SM: Address beyond the DMA hardware max",
1841         "SM: Second-level entry address beyond the max",
1842         "SM: No write permission for Write/AtomicOp request",
1843         "SM: No read permission for Read/AtomicOp request",
1844         "SM: Invalid address-interrupt address",
1845         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1846         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1847 };
1848
1849 static const char *irq_remap_fault_reasons[] =
1850 {
1851         "Detected reserved fields in the decoded interrupt-remapped request",
1852         "Interrupt index exceeded the interrupt-remapping table size",
1853         "Present field in the IRTE entry is clear",
1854         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1855         "Detected reserved fields in the IRTE entry",
1856         "Blocked a compatibility format interrupt request",
1857         "Blocked an interrupt request due to source-id verification failure",
1858 };
1859
1860 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1861 {
1862         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1863                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1864                 *fault_type = INTR_REMAP;
1865                 return irq_remap_fault_reasons[fault_reason - 0x20];
1866         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1867                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1868                 *fault_type = DMA_REMAP;
1869                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1870         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1871                 *fault_type = DMA_REMAP;
1872                 return dma_remap_fault_reasons[fault_reason];
1873         } else {
1874                 *fault_type = UNKNOWN;
1875                 return "Unknown";
1876         }
1877 }
1878
1879
1880 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1881 {
1882         if (iommu->irq == irq)
1883                 return DMAR_FECTL_REG;
1884         else if (iommu->pr_irq == irq)
1885                 return DMAR_PECTL_REG;
1886         else if (iommu->perf_irq == irq)
1887                 return DMAR_PERFINTRCTL_REG;
1888         else
1889                 BUG();
1890 }
1891
1892 void dmar_msi_unmask(struct irq_data *data)
1893 {
1894         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1895         int reg = dmar_msi_reg(iommu, data->irq);
1896         unsigned long flag;
1897
1898         /* unmask it */
1899         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1900         writel(0, iommu->reg + reg);
1901         /* Read a reg to force flush the post write */
1902         readl(iommu->reg + reg);
1903         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1904 }
1905
1906 void dmar_msi_mask(struct irq_data *data)
1907 {
1908         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1909         int reg = dmar_msi_reg(iommu, data->irq);
1910         unsigned long flag;
1911
1912         /* mask it */
1913         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1914         writel(DMA_FECTL_IM, iommu->reg + reg);
1915         /* Read a reg to force flush the post write */
1916         readl(iommu->reg + reg);
1917         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1918 }
1919
1920 void dmar_msi_write(int irq, struct msi_msg *msg)
1921 {
1922         struct intel_iommu *iommu = irq_get_handler_data(irq);
1923         int reg = dmar_msi_reg(iommu, irq);
1924         unsigned long flag;
1925
1926         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1927         writel(msg->data, iommu->reg + reg + 4);
1928         writel(msg->address_lo, iommu->reg + reg + 8);
1929         writel(msg->address_hi, iommu->reg + reg + 12);
1930         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1931 }
1932
1933 void dmar_msi_read(int irq, struct msi_msg *msg)
1934 {
1935         struct intel_iommu *iommu = irq_get_handler_data(irq);
1936         int reg = dmar_msi_reg(iommu, irq);
1937         unsigned long flag;
1938
1939         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1940         msg->data = readl(iommu->reg + reg + 4);
1941         msg->address_lo = readl(iommu->reg + reg + 8);
1942         msg->address_hi = readl(iommu->reg + reg + 12);
1943         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1944 }
1945
1946 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1947                 u8 fault_reason, u32 pasid, u16 source_id,
1948                 unsigned long long addr)
1949 {
1950         const char *reason;
1951         int fault_type;
1952
1953         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1954
1955         if (fault_type == INTR_REMAP) {
1956                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1957                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1958                        PCI_FUNC(source_id & 0xFF), addr >> 48,
1959                        fault_reason, reason);
1960
1961                 return 0;
1962         }
1963
1964         if (pasid == INVALID_IOASID)
1965                 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1966                        type ? "DMA Read" : "DMA Write",
1967                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1968                        PCI_FUNC(source_id & 0xFF), addr,
1969                        fault_reason, reason);
1970         else
1971                 pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1972                        type ? "DMA Read" : "DMA Write", pasid,
1973                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1974                        PCI_FUNC(source_id & 0xFF), addr,
1975                        fault_reason, reason);
1976
1977         dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
1978
1979         return 0;
1980 }
1981
1982 #define PRIMARY_FAULT_REG_LEN (16)
1983 irqreturn_t dmar_fault(int irq, void *dev_id)
1984 {
1985         struct intel_iommu *iommu = dev_id;
1986         int reg, fault_index;
1987         u32 fault_status;
1988         unsigned long flag;
1989         static DEFINE_RATELIMIT_STATE(rs,
1990                                       DEFAULT_RATELIMIT_INTERVAL,
1991                                       DEFAULT_RATELIMIT_BURST);
1992
1993         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1994         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1995         if (fault_status && __ratelimit(&rs))
1996                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1997
1998         /* TBD: ignore advanced fault log currently */
1999         if (!(fault_status & DMA_FSTS_PPF))
2000                 goto unlock_exit;
2001
2002         fault_index = dma_fsts_fault_record_index(fault_status);
2003         reg = cap_fault_reg_offset(iommu->cap);
2004         while (1) {
2005                 /* Disable printing, simply clear the fault when ratelimited */
2006                 bool ratelimited = !__ratelimit(&rs);
2007                 u8 fault_reason;
2008                 u16 source_id;
2009                 u64 guest_addr;
2010                 u32 pasid;
2011                 int type;
2012                 u32 data;
2013                 bool pasid_present;
2014
2015                 /* highest 32 bits */
2016                 data = readl(iommu->reg + reg +
2017                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
2018                 if (!(data & DMA_FRCD_F))
2019                         break;
2020
2021                 if (!ratelimited) {
2022                         fault_reason = dma_frcd_fault_reason(data);
2023                         type = dma_frcd_type(data);
2024
2025                         pasid = dma_frcd_pasid_value(data);
2026                         data = readl(iommu->reg + reg +
2027                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
2028                         source_id = dma_frcd_source_id(data);
2029
2030                         pasid_present = dma_frcd_pasid_present(data);
2031                         guest_addr = dmar_readq(iommu->reg + reg +
2032                                         fault_index * PRIMARY_FAULT_REG_LEN);
2033                         guest_addr = dma_frcd_page_addr(guest_addr);
2034                 }
2035
2036                 /* clear the fault */
2037                 writel(DMA_FRCD_F, iommu->reg + reg +
2038                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
2039
2040                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2041
2042                 if (!ratelimited)
2043                         /* Using pasid -1 if pasid is not present */
2044                         dmar_fault_do_one(iommu, type, fault_reason,
2045                                           pasid_present ? pasid : INVALID_IOASID,
2046                                           source_id, guest_addr);
2047
2048                 fault_index++;
2049                 if (fault_index >= cap_num_fault_regs(iommu->cap))
2050                         fault_index = 0;
2051                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2052         }
2053
2054         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2055                iommu->reg + DMAR_FSTS_REG);
2056
2057 unlock_exit:
2058         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2059         return IRQ_HANDLED;
2060 }
2061
2062 int dmar_set_interrupt(struct intel_iommu *iommu)
2063 {
2064         int irq, ret;
2065
2066         /*
2067          * Check if the fault interrupt is already initialized.
2068          */
2069         if (iommu->irq)
2070                 return 0;
2071
2072         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2073         if (irq > 0) {
2074                 iommu->irq = irq;
2075         } else {
2076                 pr_err("No free IRQ vectors\n");
2077                 return -EINVAL;
2078         }
2079
2080         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2081         if (ret)
2082                 pr_err("Can't request irq\n");
2083         return ret;
2084 }
2085
2086 int __init enable_drhd_fault_handling(void)
2087 {
2088         struct dmar_drhd_unit *drhd;
2089         struct intel_iommu *iommu;
2090
2091         /*
2092          * Enable fault control interrupt.
2093          */
2094         for_each_iommu(iommu, drhd) {
2095                 u32 fault_status;
2096                 int ret = dmar_set_interrupt(iommu);
2097
2098                 if (ret) {
2099                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2100                                (unsigned long long)drhd->reg_base_addr, ret);
2101                         return -1;
2102                 }
2103
2104                 /*
2105                  * Clear any previous faults.
2106                  */
2107                 dmar_fault(iommu->irq, iommu);
2108                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2109                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2110         }
2111
2112         return 0;
2113 }
2114
2115 /*
2116  * Re-enable Queued Invalidation interface.
2117  */
2118 int dmar_reenable_qi(struct intel_iommu *iommu)
2119 {
2120         if (!ecap_qis(iommu->ecap))
2121                 return -ENOENT;
2122
2123         if (!iommu->qi)
2124                 return -ENOENT;
2125
2126         /*
2127          * First disable queued invalidation.
2128          */
2129         dmar_disable_qi(iommu);
2130         /*
2131          * Then enable queued invalidation again. Since there is no pending
2132          * invalidation requests now, it's safe to re-enable queued
2133          * invalidation.
2134          */
2135         __dmar_enable_qi(iommu);
2136
2137         return 0;
2138 }
2139
2140 /*
2141  * Check interrupt remapping support in DMAR table description.
2142  */
2143 int __init dmar_ir_support(void)
2144 {
2145         struct acpi_table_dmar *dmar;
2146         dmar = (struct acpi_table_dmar *)dmar_tbl;
2147         if (!dmar)
2148                 return 0;
2149         return dmar->flags & 0x1;
2150 }
2151
2152 /* Check whether DMAR units are in use */
2153 static inline bool dmar_in_use(void)
2154 {
2155         return irq_remapping_enabled || intel_iommu_enabled;
2156 }
2157
2158 static int __init dmar_free_unused_resources(void)
2159 {
2160         struct dmar_drhd_unit *dmaru, *dmaru_n;
2161
2162         if (dmar_in_use())
2163                 return 0;
2164
2165         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2166                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2167
2168         down_write(&dmar_global_lock);
2169         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2170                 list_del(&dmaru->list);
2171                 dmar_free_drhd(dmaru);
2172         }
2173         up_write(&dmar_global_lock);
2174
2175         return 0;
2176 }
2177
2178 late_initcall(dmar_free_unused_resources);
2179
2180 /*
2181  * DMAR Hotplug Support
2182  * For more details, please refer to Intel(R) Virtualization Technology
2183  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2184  * "Remapping Hardware Unit Hot Plug".
2185  */
2186 static guid_t dmar_hp_guid =
2187         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2188                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2189
2190 /*
2191  * Currently there's only one revision and BIOS will not check the revision id,
2192  * so use 0 for safety.
2193  */
2194 #define DMAR_DSM_REV_ID                 0
2195 #define DMAR_DSM_FUNC_DRHD              1
2196 #define DMAR_DSM_FUNC_ATSR              2
2197 #define DMAR_DSM_FUNC_RHSA              3
2198 #define DMAR_DSM_FUNC_SATC              4
2199
2200 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2201 {
2202         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2203 }
2204
2205 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2206                                   dmar_res_handler_t handler, void *arg)
2207 {
2208         int ret = -ENODEV;
2209         union acpi_object *obj;
2210         struct acpi_dmar_header *start;
2211         struct dmar_res_callback callback;
2212         static int res_type[] = {
2213                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2214                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2215                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2216                 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2217         };
2218
2219         if (!dmar_detect_dsm(handle, func))
2220                 return 0;
2221
2222         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2223                                       func, NULL, ACPI_TYPE_BUFFER);
2224         if (!obj)
2225                 return -ENODEV;
2226
2227         memset(&callback, 0, sizeof(callback));
2228         callback.cb[res_type[func]] = handler;
2229         callback.arg[res_type[func]] = arg;
2230         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2231         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2232
2233         ACPI_FREE(obj);
2234
2235         return ret;
2236 }
2237
2238 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2239 {
2240         int ret;
2241         struct dmar_drhd_unit *dmaru;
2242
2243         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2244         if (!dmaru)
2245                 return -ENODEV;
2246
2247         ret = dmar_ir_hotplug(dmaru, true);
2248         if (ret == 0)
2249                 ret = dmar_iommu_hotplug(dmaru, true);
2250
2251         return ret;
2252 }
2253
2254 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2255 {
2256         int i, ret;
2257         struct device *dev;
2258         struct dmar_drhd_unit *dmaru;
2259
2260         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2261         if (!dmaru)
2262                 return 0;
2263
2264         /*
2265          * All PCI devices managed by this unit should have been destroyed.
2266          */
2267         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2268                 for_each_active_dev_scope(dmaru->devices,
2269                                           dmaru->devices_cnt, i, dev)
2270                         return -EBUSY;
2271         }
2272
2273         ret = dmar_ir_hotplug(dmaru, false);
2274         if (ret == 0)
2275                 ret = dmar_iommu_hotplug(dmaru, false);
2276
2277         return ret;
2278 }
2279
2280 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2281 {
2282         struct dmar_drhd_unit *dmaru;
2283
2284         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2285         if (dmaru) {
2286                 list_del_rcu(&dmaru->list);
2287                 synchronize_rcu();
2288                 dmar_free_drhd(dmaru);
2289         }
2290
2291         return 0;
2292 }
2293
2294 static int dmar_hotplug_insert(acpi_handle handle)
2295 {
2296         int ret;
2297         int drhd_count = 0;
2298
2299         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2300                                      &dmar_validate_one_drhd, (void *)1);
2301         if (ret)
2302                 goto out;
2303
2304         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2305                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2306         if (ret == 0 && drhd_count == 0) {
2307                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2308                 goto out;
2309         } else if (ret) {
2310                 goto release_drhd;
2311         }
2312
2313         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2314                                      &dmar_parse_one_rhsa, NULL);
2315         if (ret)
2316                 goto release_drhd;
2317
2318         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2319                                      &dmar_parse_one_atsr, NULL);
2320         if (ret)
2321                 goto release_atsr;
2322
2323         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2324                                      &dmar_hp_add_drhd, NULL);
2325         if (!ret)
2326                 return 0;
2327
2328         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2329                                &dmar_hp_remove_drhd, NULL);
2330 release_atsr:
2331         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2332                                &dmar_release_one_atsr, NULL);
2333 release_drhd:
2334         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2335                                &dmar_hp_release_drhd, NULL);
2336 out:
2337         return ret;
2338 }
2339
2340 static int dmar_hotplug_remove(acpi_handle handle)
2341 {
2342         int ret;
2343
2344         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2345                                      &dmar_check_one_atsr, NULL);
2346         if (ret)
2347                 return ret;
2348
2349         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2350                                      &dmar_hp_remove_drhd, NULL);
2351         if (ret == 0) {
2352                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2353                                                &dmar_release_one_atsr, NULL));
2354                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2355                                                &dmar_hp_release_drhd, NULL));
2356         } else {
2357                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2358                                        &dmar_hp_add_drhd, NULL);
2359         }
2360
2361         return ret;
2362 }
2363
2364 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2365                                        void *context, void **retval)
2366 {
2367         acpi_handle *phdl = retval;
2368
2369         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2370                 *phdl = handle;
2371                 return AE_CTRL_TERMINATE;
2372         }
2373
2374         return AE_OK;
2375 }
2376
2377 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2378 {
2379         int ret;
2380         acpi_handle tmp = NULL;
2381         acpi_status status;
2382
2383         if (!dmar_in_use())
2384                 return 0;
2385
2386         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2387                 tmp = handle;
2388         } else {
2389                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2390                                              ACPI_UINT32_MAX,
2391                                              dmar_get_dsm_handle,
2392                                              NULL, NULL, &tmp);
2393                 if (ACPI_FAILURE(status)) {
2394                         pr_warn("Failed to locate _DSM method.\n");
2395                         return -ENXIO;
2396                 }
2397         }
2398         if (tmp == NULL)
2399                 return 0;
2400
2401         down_write(&dmar_global_lock);
2402         if (insert)
2403                 ret = dmar_hotplug_insert(tmp);
2404         else
2405                 ret = dmar_hotplug_remove(tmp);
2406         up_write(&dmar_global_lock);
2407
2408         return ret;
2409 }
2410
2411 int dmar_device_add(acpi_handle handle)
2412 {
2413         return dmar_device_hotplug(handle, true);
2414 }
2415
2416 int dmar_device_remove(acpi_handle handle)
2417 {
2418         return dmar_device_hotplug(handle, false);
2419 }
2420
2421 /*
2422  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2423  *
2424  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2425  * the ACPI DMAR table. This means that the platform boot firmware has made
2426  * sure no device can issue DMA outside of RMRR regions.
2427  */
2428 bool dmar_platform_optin(void)
2429 {
2430         struct acpi_table_dmar *dmar;
2431         acpi_status status;
2432         bool ret;
2433
2434         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2435                                 (struct acpi_table_header **)&dmar);
2436         if (ACPI_FAILURE(status))
2437                 return false;
2438
2439         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2440         acpi_put_table((struct acpi_table_header *)dmar);
2441
2442         return ret;
2443 }
2444 EXPORT_SYMBOL_GPL(dmar_platform_optin);