1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 "Disable MSI-based polling for CMD_SYNC completion.");
44 enum arm_smmu_msi_index {
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
53 ARM_SMMU_EVTQ_IRQ_CFG0,
54 ARM_SMMU_EVTQ_IRQ_CFG1,
55 ARM_SMMU_EVTQ_IRQ_CFG2,
57 [GERROR_MSI_INDEX] = {
58 ARM_SMMU_GERROR_IRQ_CFG0,
59 ARM_SMMU_GERROR_IRQ_CFG1,
60 ARM_SMMU_GERROR_IRQ_CFG2,
63 ARM_SMMU_PRIQ_IRQ_CFG0,
64 ARM_SMMU_PRIQ_IRQ_CFG1,
65 ARM_SMMU_PRIQ_IRQ_CFG2,
69 struct arm_smmu_option_prop {
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
78 * Special value used by SVA when a process dies, to quiesce a CD without
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
89 static void parse_driver_options(struct arm_smmu_device *smmu)
94 if (of_property_read_bool(smmu->dev->of_node,
95 arm_smmu_options[i].prop)) {
96 smmu->options |= arm_smmu_options[i].opt;
97 dev_notice(smmu->dev, "option %s\n",
98 arm_smmu_options[i].prop);
100 } while (arm_smmu_options[++i].opt);
103 /* Low-level queue manipulation functions */
104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
106 u32 space, prod, cons;
108 prod = Q_IDX(q, q->prod);
109 cons = Q_IDX(q, q->cons);
111 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 space = (1 << q->max_n_shift) - (prod - cons);
119 static bool queue_full(struct arm_smmu_ll_queue *q)
121 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 static bool queue_empty(struct arm_smmu_ll_queue *q)
127 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
133 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 * Ensure that all CPU accesses (reads and writes) to the queue
143 * are complete before we update the cons pointer.
146 writel_relaxed(q->llq.cons, q->cons_reg);
149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
151 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
157 struct arm_smmu_ll_queue *llq = &q->llq;
159 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
162 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 Q_IDX(llq, llq->cons);
164 queue_sync_cons_out(q);
167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
173 * We can't use the _relaxed() variant here, as we must prevent
174 * speculative reads of the queue before we have determined that
175 * prod has indeed moved.
177 prod = readl(q->prod_reg);
179 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
188 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 struct arm_smmu_queue_poll *qp)
197 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
201 static int queue_poll(struct arm_smmu_queue_poll *qp)
203 if (ktime_compare(ktime_get(), qp->timeout) > 0)
208 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
223 for (i = 0; i < n_dwords; ++i)
224 *dst++ = cpu_to_le64(*src++);
227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
231 for (i = 0; i < n_dwords; ++i)
232 *dst++ = le64_to_cpu(*src++);
235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
237 if (queue_empty(&q->llq))
240 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 queue_inc_cons(&q->llq);
242 queue_sync_cons_out(q);
246 /* High-level queue accessors */
247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
249 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
252 switch (ent->opcode) {
253 case CMDQ_OP_TLBI_EL2_ALL:
254 case CMDQ_OP_TLBI_NSNH_ALL:
256 case CMDQ_OP_PREFETCH_CFG:
257 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
259 case CMDQ_OP_CFGI_CD:
260 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 case CMDQ_OP_CFGI_STE:
263 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 case CMDQ_OP_CFGI_CD_ALL:
267 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 case CMDQ_OP_CFGI_ALL:
270 /* Cover the entire SID range */
271 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 case CMDQ_OP_TLBI_NH_VA:
274 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
276 case CMDQ_OP_TLBI_EL2_VA:
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
285 case CMDQ_OP_TLBI_S2_IPA:
286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
294 case CMDQ_OP_TLBI_NH_ASID:
295 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
297 case CMDQ_OP_TLBI_S12_VMALL:
298 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
300 case CMDQ_OP_TLBI_EL2_ASID:
301 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
303 case CMDQ_OP_ATC_INV:
304 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
311 case CMDQ_OP_PRI_RESP:
312 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 switch (ent->pri.resp) {
324 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
327 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
331 case CMDQ_OP_CMD_SYNC:
332 if (ent->sync.msiaddr) {
333 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
336 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 struct arm_smmu_queue *q, u32 prod)
356 struct arm_smmu_cmdq_ent ent = {
357 .opcode = CMDQ_OP_CMD_SYNC,
361 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 * payload, so the write will zero the entire command on that platform.
364 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
369 arm_smmu_cmdq_build_cmd(cmd, &ent);
372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 struct arm_smmu_queue *q)
375 static const char * const cerror_str[] = {
376 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
377 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
378 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
379 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
383 u64 cmd[CMDQ_ENT_DWORDS];
384 u32 cons = readl_relaxed(q->cons_reg);
385 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 struct arm_smmu_cmdq_ent cmd_sync = {
387 .opcode = CMDQ_OP_CMD_SYNC,
390 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
394 case CMDQ_ERR_CERROR_ABT_IDX:
395 dev_err(smmu->dev, "retrying command fetch\n");
397 case CMDQ_ERR_CERROR_NONE_IDX:
399 case CMDQ_ERR_CERROR_ATC_INV_IDX:
401 * ATC Invalidation Completion timeout. CONS is still pointing
402 * at the CMD_SYNC. Attempt to complete other pending commands
403 * by repeating the CMD_SYNC, though we might well end up back
404 * here since the ATC invalidation may still be pending.
407 case CMDQ_ERR_CERROR_ILL_IDX:
413 * We may have concurrent producers, so we need to be careful
414 * not to touch any of the shadow cmdq state.
416 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 dev_err(smmu->dev, "skipping command in error state:\n");
418 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
421 /* Convert the erroneous command into a CMD_SYNC */
422 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
424 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
429 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
433 * Command queue locking.
434 * This is a form of bastardised rwlock with the following major changes:
436 * - The only LOCK routines are exclusive_trylock() and shared_lock().
437 * Neither have barrier semantics, and instead provide only a control
440 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441 * fails if the caller appears to be the last lock holder (yes, this is
442 * racy). All successful UNLOCK routines have RELEASE semantics.
444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
449 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 * lock counter. When held in exclusive state, the lock counter is set
451 * to INT_MIN so these increments won't hurt as the value will remain
454 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
458 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
464 (void)atomic_dec_return_release(&cmdq->lock);
467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
469 if (atomic_read(&cmdq->lock) == 1)
472 arm_smmu_cmdq_shared_unlock(cmdq);
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
479 local_irq_save(flags); \
480 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
482 local_irq_restore(flags); \
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
488 atomic_set_release(&cmdq->lock, 0); \
489 local_irq_restore(flags); \
494 * Command queue insertion.
495 * This is made fiddly by our attempts to achieve some sort of scalability
496 * since there is one queue shared amongst all of the CPUs in the system. If
497 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498 * then you'll *love* this monstrosity.
500 * The basic idea is to split the queue up into ranges of commands that are
501 * owned by a given CPU; the owner may not have written all of the commands
502 * itself, but is responsible for advancing the hardware prod pointer when
503 * the time comes. The algorithm is roughly:
505 * 1. Allocate some space in the queue. At this point we also discover
506 * whether the head of the queue is currently owned by another CPU,
507 * or whether we are the owner.
509 * 2. Write our commands into our allocated slots in the queue.
511 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
513 * 4. If we are an owner:
514 * a. Wait for the previous owner to finish.
515 * b. Mark the queue head as unowned, which tells us the range
516 * that we are responsible for publishing.
517 * c. Wait for all commands in our owned range to become valid.
518 * d. Advance the hardware prod pointer.
519 * e. Tell the next owner we've finished.
521 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
522 * owner), then we need to stick around until it has completed:
523 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524 * to clear the first 4 bytes.
525 * b. Otherwise, we spin waiting for the hardware cons pointer to
526 * advance past our command.
528 * The devil is in the details, particularly the use of locking for handling
529 * SYNC completion and freeing up space in the queue before we think that it is
532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 u32 sprod, u32 eprod, bool set)
535 u32 swidx, sbidx, ewidx, ebidx;
536 struct arm_smmu_ll_queue llq = {
537 .max_n_shift = cmdq->q.llq.max_n_shift,
541 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
544 while (llq.prod != eprod) {
547 u32 limit = BITS_PER_LONG;
549 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
552 ptr = &cmdq->valid_map[swidx];
554 if ((swidx == ewidx) && (sbidx < ebidx))
557 mask = GENMASK(limit - 1, sbidx);
560 * The valid bit is the inverse of the wrap bit. This means
561 * that a zero-initialised queue is invalid and, after marking
562 * all entries as valid, they become invalid again when we
566 atomic_long_xor(mask, ptr);
570 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
574 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
578 /* Mark all entries in the range [sprod, eprod) as valid */
579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 u32 sprod, u32 eprod)
582 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 u32 sprod, u32 eprod)
589 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
592 /* Wait for the command queue to become non-full */
593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 struct arm_smmu_ll_queue *llq)
597 struct arm_smmu_queue_poll qp;
598 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
602 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 * that fails, spin until somebody else updates it for us.
605 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 llq->val = READ_ONCE(cmdq->q.llq.val);
612 queue_poll_init(smmu, &qp);
614 llq->val = READ_ONCE(cmdq->q.llq.val);
615 if (!queue_full(llq))
618 ret = queue_poll(&qp);
625 * Wait until the SMMU signals a CMD_SYNC completion MSI.
626 * Must be called with the cmdq lock held in some capacity.
628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 struct arm_smmu_ll_queue *llq)
632 struct arm_smmu_queue_poll qp;
633 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
636 queue_poll_init(smmu, &qp);
639 * The MSI won't generate an event, since it's being written back
640 * into the command queue.
643 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
649 * Wait until the SMMU cons index passes llq->prod.
650 * Must be called with the cmdq lock held in some capacity.
652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 struct arm_smmu_ll_queue *llq)
655 struct arm_smmu_queue_poll qp;
656 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 u32 prod = llq->prod;
660 queue_poll_init(smmu, &qp);
661 llq->val = READ_ONCE(cmdq->q.llq.val);
663 if (queue_consumed(llq, prod))
666 ret = queue_poll(&qp);
669 * This needs to be a readl() so that our subsequent call
670 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
672 * Specifically, we need to ensure that we observe all
673 * shared_lock()s by other CMD_SYNCs that share our owner,
674 * so that a failing call to tryunlock() means that we're
675 * the last one out and therefore we can safely advance
676 * cmdq->q.llq.cons. Roughly speaking:
678 * CPU 0 CPU1 CPU2 (us)
688 * <control dependency>
694 * Requires us to see CPU 0's shared_lock() acquisition.
696 llq->cons = readl(cmdq->q.cons_reg);
702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 struct arm_smmu_ll_queue *llq)
705 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
708 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
715 struct arm_smmu_ll_queue llq = {
716 .max_n_shift = cmdq->q.llq.max_n_shift,
720 for (i = 0; i < n; ++i) {
721 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
723 prod = queue_inc_prod_n(&llq, i);
724 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
729 * This is the actual insertion function, and provides the following
730 * ordering guarantees to callers:
732 * - There is a dma_wmb() before publishing any commands to the queue.
733 * This can be relied upon to order prior writes to data structures
734 * in memory (such as a CD or an STE) before the command.
736 * - On completion of a CMD_SYNC, there is a control dependency.
737 * This can be relied upon to order subsequent writes to memory (e.g.
738 * freeing an IOVA) after completion of the CMD_SYNC.
740 * - Command insertion is totally ordered, so if two CPUs each race to
741 * insert their own list of commands then all of the commands from one
742 * CPU will appear before any of the commands from the other CPU.
744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 u64 *cmds, int n, bool sync)
747 u64 cmd_sync[CMDQ_ENT_DWORDS];
751 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 struct arm_smmu_ll_queue llq, head;
755 llq.max_n_shift = cmdq->q.llq.max_n_shift;
757 /* 1. Allocate some space in the queue */
758 local_irq_save(flags);
759 llq.val = READ_ONCE(cmdq->q.llq.val);
763 while (!queue_has_space(&llq, n + sync)) {
764 local_irq_restore(flags);
765 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 local_irq_save(flags);
770 head.cons = llq.cons;
771 head.prod = queue_inc_prod_n(&llq, n + sync) |
772 CMDQ_PROD_OWNED_FLAG;
774 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
780 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
785 * 2. Write our commands into the queue
786 * Dependency ordering from the cmpxchg() loop above.
788 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
790 prod = queue_inc_prod_n(&llq, n);
791 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
795 * In order to determine completion of our CMD_SYNC, we must
796 * ensure that the queue can't wrap twice without us noticing.
797 * We achieve that by taking the cmdq lock as shared before
798 * marking our slot as valid.
800 arm_smmu_cmdq_shared_lock(cmdq);
803 /* 3. Mark our slots as valid, ensuring commands are visible first */
805 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
807 /* 4. If we are the owner, take control of the SMMU hardware */
809 /* a. Wait for previous owner to finish */
810 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
812 /* b. Stop gathering work by clearing the owned flag */
813 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 &cmdq->q.llq.atomic.prod);
815 prod &= ~CMDQ_PROD_OWNED_FLAG;
818 * c. Wait for any gathered work to be written to the queue.
819 * Note that we read our own entries so that we have the control
820 * dependency required by (d).
822 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
825 * d. Advance the hardware prod pointer
826 * Control dependency ordering from the entries becoming valid.
828 writel_relaxed(prod, cmdq->q.prod_reg);
831 * e. Tell the next owner we're done
832 * Make sure we've updated the hardware first, so that we don't
833 * race to update prod and potentially move it backwards.
835 atomic_set_release(&cmdq->owner_prod, prod);
838 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
840 llq.prod = queue_inc_prod_n(&llq, n);
841 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
843 dev_err_ratelimited(smmu->dev,
844 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
846 readl_relaxed(cmdq->q.prod_reg),
847 readl_relaxed(cmdq->q.cons_reg));
851 * Try to unlock the cmdq lock. This will fail if we're the last
852 * reader, in which case we can safely update cmdq->q.llq.cons
854 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 arm_smmu_cmdq_shared_unlock(cmdq);
860 local_irq_restore(flags);
864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 struct arm_smmu_cmdq_ent *ent,
868 u64 cmd[CMDQ_ENT_DWORDS];
870 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
876 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 struct arm_smmu_cmdq_ent *ent)
882 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 struct arm_smmu_cmdq_ent *ent)
888 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 struct arm_smmu_cmdq_batch *cmds,
893 struct arm_smmu_cmdq_ent *cmd)
897 if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
903 if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
908 index = cmds->num * CMDQ_ENT_DWORDS;
909 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 struct arm_smmu_cmdq_batch *cmds)
921 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
924 static int arm_smmu_page_response(struct device *dev,
925 struct iommu_fault_event *unused,
926 struct iommu_page_response *resp)
928 struct arm_smmu_cmdq_ent cmd = {0};
929 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 int sid = master->streams[0].id;
932 if (master->stall_enabled) {
933 cmd.opcode = CMDQ_OP_RESUME;
934 cmd.resume.sid = sid;
935 cmd.resume.stag = resp->grpid;
936 switch (resp->code) {
937 case IOMMU_PAGE_RESP_INVALID:
938 case IOMMU_PAGE_RESP_FAILURE:
939 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
941 case IOMMU_PAGE_RESP_SUCCESS:
942 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
951 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
953 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 * RESUME consumption guarantees that the stalled transaction will be
955 * terminated... at some point in the future. PRI_RESP is fire and
962 /* Context descriptor manipulation functions */
963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
965 struct arm_smmu_cmdq_ent cmd = {
966 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
967 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
971 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
974 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
979 struct arm_smmu_master *master;
980 struct arm_smmu_cmdq_batch cmds;
981 struct arm_smmu_device *smmu = smmu_domain->smmu;
982 struct arm_smmu_cmdq_ent cmd = {
983 .opcode = CMDQ_OP_CFGI_CD,
992 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
993 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
994 for (i = 0; i < master->num_streams; i++) {
995 cmd.cfgi.sid = master->streams[i].id;
996 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
999 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1001 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1004 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1005 struct arm_smmu_l1_ctx_desc *l1_desc)
1007 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1009 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1010 &l1_desc->l2ptr_dma, GFP_KERNEL);
1011 if (!l1_desc->l2ptr) {
1013 "failed to allocate context descriptor table\n");
1019 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1020 struct arm_smmu_l1_ctx_desc *l1_desc)
1022 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1025 /* See comment in arm_smmu_write_ctx_desc() */
1026 WRITE_ONCE(*dst, cpu_to_le64(val));
1029 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1034 struct arm_smmu_l1_ctx_desc *l1_desc;
1035 struct arm_smmu_device *smmu = smmu_domain->smmu;
1036 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1038 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1039 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1041 idx = ssid >> CTXDESC_SPLIT;
1042 l1_desc = &cdcfg->l1_desc[idx];
1043 if (!l1_desc->l2ptr) {
1044 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1047 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1048 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1049 /* An invalid L1CD can be cached */
1050 arm_smmu_sync_cd(smmu_domain, ssid, false);
1052 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1053 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1056 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1057 struct arm_smmu_ctx_desc *cd)
1060 * This function handles the following cases:
1062 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1063 * (2) Install a secondary CD, for SID+SSID traffic.
1064 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1065 * CD, then invalidate the old entry and mappings.
1066 * (4) Quiesce the context without clearing the valid bit. Disable
1067 * translation, and ignore any translation fault.
1068 * (5) Remove a secondary CD.
1074 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1077 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1081 val = le64_to_cpu(cdptr[0]);
1082 cd_live = !!(val & CTXDESC_CD_0_V);
1084 if (!cd) { /* (5) */
1086 } else if (cd == &quiet_cd) { /* (4) */
1087 val |= CTXDESC_CD_0_TCR_EPD0;
1088 } else if (cd_live) { /* (3) */
1089 val &= ~CTXDESC_CD_0_ASID;
1090 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1092 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1093 * this substream's traffic
1095 } else { /* (1) and (2) */
1096 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1098 cdptr[3] = cpu_to_le64(cd->mair);
1101 * STE is live, and the SMMU might read dwords of this CD in any
1102 * order. Ensure that it observes valid values before reading
1105 arm_smmu_sync_cd(smmu_domain, ssid, true);
1111 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1112 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1114 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1117 if (smmu_domain->stall_enabled)
1118 val |= CTXDESC_CD_0_S;
1122 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1123 * "Configuration structures and configuration invalidation completion"
1125 * The size of single-copy atomic reads made by the SMMU is
1126 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1127 * field within an aligned 64-bit span of a structure can be altered
1128 * without first making the structure invalid.
1130 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1131 arm_smmu_sync_cd(smmu_domain, ssid, true);
1135 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1139 size_t max_contexts;
1140 struct arm_smmu_device *smmu = smmu_domain->smmu;
1141 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1142 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1144 max_contexts = 1 << cfg->s1cdmax;
1146 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1147 max_contexts <= CTXDESC_L2_ENTRIES) {
1148 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1149 cdcfg->num_l1_ents = max_contexts;
1151 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1153 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1154 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1155 CTXDESC_L2_ENTRIES);
1157 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1158 sizeof(*cdcfg->l1_desc),
1160 if (!cdcfg->l1_desc)
1163 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1166 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1168 if (!cdcfg->cdtab) {
1169 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1177 if (cdcfg->l1_desc) {
1178 devm_kfree(smmu->dev, cdcfg->l1_desc);
1179 cdcfg->l1_desc = NULL;
1184 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1187 size_t size, l1size;
1188 struct arm_smmu_device *smmu = smmu_domain->smmu;
1189 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1191 if (cdcfg->l1_desc) {
1192 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1194 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1195 if (!cdcfg->l1_desc[i].l2ptr)
1198 dmam_free_coherent(smmu->dev, size,
1199 cdcfg->l1_desc[i].l2ptr,
1200 cdcfg->l1_desc[i].l2ptr_dma);
1202 devm_kfree(smmu->dev, cdcfg->l1_desc);
1203 cdcfg->l1_desc = NULL;
1205 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1207 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1210 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1211 cdcfg->cdtab_dma = 0;
1212 cdcfg->cdtab = NULL;
1215 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1218 struct arm_smmu_ctx_desc *old_cd;
1223 free = refcount_dec_and_test(&cd->refs);
1225 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1226 WARN_ON(old_cd != cd);
1231 /* Stream table manipulation functions */
1233 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1237 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1238 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1240 /* See comment in arm_smmu_write_ctx_desc() */
1241 WRITE_ONCE(*dst, cpu_to_le64(val));
1244 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1246 struct arm_smmu_cmdq_ent cmd = {
1247 .opcode = CMDQ_OP_CFGI_STE,
1254 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1257 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1261 * This is hideously complicated, but we only really care about
1262 * three cases at the moment:
1264 * 1. Invalid (all zero) -> bypass/fault (init)
1265 * 2. Bypass/fault -> translation/bypass (attach)
1266 * 3. Translation/bypass -> bypass/fault (detach)
1268 * Given that we can't update the STE atomically and the SMMU
1269 * doesn't read the thing in a defined order, that leaves us
1270 * with the following maintenance requirements:
1272 * 1. Update Config, return (init time STEs aren't live)
1273 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1274 * 3. Update Config, sync
1276 u64 val = le64_to_cpu(dst[0]);
1277 bool ste_live = false;
1278 struct arm_smmu_device *smmu = NULL;
1279 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1280 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1281 struct arm_smmu_domain *smmu_domain = NULL;
1282 struct arm_smmu_cmdq_ent prefetch_cmd = {
1283 .opcode = CMDQ_OP_PREFETCH_CFG,
1290 smmu_domain = master->domain;
1291 smmu = master->smmu;
1295 switch (smmu_domain->stage) {
1296 case ARM_SMMU_DOMAIN_S1:
1297 s1_cfg = &smmu_domain->s1_cfg;
1299 case ARM_SMMU_DOMAIN_S2:
1300 case ARM_SMMU_DOMAIN_NESTED:
1301 s2_cfg = &smmu_domain->s2_cfg;
1308 if (val & STRTAB_STE_0_V) {
1309 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1310 case STRTAB_STE_0_CFG_BYPASS:
1312 case STRTAB_STE_0_CFG_S1_TRANS:
1313 case STRTAB_STE_0_CFG_S2_TRANS:
1316 case STRTAB_STE_0_CFG_ABORT:
1317 BUG_ON(!disable_bypass);
1320 BUG(); /* STE corruption */
1324 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1325 val = STRTAB_STE_0_V;
1328 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1329 if (!smmu_domain && disable_bypass)
1330 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1332 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1334 dst[0] = cpu_to_le64(val);
1335 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1336 STRTAB_STE_1_SHCFG_INCOMING));
1337 dst[2] = 0; /* Nuke the VMID */
1339 * The SMMU can perform negative caching, so we must sync
1340 * the STE regardless of whether the old value was live.
1343 arm_smmu_sync_ste_for_sid(smmu, sid);
1348 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1349 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1352 dst[1] = cpu_to_le64(
1353 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1354 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1355 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1356 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1357 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1359 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1360 !master->stall_enabled)
1361 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1363 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1364 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1365 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1366 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1371 dst[2] = cpu_to_le64(
1372 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1373 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1375 STRTAB_STE_2_S2ENDI |
1377 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1380 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1382 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1385 if (master->ats_enabled)
1386 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1387 STRTAB_STE_1_EATS_TRANS));
1389 arm_smmu_sync_ste_for_sid(smmu, sid);
1390 /* See comment in arm_smmu_write_ctx_desc() */
1391 WRITE_ONCE(dst[0], cpu_to_le64(val));
1392 arm_smmu_sync_ste_for_sid(smmu, sid);
1394 /* It's likely that we'll want to use the new STE soon */
1395 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1396 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1399 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1402 u64 val = STRTAB_STE_0_V;
1404 if (disable_bypass && !force)
1405 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1407 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1409 for (i = 0; i < nent; ++i) {
1410 strtab[0] = cpu_to_le64(val);
1411 strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1412 STRTAB_STE_1_SHCFG_INCOMING));
1414 strtab += STRTAB_STE_DWORDS;
1418 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1422 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1423 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1428 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1429 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1431 desc->span = STRTAB_SPLIT + 1;
1432 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1436 "failed to allocate l2 stream table for SID %u\n",
1441 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1442 arm_smmu_write_strtab_l1_desc(strtab, desc);
1446 static struct arm_smmu_master *
1447 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1449 struct rb_node *node;
1450 struct arm_smmu_stream *stream;
1452 lockdep_assert_held(&smmu->streams_mutex);
1454 node = smmu->streams.rb_node;
1456 stream = rb_entry(node, struct arm_smmu_stream, node);
1457 if (stream->id < sid)
1458 node = node->rb_right;
1459 else if (stream->id > sid)
1460 node = node->rb_left;
1462 return stream->master;
1468 /* IRQ and event handlers */
1469 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1474 struct arm_smmu_master *master;
1475 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1476 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1477 struct iommu_fault_event fault_evt = { };
1478 struct iommu_fault *flt = &fault_evt.fault;
1480 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1481 case EVT_ID_TRANSLATION_FAULT:
1482 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1484 case EVT_ID_ADDR_SIZE_FAULT:
1485 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1487 case EVT_ID_ACCESS_FAULT:
1488 reason = IOMMU_FAULT_REASON_ACCESS;
1490 case EVT_ID_PERMISSION_FAULT:
1491 reason = IOMMU_FAULT_REASON_PERMISSION;
1497 /* Stage-2 is always pinned at the moment */
1498 if (evt[1] & EVTQ_1_S2)
1501 if (evt[1] & EVTQ_1_RnW)
1502 perm |= IOMMU_FAULT_PERM_READ;
1504 perm |= IOMMU_FAULT_PERM_WRITE;
1506 if (evt[1] & EVTQ_1_InD)
1507 perm |= IOMMU_FAULT_PERM_EXEC;
1509 if (evt[1] & EVTQ_1_PnU)
1510 perm |= IOMMU_FAULT_PERM_PRIV;
1512 if (evt[1] & EVTQ_1_STALL) {
1513 flt->type = IOMMU_FAULT_PAGE_REQ;
1514 flt->prm = (struct iommu_fault_page_request) {
1515 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1516 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1518 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1522 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1523 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1526 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1527 flt->event = (struct iommu_fault_unrecoverable) {
1529 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1531 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1535 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1536 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1540 mutex_lock(&smmu->streams_mutex);
1541 master = arm_smmu_find_master(smmu, sid);
1547 ret = iommu_report_device_fault(master->dev, &fault_evt);
1548 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1549 /* Nobody cared, abort the access */
1550 struct iommu_page_response resp = {
1551 .pasid = flt->prm.pasid,
1552 .grpid = flt->prm.grpid,
1553 .code = IOMMU_PAGE_RESP_FAILURE,
1555 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1559 mutex_unlock(&smmu->streams_mutex);
1563 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1566 struct arm_smmu_device *smmu = dev;
1567 struct arm_smmu_queue *q = &smmu->evtq.q;
1568 struct arm_smmu_ll_queue *llq = &q->llq;
1569 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1570 DEFAULT_RATELIMIT_BURST);
1571 u64 evt[EVTQ_ENT_DWORDS];
1574 while (!queue_remove_raw(q, evt)) {
1575 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1577 ret = arm_smmu_handle_evt(smmu, evt);
1578 if (!ret || !__ratelimit(&rs))
1581 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1582 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1583 dev_info(smmu->dev, "\t0x%016llx\n",
1584 (unsigned long long)evt[i]);
1590 * Not much we can do on overflow, so scream and pretend we're
1593 if (queue_sync_prod_in(q) == -EOVERFLOW)
1594 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1595 } while (!queue_empty(llq));
1597 /* Sync our overflow flag, as we believe we're up to speed */
1598 queue_sync_cons_ovf(q);
1602 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1608 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1609 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1610 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1611 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1612 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1614 dev_info(smmu->dev, "unexpected PRI request received:\n");
1616 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1617 sid, ssid, grpid, last ? "L" : "",
1618 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1619 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1620 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1621 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1622 evt[1] & PRIQ_1_ADDR_MASK);
1625 struct arm_smmu_cmdq_ent cmd = {
1626 .opcode = CMDQ_OP_PRI_RESP,
1627 .substream_valid = ssv,
1632 .resp = PRI_RESP_DENY,
1636 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1640 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1642 struct arm_smmu_device *smmu = dev;
1643 struct arm_smmu_queue *q = &smmu->priq.q;
1644 struct arm_smmu_ll_queue *llq = &q->llq;
1645 u64 evt[PRIQ_ENT_DWORDS];
1648 while (!queue_remove_raw(q, evt))
1649 arm_smmu_handle_ppr(smmu, evt);
1651 if (queue_sync_prod_in(q) == -EOVERFLOW)
1652 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1653 } while (!queue_empty(llq));
1655 /* Sync our overflow flag, as we believe we're up to speed */
1656 queue_sync_cons_ovf(q);
1660 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1662 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1664 u32 gerror, gerrorn, active;
1665 struct arm_smmu_device *smmu = dev;
1667 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1668 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1670 active = gerror ^ gerrorn;
1671 if (!(active & GERROR_ERR_MASK))
1672 return IRQ_NONE; /* No errors pending */
1675 "unexpected global error reported (0x%08x), this could be serious\n",
1678 if (active & GERROR_SFM_ERR) {
1679 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1680 arm_smmu_device_disable(smmu);
1683 if (active & GERROR_MSI_GERROR_ABT_ERR)
1684 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1686 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1687 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1689 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1690 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1692 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1693 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1695 if (active & GERROR_PRIQ_ABT_ERR)
1696 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1698 if (active & GERROR_EVTQ_ABT_ERR)
1699 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1701 if (active & GERROR_CMDQ_ERR)
1702 arm_smmu_cmdq_skip_err(smmu);
1704 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1708 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1710 struct arm_smmu_device *smmu = dev;
1712 arm_smmu_evtq_thread(irq, dev);
1713 if (smmu->features & ARM_SMMU_FEAT_PRI)
1714 arm_smmu_priq_thread(irq, dev);
1719 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1721 arm_smmu_gerror_handler(irq, dev);
1722 return IRQ_WAKE_THREAD;
1726 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1727 struct arm_smmu_cmdq_ent *cmd)
1731 /* ATC invalidates are always on 4096-bytes pages */
1732 size_t inval_grain_shift = 12;
1733 unsigned long page_start, page_end;
1738 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1739 * prefix. In that case all ATC entries within the address range are
1740 * invalidated, including those that were requested with a PASID! There
1741 * is no way to invalidate only entries without PASID.
1743 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1744 * traffic), translation requests without PASID create ATC entries
1745 * without PASID, which must be invalidated with substream_valid clear.
1746 * This has the unpleasant side-effect of invalidating all PASID-tagged
1747 * ATC entries within the address range.
1749 *cmd = (struct arm_smmu_cmdq_ent) {
1750 .opcode = CMDQ_OP_ATC_INV,
1751 .substream_valid = !!ssid,
1756 cmd->atc.size = ATC_INV_SIZE_ALL;
1760 page_start = iova >> inval_grain_shift;
1761 page_end = (iova + size - 1) >> inval_grain_shift;
1764 * In an ATS Invalidate Request, the address must be aligned on the
1765 * range size, which must be a power of two number of page sizes. We
1766 * thus have to choose between grossly over-invalidating the region, or
1767 * splitting the invalidation into multiple commands. For simplicity
1768 * we'll go with the first solution, but should refine it in the future
1769 * if multiple commands are shown to be more efficient.
1771 * Find the smallest power of two that covers the range. The most
1772 * significant differing bit between the start and end addresses,
1773 * fls(start ^ end), indicates the required span. For example:
1775 * We want to invalidate pages [8; 11]. This is already the ideal range:
1776 * x = 0b1000 ^ 0b1011 = 0b11
1777 * span = 1 << fls(x) = 4
1779 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1780 * x = 0b0111 ^ 0b1010 = 0b1101
1781 * span = 1 << fls(x) = 16
1783 log2_span = fls_long(page_start ^ page_end);
1784 span_mask = (1ULL << log2_span) - 1;
1786 page_start &= ~span_mask;
1788 cmd->atc.addr = page_start << inval_grain_shift;
1789 cmd->atc.size = log2_span;
1792 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1795 struct arm_smmu_cmdq_ent cmd;
1796 struct arm_smmu_cmdq_batch cmds;
1798 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1801 for (i = 0; i < master->num_streams; i++) {
1802 cmd.atc.sid = master->streams[i].id;
1803 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1806 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1809 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1810 unsigned long iova, size_t size)
1813 unsigned long flags;
1814 struct arm_smmu_cmdq_ent cmd;
1815 struct arm_smmu_master *master;
1816 struct arm_smmu_cmdq_batch cmds;
1818 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1822 * Ensure that we've completed prior invalidation of the main TLBs
1823 * before we read 'nr_ats_masters' in case of a concurrent call to
1824 * arm_smmu_enable_ats():
1826 * // unmap() // arm_smmu_enable_ats()
1827 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1829 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1831 * Ensures that we always see the incremented 'nr_ats_masters' count if
1832 * ATS was enabled at the PCI device before completion of the TLBI.
1835 if (!atomic_read(&smmu_domain->nr_ats_masters))
1838 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1842 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1843 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1844 if (!master->ats_enabled)
1847 for (i = 0; i < master->num_streams; i++) {
1848 cmd.atc.sid = master->streams[i].id;
1849 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1852 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1854 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1857 /* IO_PGTABLE API */
1858 static void arm_smmu_tlb_inv_context(void *cookie)
1860 struct arm_smmu_domain *smmu_domain = cookie;
1861 struct arm_smmu_device *smmu = smmu_domain->smmu;
1862 struct arm_smmu_cmdq_ent cmd;
1865 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1866 * PTEs previously cleared by unmaps on the current CPU not yet visible
1867 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1868 * insertion to guarantee those are observed before the TLBI. Do be
1871 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1872 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1874 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1875 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1876 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1878 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1881 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1882 unsigned long iova, size_t size,
1884 struct arm_smmu_domain *smmu_domain)
1886 struct arm_smmu_device *smmu = smmu_domain->smmu;
1887 unsigned long end = iova + size, num_pages = 0, tg = 0;
1888 size_t inv_range = granule;
1889 struct arm_smmu_cmdq_batch cmds;
1894 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1895 /* Get the leaf page size */
1896 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1898 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1899 cmd->tlbi.tg = (tg - 10) / 2;
1902 * Determine what level the granule is at. For non-leaf, io-pgtable
1903 * assumes .tlb_flush_walk can invalidate multiple levels at once,
1904 * so ignore the nominal last-level granule and leave TTL=0.
1907 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1909 num_pages = size >> tg;
1914 while (iova < end) {
1915 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1917 * On each iteration of the loop, the range is 5 bits
1918 * worth of the aligned size remaining.
1919 * The range in pages is:
1921 * range = (num_pages & (0x1f << __ffs(num_pages)))
1923 unsigned long scale, num;
1925 /* Determine the power of 2 multiple number of pages */
1926 scale = __ffs(num_pages);
1927 cmd->tlbi.scale = scale;
1929 /* Determine how many chunks of 2^scale size we have */
1930 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1931 cmd->tlbi.num = num - 1;
1933 /* range is num * 2^scale * pgsize */
1934 inv_range = num << (scale + tg);
1936 /* Clear out the lower order bits for the next iteration */
1937 num_pages -= num << scale;
1940 cmd->tlbi.addr = iova;
1941 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1944 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1947 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1948 size_t granule, bool leaf,
1949 struct arm_smmu_domain *smmu_domain)
1951 struct arm_smmu_cmdq_ent cmd = {
1957 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1958 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1959 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1960 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1962 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1963 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1965 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1968 * Unfortunately, this can't be leaf-only since we may have
1969 * zapped an entire table.
1971 arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1974 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1975 size_t granule, bool leaf,
1976 struct arm_smmu_domain *smmu_domain)
1978 struct arm_smmu_cmdq_ent cmd = {
1979 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1980 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1987 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1990 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1991 unsigned long iova, size_t granule,
1994 struct arm_smmu_domain *smmu_domain = cookie;
1995 struct iommu_domain *domain = &smmu_domain->domain;
1997 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2000 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2001 size_t granule, void *cookie)
2003 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2006 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2007 .tlb_flush_all = arm_smmu_tlb_inv_context,
2008 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2009 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2013 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2015 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2018 case IOMMU_CAP_CACHE_COHERENCY:
2019 /* Assume that a coherent TCU implies coherent TBUs */
2020 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2021 case IOMMU_CAP_NOEXEC:
2022 case IOMMU_CAP_DEFERRED_FLUSH:
2029 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2031 struct arm_smmu_domain *smmu_domain;
2033 if (type == IOMMU_DOMAIN_SVA)
2034 return arm_smmu_sva_domain_alloc();
2036 if (type != IOMMU_DOMAIN_UNMANAGED &&
2037 type != IOMMU_DOMAIN_DMA &&
2038 type != IOMMU_DOMAIN_IDENTITY)
2042 * Allocate the domain and initialise some of its data structures.
2043 * We can't really do anything meaningful until we've added a
2046 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2050 mutex_init(&smmu_domain->init_mutex);
2051 INIT_LIST_HEAD(&smmu_domain->devices);
2052 spin_lock_init(&smmu_domain->devices_lock);
2053 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2055 return &smmu_domain->domain;
2058 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2060 int idx, size = 1 << span;
2063 idx = find_first_zero_bit(map, size);
2066 } while (test_and_set_bit(idx, map));
2071 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2073 clear_bit(idx, map);
2076 static void arm_smmu_domain_free(struct iommu_domain *domain)
2078 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2079 struct arm_smmu_device *smmu = smmu_domain->smmu;
2081 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2083 /* Free the CD and ASID, if we allocated them */
2084 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2085 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2087 /* Prevent SVA from touching the CD while we're freeing it */
2088 mutex_lock(&arm_smmu_asid_lock);
2089 if (cfg->cdcfg.cdtab)
2090 arm_smmu_free_cd_tables(smmu_domain);
2091 arm_smmu_free_asid(&cfg->cd);
2092 mutex_unlock(&arm_smmu_asid_lock);
2094 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2096 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2102 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2103 struct arm_smmu_master *master,
2104 struct io_pgtable_cfg *pgtbl_cfg)
2108 struct arm_smmu_device *smmu = smmu_domain->smmu;
2109 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2110 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2112 refcount_set(&cfg->cd.refs, 1);
2114 /* Prevent SVA from modifying the ASID until it is written to the CD */
2115 mutex_lock(&arm_smmu_asid_lock);
2116 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2117 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2121 cfg->s1cdmax = master->ssid_bits;
2123 smmu_domain->stall_enabled = master->stall_enabled;
2125 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2129 cfg->cd.asid = (u16)asid;
2130 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2131 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2132 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2133 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2134 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2135 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2136 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2137 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2138 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2141 * Note that this will end up calling arm_smmu_sync_cd() before
2142 * the master has been added to the devices list for this domain.
2143 * This isn't an issue because the STE hasn't been installed yet.
2145 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2147 goto out_free_cd_tables;
2149 mutex_unlock(&arm_smmu_asid_lock);
2153 arm_smmu_free_cd_tables(smmu_domain);
2155 arm_smmu_free_asid(&cfg->cd);
2157 mutex_unlock(&arm_smmu_asid_lock);
2161 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2162 struct arm_smmu_master *master,
2163 struct io_pgtable_cfg *pgtbl_cfg)
2166 struct arm_smmu_device *smmu = smmu_domain->smmu;
2167 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2168 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2170 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2174 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2175 cfg->vmid = (u16)vmid;
2176 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2177 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2178 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2179 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2180 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2181 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2182 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2183 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2187 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2188 struct arm_smmu_master *master)
2191 unsigned long ias, oas;
2192 enum io_pgtable_fmt fmt;
2193 struct io_pgtable_cfg pgtbl_cfg;
2194 struct io_pgtable_ops *pgtbl_ops;
2195 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2196 struct arm_smmu_master *,
2197 struct io_pgtable_cfg *);
2198 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2199 struct arm_smmu_device *smmu = smmu_domain->smmu;
2201 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2202 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2206 /* Restrict the stage to what we can actually support */
2207 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2208 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2209 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2210 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2212 switch (smmu_domain->stage) {
2213 case ARM_SMMU_DOMAIN_S1:
2214 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2215 ias = min_t(unsigned long, ias, VA_BITS);
2217 fmt = ARM_64_LPAE_S1;
2218 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2220 case ARM_SMMU_DOMAIN_NESTED:
2221 case ARM_SMMU_DOMAIN_S2:
2224 fmt = ARM_64_LPAE_S2;
2225 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2231 pgtbl_cfg = (struct io_pgtable_cfg) {
2232 .pgsize_bitmap = smmu->pgsize_bitmap,
2235 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2236 .tlb = &arm_smmu_flush_ops,
2237 .iommu_dev = smmu->dev,
2240 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2244 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2245 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2246 domain->geometry.force_aperture = true;
2248 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2250 free_io_pgtable_ops(pgtbl_ops);
2254 smmu_domain->pgtbl_ops = pgtbl_ops;
2258 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2261 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2263 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2264 struct arm_smmu_strtab_l1_desc *l1_desc;
2267 /* Two-level walk */
2268 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2269 l1_desc = &cfg->l1_desc[idx];
2270 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2271 step = &l1_desc->l2ptr[idx];
2273 /* Simple linear lookup */
2274 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2280 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2283 struct arm_smmu_device *smmu = master->smmu;
2285 for (i = 0; i < master->num_streams; ++i) {
2286 u32 sid = master->streams[i].id;
2287 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2289 /* Bridged PCI devices may end up with duplicated IDs */
2290 for (j = 0; j < i; j++)
2291 if (master->streams[j].id == sid)
2296 arm_smmu_write_strtab_ent(master, sid, step);
2300 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2302 struct device *dev = master->dev;
2303 struct arm_smmu_device *smmu = master->smmu;
2304 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2306 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2309 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2312 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2315 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2318 struct pci_dev *pdev;
2319 struct arm_smmu_device *smmu = master->smmu;
2320 struct arm_smmu_domain *smmu_domain = master->domain;
2322 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2323 if (!master->ats_enabled)
2326 /* Smallest Translation Unit: log2 of the smallest supported granule */
2327 stu = __ffs(smmu->pgsize_bitmap);
2328 pdev = to_pci_dev(master->dev);
2330 atomic_inc(&smmu_domain->nr_ats_masters);
2331 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2332 if (pci_enable_ats(pdev, stu))
2333 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2336 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2338 struct arm_smmu_domain *smmu_domain = master->domain;
2340 if (!master->ats_enabled)
2343 pci_disable_ats(to_pci_dev(master->dev));
2345 * Ensure ATS is disabled at the endpoint before we issue the
2346 * ATC invalidation via the SMMU.
2349 arm_smmu_atc_inv_master(master);
2350 atomic_dec(&smmu_domain->nr_ats_masters);
2353 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2358 struct pci_dev *pdev;
2360 if (!dev_is_pci(master->dev))
2363 pdev = to_pci_dev(master->dev);
2365 features = pci_pasid_features(pdev);
2369 num_pasids = pci_max_pasids(pdev);
2370 if (num_pasids <= 0)
2373 ret = pci_enable_pasid(pdev, features);
2375 dev_err(&pdev->dev, "Failed to enable PASID\n");
2379 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2380 master->smmu->ssid_bits);
2384 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2386 struct pci_dev *pdev;
2388 if (!dev_is_pci(master->dev))
2391 pdev = to_pci_dev(master->dev);
2393 if (!pdev->pasid_enabled)
2396 master->ssid_bits = 0;
2397 pci_disable_pasid(pdev);
2400 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2402 unsigned long flags;
2403 struct arm_smmu_domain *smmu_domain = master->domain;
2408 arm_smmu_disable_ats(master);
2410 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2411 list_del(&master->domain_head);
2412 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2414 master->domain = NULL;
2415 master->ats_enabled = false;
2416 arm_smmu_install_ste_for_dev(master);
2419 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2422 unsigned long flags;
2423 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2424 struct arm_smmu_device *smmu;
2425 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2426 struct arm_smmu_master *master;
2431 master = dev_iommu_priv_get(dev);
2432 smmu = master->smmu;
2435 * Checking that SVA is disabled ensures that this device isn't bound to
2436 * any mm, and can be safely detached from its old domain. Bonds cannot
2437 * be removed concurrently since we're holding the group mutex.
2439 if (arm_smmu_master_sva_enabled(master)) {
2440 dev_err(dev, "cannot attach - SVA enabled\n");
2444 arm_smmu_detach_dev(master);
2446 mutex_lock(&smmu_domain->init_mutex);
2448 if (!smmu_domain->smmu) {
2449 smmu_domain->smmu = smmu;
2450 ret = arm_smmu_domain_finalise(domain, master);
2452 smmu_domain->smmu = NULL;
2455 } else if (smmu_domain->smmu != smmu) {
2458 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2459 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2462 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2463 smmu_domain->stall_enabled != master->stall_enabled) {
2468 master->domain = smmu_domain;
2471 * The SMMU does not support enabling ATS with bypass. When the STE is
2472 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2473 * Translated transactions are denied as though ATS is disabled for the
2474 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2475 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2477 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2478 master->ats_enabled = arm_smmu_ats_supported(master);
2480 arm_smmu_install_ste_for_dev(master);
2482 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2483 list_add(&master->domain_head, &smmu_domain->devices);
2484 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2486 arm_smmu_enable_ats(master);
2489 mutex_unlock(&smmu_domain->init_mutex);
2493 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2494 phys_addr_t paddr, size_t pgsize, size_t pgcount,
2495 int prot, gfp_t gfp, size_t *mapped)
2497 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2502 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2505 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2506 size_t pgsize, size_t pgcount,
2507 struct iommu_iotlb_gather *gather)
2509 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2510 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2515 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2518 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2520 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2522 if (smmu_domain->smmu)
2523 arm_smmu_tlb_inv_context(smmu_domain);
2526 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2527 struct iommu_iotlb_gather *gather)
2529 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2531 if (!gather->pgsize)
2534 arm_smmu_tlb_inv_range_domain(gather->start,
2535 gather->end - gather->start + 1,
2536 gather->pgsize, true, smmu_domain);
2540 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2542 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2547 return ops->iova_to_phys(ops, iova);
2550 static struct platform_driver arm_smmu_driver;
2553 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2555 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2558 return dev ? dev_get_drvdata(dev) : NULL;
2561 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2563 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2565 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2566 limit *= 1UL << STRTAB_SPLIT;
2571 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2573 /* Check the SIDs are in range of the SMMU and our stream table */
2574 if (!arm_smmu_sid_in_range(smmu, sid))
2577 /* Ensure l2 strtab is initialised */
2578 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2579 return arm_smmu_init_l2_strtab(smmu, sid);
2584 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2585 struct arm_smmu_master *master)
2589 struct arm_smmu_stream *new_stream, *cur_stream;
2590 struct rb_node **new_node, *parent_node = NULL;
2591 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2593 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2595 if (!master->streams)
2597 master->num_streams = fwspec->num_ids;
2599 mutex_lock(&smmu->streams_mutex);
2600 for (i = 0; i < fwspec->num_ids; i++) {
2601 u32 sid = fwspec->ids[i];
2603 new_stream = &master->streams[i];
2604 new_stream->id = sid;
2605 new_stream->master = master;
2607 ret = arm_smmu_init_sid_strtab(smmu, sid);
2611 /* Insert into SID tree */
2612 new_node = &(smmu->streams.rb_node);
2614 cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2616 parent_node = *new_node;
2617 if (cur_stream->id > new_stream->id) {
2618 new_node = &((*new_node)->rb_left);
2619 } else if (cur_stream->id < new_stream->id) {
2620 new_node = &((*new_node)->rb_right);
2622 dev_warn(master->dev,
2623 "stream %u already in tree\n",
2632 rb_link_node(&new_stream->node, parent_node, new_node);
2633 rb_insert_color(&new_stream->node, &smmu->streams);
2637 for (i--; i >= 0; i--)
2638 rb_erase(&master->streams[i].node, &smmu->streams);
2639 kfree(master->streams);
2641 mutex_unlock(&smmu->streams_mutex);
2646 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2649 struct arm_smmu_device *smmu = master->smmu;
2650 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2652 if (!smmu || !master->streams)
2655 mutex_lock(&smmu->streams_mutex);
2656 for (i = 0; i < fwspec->num_ids; i++)
2657 rb_erase(&master->streams[i].node, &smmu->streams);
2658 mutex_unlock(&smmu->streams_mutex);
2660 kfree(master->streams);
2663 static struct iommu_ops arm_smmu_ops;
2665 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2668 struct arm_smmu_device *smmu;
2669 struct arm_smmu_master *master;
2670 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2672 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2673 return ERR_PTR(-ENODEV);
2675 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2676 return ERR_PTR(-EBUSY);
2678 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2680 return ERR_PTR(-ENODEV);
2682 master = kzalloc(sizeof(*master), GFP_KERNEL);
2684 return ERR_PTR(-ENOMEM);
2687 master->smmu = smmu;
2688 INIT_LIST_HEAD(&master->bonds);
2689 dev_iommu_priv_set(dev, master);
2691 ret = arm_smmu_insert_master(smmu, master);
2693 goto err_free_master;
2695 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2696 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2699 * Note that PASID must be enabled before, and disabled after ATS:
2700 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2702 * Behavior is undefined if this bit is Set and the value of the PASID
2703 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2706 arm_smmu_enable_pasid(master);
2708 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2709 master->ssid_bits = min_t(u8, master->ssid_bits,
2710 CTXDESC_LINEAR_CDMAX);
2712 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2713 device_property_read_bool(dev, "dma-can-stall")) ||
2714 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2715 master->stall_enabled = true;
2717 return &smmu->iommu;
2721 dev_iommu_priv_set(dev, NULL);
2722 return ERR_PTR(ret);
2725 static void arm_smmu_release_device(struct device *dev)
2727 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2729 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2730 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2731 arm_smmu_detach_dev(master);
2732 arm_smmu_disable_pasid(master);
2733 arm_smmu_remove_master(master);
2737 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2739 struct iommu_group *group;
2742 * We don't support devices sharing stream IDs other than PCI RID
2743 * aliases, since the necessary ID-to-device lookup becomes rather
2744 * impractical given a potential sparse 32-bit stream ID space.
2746 if (dev_is_pci(dev))
2747 group = pci_device_group(dev);
2749 group = generic_device_group(dev);
2754 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2756 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2759 mutex_lock(&smmu_domain->init_mutex);
2760 if (smmu_domain->smmu)
2763 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2764 mutex_unlock(&smmu_domain->init_mutex);
2769 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2771 return iommu_fwspec_add_ids(dev, args->args, 1);
2774 static void arm_smmu_get_resv_regions(struct device *dev,
2775 struct list_head *head)
2777 struct iommu_resv_region *region;
2778 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2780 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2781 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2785 list_add_tail(®ion->list, head);
2787 iommu_dma_get_resv_regions(dev, head);
2790 static int arm_smmu_dev_enable_feature(struct device *dev,
2791 enum iommu_dev_features feat)
2793 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2799 case IOMMU_DEV_FEAT_IOPF:
2800 if (!arm_smmu_master_iopf_supported(master))
2802 if (master->iopf_enabled)
2804 master->iopf_enabled = true;
2806 case IOMMU_DEV_FEAT_SVA:
2807 if (!arm_smmu_master_sva_supported(master))
2809 if (arm_smmu_master_sva_enabled(master))
2811 return arm_smmu_master_enable_sva(master);
2817 static int arm_smmu_dev_disable_feature(struct device *dev,
2818 enum iommu_dev_features feat)
2820 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2826 case IOMMU_DEV_FEAT_IOPF:
2827 if (!master->iopf_enabled)
2829 if (master->sva_enabled)
2831 master->iopf_enabled = false;
2833 case IOMMU_DEV_FEAT_SVA:
2834 if (!arm_smmu_master_sva_enabled(master))
2836 return arm_smmu_master_disable_sva(master);
2843 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2844 * PCIe link and save the data to memory by DMA. The hardware is restricted to
2845 * use identity mapping only.
2847 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2848 (pdev)->device == 0xa12e)
2850 static int arm_smmu_def_domain_type(struct device *dev)
2852 if (dev_is_pci(dev)) {
2853 struct pci_dev *pdev = to_pci_dev(dev);
2855 if (IS_HISI_PTT_DEVICE(pdev))
2856 return IOMMU_DOMAIN_IDENTITY;
2862 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2864 struct iommu_domain *domain;
2866 domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2867 if (WARN_ON(IS_ERR(domain)) || !domain)
2870 arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2873 static struct iommu_ops arm_smmu_ops = {
2874 .capable = arm_smmu_capable,
2875 .domain_alloc = arm_smmu_domain_alloc,
2876 .probe_device = arm_smmu_probe_device,
2877 .release_device = arm_smmu_release_device,
2878 .device_group = arm_smmu_device_group,
2879 .of_xlate = arm_smmu_of_xlate,
2880 .get_resv_regions = arm_smmu_get_resv_regions,
2881 .remove_dev_pasid = arm_smmu_remove_dev_pasid,
2882 .dev_enable_feat = arm_smmu_dev_enable_feature,
2883 .dev_disable_feat = arm_smmu_dev_disable_feature,
2884 .page_response = arm_smmu_page_response,
2885 .def_domain_type = arm_smmu_def_domain_type,
2886 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2887 .owner = THIS_MODULE,
2888 .default_domain_ops = &(const struct iommu_domain_ops) {
2889 .attach_dev = arm_smmu_attach_dev,
2890 .map_pages = arm_smmu_map_pages,
2891 .unmap_pages = arm_smmu_unmap_pages,
2892 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2893 .iotlb_sync = arm_smmu_iotlb_sync,
2894 .iova_to_phys = arm_smmu_iova_to_phys,
2895 .enable_nesting = arm_smmu_enable_nesting,
2896 .free = arm_smmu_domain_free,
2900 /* Probing and initialisation functions */
2901 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2902 struct arm_smmu_queue *q,
2904 unsigned long prod_off,
2905 unsigned long cons_off,
2906 size_t dwords, const char *name)
2911 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2912 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2914 if (q->base || qsz < PAGE_SIZE)
2917 q->llq.max_n_shift--;
2922 "failed to allocate queue (0x%zx bytes) for %s\n",
2927 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2928 dev_info(smmu->dev, "allocated %u entries for %s\n",
2929 1 << q->llq.max_n_shift, name);
2932 q->prod_reg = page + prod_off;
2933 q->cons_reg = page + cons_off;
2934 q->ent_dwords = dwords;
2936 q->q_base = Q_BASE_RWA;
2937 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2938 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2940 q->llq.prod = q->llq.cons = 0;
2944 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2946 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2947 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2949 atomic_set(&cmdq->owner_prod, 0);
2950 atomic_set(&cmdq->lock, 0);
2952 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2954 if (!cmdq->valid_map)
2960 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2965 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2966 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2967 CMDQ_ENT_DWORDS, "cmdq");
2971 ret = arm_smmu_cmdq_init(smmu);
2976 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2977 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2978 EVTQ_ENT_DWORDS, "evtq");
2982 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2983 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2984 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2985 if (!smmu->evtq.iopf)
2990 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2993 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2994 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2995 PRIQ_ENT_DWORDS, "priq");
2998 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3001 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3002 void *strtab = smmu->strtab_cfg.strtab;
3004 cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3005 sizeof(*cfg->l1_desc), GFP_KERNEL);
3009 for (i = 0; i < cfg->num_l1_ents; ++i) {
3010 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3011 strtab += STRTAB_L1_DESC_DWORDS << 3;
3017 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3022 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3024 /* Calculate the L1 size, capped to the SIDSIZE. */
3025 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3026 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3027 cfg->num_l1_ents = 1 << size;
3029 size += STRTAB_SPLIT;
3030 if (size < smmu->sid_bits)
3032 "2-level strtab only covers %u/%u bits of SID\n",
3033 size, smmu->sid_bits);
3035 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3036 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3040 "failed to allocate l1 stream table (%u bytes)\n",
3044 cfg->strtab = strtab;
3046 /* Configure strtab_base_cfg for 2 levels */
3047 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3048 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3049 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3050 cfg->strtab_base_cfg = reg;
3052 return arm_smmu_init_l1_strtab(smmu);
3055 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3060 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3062 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3063 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3067 "failed to allocate linear stream table (%u bytes)\n",
3071 cfg->strtab = strtab;
3072 cfg->num_l1_ents = 1 << smmu->sid_bits;
3074 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3075 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3076 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3077 cfg->strtab_base_cfg = reg;
3079 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3083 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3088 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3089 ret = arm_smmu_init_strtab_2lvl(smmu);
3091 ret = arm_smmu_init_strtab_linear(smmu);
3096 /* Set the strtab base address */
3097 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3098 reg |= STRTAB_BASE_RA;
3099 smmu->strtab_cfg.strtab_base = reg;
3101 /* Allocate the first VMID for stage-2 bypass STEs */
3102 set_bit(0, smmu->vmid_map);
3106 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3110 mutex_init(&smmu->streams_mutex);
3111 smmu->streams = RB_ROOT;
3113 ret = arm_smmu_init_queues(smmu);
3117 return arm_smmu_init_strtab(smmu);
3120 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3121 unsigned int reg_off, unsigned int ack_off)
3125 writel_relaxed(val, smmu->base + reg_off);
3126 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3127 1, ARM_SMMU_POLL_TIMEOUT_US);
3130 /* GBPA is "special" */
3131 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3134 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3136 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3137 1, ARM_SMMU_POLL_TIMEOUT_US);
3143 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3144 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3145 1, ARM_SMMU_POLL_TIMEOUT_US);
3148 dev_err(smmu->dev, "GBPA not responding to update\n");
3152 static void arm_smmu_free_msis(void *data)
3154 struct device *dev = data;
3155 platform_msi_domain_free_irqs(dev);
3158 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3160 phys_addr_t doorbell;
3161 struct device *dev = msi_desc_to_dev(desc);
3162 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3163 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3165 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3166 doorbell &= MSI_CFG0_ADDR_MASK;
3168 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3169 writel_relaxed(msg->data, smmu->base + cfg[1]);
3170 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3173 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3175 int ret, nvec = ARM_SMMU_MAX_MSIS;
3176 struct device *dev = smmu->dev;
3178 /* Clear the MSI address regs */
3179 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3180 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3182 if (smmu->features & ARM_SMMU_FEAT_PRI)
3183 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3187 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3190 if (!dev->msi.domain) {
3191 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3195 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3196 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3198 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3202 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3203 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3204 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3206 /* Add callback to free MSIs on teardown */
3207 devm_add_action(dev, arm_smmu_free_msis, dev);
3210 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3214 arm_smmu_setup_msis(smmu);
3216 /* Request interrupt lines */
3217 irq = smmu->evtq.q.irq;
3219 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3220 arm_smmu_evtq_thread,
3222 "arm-smmu-v3-evtq", smmu);
3224 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3226 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3229 irq = smmu->gerr_irq;
3231 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3232 0, "arm-smmu-v3-gerror", smmu);
3234 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3236 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3239 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3240 irq = smmu->priq.q.irq;
3242 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3243 arm_smmu_priq_thread,
3249 "failed to enable priq irq\n");
3251 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3256 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3259 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3261 /* Disable IRQs first */
3262 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3263 ARM_SMMU_IRQ_CTRLACK);
3265 dev_err(smmu->dev, "failed to disable irqs\n");
3269 irq = smmu->combined_irq;
3272 * Cavium ThunderX2 implementation doesn't support unique irq
3273 * lines. Use a single irq line for all the SMMUv3 interrupts.
3275 ret = devm_request_threaded_irq(smmu->dev, irq,
3276 arm_smmu_combined_irq_handler,
3277 arm_smmu_combined_irq_thread,
3279 "arm-smmu-v3-combined-irq", smmu);
3281 dev_warn(smmu->dev, "failed to enable combined irq\n");
3283 arm_smmu_setup_unique_irqs(smmu);
3285 if (smmu->features & ARM_SMMU_FEAT_PRI)
3286 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3288 /* Enable interrupt generation on the SMMU */
3289 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3290 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3292 dev_warn(smmu->dev, "failed to enable irqs\n");
3297 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3301 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3303 dev_err(smmu->dev, "failed to clear cr0\n");
3308 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3312 struct arm_smmu_cmdq_ent cmd;
3314 /* Clear CR0 and sync (disables SMMU and queue processing) */
3315 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3316 if (reg & CR0_SMMUEN) {
3317 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3318 WARN_ON(is_kdump_kernel() && !disable_bypass);
3319 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3322 ret = arm_smmu_device_disable(smmu);
3326 /* CR1 (table and queue memory attributes) */
3327 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3328 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3329 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3330 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3331 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3332 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3333 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3335 /* CR2 (random crap) */
3336 reg = CR2_PTM | CR2_RECINVSID;
3338 if (smmu->features & ARM_SMMU_FEAT_E2H)
3341 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3344 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3345 smmu->base + ARM_SMMU_STRTAB_BASE);
3346 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3347 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3350 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3351 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3352 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3354 enables = CR0_CMDQEN;
3355 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3358 dev_err(smmu->dev, "failed to enable command queue\n");
3362 /* Invalidate any cached configuration */
3363 cmd.opcode = CMDQ_OP_CFGI_ALL;
3364 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3366 /* Invalidate any stale TLB entries */
3367 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3368 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3369 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3372 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3373 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3376 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3377 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3378 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3380 enables |= CR0_EVTQEN;
3381 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3384 dev_err(smmu->dev, "failed to enable event queue\n");
3389 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3390 writeq_relaxed(smmu->priq.q.q_base,
3391 smmu->base + ARM_SMMU_PRIQ_BASE);
3392 writel_relaxed(smmu->priq.q.llq.prod,
3393 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3394 writel_relaxed(smmu->priq.q.llq.cons,
3395 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3397 enables |= CR0_PRIQEN;
3398 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3401 dev_err(smmu->dev, "failed to enable PRI queue\n");
3406 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3407 enables |= CR0_ATSCHK;
3408 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3411 dev_err(smmu->dev, "failed to enable ATS check\n");
3416 ret = arm_smmu_setup_irqs(smmu);
3418 dev_err(smmu->dev, "failed to setup irqs\n");
3422 if (is_kdump_kernel())
3423 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3425 /* Enable the SMMU interface, or ensure bypass */
3426 if (!bypass || disable_bypass) {
3427 enables |= CR0_SMMUEN;
3429 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3433 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3436 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3443 #define IIDR_IMPLEMENTER_ARM 0x43b
3444 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
3445 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
3447 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3450 unsigned int implementer, productid, variant, revision;
3452 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3453 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3454 productid = FIELD_GET(IIDR_PRODUCTID, reg);
3455 variant = FIELD_GET(IIDR_VARIANT, reg);
3456 revision = FIELD_GET(IIDR_REVISION, reg);
3458 switch (implementer) {
3459 case IIDR_IMPLEMENTER_ARM:
3460 switch (productid) {
3461 case IIDR_PRODUCTID_ARM_MMU_600:
3462 /* Arm erratum 1076982 */
3463 if (variant == 0 && revision <= 2)
3464 smmu->features &= ~ARM_SMMU_FEAT_SEV;
3465 /* Arm erratum 1209401 */
3467 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3469 case IIDR_PRODUCTID_ARM_MMU_700:
3470 /* Arm erratum 2812531 */
3471 smmu->features &= ~ARM_SMMU_FEAT_BTM;
3472 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3473 /* Arm errata 2268618, 2812531 */
3474 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3481 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3484 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3487 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3489 /* 2-level structures */
3490 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3491 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3493 if (reg & IDR0_CD2L)
3494 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3497 * Translation table endianness.
3498 * We currently require the same endianness as the CPU, but this
3499 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3501 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3502 case IDR0_TTENDIAN_MIXED:
3503 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3506 case IDR0_TTENDIAN_BE:
3507 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3510 case IDR0_TTENDIAN_LE:
3511 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3515 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3519 /* Boolean feature flags */
3520 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3521 smmu->features |= ARM_SMMU_FEAT_PRI;
3523 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3524 smmu->features |= ARM_SMMU_FEAT_ATS;
3527 smmu->features |= ARM_SMMU_FEAT_SEV;
3529 if (reg & IDR0_MSI) {
3530 smmu->features |= ARM_SMMU_FEAT_MSI;
3531 if (coherent && !disable_msipolling)
3532 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3535 if (reg & IDR0_HYP) {
3536 smmu->features |= ARM_SMMU_FEAT_HYP;
3537 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3538 smmu->features |= ARM_SMMU_FEAT_E2H;
3542 * The coherency feature as set by FW is used in preference to the ID
3543 * register, but warn on mismatch.
3545 if (!!(reg & IDR0_COHACC) != coherent)
3546 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3547 coherent ? "true" : "false");
3549 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3550 case IDR0_STALL_MODEL_FORCE:
3551 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3553 case IDR0_STALL_MODEL_STALL:
3554 smmu->features |= ARM_SMMU_FEAT_STALLS;
3558 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3561 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3563 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3564 dev_err(smmu->dev, "no translation support!\n");
3568 /* We only support the AArch64 table format at present */
3569 switch (FIELD_GET(IDR0_TTF, reg)) {
3570 case IDR0_TTF_AARCH32_64:
3573 case IDR0_TTF_AARCH64:
3576 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3580 /* ASID/VMID sizes */
3581 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3582 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3585 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3586 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3587 dev_err(smmu->dev, "embedded implementation not supported\n");
3591 /* Queue sizes, capped to ensure natural alignment */
3592 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3593 FIELD_GET(IDR1_CMDQS, reg));
3594 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3596 * We don't support splitting up batches, so one batch of
3597 * commands plus an extra sync needs to fit inside the command
3598 * queue. There's also no way we can handle the weird alignment
3599 * restrictions on the base pointer for a unit-length queue.
3601 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3602 CMDQ_BATCH_ENTRIES);
3606 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3607 FIELD_GET(IDR1_EVTQS, reg));
3608 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3609 FIELD_GET(IDR1_PRIQS, reg));
3611 /* SID/SSID sizes */
3612 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3613 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3614 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3617 * If the SMMU supports fewer bits than would fill a single L2 stream
3618 * table, use a linear table instead.
3620 if (smmu->sid_bits <= STRTAB_SPLIT)
3621 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3624 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3625 if (FIELD_GET(IDR3_RIL, reg))
3626 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3629 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3631 /* Maximum number of outstanding stalls */
3632 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3635 if (reg & IDR5_GRAN64K)
3636 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3637 if (reg & IDR5_GRAN16K)
3638 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3639 if (reg & IDR5_GRAN4K)
3640 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3642 /* Input address size */
3643 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3644 smmu->features |= ARM_SMMU_FEAT_VAX;
3646 /* Output address size */
3647 switch (FIELD_GET(IDR5_OAS, reg)) {
3648 case IDR5_OAS_32_BIT:
3651 case IDR5_OAS_36_BIT:
3654 case IDR5_OAS_40_BIT:
3657 case IDR5_OAS_42_BIT:
3660 case IDR5_OAS_44_BIT:
3663 case IDR5_OAS_52_BIT:
3665 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3669 "unknown output address size. Truncating to 48-bit\n");
3671 case IDR5_OAS_48_BIT:
3675 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3676 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3678 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3680 /* Set the DMA mask for our table walker */
3681 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3683 "failed to set DMA mask for table walker\n");
3685 smmu->ias = max(smmu->ias, smmu->oas);
3687 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3688 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3689 smmu->features |= ARM_SMMU_FEAT_NESTING;
3691 arm_smmu_device_iidr_probe(smmu);
3693 if (arm_smmu_sva_supported(smmu))
3694 smmu->features |= ARM_SMMU_FEAT_SVA;
3696 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3697 smmu->ias, smmu->oas, smmu->features);
3702 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3705 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3706 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3708 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3709 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3713 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3716 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3717 struct arm_smmu_device *smmu)
3719 struct acpi_iort_smmu_v3 *iort_smmu;
3720 struct device *dev = smmu->dev;
3721 struct acpi_iort_node *node;
3723 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3725 /* Retrieve SMMUv3 specific data */
3726 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3728 acpi_smmu_get_options(iort_smmu->model, smmu);
3730 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3731 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3736 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3737 struct arm_smmu_device *smmu)
3743 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3744 struct arm_smmu_device *smmu)
3746 struct device *dev = &pdev->dev;
3750 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3751 dev_err(dev, "missing #iommu-cells property\n");
3752 else if (cells != 1)
3753 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3757 parse_driver_options(smmu);
3759 if (of_dma_is_coherent(dev->of_node))
3760 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3765 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3767 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3773 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3774 resource_size_t size)
3776 struct resource res = DEFINE_RES_MEM(start, size);
3778 return devm_ioremap_resource(dev, &res);
3781 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3783 struct list_head rmr_list;
3784 struct iommu_resv_region *e;
3786 INIT_LIST_HEAD(&rmr_list);
3787 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3789 list_for_each_entry(e, &rmr_list, list) {
3791 struct iommu_iort_rmr_data *rmr;
3794 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3795 for (i = 0; i < rmr->num_sids; i++) {
3796 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3798 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3803 step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3804 arm_smmu_init_bypass_stes(step, 1, true);
3808 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3811 static int arm_smmu_device_probe(struct platform_device *pdev)
3814 struct resource *res;
3815 resource_size_t ioaddr;
3816 struct arm_smmu_device *smmu;
3817 struct device *dev = &pdev->dev;
3820 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3826 ret = arm_smmu_device_dt_probe(pdev, smmu);
3828 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3833 /* Set bypass mode according to firmware probing result */
3837 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3840 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3841 dev_err(dev, "MMIO region too small (%pr)\n", res);
3844 ioaddr = res->start;
3847 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3848 * the PMCG registers which are reserved by the PMU driver.
3850 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3851 if (IS_ERR(smmu->base))
3852 return PTR_ERR(smmu->base);
3854 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3855 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3857 if (IS_ERR(smmu->page1))
3858 return PTR_ERR(smmu->page1);
3860 smmu->page1 = smmu->base;
3863 /* Interrupt lines */
3865 irq = platform_get_irq_byname_optional(pdev, "combined");
3867 smmu->combined_irq = irq;
3869 irq = platform_get_irq_byname_optional(pdev, "eventq");
3871 smmu->evtq.q.irq = irq;
3873 irq = platform_get_irq_byname_optional(pdev, "priq");
3875 smmu->priq.q.irq = irq;
3877 irq = platform_get_irq_byname_optional(pdev, "gerror");
3879 smmu->gerr_irq = irq;
3882 ret = arm_smmu_device_hw_probe(smmu);
3886 /* Initialise in-memory data structures */
3887 ret = arm_smmu_init_structures(smmu);
3891 /* Record our private device structure */
3892 platform_set_drvdata(pdev, smmu);
3894 /* Check for RMRs and install bypass STEs if any */
3895 arm_smmu_rmr_install_bypass_ste(smmu);
3897 /* Reset the device */
3898 ret = arm_smmu_device_reset(smmu, bypass);
3902 /* And we're up. Go go go! */
3903 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3904 "smmu3.%pa", &ioaddr);
3908 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3910 dev_err(dev, "Failed to register iommu\n");
3911 iommu_device_sysfs_remove(&smmu->iommu);
3918 static void arm_smmu_device_remove(struct platform_device *pdev)
3920 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3922 iommu_device_unregister(&smmu->iommu);
3923 iommu_device_sysfs_remove(&smmu->iommu);
3924 arm_smmu_device_disable(smmu);
3925 iopf_queue_free(smmu->evtq.iopf);
3928 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3930 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3932 arm_smmu_device_disable(smmu);
3935 static const struct of_device_id arm_smmu_of_match[] = {
3936 { .compatible = "arm,smmu-v3", },
3939 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3941 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3943 arm_smmu_sva_notifier_synchronize();
3944 platform_driver_unregister(drv);
3947 static struct platform_driver arm_smmu_driver = {
3949 .name = "arm-smmu-v3",
3950 .of_match_table = arm_smmu_of_match,
3951 .suppress_bind_attrs = true,
3953 .probe = arm_smmu_device_probe,
3954 .remove_new = arm_smmu_device_remove,
3955 .shutdown = arm_smmu_device_shutdown,
3957 module_driver(arm_smmu_driver, platform_driver_register,
3958 arm_smmu_driver_unregister);
3960 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3961 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3962 MODULE_ALIAS("platform:arm-smmu-v3");
3963 MODULE_LICENSE("GPL v2");