1 // SPDX-License-Identifier: GPL-2.0
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
31 #include <linux/amba/bus.h>
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 "Disable MSI-based polling for CMD_SYNC completion.");
46 enum arm_smmu_msi_index {
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55 ARM_SMMU_EVTQ_IRQ_CFG0,
56 ARM_SMMU_EVTQ_IRQ_CFG1,
57 ARM_SMMU_EVTQ_IRQ_CFG2,
59 [GERROR_MSI_INDEX] = {
60 ARM_SMMU_GERROR_IRQ_CFG0,
61 ARM_SMMU_GERROR_IRQ_CFG1,
62 ARM_SMMU_GERROR_IRQ_CFG2,
65 ARM_SMMU_PRIQ_IRQ_CFG0,
66 ARM_SMMU_PRIQ_IRQ_CFG1,
67 ARM_SMMU_PRIQ_IRQ_CFG2,
71 struct arm_smmu_option_prop {
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
80 * Special value used by SVA when a process dies, to quiesce a CD without
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
91 static void parse_driver_options(struct arm_smmu_device *smmu)
96 if (of_property_read_bool(smmu->dev->of_node,
97 arm_smmu_options[i].prop)) {
98 smmu->options |= arm_smmu_options[i].opt;
99 dev_notice(smmu->dev, "option %s\n",
100 arm_smmu_options[i].prop);
102 } while (arm_smmu_options[++i].opt);
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
108 u32 space, prod, cons;
110 prod = Q_IDX(q, q->prod);
111 cons = Q_IDX(q, q->cons);
113 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 space = (1 << q->max_n_shift) - (prod - cons);
121 static bool queue_full(struct arm_smmu_ll_queue *q)
123 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
129 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
135 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
144 * Ensure that all CPU accesses (reads and writes) to the queue
145 * are complete before we update the cons pointer.
148 writel_relaxed(q->llq.cons, q->cons_reg);
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
153 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
163 * We can't use the _relaxed() variant here, as we must prevent
164 * speculative reads of the queue before we have determined that
165 * prod has indeed moved.
167 prod = readl(q->prod_reg);
169 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
178 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 struct arm_smmu_queue_poll *qp)
187 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
193 if (ktime_compare(ktime_get(), qp->timeout) > 0)
198 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
213 for (i = 0; i < n_dwords; ++i)
214 *dst++ = cpu_to_le64(*src++);
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
221 for (i = 0; i < n_dwords; ++i)
222 *dst++ = le64_to_cpu(*src++);
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
227 if (queue_empty(&q->llq))
230 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 queue_inc_cons(&q->llq);
232 queue_sync_cons_out(q);
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
239 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
242 switch (ent->opcode) {
243 case CMDQ_OP_TLBI_EL2_ALL:
244 case CMDQ_OP_TLBI_NSNH_ALL:
246 case CMDQ_OP_PREFETCH_CFG:
247 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
249 case CMDQ_OP_CFGI_CD:
250 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
252 case CMDQ_OP_CFGI_STE:
253 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
256 case CMDQ_OP_CFGI_CD_ALL:
257 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
259 case CMDQ_OP_CFGI_ALL:
260 /* Cover the entire SID range */
261 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
263 case CMDQ_OP_TLBI_NH_VA:
264 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
266 case CMDQ_OP_TLBI_EL2_VA:
267 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
275 case CMDQ_OP_TLBI_S2_IPA:
276 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
284 case CMDQ_OP_TLBI_NH_ASID:
285 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
287 case CMDQ_OP_TLBI_S12_VMALL:
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290 case CMDQ_OP_TLBI_EL2_ASID:
291 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
293 case CMDQ_OP_ATC_INV:
294 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
301 case CMDQ_OP_PRI_RESP:
302 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 switch (ent->pri.resp) {
314 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
317 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
321 case CMDQ_OP_CMD_SYNC:
322 if (ent->sync.msiaddr) {
323 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
326 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
328 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
338 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
343 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
344 struct arm_smmu_queue *q, u32 prod)
346 struct arm_smmu_cmdq_ent ent = {
347 .opcode = CMDQ_OP_CMD_SYNC,
351 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
352 * payload, so the write will zero the entire command on that platform.
354 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
355 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
359 arm_smmu_cmdq_build_cmd(cmd, &ent);
362 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
363 struct arm_smmu_queue *q)
365 static const char * const cerror_str[] = {
366 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
367 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
368 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
369 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
373 u64 cmd[CMDQ_ENT_DWORDS];
374 u32 cons = readl_relaxed(q->cons_reg);
375 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
376 struct arm_smmu_cmdq_ent cmd_sync = {
377 .opcode = CMDQ_OP_CMD_SYNC,
380 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
381 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
384 case CMDQ_ERR_CERROR_ABT_IDX:
385 dev_err(smmu->dev, "retrying command fetch\n");
387 case CMDQ_ERR_CERROR_NONE_IDX:
389 case CMDQ_ERR_CERROR_ATC_INV_IDX:
391 * ATC Invalidation Completion timeout. CONS is still pointing
392 * at the CMD_SYNC. Attempt to complete other pending commands
393 * by repeating the CMD_SYNC, though we might well end up back
394 * here since the ATC invalidation may still be pending.
397 case CMDQ_ERR_CERROR_ILL_IDX:
403 * We may have concurrent producers, so we need to be careful
404 * not to touch any of the shadow cmdq state.
406 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
407 dev_err(smmu->dev, "skipping command in error state:\n");
408 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
409 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
411 /* Convert the erroneous command into a CMD_SYNC */
412 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
414 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
417 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
419 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
423 * Command queue locking.
424 * This is a form of bastardised rwlock with the following major changes:
426 * - The only LOCK routines are exclusive_trylock() and shared_lock().
427 * Neither have barrier semantics, and instead provide only a control
430 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
431 * fails if the caller appears to be the last lock holder (yes, this is
432 * racy). All successful UNLOCK routines have RELEASE semantics.
434 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
439 * We can try to avoid the cmpxchg() loop by simply incrementing the
440 * lock counter. When held in exclusive state, the lock counter is set
441 * to INT_MIN so these increments won't hurt as the value will remain
444 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
448 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
449 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
452 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
454 (void)atomic_dec_return_release(&cmdq->lock);
457 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
459 if (atomic_read(&cmdq->lock) == 1)
462 arm_smmu_cmdq_shared_unlock(cmdq);
466 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
469 local_irq_save(flags); \
470 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
472 local_irq_restore(flags); \
476 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
478 atomic_set_release(&cmdq->lock, 0); \
479 local_irq_restore(flags); \
484 * Command queue insertion.
485 * This is made fiddly by our attempts to achieve some sort of scalability
486 * since there is one queue shared amongst all of the CPUs in the system. If
487 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
488 * then you'll *love* this monstrosity.
490 * The basic idea is to split the queue up into ranges of commands that are
491 * owned by a given CPU; the owner may not have written all of the commands
492 * itself, but is responsible for advancing the hardware prod pointer when
493 * the time comes. The algorithm is roughly:
495 * 1. Allocate some space in the queue. At this point we also discover
496 * whether the head of the queue is currently owned by another CPU,
497 * or whether we are the owner.
499 * 2. Write our commands into our allocated slots in the queue.
501 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
503 * 4. If we are an owner:
504 * a. Wait for the previous owner to finish.
505 * b. Mark the queue head as unowned, which tells us the range
506 * that we are responsible for publishing.
507 * c. Wait for all commands in our owned range to become valid.
508 * d. Advance the hardware prod pointer.
509 * e. Tell the next owner we've finished.
511 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
512 * owner), then we need to stick around until it has completed:
513 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
514 * to clear the first 4 bytes.
515 * b. Otherwise, we spin waiting for the hardware cons pointer to
516 * advance past our command.
518 * The devil is in the details, particularly the use of locking for handling
519 * SYNC completion and freeing up space in the queue before we think that it is
522 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
523 u32 sprod, u32 eprod, bool set)
525 u32 swidx, sbidx, ewidx, ebidx;
526 struct arm_smmu_ll_queue llq = {
527 .max_n_shift = cmdq->q.llq.max_n_shift,
531 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
532 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
534 while (llq.prod != eprod) {
537 u32 limit = BITS_PER_LONG;
539 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
540 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
542 ptr = &cmdq->valid_map[swidx];
544 if ((swidx == ewidx) && (sbidx < ebidx))
547 mask = GENMASK(limit - 1, sbidx);
550 * The valid bit is the inverse of the wrap bit. This means
551 * that a zero-initialised queue is invalid and, after marking
552 * all entries as valid, they become invalid again when we
556 atomic_long_xor(mask, ptr);
560 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
561 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
564 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
568 /* Mark all entries in the range [sprod, eprod) as valid */
569 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
570 u32 sprod, u32 eprod)
572 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
575 /* Wait for all entries in the range [sprod, eprod) to become valid */
576 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
577 u32 sprod, u32 eprod)
579 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
582 /* Wait for the command queue to become non-full */
583 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
584 struct arm_smmu_ll_queue *llq)
587 struct arm_smmu_queue_poll qp;
588 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
592 * Try to update our copy of cons by grabbing exclusive cmdq access. If
593 * that fails, spin until somebody else updates it for us.
595 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
596 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
597 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
598 llq->val = READ_ONCE(cmdq->q.llq.val);
602 queue_poll_init(smmu, &qp);
604 llq->val = READ_ONCE(cmdq->q.llq.val);
605 if (!queue_full(llq))
608 ret = queue_poll(&qp);
615 * Wait until the SMMU signals a CMD_SYNC completion MSI.
616 * Must be called with the cmdq lock held in some capacity.
618 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
619 struct arm_smmu_ll_queue *llq)
622 struct arm_smmu_queue_poll qp;
623 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
624 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
626 queue_poll_init(smmu, &qp);
629 * The MSI won't generate an event, since it's being written back
630 * into the command queue.
633 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
634 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
639 * Wait until the SMMU cons index passes llq->prod.
640 * Must be called with the cmdq lock held in some capacity.
642 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
643 struct arm_smmu_ll_queue *llq)
645 struct arm_smmu_queue_poll qp;
646 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
647 u32 prod = llq->prod;
650 queue_poll_init(smmu, &qp);
651 llq->val = READ_ONCE(cmdq->q.llq.val);
653 if (queue_consumed(llq, prod))
656 ret = queue_poll(&qp);
659 * This needs to be a readl() so that our subsequent call
660 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
662 * Specifically, we need to ensure that we observe all
663 * shared_lock()s by other CMD_SYNCs that share our owner,
664 * so that a failing call to tryunlock() means that we're
665 * the last one out and therefore we can safely advance
666 * cmdq->q.llq.cons. Roughly speaking:
668 * CPU 0 CPU1 CPU2 (us)
678 * <control dependency>
684 * Requires us to see CPU 0's shared_lock() acquisition.
686 llq->cons = readl(cmdq->q.cons_reg);
692 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
693 struct arm_smmu_ll_queue *llq)
695 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
696 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
698 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
701 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
705 struct arm_smmu_ll_queue llq = {
706 .max_n_shift = cmdq->q.llq.max_n_shift,
710 for (i = 0; i < n; ++i) {
711 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
713 prod = queue_inc_prod_n(&llq, i);
714 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
719 * This is the actual insertion function, and provides the following
720 * ordering guarantees to callers:
722 * - There is a dma_wmb() before publishing any commands to the queue.
723 * This can be relied upon to order prior writes to data structures
724 * in memory (such as a CD or an STE) before the command.
726 * - On completion of a CMD_SYNC, there is a control dependency.
727 * This can be relied upon to order subsequent writes to memory (e.g.
728 * freeing an IOVA) after completion of the CMD_SYNC.
730 * - Command insertion is totally ordered, so if two CPUs each race to
731 * insert their own list of commands then all of the commands from one
732 * CPU will appear before any of the commands from the other CPU.
734 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
735 u64 *cmds, int n, bool sync)
737 u64 cmd_sync[CMDQ_ENT_DWORDS];
741 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
742 struct arm_smmu_ll_queue llq, head;
745 llq.max_n_shift = cmdq->q.llq.max_n_shift;
747 /* 1. Allocate some space in the queue */
748 local_irq_save(flags);
749 llq.val = READ_ONCE(cmdq->q.llq.val);
753 while (!queue_has_space(&llq, n + sync)) {
754 local_irq_restore(flags);
755 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
756 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
757 local_irq_save(flags);
760 head.cons = llq.cons;
761 head.prod = queue_inc_prod_n(&llq, n + sync) |
762 CMDQ_PROD_OWNED_FLAG;
764 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
770 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
771 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
772 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
775 * 2. Write our commands into the queue
776 * Dependency ordering from the cmpxchg() loop above.
778 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
780 prod = queue_inc_prod_n(&llq, n);
781 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
782 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
785 * In order to determine completion of our CMD_SYNC, we must
786 * ensure that the queue can't wrap twice without us noticing.
787 * We achieve that by taking the cmdq lock as shared before
788 * marking our slot as valid.
790 arm_smmu_cmdq_shared_lock(cmdq);
793 /* 3. Mark our slots as valid, ensuring commands are visible first */
795 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
797 /* 4. If we are the owner, take control of the SMMU hardware */
799 /* a. Wait for previous owner to finish */
800 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
802 /* b. Stop gathering work by clearing the owned flag */
803 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
804 &cmdq->q.llq.atomic.prod);
805 prod &= ~CMDQ_PROD_OWNED_FLAG;
808 * c. Wait for any gathered work to be written to the queue.
809 * Note that we read our own entries so that we have the control
810 * dependency required by (d).
812 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
815 * d. Advance the hardware prod pointer
816 * Control dependency ordering from the entries becoming valid.
818 writel_relaxed(prod, cmdq->q.prod_reg);
821 * e. Tell the next owner we're done
822 * Make sure we've updated the hardware first, so that we don't
823 * race to update prod and potentially move it backwards.
825 atomic_set_release(&cmdq->owner_prod, prod);
828 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
830 llq.prod = queue_inc_prod_n(&llq, n);
831 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
833 dev_err_ratelimited(smmu->dev,
834 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
836 readl_relaxed(cmdq->q.prod_reg),
837 readl_relaxed(cmdq->q.cons_reg));
841 * Try to unlock the cmdq lock. This will fail if we're the last
842 * reader, in which case we can safely update cmdq->q.llq.cons
844 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
845 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
846 arm_smmu_cmdq_shared_unlock(cmdq);
850 local_irq_restore(flags);
854 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
855 struct arm_smmu_cmdq_ent *ent,
858 u64 cmd[CMDQ_ENT_DWORDS];
860 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
861 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
866 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
869 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870 struct arm_smmu_cmdq_ent *ent)
872 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
875 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
876 struct arm_smmu_cmdq_ent *ent)
878 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
881 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
882 struct arm_smmu_cmdq_batch *cmds,
883 struct arm_smmu_cmdq_ent *cmd)
887 if (cmds->num == CMDQ_BATCH_ENTRIES) {
888 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
892 index = cmds->num * CMDQ_ENT_DWORDS;
893 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
894 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
902 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
903 struct arm_smmu_cmdq_batch *cmds)
905 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
908 static int arm_smmu_page_response(struct device *dev,
909 struct iommu_fault_event *unused,
910 struct iommu_page_response *resp)
912 struct arm_smmu_cmdq_ent cmd = {0};
913 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
914 int sid = master->streams[0].id;
916 if (master->stall_enabled) {
917 cmd.opcode = CMDQ_OP_RESUME;
918 cmd.resume.sid = sid;
919 cmd.resume.stag = resp->grpid;
920 switch (resp->code) {
921 case IOMMU_PAGE_RESP_INVALID:
922 case IOMMU_PAGE_RESP_FAILURE:
923 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
925 case IOMMU_PAGE_RESP_SUCCESS:
926 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
935 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
937 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
938 * RESUME consumption guarantees that the stalled transaction will be
939 * terminated... at some point in the future. PRI_RESP is fire and
946 /* Context descriptor manipulation functions */
947 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
949 struct arm_smmu_cmdq_ent cmd = {
950 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
951 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
955 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
958 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
963 struct arm_smmu_master *master;
964 struct arm_smmu_cmdq_batch cmds;
965 struct arm_smmu_device *smmu = smmu_domain->smmu;
966 struct arm_smmu_cmdq_ent cmd = {
967 .opcode = CMDQ_OP_CFGI_CD,
976 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
977 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
978 for (i = 0; i < master->num_streams; i++) {
979 cmd.cfgi.sid = master->streams[i].id;
980 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
983 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
985 arm_smmu_cmdq_batch_submit(smmu, &cmds);
988 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
989 struct arm_smmu_l1_ctx_desc *l1_desc)
991 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
993 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
994 &l1_desc->l2ptr_dma, GFP_KERNEL);
995 if (!l1_desc->l2ptr) {
997 "failed to allocate context descriptor table\n");
1003 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1004 struct arm_smmu_l1_ctx_desc *l1_desc)
1006 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1009 /* See comment in arm_smmu_write_ctx_desc() */
1010 WRITE_ONCE(*dst, cpu_to_le64(val));
1013 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1018 struct arm_smmu_l1_ctx_desc *l1_desc;
1019 struct arm_smmu_device *smmu = smmu_domain->smmu;
1020 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1022 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1023 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1025 idx = ssid >> CTXDESC_SPLIT;
1026 l1_desc = &cdcfg->l1_desc[idx];
1027 if (!l1_desc->l2ptr) {
1028 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1031 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1032 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1033 /* An invalid L1CD can be cached */
1034 arm_smmu_sync_cd(smmu_domain, ssid, false);
1036 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1037 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1040 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1041 struct arm_smmu_ctx_desc *cd)
1044 * This function handles the following cases:
1046 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1047 * (2) Install a secondary CD, for SID+SSID traffic.
1048 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1049 * CD, then invalidate the old entry and mappings.
1050 * (4) Quiesce the context without clearing the valid bit. Disable
1051 * translation, and ignore any translation fault.
1052 * (5) Remove a secondary CD.
1058 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1061 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1065 val = le64_to_cpu(cdptr[0]);
1066 cd_live = !!(val & CTXDESC_CD_0_V);
1068 if (!cd) { /* (5) */
1070 } else if (cd == &quiet_cd) { /* (4) */
1071 val |= CTXDESC_CD_0_TCR_EPD0;
1072 } else if (cd_live) { /* (3) */
1073 val &= ~CTXDESC_CD_0_ASID;
1074 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1076 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1077 * this substream's traffic
1079 } else { /* (1) and (2) */
1080 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1082 cdptr[3] = cpu_to_le64(cd->mair);
1085 * STE is live, and the SMMU might read dwords of this CD in any
1086 * order. Ensure that it observes valid values before reading
1089 arm_smmu_sync_cd(smmu_domain, ssid, true);
1095 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1096 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1098 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1101 if (smmu_domain->stall_enabled)
1102 val |= CTXDESC_CD_0_S;
1106 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1107 * "Configuration structures and configuration invalidation completion"
1109 * The size of single-copy atomic reads made by the SMMU is
1110 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1111 * field within an aligned 64-bit span of a structure can be altered
1112 * without first making the structure invalid.
1114 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1115 arm_smmu_sync_cd(smmu_domain, ssid, true);
1119 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1123 size_t max_contexts;
1124 struct arm_smmu_device *smmu = smmu_domain->smmu;
1125 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1126 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1128 max_contexts = 1 << cfg->s1cdmax;
1130 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1131 max_contexts <= CTXDESC_L2_ENTRIES) {
1132 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1133 cdcfg->num_l1_ents = max_contexts;
1135 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1137 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1138 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1139 CTXDESC_L2_ENTRIES);
1141 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1142 sizeof(*cdcfg->l1_desc),
1144 if (!cdcfg->l1_desc)
1147 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1150 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1152 if (!cdcfg->cdtab) {
1153 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1161 if (cdcfg->l1_desc) {
1162 devm_kfree(smmu->dev, cdcfg->l1_desc);
1163 cdcfg->l1_desc = NULL;
1168 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1171 size_t size, l1size;
1172 struct arm_smmu_device *smmu = smmu_domain->smmu;
1173 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1175 if (cdcfg->l1_desc) {
1176 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1178 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1179 if (!cdcfg->l1_desc[i].l2ptr)
1182 dmam_free_coherent(smmu->dev, size,
1183 cdcfg->l1_desc[i].l2ptr,
1184 cdcfg->l1_desc[i].l2ptr_dma);
1186 devm_kfree(smmu->dev, cdcfg->l1_desc);
1187 cdcfg->l1_desc = NULL;
1189 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1191 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1194 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1195 cdcfg->cdtab_dma = 0;
1196 cdcfg->cdtab = NULL;
1199 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1202 struct arm_smmu_ctx_desc *old_cd;
1207 free = refcount_dec_and_test(&cd->refs);
1209 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1210 WARN_ON(old_cd != cd);
1215 /* Stream table manipulation functions */
1217 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1221 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1222 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1224 /* See comment in arm_smmu_write_ctx_desc() */
1225 WRITE_ONCE(*dst, cpu_to_le64(val));
1228 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1230 struct arm_smmu_cmdq_ent cmd = {
1231 .opcode = CMDQ_OP_CFGI_STE,
1238 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1241 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1245 * This is hideously complicated, but we only really care about
1246 * three cases at the moment:
1248 * 1. Invalid (all zero) -> bypass/fault (init)
1249 * 2. Bypass/fault -> translation/bypass (attach)
1250 * 3. Translation/bypass -> bypass/fault (detach)
1252 * Given that we can't update the STE atomically and the SMMU
1253 * doesn't read the thing in a defined order, that leaves us
1254 * with the following maintenance requirements:
1256 * 1. Update Config, return (init time STEs aren't live)
1257 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1258 * 3. Update Config, sync
1260 u64 val = le64_to_cpu(dst[0]);
1261 bool ste_live = false;
1262 struct arm_smmu_device *smmu = NULL;
1263 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1264 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1265 struct arm_smmu_domain *smmu_domain = NULL;
1266 struct arm_smmu_cmdq_ent prefetch_cmd = {
1267 .opcode = CMDQ_OP_PREFETCH_CFG,
1274 smmu_domain = master->domain;
1275 smmu = master->smmu;
1279 switch (smmu_domain->stage) {
1280 case ARM_SMMU_DOMAIN_S1:
1281 s1_cfg = &smmu_domain->s1_cfg;
1283 case ARM_SMMU_DOMAIN_S2:
1284 case ARM_SMMU_DOMAIN_NESTED:
1285 s2_cfg = &smmu_domain->s2_cfg;
1292 if (val & STRTAB_STE_0_V) {
1293 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1294 case STRTAB_STE_0_CFG_BYPASS:
1296 case STRTAB_STE_0_CFG_S1_TRANS:
1297 case STRTAB_STE_0_CFG_S2_TRANS:
1300 case STRTAB_STE_0_CFG_ABORT:
1301 BUG_ON(!disable_bypass);
1304 BUG(); /* STE corruption */
1308 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1309 val = STRTAB_STE_0_V;
1312 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1313 if (!smmu_domain && disable_bypass)
1314 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1316 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1318 dst[0] = cpu_to_le64(val);
1319 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1320 STRTAB_STE_1_SHCFG_INCOMING));
1321 dst[2] = 0; /* Nuke the VMID */
1323 * The SMMU can perform negative caching, so we must sync
1324 * the STE regardless of whether the old value was live.
1327 arm_smmu_sync_ste_for_sid(smmu, sid);
1332 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1333 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1336 dst[1] = cpu_to_le64(
1337 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1338 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1339 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1340 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1341 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1343 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1344 !master->stall_enabled)
1345 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1347 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1348 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1349 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1350 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1355 dst[2] = cpu_to_le64(
1356 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1357 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1359 STRTAB_STE_2_S2ENDI |
1361 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1364 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1366 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1369 if (master->ats_enabled)
1370 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1371 STRTAB_STE_1_EATS_TRANS));
1373 arm_smmu_sync_ste_for_sid(smmu, sid);
1374 /* See comment in arm_smmu_write_ctx_desc() */
1375 WRITE_ONCE(dst[0], cpu_to_le64(val));
1376 arm_smmu_sync_ste_for_sid(smmu, sid);
1378 /* It's likely that we'll want to use the new STE soon */
1379 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1380 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1383 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1387 for (i = 0; i < nent; ++i) {
1388 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1389 strtab += STRTAB_STE_DWORDS;
1393 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1397 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1398 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1403 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1404 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1406 desc->span = STRTAB_SPLIT + 1;
1407 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1411 "failed to allocate l2 stream table for SID %u\n",
1416 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1417 arm_smmu_write_strtab_l1_desc(strtab, desc);
1421 static struct arm_smmu_master *
1422 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1424 struct rb_node *node;
1425 struct arm_smmu_stream *stream;
1427 lockdep_assert_held(&smmu->streams_mutex);
1429 node = smmu->streams.rb_node;
1431 stream = rb_entry(node, struct arm_smmu_stream, node);
1432 if (stream->id < sid)
1433 node = node->rb_right;
1434 else if (stream->id > sid)
1435 node = node->rb_left;
1437 return stream->master;
1443 /* IRQ and event handlers */
1444 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1449 struct arm_smmu_master *master;
1450 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1451 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1452 struct iommu_fault_event fault_evt = { };
1453 struct iommu_fault *flt = &fault_evt.fault;
1455 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1456 case EVT_ID_TRANSLATION_FAULT:
1457 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1459 case EVT_ID_ADDR_SIZE_FAULT:
1460 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1462 case EVT_ID_ACCESS_FAULT:
1463 reason = IOMMU_FAULT_REASON_ACCESS;
1465 case EVT_ID_PERMISSION_FAULT:
1466 reason = IOMMU_FAULT_REASON_PERMISSION;
1472 /* Stage-2 is always pinned at the moment */
1473 if (evt[1] & EVTQ_1_S2)
1476 if (evt[1] & EVTQ_1_RnW)
1477 perm |= IOMMU_FAULT_PERM_READ;
1479 perm |= IOMMU_FAULT_PERM_WRITE;
1481 if (evt[1] & EVTQ_1_InD)
1482 perm |= IOMMU_FAULT_PERM_EXEC;
1484 if (evt[1] & EVTQ_1_PnU)
1485 perm |= IOMMU_FAULT_PERM_PRIV;
1487 if (evt[1] & EVTQ_1_STALL) {
1488 flt->type = IOMMU_FAULT_PAGE_REQ;
1489 flt->prm = (struct iommu_fault_page_request) {
1490 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1491 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1493 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1497 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1498 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1501 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1502 flt->event = (struct iommu_fault_unrecoverable) {
1504 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1506 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1510 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1511 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1515 mutex_lock(&smmu->streams_mutex);
1516 master = arm_smmu_find_master(smmu, sid);
1522 ret = iommu_report_device_fault(master->dev, &fault_evt);
1523 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1524 /* Nobody cared, abort the access */
1525 struct iommu_page_response resp = {
1526 .pasid = flt->prm.pasid,
1527 .grpid = flt->prm.grpid,
1528 .code = IOMMU_PAGE_RESP_FAILURE,
1530 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1534 mutex_unlock(&smmu->streams_mutex);
1538 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1541 struct arm_smmu_device *smmu = dev;
1542 struct arm_smmu_queue *q = &smmu->evtq.q;
1543 struct arm_smmu_ll_queue *llq = &q->llq;
1544 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1545 DEFAULT_RATELIMIT_BURST);
1546 u64 evt[EVTQ_ENT_DWORDS];
1549 while (!queue_remove_raw(q, evt)) {
1550 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1552 ret = arm_smmu_handle_evt(smmu, evt);
1553 if (!ret || !__ratelimit(&rs))
1556 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1557 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1558 dev_info(smmu->dev, "\t0x%016llx\n",
1559 (unsigned long long)evt[i]);
1564 * Not much we can do on overflow, so scream and pretend we're
1567 if (queue_sync_prod_in(q) == -EOVERFLOW)
1568 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1569 } while (!queue_empty(llq));
1571 /* Sync our overflow flag, as we believe we're up to speed */
1572 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1573 Q_IDX(llq, llq->cons);
1577 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1583 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1584 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1585 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1586 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1587 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1589 dev_info(smmu->dev, "unexpected PRI request received:\n");
1591 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1592 sid, ssid, grpid, last ? "L" : "",
1593 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1594 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1595 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1596 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1597 evt[1] & PRIQ_1_ADDR_MASK);
1600 struct arm_smmu_cmdq_ent cmd = {
1601 .opcode = CMDQ_OP_PRI_RESP,
1602 .substream_valid = ssv,
1607 .resp = PRI_RESP_DENY,
1611 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1615 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1617 struct arm_smmu_device *smmu = dev;
1618 struct arm_smmu_queue *q = &smmu->priq.q;
1619 struct arm_smmu_ll_queue *llq = &q->llq;
1620 u64 evt[PRIQ_ENT_DWORDS];
1623 while (!queue_remove_raw(q, evt))
1624 arm_smmu_handle_ppr(smmu, evt);
1626 if (queue_sync_prod_in(q) == -EOVERFLOW)
1627 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1628 } while (!queue_empty(llq));
1630 /* Sync our overflow flag, as we believe we're up to speed */
1631 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1632 Q_IDX(llq, llq->cons);
1633 queue_sync_cons_out(q);
1637 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1639 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1641 u32 gerror, gerrorn, active;
1642 struct arm_smmu_device *smmu = dev;
1644 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1645 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1647 active = gerror ^ gerrorn;
1648 if (!(active & GERROR_ERR_MASK))
1649 return IRQ_NONE; /* No errors pending */
1652 "unexpected global error reported (0x%08x), this could be serious\n",
1655 if (active & GERROR_SFM_ERR) {
1656 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1657 arm_smmu_device_disable(smmu);
1660 if (active & GERROR_MSI_GERROR_ABT_ERR)
1661 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1663 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1664 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1666 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1667 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1669 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1670 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1672 if (active & GERROR_PRIQ_ABT_ERR)
1673 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1675 if (active & GERROR_EVTQ_ABT_ERR)
1676 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1678 if (active & GERROR_CMDQ_ERR)
1679 arm_smmu_cmdq_skip_err(smmu);
1681 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1685 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1687 struct arm_smmu_device *smmu = dev;
1689 arm_smmu_evtq_thread(irq, dev);
1690 if (smmu->features & ARM_SMMU_FEAT_PRI)
1691 arm_smmu_priq_thread(irq, dev);
1696 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1698 arm_smmu_gerror_handler(irq, dev);
1699 return IRQ_WAKE_THREAD;
1703 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1704 struct arm_smmu_cmdq_ent *cmd)
1708 /* ATC invalidates are always on 4096-bytes pages */
1709 size_t inval_grain_shift = 12;
1710 unsigned long page_start, page_end;
1715 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1716 * prefix. In that case all ATC entries within the address range are
1717 * invalidated, including those that were requested with a PASID! There
1718 * is no way to invalidate only entries without PASID.
1720 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1721 * traffic), translation requests without PASID create ATC entries
1722 * without PASID, which must be invalidated with substream_valid clear.
1723 * This has the unpleasant side-effect of invalidating all PASID-tagged
1724 * ATC entries within the address range.
1726 *cmd = (struct arm_smmu_cmdq_ent) {
1727 .opcode = CMDQ_OP_ATC_INV,
1728 .substream_valid = !!ssid,
1733 cmd->atc.size = ATC_INV_SIZE_ALL;
1737 page_start = iova >> inval_grain_shift;
1738 page_end = (iova + size - 1) >> inval_grain_shift;
1741 * In an ATS Invalidate Request, the address must be aligned on the
1742 * range size, which must be a power of two number of page sizes. We
1743 * thus have to choose between grossly over-invalidating the region, or
1744 * splitting the invalidation into multiple commands. For simplicity
1745 * we'll go with the first solution, but should refine it in the future
1746 * if multiple commands are shown to be more efficient.
1748 * Find the smallest power of two that covers the range. The most
1749 * significant differing bit between the start and end addresses,
1750 * fls(start ^ end), indicates the required span. For example:
1752 * We want to invalidate pages [8; 11]. This is already the ideal range:
1753 * x = 0b1000 ^ 0b1011 = 0b11
1754 * span = 1 << fls(x) = 4
1756 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1757 * x = 0b0111 ^ 0b1010 = 0b1101
1758 * span = 1 << fls(x) = 16
1760 log2_span = fls_long(page_start ^ page_end);
1761 span_mask = (1ULL << log2_span) - 1;
1763 page_start &= ~span_mask;
1765 cmd->atc.addr = page_start << inval_grain_shift;
1766 cmd->atc.size = log2_span;
1769 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1772 struct arm_smmu_cmdq_ent cmd;
1773 struct arm_smmu_cmdq_batch cmds;
1775 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1778 for (i = 0; i < master->num_streams; i++) {
1779 cmd.atc.sid = master->streams[i].id;
1780 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1783 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1786 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1787 unsigned long iova, size_t size)
1790 unsigned long flags;
1791 struct arm_smmu_cmdq_ent cmd;
1792 struct arm_smmu_master *master;
1793 struct arm_smmu_cmdq_batch cmds;
1795 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1799 * Ensure that we've completed prior invalidation of the main TLBs
1800 * before we read 'nr_ats_masters' in case of a concurrent call to
1801 * arm_smmu_enable_ats():
1803 * // unmap() // arm_smmu_enable_ats()
1804 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1806 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1808 * Ensures that we always see the incremented 'nr_ats_masters' count if
1809 * ATS was enabled at the PCI device before completion of the TLBI.
1812 if (!atomic_read(&smmu_domain->nr_ats_masters))
1815 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1819 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1820 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1821 if (!master->ats_enabled)
1824 for (i = 0; i < master->num_streams; i++) {
1825 cmd.atc.sid = master->streams[i].id;
1826 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1829 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1831 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1834 /* IO_PGTABLE API */
1835 static void arm_smmu_tlb_inv_context(void *cookie)
1837 struct arm_smmu_domain *smmu_domain = cookie;
1838 struct arm_smmu_device *smmu = smmu_domain->smmu;
1839 struct arm_smmu_cmdq_ent cmd;
1842 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1843 * PTEs previously cleared by unmaps on the current CPU not yet visible
1844 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1845 * insertion to guarantee those are observed before the TLBI. Do be
1848 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1849 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1851 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1852 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1853 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1855 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1858 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1859 unsigned long iova, size_t size,
1861 struct arm_smmu_domain *smmu_domain)
1863 struct arm_smmu_device *smmu = smmu_domain->smmu;
1864 unsigned long end = iova + size, num_pages = 0, tg = 0;
1865 size_t inv_range = granule;
1866 struct arm_smmu_cmdq_batch cmds;
1871 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1872 /* Get the leaf page size */
1873 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1875 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1876 cmd->tlbi.tg = (tg - 10) / 2;
1878 /* Determine what level the granule is at */
1879 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1881 num_pages = size >> tg;
1886 while (iova < end) {
1887 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1889 * On each iteration of the loop, the range is 5 bits
1890 * worth of the aligned size remaining.
1891 * The range in pages is:
1893 * range = (num_pages & (0x1f << __ffs(num_pages)))
1895 unsigned long scale, num;
1897 /* Determine the power of 2 multiple number of pages */
1898 scale = __ffs(num_pages);
1899 cmd->tlbi.scale = scale;
1901 /* Determine how many chunks of 2^scale size we have */
1902 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1903 cmd->tlbi.num = num - 1;
1905 /* range is num * 2^scale * pgsize */
1906 inv_range = num << (scale + tg);
1908 /* Clear out the lower order bits for the next iteration */
1909 num_pages -= num << scale;
1912 cmd->tlbi.addr = iova;
1913 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1916 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1919 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1920 size_t granule, bool leaf,
1921 struct arm_smmu_domain *smmu_domain)
1923 struct arm_smmu_cmdq_ent cmd = {
1929 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1930 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1931 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1932 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1934 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1935 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1937 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1940 * Unfortunately, this can't be leaf-only since we may have
1941 * zapped an entire table.
1943 arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1946 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1947 size_t granule, bool leaf,
1948 struct arm_smmu_domain *smmu_domain)
1950 struct arm_smmu_cmdq_ent cmd = {
1951 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1952 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1959 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1962 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1963 unsigned long iova, size_t granule,
1966 struct arm_smmu_domain *smmu_domain = cookie;
1967 struct iommu_domain *domain = &smmu_domain->domain;
1969 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1972 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1973 size_t granule, void *cookie)
1975 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1978 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1979 .tlb_flush_all = arm_smmu_tlb_inv_context,
1980 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1981 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
1985 static bool arm_smmu_capable(enum iommu_cap cap)
1988 case IOMMU_CAP_CACHE_COHERENCY:
1990 case IOMMU_CAP_NOEXEC:
1997 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1999 struct arm_smmu_domain *smmu_domain;
2001 if (type != IOMMU_DOMAIN_UNMANAGED &&
2002 type != IOMMU_DOMAIN_DMA &&
2003 type != IOMMU_DOMAIN_DMA_FQ &&
2004 type != IOMMU_DOMAIN_IDENTITY)
2008 * Allocate the domain and initialise some of its data structures.
2009 * We can't really do anything meaningful until we've added a
2012 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2016 mutex_init(&smmu_domain->init_mutex);
2017 INIT_LIST_HEAD(&smmu_domain->devices);
2018 spin_lock_init(&smmu_domain->devices_lock);
2019 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2021 return &smmu_domain->domain;
2024 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2026 int idx, size = 1 << span;
2029 idx = find_first_zero_bit(map, size);
2032 } while (test_and_set_bit(idx, map));
2037 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2039 clear_bit(idx, map);
2042 static void arm_smmu_domain_free(struct iommu_domain *domain)
2044 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2045 struct arm_smmu_device *smmu = smmu_domain->smmu;
2047 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2049 /* Free the CD and ASID, if we allocated them */
2050 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2051 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2053 /* Prevent SVA from touching the CD while we're freeing it */
2054 mutex_lock(&arm_smmu_asid_lock);
2055 if (cfg->cdcfg.cdtab)
2056 arm_smmu_free_cd_tables(smmu_domain);
2057 arm_smmu_free_asid(&cfg->cd);
2058 mutex_unlock(&arm_smmu_asid_lock);
2060 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2062 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2068 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2069 struct arm_smmu_master *master,
2070 struct io_pgtable_cfg *pgtbl_cfg)
2074 struct arm_smmu_device *smmu = smmu_domain->smmu;
2075 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2076 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2078 refcount_set(&cfg->cd.refs, 1);
2080 /* Prevent SVA from modifying the ASID until it is written to the CD */
2081 mutex_lock(&arm_smmu_asid_lock);
2082 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2083 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2087 cfg->s1cdmax = master->ssid_bits;
2089 smmu_domain->stall_enabled = master->stall_enabled;
2091 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2095 cfg->cd.asid = (u16)asid;
2096 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2097 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2098 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2099 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2100 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2101 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2102 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2103 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2104 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2107 * Note that this will end up calling arm_smmu_sync_cd() before
2108 * the master has been added to the devices list for this domain.
2109 * This isn't an issue because the STE hasn't been installed yet.
2111 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2113 goto out_free_cd_tables;
2115 mutex_unlock(&arm_smmu_asid_lock);
2119 arm_smmu_free_cd_tables(smmu_domain);
2121 arm_smmu_free_asid(&cfg->cd);
2123 mutex_unlock(&arm_smmu_asid_lock);
2127 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2128 struct arm_smmu_master *master,
2129 struct io_pgtable_cfg *pgtbl_cfg)
2132 struct arm_smmu_device *smmu = smmu_domain->smmu;
2133 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2134 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2136 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2140 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2141 cfg->vmid = (u16)vmid;
2142 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2143 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2144 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2145 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2146 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2147 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2148 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2149 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2153 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2154 struct arm_smmu_master *master)
2157 unsigned long ias, oas;
2158 enum io_pgtable_fmt fmt;
2159 struct io_pgtable_cfg pgtbl_cfg;
2160 struct io_pgtable_ops *pgtbl_ops;
2161 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2162 struct arm_smmu_master *,
2163 struct io_pgtable_cfg *);
2164 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2165 struct arm_smmu_device *smmu = smmu_domain->smmu;
2167 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2168 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2172 /* Restrict the stage to what we can actually support */
2173 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2174 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2175 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2176 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2178 switch (smmu_domain->stage) {
2179 case ARM_SMMU_DOMAIN_S1:
2180 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2181 ias = min_t(unsigned long, ias, VA_BITS);
2183 fmt = ARM_64_LPAE_S1;
2184 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2186 case ARM_SMMU_DOMAIN_NESTED:
2187 case ARM_SMMU_DOMAIN_S2:
2190 fmt = ARM_64_LPAE_S2;
2191 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2197 pgtbl_cfg = (struct io_pgtable_cfg) {
2198 .pgsize_bitmap = smmu->pgsize_bitmap,
2201 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2202 .tlb = &arm_smmu_flush_ops,
2203 .iommu_dev = smmu->dev,
2206 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2210 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2211 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2212 domain->geometry.force_aperture = true;
2214 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2216 free_io_pgtable_ops(pgtbl_ops);
2220 smmu_domain->pgtbl_ops = pgtbl_ops;
2224 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2227 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2229 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2230 struct arm_smmu_strtab_l1_desc *l1_desc;
2233 /* Two-level walk */
2234 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2235 l1_desc = &cfg->l1_desc[idx];
2236 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2237 step = &l1_desc->l2ptr[idx];
2239 /* Simple linear lookup */
2240 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2246 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2249 struct arm_smmu_device *smmu = master->smmu;
2251 for (i = 0; i < master->num_streams; ++i) {
2252 u32 sid = master->streams[i].id;
2253 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2255 /* Bridged PCI devices may end up with duplicated IDs */
2256 for (j = 0; j < i; j++)
2257 if (master->streams[j].id == sid)
2262 arm_smmu_write_strtab_ent(master, sid, step);
2266 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2268 struct device *dev = master->dev;
2269 struct arm_smmu_device *smmu = master->smmu;
2270 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2272 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2275 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2278 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2281 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2284 struct pci_dev *pdev;
2285 struct arm_smmu_device *smmu = master->smmu;
2286 struct arm_smmu_domain *smmu_domain = master->domain;
2288 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2289 if (!master->ats_enabled)
2292 /* Smallest Translation Unit: log2 of the smallest supported granule */
2293 stu = __ffs(smmu->pgsize_bitmap);
2294 pdev = to_pci_dev(master->dev);
2296 atomic_inc(&smmu_domain->nr_ats_masters);
2297 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2298 if (pci_enable_ats(pdev, stu))
2299 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2302 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2304 struct arm_smmu_domain *smmu_domain = master->domain;
2306 if (!master->ats_enabled)
2309 pci_disable_ats(to_pci_dev(master->dev));
2311 * Ensure ATS is disabled at the endpoint before we issue the
2312 * ATC invalidation via the SMMU.
2315 arm_smmu_atc_inv_master(master);
2316 atomic_dec(&smmu_domain->nr_ats_masters);
2319 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2324 struct pci_dev *pdev;
2326 if (!dev_is_pci(master->dev))
2329 pdev = to_pci_dev(master->dev);
2331 features = pci_pasid_features(pdev);
2335 num_pasids = pci_max_pasids(pdev);
2336 if (num_pasids <= 0)
2339 ret = pci_enable_pasid(pdev, features);
2341 dev_err(&pdev->dev, "Failed to enable PASID\n");
2345 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2346 master->smmu->ssid_bits);
2350 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2352 struct pci_dev *pdev;
2354 if (!dev_is_pci(master->dev))
2357 pdev = to_pci_dev(master->dev);
2359 if (!pdev->pasid_enabled)
2362 master->ssid_bits = 0;
2363 pci_disable_pasid(pdev);
2366 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2368 unsigned long flags;
2369 struct arm_smmu_domain *smmu_domain = master->domain;
2374 arm_smmu_disable_ats(master);
2376 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2377 list_del(&master->domain_head);
2378 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2380 master->domain = NULL;
2381 master->ats_enabled = false;
2382 arm_smmu_install_ste_for_dev(master);
2385 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2388 unsigned long flags;
2389 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2390 struct arm_smmu_device *smmu;
2391 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2392 struct arm_smmu_master *master;
2397 master = dev_iommu_priv_get(dev);
2398 smmu = master->smmu;
2401 * Checking that SVA is disabled ensures that this device isn't bound to
2402 * any mm, and can be safely detached from its old domain. Bonds cannot
2403 * be removed concurrently since we're holding the group mutex.
2405 if (arm_smmu_master_sva_enabled(master)) {
2406 dev_err(dev, "cannot attach - SVA enabled\n");
2410 arm_smmu_detach_dev(master);
2412 mutex_lock(&smmu_domain->init_mutex);
2414 if (!smmu_domain->smmu) {
2415 smmu_domain->smmu = smmu;
2416 ret = arm_smmu_domain_finalise(domain, master);
2418 smmu_domain->smmu = NULL;
2421 } else if (smmu_domain->smmu != smmu) {
2423 "cannot attach to SMMU %s (upstream of %s)\n",
2424 dev_name(smmu_domain->smmu->dev),
2425 dev_name(smmu->dev));
2428 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2429 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2431 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2432 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2435 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2436 smmu_domain->stall_enabled != master->stall_enabled) {
2437 dev_err(dev, "cannot attach to stall-%s domain\n",
2438 smmu_domain->stall_enabled ? "enabled" : "disabled");
2443 master->domain = smmu_domain;
2445 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2446 master->ats_enabled = arm_smmu_ats_supported(master);
2448 arm_smmu_install_ste_for_dev(master);
2450 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2451 list_add(&master->domain_head, &smmu_domain->devices);
2452 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2454 arm_smmu_enable_ats(master);
2457 mutex_unlock(&smmu_domain->init_mutex);
2461 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2462 phys_addr_t paddr, size_t pgsize, size_t pgcount,
2463 int prot, gfp_t gfp, size_t *mapped)
2465 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2470 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2473 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2474 size_t pgsize, size_t pgcount,
2475 struct iommu_iotlb_gather *gather)
2477 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2478 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2483 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2486 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2488 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2490 if (smmu_domain->smmu)
2491 arm_smmu_tlb_inv_context(smmu_domain);
2494 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2495 struct iommu_iotlb_gather *gather)
2497 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2499 if (!gather->pgsize)
2502 arm_smmu_tlb_inv_range_domain(gather->start,
2503 gather->end - gather->start + 1,
2504 gather->pgsize, true, smmu_domain);
2508 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2510 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2515 return ops->iova_to_phys(ops, iova);
2518 static struct platform_driver arm_smmu_driver;
2521 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2523 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2526 return dev ? dev_get_drvdata(dev) : NULL;
2529 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2531 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2533 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2534 limit *= 1UL << STRTAB_SPLIT;
2539 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2540 struct arm_smmu_master *master)
2544 struct arm_smmu_stream *new_stream, *cur_stream;
2545 struct rb_node **new_node, *parent_node = NULL;
2546 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2548 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2550 if (!master->streams)
2552 master->num_streams = fwspec->num_ids;
2554 mutex_lock(&smmu->streams_mutex);
2555 for (i = 0; i < fwspec->num_ids; i++) {
2556 u32 sid = fwspec->ids[i];
2558 new_stream = &master->streams[i];
2559 new_stream->id = sid;
2560 new_stream->master = master;
2563 * Check the SIDs are in range of the SMMU and our stream table
2565 if (!arm_smmu_sid_in_range(smmu, sid)) {
2570 /* Ensure l2 strtab is initialised */
2571 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2572 ret = arm_smmu_init_l2_strtab(smmu, sid);
2577 /* Insert into SID tree */
2578 new_node = &(smmu->streams.rb_node);
2580 cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2582 parent_node = *new_node;
2583 if (cur_stream->id > new_stream->id) {
2584 new_node = &((*new_node)->rb_left);
2585 } else if (cur_stream->id < new_stream->id) {
2586 new_node = &((*new_node)->rb_right);
2588 dev_warn(master->dev,
2589 "stream %u already in tree\n",
2598 rb_link_node(&new_stream->node, parent_node, new_node);
2599 rb_insert_color(&new_stream->node, &smmu->streams);
2603 for (i--; i >= 0; i--)
2604 rb_erase(&master->streams[i].node, &smmu->streams);
2605 kfree(master->streams);
2607 mutex_unlock(&smmu->streams_mutex);
2612 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2615 struct arm_smmu_device *smmu = master->smmu;
2616 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2618 if (!smmu || !master->streams)
2621 mutex_lock(&smmu->streams_mutex);
2622 for (i = 0; i < fwspec->num_ids; i++)
2623 rb_erase(&master->streams[i].node, &smmu->streams);
2624 mutex_unlock(&smmu->streams_mutex);
2626 kfree(master->streams);
2629 static struct iommu_ops arm_smmu_ops;
2631 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2634 struct arm_smmu_device *smmu;
2635 struct arm_smmu_master *master;
2636 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2638 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2639 return ERR_PTR(-ENODEV);
2641 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2642 return ERR_PTR(-EBUSY);
2644 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2646 return ERR_PTR(-ENODEV);
2648 master = kzalloc(sizeof(*master), GFP_KERNEL);
2650 return ERR_PTR(-ENOMEM);
2653 master->smmu = smmu;
2654 INIT_LIST_HEAD(&master->bonds);
2655 dev_iommu_priv_set(dev, master);
2657 ret = arm_smmu_insert_master(smmu, master);
2659 goto err_free_master;
2661 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2662 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2665 * Note that PASID must be enabled before, and disabled after ATS:
2666 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2668 * Behavior is undefined if this bit is Set and the value of the PASID
2669 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2672 arm_smmu_enable_pasid(master);
2674 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2675 master->ssid_bits = min_t(u8, master->ssid_bits,
2676 CTXDESC_LINEAR_CDMAX);
2678 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2679 device_property_read_bool(dev, "dma-can-stall")) ||
2680 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2681 master->stall_enabled = true;
2683 return &smmu->iommu;
2687 dev_iommu_priv_set(dev, NULL);
2688 return ERR_PTR(ret);
2691 static void arm_smmu_release_device(struct device *dev)
2693 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2694 struct arm_smmu_master *master;
2696 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2699 master = dev_iommu_priv_get(dev);
2700 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2701 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2702 arm_smmu_detach_dev(master);
2703 arm_smmu_disable_pasid(master);
2704 arm_smmu_remove_master(master);
2706 iommu_fwspec_free(dev);
2709 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2711 struct iommu_group *group;
2714 * We don't support devices sharing stream IDs other than PCI RID
2715 * aliases, since the necessary ID-to-device lookup becomes rather
2716 * impractical given a potential sparse 32-bit stream ID space.
2718 if (dev_is_pci(dev))
2719 group = pci_device_group(dev);
2721 group = generic_device_group(dev);
2726 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2728 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2731 mutex_lock(&smmu_domain->init_mutex);
2732 if (smmu_domain->smmu)
2735 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2736 mutex_unlock(&smmu_domain->init_mutex);
2741 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2743 return iommu_fwspec_add_ids(dev, args->args, 1);
2746 static void arm_smmu_get_resv_regions(struct device *dev,
2747 struct list_head *head)
2749 struct iommu_resv_region *region;
2750 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2752 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2753 prot, IOMMU_RESV_SW_MSI);
2757 list_add_tail(®ion->list, head);
2759 iommu_dma_get_resv_regions(dev, head);
2762 static bool arm_smmu_dev_has_feature(struct device *dev,
2763 enum iommu_dev_features feat)
2765 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2771 case IOMMU_DEV_FEAT_IOPF:
2772 return arm_smmu_master_iopf_supported(master);
2773 case IOMMU_DEV_FEAT_SVA:
2774 return arm_smmu_master_sva_supported(master);
2780 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2781 enum iommu_dev_features feat)
2783 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2789 case IOMMU_DEV_FEAT_IOPF:
2790 return master->iopf_enabled;
2791 case IOMMU_DEV_FEAT_SVA:
2792 return arm_smmu_master_sva_enabled(master);
2798 static int arm_smmu_dev_enable_feature(struct device *dev,
2799 enum iommu_dev_features feat)
2801 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2803 if (!arm_smmu_dev_has_feature(dev, feat))
2806 if (arm_smmu_dev_feature_enabled(dev, feat))
2810 case IOMMU_DEV_FEAT_IOPF:
2811 master->iopf_enabled = true;
2813 case IOMMU_DEV_FEAT_SVA:
2814 return arm_smmu_master_enable_sva(master);
2820 static int arm_smmu_dev_disable_feature(struct device *dev,
2821 enum iommu_dev_features feat)
2823 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2825 if (!arm_smmu_dev_feature_enabled(dev, feat))
2829 case IOMMU_DEV_FEAT_IOPF:
2830 if (master->sva_enabled)
2832 master->iopf_enabled = false;
2834 case IOMMU_DEV_FEAT_SVA:
2835 return arm_smmu_master_disable_sva(master);
2841 static struct iommu_ops arm_smmu_ops = {
2842 .capable = arm_smmu_capable,
2843 .domain_alloc = arm_smmu_domain_alloc,
2844 .domain_free = arm_smmu_domain_free,
2845 .attach_dev = arm_smmu_attach_dev,
2846 .map_pages = arm_smmu_map_pages,
2847 .unmap_pages = arm_smmu_unmap_pages,
2848 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2849 .iotlb_sync = arm_smmu_iotlb_sync,
2850 .iova_to_phys = arm_smmu_iova_to_phys,
2851 .probe_device = arm_smmu_probe_device,
2852 .release_device = arm_smmu_release_device,
2853 .device_group = arm_smmu_device_group,
2854 .enable_nesting = arm_smmu_enable_nesting,
2855 .of_xlate = arm_smmu_of_xlate,
2856 .get_resv_regions = arm_smmu_get_resv_regions,
2857 .put_resv_regions = generic_iommu_put_resv_regions,
2858 .dev_has_feat = arm_smmu_dev_has_feature,
2859 .dev_feat_enabled = arm_smmu_dev_feature_enabled,
2860 .dev_enable_feat = arm_smmu_dev_enable_feature,
2861 .dev_disable_feat = arm_smmu_dev_disable_feature,
2862 .sva_bind = arm_smmu_sva_bind,
2863 .sva_unbind = arm_smmu_sva_unbind,
2864 .sva_get_pasid = arm_smmu_sva_get_pasid,
2865 .page_response = arm_smmu_page_response,
2866 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2867 .owner = THIS_MODULE,
2870 /* Probing and initialisation functions */
2871 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2872 struct arm_smmu_queue *q,
2874 unsigned long prod_off,
2875 unsigned long cons_off,
2876 size_t dwords, const char *name)
2881 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2882 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2884 if (q->base || qsz < PAGE_SIZE)
2887 q->llq.max_n_shift--;
2892 "failed to allocate queue (0x%zx bytes) for %s\n",
2897 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2898 dev_info(smmu->dev, "allocated %u entries for %s\n",
2899 1 << q->llq.max_n_shift, name);
2902 q->prod_reg = page + prod_off;
2903 q->cons_reg = page + cons_off;
2904 q->ent_dwords = dwords;
2906 q->q_base = Q_BASE_RWA;
2907 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2908 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2910 q->llq.prod = q->llq.cons = 0;
2914 static void arm_smmu_cmdq_free_bitmap(void *data)
2916 unsigned long *bitmap = data;
2917 bitmap_free(bitmap);
2920 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2923 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2924 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2925 atomic_long_t *bitmap;
2927 atomic_set(&cmdq->owner_prod, 0);
2928 atomic_set(&cmdq->lock, 0);
2930 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2932 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2935 cmdq->valid_map = bitmap;
2936 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2942 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2947 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2948 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2949 CMDQ_ENT_DWORDS, "cmdq");
2953 ret = arm_smmu_cmdq_init(smmu);
2958 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2959 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2960 EVTQ_ENT_DWORDS, "evtq");
2964 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2965 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2966 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2967 if (!smmu->evtq.iopf)
2972 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2975 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2976 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2977 PRIQ_ENT_DWORDS, "priq");
2980 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2983 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2984 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2985 void *strtab = smmu->strtab_cfg.strtab;
2987 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2991 for (i = 0; i < cfg->num_l1_ents; ++i) {
2992 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2993 strtab += STRTAB_L1_DESC_DWORDS << 3;
2999 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3004 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3006 /* Calculate the L1 size, capped to the SIDSIZE. */
3007 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3008 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3009 cfg->num_l1_ents = 1 << size;
3011 size += STRTAB_SPLIT;
3012 if (size < smmu->sid_bits)
3014 "2-level strtab only covers %u/%u bits of SID\n",
3015 size, smmu->sid_bits);
3017 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3018 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3022 "failed to allocate l1 stream table (%u bytes)\n",
3026 cfg->strtab = strtab;
3028 /* Configure strtab_base_cfg for 2 levels */
3029 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3030 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3031 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3032 cfg->strtab_base_cfg = reg;
3034 return arm_smmu_init_l1_strtab(smmu);
3037 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3042 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3044 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3045 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3049 "failed to allocate linear stream table (%u bytes)\n",
3053 cfg->strtab = strtab;
3054 cfg->num_l1_ents = 1 << smmu->sid_bits;
3056 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3057 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3058 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3059 cfg->strtab_base_cfg = reg;
3061 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3065 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3070 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3071 ret = arm_smmu_init_strtab_2lvl(smmu);
3073 ret = arm_smmu_init_strtab_linear(smmu);
3078 /* Set the strtab base address */
3079 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3080 reg |= STRTAB_BASE_RA;
3081 smmu->strtab_cfg.strtab_base = reg;
3083 /* Allocate the first VMID for stage-2 bypass STEs */
3084 set_bit(0, smmu->vmid_map);
3088 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3092 mutex_init(&smmu->streams_mutex);
3093 smmu->streams = RB_ROOT;
3095 ret = arm_smmu_init_queues(smmu);
3099 return arm_smmu_init_strtab(smmu);
3102 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3103 unsigned int reg_off, unsigned int ack_off)
3107 writel_relaxed(val, smmu->base + reg_off);
3108 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3109 1, ARM_SMMU_POLL_TIMEOUT_US);
3112 /* GBPA is "special" */
3113 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3116 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3118 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3119 1, ARM_SMMU_POLL_TIMEOUT_US);
3125 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3126 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3127 1, ARM_SMMU_POLL_TIMEOUT_US);
3130 dev_err(smmu->dev, "GBPA not responding to update\n");
3134 static void arm_smmu_free_msis(void *data)
3136 struct device *dev = data;
3137 platform_msi_domain_free_irqs(dev);
3140 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3142 phys_addr_t doorbell;
3143 struct device *dev = msi_desc_to_dev(desc);
3144 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3145 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3147 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3148 doorbell &= MSI_CFG0_ADDR_MASK;
3150 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3151 writel_relaxed(msg->data, smmu->base + cfg[1]);
3152 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3155 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3157 struct msi_desc *desc;
3158 int ret, nvec = ARM_SMMU_MAX_MSIS;
3159 struct device *dev = smmu->dev;
3161 /* Clear the MSI address regs */
3162 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3163 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3165 if (smmu->features & ARM_SMMU_FEAT_PRI)
3166 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3170 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3173 if (!dev->msi.domain) {
3174 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3178 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3179 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3181 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3185 for_each_msi_entry(desc, dev) {
3186 switch (desc->platform.msi_index) {
3187 case EVTQ_MSI_INDEX:
3188 smmu->evtq.q.irq = desc->irq;
3190 case GERROR_MSI_INDEX:
3191 smmu->gerr_irq = desc->irq;
3193 case PRIQ_MSI_INDEX:
3194 smmu->priq.q.irq = desc->irq;
3196 default: /* Unknown */
3201 /* Add callback to free MSIs on teardown */
3202 devm_add_action(dev, arm_smmu_free_msis, dev);
3205 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3209 arm_smmu_setup_msis(smmu);
3211 /* Request interrupt lines */
3212 irq = smmu->evtq.q.irq;
3214 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3215 arm_smmu_evtq_thread,
3217 "arm-smmu-v3-evtq", smmu);
3219 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3221 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3224 irq = smmu->gerr_irq;
3226 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3227 0, "arm-smmu-v3-gerror", smmu);
3229 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3231 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3234 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3235 irq = smmu->priq.q.irq;
3237 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3238 arm_smmu_priq_thread,
3244 "failed to enable priq irq\n");
3246 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3251 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3254 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3256 /* Disable IRQs first */
3257 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3258 ARM_SMMU_IRQ_CTRLACK);
3260 dev_err(smmu->dev, "failed to disable irqs\n");
3264 irq = smmu->combined_irq;
3267 * Cavium ThunderX2 implementation doesn't support unique irq
3268 * lines. Use a single irq line for all the SMMUv3 interrupts.
3270 ret = devm_request_threaded_irq(smmu->dev, irq,
3271 arm_smmu_combined_irq_handler,
3272 arm_smmu_combined_irq_thread,
3274 "arm-smmu-v3-combined-irq", smmu);
3276 dev_warn(smmu->dev, "failed to enable combined irq\n");
3278 arm_smmu_setup_unique_irqs(smmu);
3280 if (smmu->features & ARM_SMMU_FEAT_PRI)
3281 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3283 /* Enable interrupt generation on the SMMU */
3284 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3285 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3287 dev_warn(smmu->dev, "failed to enable irqs\n");
3292 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3296 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3298 dev_err(smmu->dev, "failed to clear cr0\n");
3303 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3307 struct arm_smmu_cmdq_ent cmd;
3309 /* Clear CR0 and sync (disables SMMU and queue processing) */
3310 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3311 if (reg & CR0_SMMUEN) {
3312 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3313 WARN_ON(is_kdump_kernel() && !disable_bypass);
3314 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3317 ret = arm_smmu_device_disable(smmu);
3321 /* CR1 (table and queue memory attributes) */
3322 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3323 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3324 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3325 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3326 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3327 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3328 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3330 /* CR2 (random crap) */
3331 reg = CR2_PTM | CR2_RECINVSID;
3333 if (smmu->features & ARM_SMMU_FEAT_E2H)
3336 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3339 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3340 smmu->base + ARM_SMMU_STRTAB_BASE);
3341 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3342 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3345 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3346 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3347 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3349 enables = CR0_CMDQEN;
3350 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3353 dev_err(smmu->dev, "failed to enable command queue\n");
3357 /* Invalidate any cached configuration */
3358 cmd.opcode = CMDQ_OP_CFGI_ALL;
3359 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3361 /* Invalidate any stale TLB entries */
3362 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3363 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3364 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3367 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3368 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3371 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3372 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3373 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3375 enables |= CR0_EVTQEN;
3376 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3379 dev_err(smmu->dev, "failed to enable event queue\n");
3384 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3385 writeq_relaxed(smmu->priq.q.q_base,
3386 smmu->base + ARM_SMMU_PRIQ_BASE);
3387 writel_relaxed(smmu->priq.q.llq.prod,
3388 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3389 writel_relaxed(smmu->priq.q.llq.cons,
3390 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3392 enables |= CR0_PRIQEN;
3393 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3396 dev_err(smmu->dev, "failed to enable PRI queue\n");
3401 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3402 enables |= CR0_ATSCHK;
3403 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3406 dev_err(smmu->dev, "failed to enable ATS check\n");
3411 ret = arm_smmu_setup_irqs(smmu);
3413 dev_err(smmu->dev, "failed to setup irqs\n");
3417 if (is_kdump_kernel())
3418 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3420 /* Enable the SMMU interface, or ensure bypass */
3421 if (!bypass || disable_bypass) {
3422 enables |= CR0_SMMUEN;
3424 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3428 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3431 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3438 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3441 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3444 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3446 /* 2-level structures */
3447 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3448 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3450 if (reg & IDR0_CD2L)
3451 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3454 * Translation table endianness.
3455 * We currently require the same endianness as the CPU, but this
3456 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3458 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3459 case IDR0_TTENDIAN_MIXED:
3460 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3463 case IDR0_TTENDIAN_BE:
3464 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3467 case IDR0_TTENDIAN_LE:
3468 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3472 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3476 /* Boolean feature flags */
3477 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3478 smmu->features |= ARM_SMMU_FEAT_PRI;
3480 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3481 smmu->features |= ARM_SMMU_FEAT_ATS;
3484 smmu->features |= ARM_SMMU_FEAT_SEV;
3486 if (reg & IDR0_MSI) {
3487 smmu->features |= ARM_SMMU_FEAT_MSI;
3488 if (coherent && !disable_msipolling)
3489 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3492 if (reg & IDR0_HYP) {
3493 smmu->features |= ARM_SMMU_FEAT_HYP;
3494 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3495 smmu->features |= ARM_SMMU_FEAT_E2H;
3499 * The coherency feature as set by FW is used in preference to the ID
3500 * register, but warn on mismatch.
3502 if (!!(reg & IDR0_COHACC) != coherent)
3503 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3504 coherent ? "true" : "false");
3506 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3507 case IDR0_STALL_MODEL_FORCE:
3508 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3510 case IDR0_STALL_MODEL_STALL:
3511 smmu->features |= ARM_SMMU_FEAT_STALLS;
3515 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3518 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3520 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3521 dev_err(smmu->dev, "no translation support!\n");
3525 /* We only support the AArch64 table format at present */
3526 switch (FIELD_GET(IDR0_TTF, reg)) {
3527 case IDR0_TTF_AARCH32_64:
3530 case IDR0_TTF_AARCH64:
3533 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3537 /* ASID/VMID sizes */
3538 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3539 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3542 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3543 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3544 dev_err(smmu->dev, "embedded implementation not supported\n");
3548 /* Queue sizes, capped to ensure natural alignment */
3549 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3550 FIELD_GET(IDR1_CMDQS, reg));
3551 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3553 * We don't support splitting up batches, so one batch of
3554 * commands plus an extra sync needs to fit inside the command
3555 * queue. There's also no way we can handle the weird alignment
3556 * restrictions on the base pointer for a unit-length queue.
3558 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3559 CMDQ_BATCH_ENTRIES);
3563 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3564 FIELD_GET(IDR1_EVTQS, reg));
3565 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3566 FIELD_GET(IDR1_PRIQS, reg));
3568 /* SID/SSID sizes */
3569 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3570 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3573 * If the SMMU supports fewer bits than would fill a single L2 stream
3574 * table, use a linear table instead.
3576 if (smmu->sid_bits <= STRTAB_SPLIT)
3577 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3580 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3581 if (FIELD_GET(IDR3_RIL, reg))
3582 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3585 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3587 /* Maximum number of outstanding stalls */
3588 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3591 if (reg & IDR5_GRAN64K)
3592 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3593 if (reg & IDR5_GRAN16K)
3594 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3595 if (reg & IDR5_GRAN4K)
3596 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3598 /* Input address size */
3599 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3600 smmu->features |= ARM_SMMU_FEAT_VAX;
3602 /* Output address size */
3603 switch (FIELD_GET(IDR5_OAS, reg)) {
3604 case IDR5_OAS_32_BIT:
3607 case IDR5_OAS_36_BIT:
3610 case IDR5_OAS_40_BIT:
3613 case IDR5_OAS_42_BIT:
3616 case IDR5_OAS_44_BIT:
3619 case IDR5_OAS_52_BIT:
3621 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3625 "unknown output address size. Truncating to 48-bit\n");
3627 case IDR5_OAS_48_BIT:
3631 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3632 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3634 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3636 /* Set the DMA mask for our table walker */
3637 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3639 "failed to set DMA mask for table walker\n");
3641 smmu->ias = max(smmu->ias, smmu->oas);
3643 if (arm_smmu_sva_supported(smmu))
3644 smmu->features |= ARM_SMMU_FEAT_SVA;
3646 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3647 smmu->ias, smmu->oas, smmu->features);
3652 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3655 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3656 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3658 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3659 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3663 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3666 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3667 struct arm_smmu_device *smmu)
3669 struct acpi_iort_smmu_v3 *iort_smmu;
3670 struct device *dev = smmu->dev;
3671 struct acpi_iort_node *node;
3673 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3675 /* Retrieve SMMUv3 specific data */
3676 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3678 acpi_smmu_get_options(iort_smmu->model, smmu);
3680 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3681 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3686 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3687 struct arm_smmu_device *smmu)
3693 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3694 struct arm_smmu_device *smmu)
3696 struct device *dev = &pdev->dev;
3700 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3701 dev_err(dev, "missing #iommu-cells property\n");
3702 else if (cells != 1)
3703 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3707 parse_driver_options(smmu);
3709 if (of_dma_is_coherent(dev->of_node))
3710 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3715 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3717 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3723 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3728 if (pci_bus_type.iommu_ops != ops) {
3729 err = bus_set_iommu(&pci_bus_type, ops);
3734 #ifdef CONFIG_ARM_AMBA
3735 if (amba_bustype.iommu_ops != ops) {
3736 err = bus_set_iommu(&amba_bustype, ops);
3738 goto err_reset_pci_ops;
3741 if (platform_bus_type.iommu_ops != ops) {
3742 err = bus_set_iommu(&platform_bus_type, ops);
3744 goto err_reset_amba_ops;
3750 #ifdef CONFIG_ARM_AMBA
3751 bus_set_iommu(&amba_bustype, NULL);
3753 err_reset_pci_ops: __maybe_unused;
3755 bus_set_iommu(&pci_bus_type, NULL);
3760 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3761 resource_size_t size)
3763 struct resource res = DEFINE_RES_MEM(start, size);
3765 return devm_ioremap_resource(dev, &res);
3768 static int arm_smmu_device_probe(struct platform_device *pdev)
3771 struct resource *res;
3772 resource_size_t ioaddr;
3773 struct arm_smmu_device *smmu;
3774 struct device *dev = &pdev->dev;
3777 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3783 ret = arm_smmu_device_dt_probe(pdev, smmu);
3785 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3790 /* Set bypass mode according to firmware probing result */
3794 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3795 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3796 dev_err(dev, "MMIO region too small (%pr)\n", res);
3799 ioaddr = res->start;
3802 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3803 * the PMCG registers which are reserved by the PMU driver.
3805 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3806 if (IS_ERR(smmu->base))
3807 return PTR_ERR(smmu->base);
3809 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3810 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3812 if (IS_ERR(smmu->page1))
3813 return PTR_ERR(smmu->page1);
3815 smmu->page1 = smmu->base;
3818 /* Interrupt lines */
3820 irq = platform_get_irq_byname_optional(pdev, "combined");
3822 smmu->combined_irq = irq;
3824 irq = platform_get_irq_byname_optional(pdev, "eventq");
3826 smmu->evtq.q.irq = irq;
3828 irq = platform_get_irq_byname_optional(pdev, "priq");
3830 smmu->priq.q.irq = irq;
3832 irq = platform_get_irq_byname_optional(pdev, "gerror");
3834 smmu->gerr_irq = irq;
3837 ret = arm_smmu_device_hw_probe(smmu);
3841 /* Initialise in-memory data structures */
3842 ret = arm_smmu_init_structures(smmu);
3846 /* Record our private device structure */
3847 platform_set_drvdata(pdev, smmu);
3849 /* Reset the device */
3850 ret = arm_smmu_device_reset(smmu, bypass);
3854 /* And we're up. Go go go! */
3855 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3856 "smmu3.%pa", &ioaddr);
3860 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3862 dev_err(dev, "Failed to register iommu\n");
3863 goto err_sysfs_remove;
3866 ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3868 goto err_unregister_device;
3872 err_unregister_device:
3873 iommu_device_unregister(&smmu->iommu);
3875 iommu_device_sysfs_remove(&smmu->iommu);
3879 static int arm_smmu_device_remove(struct platform_device *pdev)
3881 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3883 arm_smmu_set_bus_ops(NULL);
3884 iommu_device_unregister(&smmu->iommu);
3885 iommu_device_sysfs_remove(&smmu->iommu);
3886 arm_smmu_device_disable(smmu);
3887 iopf_queue_free(smmu->evtq.iopf);
3892 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3894 arm_smmu_device_remove(pdev);
3897 static const struct of_device_id arm_smmu_of_match[] = {
3898 { .compatible = "arm,smmu-v3", },
3901 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3903 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3905 arm_smmu_sva_notifier_synchronize();
3906 platform_driver_unregister(drv);
3909 static struct platform_driver arm_smmu_driver = {
3911 .name = "arm-smmu-v3",
3912 .of_match_table = arm_smmu_of_match,
3913 .suppress_bind_attrs = true,
3915 .probe = arm_smmu_device_probe,
3916 .remove = arm_smmu_device_remove,
3917 .shutdown = arm_smmu_device_shutdown,
3919 module_driver(arm_smmu_driver, platform_driver_register,
3920 arm_smmu_driver_unregister);
3922 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3923 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3924 MODULE_ALIAS("platform:arm-smmu-v3");
3925 MODULE_LICENSE("GPL v2");