device: Move MSI related data into a struct
[sfrench/cifs-2.6.git] / drivers / iommu / arm / arm-smmu-v3 / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30
31 #include <linux/amba/bus.h>
32
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44         "Disable MSI-based polling for CMD_SYNC completion.");
45
46 enum arm_smmu_msi_index {
47         EVTQ_MSI_INDEX,
48         GERROR_MSI_INDEX,
49         PRIQ_MSI_INDEX,
50         ARM_SMMU_MAX_MSIS,
51 };
52
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54         [EVTQ_MSI_INDEX] = {
55                 ARM_SMMU_EVTQ_IRQ_CFG0,
56                 ARM_SMMU_EVTQ_IRQ_CFG1,
57                 ARM_SMMU_EVTQ_IRQ_CFG2,
58         },
59         [GERROR_MSI_INDEX] = {
60                 ARM_SMMU_GERROR_IRQ_CFG0,
61                 ARM_SMMU_GERROR_IRQ_CFG1,
62                 ARM_SMMU_GERROR_IRQ_CFG2,
63         },
64         [PRIQ_MSI_INDEX] = {
65                 ARM_SMMU_PRIQ_IRQ_CFG0,
66                 ARM_SMMU_PRIQ_IRQ_CFG1,
67                 ARM_SMMU_PRIQ_IRQ_CFG2,
68         },
69 };
70
71 struct arm_smmu_option_prop {
72         u32 opt;
73         const char *prop;
74 };
75
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88         { 0, NULL},
89 };
90
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93         int i = 0;
94
95         do {
96                 if (of_property_read_bool(smmu->dev->of_node,
97                                                 arm_smmu_options[i].prop)) {
98                         smmu->options |= arm_smmu_options[i].opt;
99                         dev_notice(smmu->dev, "option %s\n",
100                                 arm_smmu_options[i].prop);
101                 }
102         } while (arm_smmu_options[++i].opt);
103 }
104
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108         u32 space, prod, cons;
109
110         prod = Q_IDX(q, q->prod);
111         cons = Q_IDX(q, q->cons);
112
113         if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114                 space = (1 << q->max_n_shift) - (prod - cons);
115         else
116                 space = cons - prod;
117
118         return space >= n;
119 }
120
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135         return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136                 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137                ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138                 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143         /*
144          * Ensure that all CPU accesses (reads and writes) to the queue
145          * are complete before we update the cons pointer.
146          */
147         __iomb();
148         writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154         q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159         u32 prod;
160         int ret = 0;
161
162         /*
163          * We can't use the _relaxed() variant here, as we must prevent
164          * speculative reads of the queue before we have determined that
165          * prod has indeed moved.
166          */
167         prod = readl(q->prod_reg);
168
169         if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170                 ret = -EOVERFLOW;
171
172         q->llq.prod = prod;
173         return ret;
174 }
175
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179         return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183                             struct arm_smmu_queue_poll *qp)
184 {
185         qp->delay = 1;
186         qp->spin_cnt = 0;
187         qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188         qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193         if (ktime_compare(ktime_get(), qp->timeout) > 0)
194                 return -ETIMEDOUT;
195
196         if (qp->wfe) {
197                 wfe();
198         } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199                 cpu_relax();
200         } else {
201                 udelay(qp->delay);
202                 qp->delay *= 2;
203                 qp->spin_cnt = 0;
204         }
205
206         return 0;
207 }
208
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211         int i;
212
213         for (i = 0; i < n_dwords; ++i)
214                 *dst++ = cpu_to_le64(*src++);
215 }
216
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219         int i;
220
221         for (i = 0; i < n_dwords; ++i)
222                 *dst++ = le64_to_cpu(*src++);
223 }
224
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227         if (queue_empty(&q->llq))
228                 return -EAGAIN;
229
230         queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231         queue_inc_cons(&q->llq);
232         queue_sync_cons_out(q);
233         return 0;
234 }
235
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239         memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241
242         switch (ent->opcode) {
243         case CMDQ_OP_TLBI_EL2_ALL:
244         case CMDQ_OP_TLBI_NSNH_ALL:
245                 break;
246         case CMDQ_OP_PREFETCH_CFG:
247                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248                 break;
249         case CMDQ_OP_CFGI_CD:
250                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251                 fallthrough;
252         case CMDQ_OP_CFGI_STE:
253                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255                 break;
256         case CMDQ_OP_CFGI_CD_ALL:
257                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258                 break;
259         case CMDQ_OP_CFGI_ALL:
260                 /* Cover the entire SID range */
261                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262                 break;
263         case CMDQ_OP_TLBI_NH_VA:
264                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265                 fallthrough;
266         case CMDQ_OP_TLBI_EL2_VA:
267                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274                 break;
275         case CMDQ_OP_TLBI_S2_IPA:
276                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283                 break;
284         case CMDQ_OP_TLBI_NH_ASID:
285                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286                 fallthrough;
287         case CMDQ_OP_TLBI_S12_VMALL:
288                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289                 break;
290         case CMDQ_OP_TLBI_EL2_ASID:
291                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292                 break;
293         case CMDQ_OP_ATC_INV:
294                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297                 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298                 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299                 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300                 break;
301         case CMDQ_OP_PRI_RESP:
302                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306                 switch (ent->pri.resp) {
307                 case PRI_RESP_DENY:
308                 case PRI_RESP_FAIL:
309                 case PRI_RESP_SUCC:
310                         break;
311                 default:
312                         return -EINVAL;
313                 }
314                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315                 break;
316         case CMDQ_OP_RESUME:
317                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318                 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319                 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320                 break;
321         case CMDQ_OP_CMD_SYNC:
322                 if (ent->sync.msiaddr) {
323                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324                         cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325                 } else {
326                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327                 }
328                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330                 break;
331         default:
332                 return -ENOENT;
333         }
334
335         return 0;
336 }
337
338 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
339 {
340         return &smmu->cmdq;
341 }
342
343 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
344                                          struct arm_smmu_queue *q, u32 prod)
345 {
346         struct arm_smmu_cmdq_ent ent = {
347                 .opcode = CMDQ_OP_CMD_SYNC,
348         };
349
350         /*
351          * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
352          * payload, so the write will zero the entire command on that platform.
353          */
354         if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
355                 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
356                                    q->ent_dwords * 8;
357         }
358
359         arm_smmu_cmdq_build_cmd(cmd, &ent);
360 }
361
362 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
363                                      struct arm_smmu_queue *q)
364 {
365         static const char * const cerror_str[] = {
366                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
367                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
368                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
369                 [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
370         };
371
372         int i;
373         u64 cmd[CMDQ_ENT_DWORDS];
374         u32 cons = readl_relaxed(q->cons_reg);
375         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
376         struct arm_smmu_cmdq_ent cmd_sync = {
377                 .opcode = CMDQ_OP_CMD_SYNC,
378         };
379
380         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
381                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
382
383         switch (idx) {
384         case CMDQ_ERR_CERROR_ABT_IDX:
385                 dev_err(smmu->dev, "retrying command fetch\n");
386                 return;
387         case CMDQ_ERR_CERROR_NONE_IDX:
388                 return;
389         case CMDQ_ERR_CERROR_ATC_INV_IDX:
390                 /*
391                  * ATC Invalidation Completion timeout. CONS is still pointing
392                  * at the CMD_SYNC. Attempt to complete other pending commands
393                  * by repeating the CMD_SYNC, though we might well end up back
394                  * here since the ATC invalidation may still be pending.
395                  */
396                 return;
397         case CMDQ_ERR_CERROR_ILL_IDX:
398         default:
399                 break;
400         }
401
402         /*
403          * We may have concurrent producers, so we need to be careful
404          * not to touch any of the shadow cmdq state.
405          */
406         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
407         dev_err(smmu->dev, "skipping command in error state:\n");
408         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
409                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
410
411         /* Convert the erroneous command into a CMD_SYNC */
412         arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
413
414         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
415 }
416
417 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
418 {
419         __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
420 }
421
422 /*
423  * Command queue locking.
424  * This is a form of bastardised rwlock with the following major changes:
425  *
426  * - The only LOCK routines are exclusive_trylock() and shared_lock().
427  *   Neither have barrier semantics, and instead provide only a control
428  *   dependency.
429  *
430  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
431  *   fails if the caller appears to be the last lock holder (yes, this is
432  *   racy). All successful UNLOCK routines have RELEASE semantics.
433  */
434 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
435 {
436         int val;
437
438         /*
439          * We can try to avoid the cmpxchg() loop by simply incrementing the
440          * lock counter. When held in exclusive state, the lock counter is set
441          * to INT_MIN so these increments won't hurt as the value will remain
442          * negative.
443          */
444         if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
445                 return;
446
447         do {
448                 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
449         } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
450 }
451
452 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
453 {
454         (void)atomic_dec_return_release(&cmdq->lock);
455 }
456
457 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
458 {
459         if (atomic_read(&cmdq->lock) == 1)
460                 return false;
461
462         arm_smmu_cmdq_shared_unlock(cmdq);
463         return true;
464 }
465
466 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)            \
467 ({                                                                      \
468         bool __ret;                                                     \
469         local_irq_save(flags);                                          \
470         __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
471         if (!__ret)                                                     \
472                 local_irq_restore(flags);                               \
473         __ret;                                                          \
474 })
475
476 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)          \
477 ({                                                                      \
478         atomic_set_release(&cmdq->lock, 0);                             \
479         local_irq_restore(flags);                                       \
480 })
481
482
483 /*
484  * Command queue insertion.
485  * This is made fiddly by our attempts to achieve some sort of scalability
486  * since there is one queue shared amongst all of the CPUs in the system.  If
487  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
488  * then you'll *love* this monstrosity.
489  *
490  * The basic idea is to split the queue up into ranges of commands that are
491  * owned by a given CPU; the owner may not have written all of the commands
492  * itself, but is responsible for advancing the hardware prod pointer when
493  * the time comes. The algorithm is roughly:
494  *
495  *      1. Allocate some space in the queue. At this point we also discover
496  *         whether the head of the queue is currently owned by another CPU,
497  *         or whether we are the owner.
498  *
499  *      2. Write our commands into our allocated slots in the queue.
500  *
501  *      3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
502  *
503  *      4. If we are an owner:
504  *              a. Wait for the previous owner to finish.
505  *              b. Mark the queue head as unowned, which tells us the range
506  *                 that we are responsible for publishing.
507  *              c. Wait for all commands in our owned range to become valid.
508  *              d. Advance the hardware prod pointer.
509  *              e. Tell the next owner we've finished.
510  *
511  *      5. If we are inserting a CMD_SYNC (we may or may not have been an
512  *         owner), then we need to stick around until it has completed:
513  *              a. If we have MSIs, the SMMU can write back into the CMD_SYNC
514  *                 to clear the first 4 bytes.
515  *              b. Otherwise, we spin waiting for the hardware cons pointer to
516  *                 advance past our command.
517  *
518  * The devil is in the details, particularly the use of locking for handling
519  * SYNC completion and freeing up space in the queue before we think that it is
520  * full.
521  */
522 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
523                                                u32 sprod, u32 eprod, bool set)
524 {
525         u32 swidx, sbidx, ewidx, ebidx;
526         struct arm_smmu_ll_queue llq = {
527                 .max_n_shift    = cmdq->q.llq.max_n_shift,
528                 .prod           = sprod,
529         };
530
531         ewidx = BIT_WORD(Q_IDX(&llq, eprod));
532         ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
533
534         while (llq.prod != eprod) {
535                 unsigned long mask;
536                 atomic_long_t *ptr;
537                 u32 limit = BITS_PER_LONG;
538
539                 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
540                 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
541
542                 ptr = &cmdq->valid_map[swidx];
543
544                 if ((swidx == ewidx) && (sbidx < ebidx))
545                         limit = ebidx;
546
547                 mask = GENMASK(limit - 1, sbidx);
548
549                 /*
550                  * The valid bit is the inverse of the wrap bit. This means
551                  * that a zero-initialised queue is invalid and, after marking
552                  * all entries as valid, they become invalid again when we
553                  * wrap.
554                  */
555                 if (set) {
556                         atomic_long_xor(mask, ptr);
557                 } else { /* Poll */
558                         unsigned long valid;
559
560                         valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
561                         atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
562                 }
563
564                 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
565         }
566 }
567
568 /* Mark all entries in the range [sprod, eprod) as valid */
569 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
570                                         u32 sprod, u32 eprod)
571 {
572         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
573 }
574
575 /* Wait for all entries in the range [sprod, eprod) to become valid */
576 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
577                                          u32 sprod, u32 eprod)
578 {
579         __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
580 }
581
582 /* Wait for the command queue to become non-full */
583 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
584                                              struct arm_smmu_ll_queue *llq)
585 {
586         unsigned long flags;
587         struct arm_smmu_queue_poll qp;
588         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
589         int ret = 0;
590
591         /*
592          * Try to update our copy of cons by grabbing exclusive cmdq access. If
593          * that fails, spin until somebody else updates it for us.
594          */
595         if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
596                 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
597                 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
598                 llq->val = READ_ONCE(cmdq->q.llq.val);
599                 return 0;
600         }
601
602         queue_poll_init(smmu, &qp);
603         do {
604                 llq->val = READ_ONCE(cmdq->q.llq.val);
605                 if (!queue_full(llq))
606                         break;
607
608                 ret = queue_poll(&qp);
609         } while (!ret);
610
611         return ret;
612 }
613
614 /*
615  * Wait until the SMMU signals a CMD_SYNC completion MSI.
616  * Must be called with the cmdq lock held in some capacity.
617  */
618 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
619                                           struct arm_smmu_ll_queue *llq)
620 {
621         int ret = 0;
622         struct arm_smmu_queue_poll qp;
623         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
624         u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
625
626         queue_poll_init(smmu, &qp);
627
628         /*
629          * The MSI won't generate an event, since it's being written back
630          * into the command queue.
631          */
632         qp.wfe = false;
633         smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
634         llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
635         return ret;
636 }
637
638 /*
639  * Wait until the SMMU cons index passes llq->prod.
640  * Must be called with the cmdq lock held in some capacity.
641  */
642 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
643                                                struct arm_smmu_ll_queue *llq)
644 {
645         struct arm_smmu_queue_poll qp;
646         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
647         u32 prod = llq->prod;
648         int ret = 0;
649
650         queue_poll_init(smmu, &qp);
651         llq->val = READ_ONCE(cmdq->q.llq.val);
652         do {
653                 if (queue_consumed(llq, prod))
654                         break;
655
656                 ret = queue_poll(&qp);
657
658                 /*
659                  * This needs to be a readl() so that our subsequent call
660                  * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
661                  *
662                  * Specifically, we need to ensure that we observe all
663                  * shared_lock()s by other CMD_SYNCs that share our owner,
664                  * so that a failing call to tryunlock() means that we're
665                  * the last one out and therefore we can safely advance
666                  * cmdq->q.llq.cons. Roughly speaking:
667                  *
668                  * CPU 0                CPU1                    CPU2 (us)
669                  *
670                  * if (sync)
671                  *      shared_lock();
672                  *
673                  * dma_wmb();
674                  * set_valid_map();
675                  *
676                  *                      if (owner) {
677                  *                              poll_valid_map();
678                  *                              <control dependency>
679                  *                              writel(prod_reg);
680                  *
681                  *                                              readl(cons_reg);
682                  *                                              tryunlock();
683                  *
684                  * Requires us to see CPU 0's shared_lock() acquisition.
685                  */
686                 llq->cons = readl(cmdq->q.cons_reg);
687         } while (!ret);
688
689         return ret;
690 }
691
692 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
693                                          struct arm_smmu_ll_queue *llq)
694 {
695         if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
696                 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
697
698         return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
699 }
700
701 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
702                                         u32 prod, int n)
703 {
704         int i;
705         struct arm_smmu_ll_queue llq = {
706                 .max_n_shift    = cmdq->q.llq.max_n_shift,
707                 .prod           = prod,
708         };
709
710         for (i = 0; i < n; ++i) {
711                 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
712
713                 prod = queue_inc_prod_n(&llq, i);
714                 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
715         }
716 }
717
718 /*
719  * This is the actual insertion function, and provides the following
720  * ordering guarantees to callers:
721  *
722  * - There is a dma_wmb() before publishing any commands to the queue.
723  *   This can be relied upon to order prior writes to data structures
724  *   in memory (such as a CD or an STE) before the command.
725  *
726  * - On completion of a CMD_SYNC, there is a control dependency.
727  *   This can be relied upon to order subsequent writes to memory (e.g.
728  *   freeing an IOVA) after completion of the CMD_SYNC.
729  *
730  * - Command insertion is totally ordered, so if two CPUs each race to
731  *   insert their own list of commands then all of the commands from one
732  *   CPU will appear before any of the commands from the other CPU.
733  */
734 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
735                                        u64 *cmds, int n, bool sync)
736 {
737         u64 cmd_sync[CMDQ_ENT_DWORDS];
738         u32 prod;
739         unsigned long flags;
740         bool owner;
741         struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
742         struct arm_smmu_ll_queue llq, head;
743         int ret = 0;
744
745         llq.max_n_shift = cmdq->q.llq.max_n_shift;
746
747         /* 1. Allocate some space in the queue */
748         local_irq_save(flags);
749         llq.val = READ_ONCE(cmdq->q.llq.val);
750         do {
751                 u64 old;
752
753                 while (!queue_has_space(&llq, n + sync)) {
754                         local_irq_restore(flags);
755                         if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
756                                 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
757                         local_irq_save(flags);
758                 }
759
760                 head.cons = llq.cons;
761                 head.prod = queue_inc_prod_n(&llq, n + sync) |
762                                              CMDQ_PROD_OWNED_FLAG;
763
764                 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
765                 if (old == llq.val)
766                         break;
767
768                 llq.val = old;
769         } while (1);
770         owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
771         head.prod &= ~CMDQ_PROD_OWNED_FLAG;
772         llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
773
774         /*
775          * 2. Write our commands into the queue
776          * Dependency ordering from the cmpxchg() loop above.
777          */
778         arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
779         if (sync) {
780                 prod = queue_inc_prod_n(&llq, n);
781                 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
782                 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
783
784                 /*
785                  * In order to determine completion of our CMD_SYNC, we must
786                  * ensure that the queue can't wrap twice without us noticing.
787                  * We achieve that by taking the cmdq lock as shared before
788                  * marking our slot as valid.
789                  */
790                 arm_smmu_cmdq_shared_lock(cmdq);
791         }
792
793         /* 3. Mark our slots as valid, ensuring commands are visible first */
794         dma_wmb();
795         arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
796
797         /* 4. If we are the owner, take control of the SMMU hardware */
798         if (owner) {
799                 /* a. Wait for previous owner to finish */
800                 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
801
802                 /* b. Stop gathering work by clearing the owned flag */
803                 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
804                                                    &cmdq->q.llq.atomic.prod);
805                 prod &= ~CMDQ_PROD_OWNED_FLAG;
806
807                 /*
808                  * c. Wait for any gathered work to be written to the queue.
809                  * Note that we read our own entries so that we have the control
810                  * dependency required by (d).
811                  */
812                 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
813
814                 /*
815                  * d. Advance the hardware prod pointer
816                  * Control dependency ordering from the entries becoming valid.
817                  */
818                 writel_relaxed(prod, cmdq->q.prod_reg);
819
820                 /*
821                  * e. Tell the next owner we're done
822                  * Make sure we've updated the hardware first, so that we don't
823                  * race to update prod and potentially move it backwards.
824                  */
825                 atomic_set_release(&cmdq->owner_prod, prod);
826         }
827
828         /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
829         if (sync) {
830                 llq.prod = queue_inc_prod_n(&llq, n);
831                 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
832                 if (ret) {
833                         dev_err_ratelimited(smmu->dev,
834                                             "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
835                                             llq.prod,
836                                             readl_relaxed(cmdq->q.prod_reg),
837                                             readl_relaxed(cmdq->q.cons_reg));
838                 }
839
840                 /*
841                  * Try to unlock the cmdq lock. This will fail if we're the last
842                  * reader, in which case we can safely update cmdq->q.llq.cons
843                  */
844                 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
845                         WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
846                         arm_smmu_cmdq_shared_unlock(cmdq);
847                 }
848         }
849
850         local_irq_restore(flags);
851         return ret;
852 }
853
854 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
855                                      struct arm_smmu_cmdq_ent *ent,
856                                      bool sync)
857 {
858         u64 cmd[CMDQ_ENT_DWORDS];
859
860         if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
861                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
862                          ent->opcode);
863                 return -EINVAL;
864         }
865
866         return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
867 }
868
869 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870                                    struct arm_smmu_cmdq_ent *ent)
871 {
872         return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
873 }
874
875 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
876                                              struct arm_smmu_cmdq_ent *ent)
877 {
878         return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
879 }
880
881 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
882                                     struct arm_smmu_cmdq_batch *cmds,
883                                     struct arm_smmu_cmdq_ent *cmd)
884 {
885         int index;
886
887         if (cmds->num == CMDQ_BATCH_ENTRIES) {
888                 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
889                 cmds->num = 0;
890         }
891
892         index = cmds->num * CMDQ_ENT_DWORDS;
893         if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
894                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
895                          cmd->opcode);
896                 return;
897         }
898
899         cmds->num++;
900 }
901
902 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
903                                       struct arm_smmu_cmdq_batch *cmds)
904 {
905         return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
906 }
907
908 static int arm_smmu_page_response(struct device *dev,
909                                   struct iommu_fault_event *unused,
910                                   struct iommu_page_response *resp)
911 {
912         struct arm_smmu_cmdq_ent cmd = {0};
913         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
914         int sid = master->streams[0].id;
915
916         if (master->stall_enabled) {
917                 cmd.opcode              = CMDQ_OP_RESUME;
918                 cmd.resume.sid          = sid;
919                 cmd.resume.stag         = resp->grpid;
920                 switch (resp->code) {
921                 case IOMMU_PAGE_RESP_INVALID:
922                 case IOMMU_PAGE_RESP_FAILURE:
923                         cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
924                         break;
925                 case IOMMU_PAGE_RESP_SUCCESS:
926                         cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
927                         break;
928                 default:
929                         return -EINVAL;
930                 }
931         } else {
932                 return -ENODEV;
933         }
934
935         arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
936         /*
937          * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
938          * RESUME consumption guarantees that the stalled transaction will be
939          * terminated... at some point in the future. PRI_RESP is fire and
940          * forget.
941          */
942
943         return 0;
944 }
945
946 /* Context descriptor manipulation functions */
947 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
948 {
949         struct arm_smmu_cmdq_ent cmd = {
950                 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
951                         CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
952                 .tlbi.asid = asid,
953         };
954
955         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
956 }
957
958 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
959                              int ssid, bool leaf)
960 {
961         size_t i;
962         unsigned long flags;
963         struct arm_smmu_master *master;
964         struct arm_smmu_cmdq_batch cmds;
965         struct arm_smmu_device *smmu = smmu_domain->smmu;
966         struct arm_smmu_cmdq_ent cmd = {
967                 .opcode = CMDQ_OP_CFGI_CD,
968                 .cfgi   = {
969                         .ssid   = ssid,
970                         .leaf   = leaf,
971                 },
972         };
973
974         cmds.num = 0;
975
976         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
977         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
978                 for (i = 0; i < master->num_streams; i++) {
979                         cmd.cfgi.sid = master->streams[i].id;
980                         arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
981                 }
982         }
983         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
984
985         arm_smmu_cmdq_batch_submit(smmu, &cmds);
986 }
987
988 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
989                                         struct arm_smmu_l1_ctx_desc *l1_desc)
990 {
991         size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
992
993         l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
994                                              &l1_desc->l2ptr_dma, GFP_KERNEL);
995         if (!l1_desc->l2ptr) {
996                 dev_warn(smmu->dev,
997                          "failed to allocate context descriptor table\n");
998                 return -ENOMEM;
999         }
1000         return 0;
1001 }
1002
1003 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1004                                       struct arm_smmu_l1_ctx_desc *l1_desc)
1005 {
1006         u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1007                   CTXDESC_L1_DESC_V;
1008
1009         /* See comment in arm_smmu_write_ctx_desc() */
1010         WRITE_ONCE(*dst, cpu_to_le64(val));
1011 }
1012
1013 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1014                                    u32 ssid)
1015 {
1016         __le64 *l1ptr;
1017         unsigned int idx;
1018         struct arm_smmu_l1_ctx_desc *l1_desc;
1019         struct arm_smmu_device *smmu = smmu_domain->smmu;
1020         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1021
1022         if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1023                 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1024
1025         idx = ssid >> CTXDESC_SPLIT;
1026         l1_desc = &cdcfg->l1_desc[idx];
1027         if (!l1_desc->l2ptr) {
1028                 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1029                         return NULL;
1030
1031                 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1032                 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1033                 /* An invalid L1CD can be cached */
1034                 arm_smmu_sync_cd(smmu_domain, ssid, false);
1035         }
1036         idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1037         return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1038 }
1039
1040 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1041                             struct arm_smmu_ctx_desc *cd)
1042 {
1043         /*
1044          * This function handles the following cases:
1045          *
1046          * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1047          * (2) Install a secondary CD, for SID+SSID traffic.
1048          * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1049          *     CD, then invalidate the old entry and mappings.
1050          * (4) Quiesce the context without clearing the valid bit. Disable
1051          *     translation, and ignore any translation fault.
1052          * (5) Remove a secondary CD.
1053          */
1054         u64 val;
1055         bool cd_live;
1056         __le64 *cdptr;
1057
1058         if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1059                 return -E2BIG;
1060
1061         cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1062         if (!cdptr)
1063                 return -ENOMEM;
1064
1065         val = le64_to_cpu(cdptr[0]);
1066         cd_live = !!(val & CTXDESC_CD_0_V);
1067
1068         if (!cd) { /* (5) */
1069                 val = 0;
1070         } else if (cd == &quiet_cd) { /* (4) */
1071                 val |= CTXDESC_CD_0_TCR_EPD0;
1072         } else if (cd_live) { /* (3) */
1073                 val &= ~CTXDESC_CD_0_ASID;
1074                 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1075                 /*
1076                  * Until CD+TLB invalidation, both ASIDs may be used for tagging
1077                  * this substream's traffic
1078                  */
1079         } else { /* (1) and (2) */
1080                 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1081                 cdptr[2] = 0;
1082                 cdptr[3] = cpu_to_le64(cd->mair);
1083
1084                 /*
1085                  * STE is live, and the SMMU might read dwords of this CD in any
1086                  * order. Ensure that it observes valid values before reading
1087                  * V=1.
1088                  */
1089                 arm_smmu_sync_cd(smmu_domain, ssid, true);
1090
1091                 val = cd->tcr |
1092 #ifdef __BIG_ENDIAN
1093                         CTXDESC_CD_0_ENDI |
1094 #endif
1095                         CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1096                         (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1097                         CTXDESC_CD_0_AA64 |
1098                         FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1099                         CTXDESC_CD_0_V;
1100
1101                 if (smmu_domain->stall_enabled)
1102                         val |= CTXDESC_CD_0_S;
1103         }
1104
1105         /*
1106          * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1107          * "Configuration structures and configuration invalidation completion"
1108          *
1109          *   The size of single-copy atomic reads made by the SMMU is
1110          *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1111          *   field within an aligned 64-bit span of a structure can be altered
1112          *   without first making the structure invalid.
1113          */
1114         WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1115         arm_smmu_sync_cd(smmu_domain, ssid, true);
1116         return 0;
1117 }
1118
1119 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1120 {
1121         int ret;
1122         size_t l1size;
1123         size_t max_contexts;
1124         struct arm_smmu_device *smmu = smmu_domain->smmu;
1125         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1126         struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1127
1128         max_contexts = 1 << cfg->s1cdmax;
1129
1130         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1131             max_contexts <= CTXDESC_L2_ENTRIES) {
1132                 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1133                 cdcfg->num_l1_ents = max_contexts;
1134
1135                 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1136         } else {
1137                 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1138                 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1139                                                   CTXDESC_L2_ENTRIES);
1140
1141                 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1142                                               sizeof(*cdcfg->l1_desc),
1143                                               GFP_KERNEL);
1144                 if (!cdcfg->l1_desc)
1145                         return -ENOMEM;
1146
1147                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1148         }
1149
1150         cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1151                                            GFP_KERNEL);
1152         if (!cdcfg->cdtab) {
1153                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1154                 ret = -ENOMEM;
1155                 goto err_free_l1;
1156         }
1157
1158         return 0;
1159
1160 err_free_l1:
1161         if (cdcfg->l1_desc) {
1162                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1163                 cdcfg->l1_desc = NULL;
1164         }
1165         return ret;
1166 }
1167
1168 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1169 {
1170         int i;
1171         size_t size, l1size;
1172         struct arm_smmu_device *smmu = smmu_domain->smmu;
1173         struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1174
1175         if (cdcfg->l1_desc) {
1176                 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1177
1178                 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1179                         if (!cdcfg->l1_desc[i].l2ptr)
1180                                 continue;
1181
1182                         dmam_free_coherent(smmu->dev, size,
1183                                            cdcfg->l1_desc[i].l2ptr,
1184                                            cdcfg->l1_desc[i].l2ptr_dma);
1185                 }
1186                 devm_kfree(smmu->dev, cdcfg->l1_desc);
1187                 cdcfg->l1_desc = NULL;
1188
1189                 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1190         } else {
1191                 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1192         }
1193
1194         dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1195         cdcfg->cdtab_dma = 0;
1196         cdcfg->cdtab = NULL;
1197 }
1198
1199 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1200 {
1201         bool free;
1202         struct arm_smmu_ctx_desc *old_cd;
1203
1204         if (!cd->asid)
1205                 return false;
1206
1207         free = refcount_dec_and_test(&cd->refs);
1208         if (free) {
1209                 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1210                 WARN_ON(old_cd != cd);
1211         }
1212         return free;
1213 }
1214
1215 /* Stream table manipulation functions */
1216 static void
1217 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1218 {
1219         u64 val = 0;
1220
1221         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1222         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1223
1224         /* See comment in arm_smmu_write_ctx_desc() */
1225         WRITE_ONCE(*dst, cpu_to_le64(val));
1226 }
1227
1228 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1229 {
1230         struct arm_smmu_cmdq_ent cmd = {
1231                 .opcode = CMDQ_OP_CFGI_STE,
1232                 .cfgi   = {
1233                         .sid    = sid,
1234                         .leaf   = true,
1235                 },
1236         };
1237
1238         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1239 }
1240
1241 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1242                                       __le64 *dst)
1243 {
1244         /*
1245          * This is hideously complicated, but we only really care about
1246          * three cases at the moment:
1247          *
1248          * 1. Invalid (all zero) -> bypass/fault (init)
1249          * 2. Bypass/fault -> translation/bypass (attach)
1250          * 3. Translation/bypass -> bypass/fault (detach)
1251          *
1252          * Given that we can't update the STE atomically and the SMMU
1253          * doesn't read the thing in a defined order, that leaves us
1254          * with the following maintenance requirements:
1255          *
1256          * 1. Update Config, return (init time STEs aren't live)
1257          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1258          * 3. Update Config, sync
1259          */
1260         u64 val = le64_to_cpu(dst[0]);
1261         bool ste_live = false;
1262         struct arm_smmu_device *smmu = NULL;
1263         struct arm_smmu_s1_cfg *s1_cfg = NULL;
1264         struct arm_smmu_s2_cfg *s2_cfg = NULL;
1265         struct arm_smmu_domain *smmu_domain = NULL;
1266         struct arm_smmu_cmdq_ent prefetch_cmd = {
1267                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1268                 .prefetch       = {
1269                         .sid    = sid,
1270                 },
1271         };
1272
1273         if (master) {
1274                 smmu_domain = master->domain;
1275                 smmu = master->smmu;
1276         }
1277
1278         if (smmu_domain) {
1279                 switch (smmu_domain->stage) {
1280                 case ARM_SMMU_DOMAIN_S1:
1281                         s1_cfg = &smmu_domain->s1_cfg;
1282                         break;
1283                 case ARM_SMMU_DOMAIN_S2:
1284                 case ARM_SMMU_DOMAIN_NESTED:
1285                         s2_cfg = &smmu_domain->s2_cfg;
1286                         break;
1287                 default:
1288                         break;
1289                 }
1290         }
1291
1292         if (val & STRTAB_STE_0_V) {
1293                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1294                 case STRTAB_STE_0_CFG_BYPASS:
1295                         break;
1296                 case STRTAB_STE_0_CFG_S1_TRANS:
1297                 case STRTAB_STE_0_CFG_S2_TRANS:
1298                         ste_live = true;
1299                         break;
1300                 case STRTAB_STE_0_CFG_ABORT:
1301                         BUG_ON(!disable_bypass);
1302                         break;
1303                 default:
1304                         BUG(); /* STE corruption */
1305                 }
1306         }
1307
1308         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1309         val = STRTAB_STE_0_V;
1310
1311         /* Bypass/fault */
1312         if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1313                 if (!smmu_domain && disable_bypass)
1314                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1315                 else
1316                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1317
1318                 dst[0] = cpu_to_le64(val);
1319                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1320                                                 STRTAB_STE_1_SHCFG_INCOMING));
1321                 dst[2] = 0; /* Nuke the VMID */
1322                 /*
1323                  * The SMMU can perform negative caching, so we must sync
1324                  * the STE regardless of whether the old value was live.
1325                  */
1326                 if (smmu)
1327                         arm_smmu_sync_ste_for_sid(smmu, sid);
1328                 return;
1329         }
1330
1331         if (s1_cfg) {
1332                 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1333                         STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1334
1335                 BUG_ON(ste_live);
1336                 dst[1] = cpu_to_le64(
1337                          FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1338                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1339                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1340                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1341                          FIELD_PREP(STRTAB_STE_1_STRW, strw));
1342
1343                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1344                     !master->stall_enabled)
1345                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1346
1347                 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1348                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1349                         FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1350                         FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1351         }
1352
1353         if (s2_cfg) {
1354                 BUG_ON(ste_live);
1355                 dst[2] = cpu_to_le64(
1356                          FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1357                          FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1358 #ifdef __BIG_ENDIAN
1359                          STRTAB_STE_2_S2ENDI |
1360 #endif
1361                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1362                          STRTAB_STE_2_S2R);
1363
1364                 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1365
1366                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1367         }
1368
1369         if (master->ats_enabled)
1370                 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1371                                                  STRTAB_STE_1_EATS_TRANS));
1372
1373         arm_smmu_sync_ste_for_sid(smmu, sid);
1374         /* See comment in arm_smmu_write_ctx_desc() */
1375         WRITE_ONCE(dst[0], cpu_to_le64(val));
1376         arm_smmu_sync_ste_for_sid(smmu, sid);
1377
1378         /* It's likely that we'll want to use the new STE soon */
1379         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1380                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1381 }
1382
1383 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1384 {
1385         unsigned int i;
1386
1387         for (i = 0; i < nent; ++i) {
1388                 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1389                 strtab += STRTAB_STE_DWORDS;
1390         }
1391 }
1392
1393 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1394 {
1395         size_t size;
1396         void *strtab;
1397         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1398         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1399
1400         if (desc->l2ptr)
1401                 return 0;
1402
1403         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1404         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1405
1406         desc->span = STRTAB_SPLIT + 1;
1407         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1408                                           GFP_KERNEL);
1409         if (!desc->l2ptr) {
1410                 dev_err(smmu->dev,
1411                         "failed to allocate l2 stream table for SID %u\n",
1412                         sid);
1413                 return -ENOMEM;
1414         }
1415
1416         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1417         arm_smmu_write_strtab_l1_desc(strtab, desc);
1418         return 0;
1419 }
1420
1421 static struct arm_smmu_master *
1422 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1423 {
1424         struct rb_node *node;
1425         struct arm_smmu_stream *stream;
1426
1427         lockdep_assert_held(&smmu->streams_mutex);
1428
1429         node = smmu->streams.rb_node;
1430         while (node) {
1431                 stream = rb_entry(node, struct arm_smmu_stream, node);
1432                 if (stream->id < sid)
1433                         node = node->rb_right;
1434                 else if (stream->id > sid)
1435                         node = node->rb_left;
1436                 else
1437                         return stream->master;
1438         }
1439
1440         return NULL;
1441 }
1442
1443 /* IRQ and event handlers */
1444 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1445 {
1446         int ret;
1447         u32 reason;
1448         u32 perm = 0;
1449         struct arm_smmu_master *master;
1450         bool ssid_valid = evt[0] & EVTQ_0_SSV;
1451         u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1452         struct iommu_fault_event fault_evt = { };
1453         struct iommu_fault *flt = &fault_evt.fault;
1454
1455         switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1456         case EVT_ID_TRANSLATION_FAULT:
1457                 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1458                 break;
1459         case EVT_ID_ADDR_SIZE_FAULT:
1460                 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1461                 break;
1462         case EVT_ID_ACCESS_FAULT:
1463                 reason = IOMMU_FAULT_REASON_ACCESS;
1464                 break;
1465         case EVT_ID_PERMISSION_FAULT:
1466                 reason = IOMMU_FAULT_REASON_PERMISSION;
1467                 break;
1468         default:
1469                 return -EOPNOTSUPP;
1470         }
1471
1472         /* Stage-2 is always pinned at the moment */
1473         if (evt[1] & EVTQ_1_S2)
1474                 return -EFAULT;
1475
1476         if (evt[1] & EVTQ_1_RnW)
1477                 perm |= IOMMU_FAULT_PERM_READ;
1478         else
1479                 perm |= IOMMU_FAULT_PERM_WRITE;
1480
1481         if (evt[1] & EVTQ_1_InD)
1482                 perm |= IOMMU_FAULT_PERM_EXEC;
1483
1484         if (evt[1] & EVTQ_1_PnU)
1485                 perm |= IOMMU_FAULT_PERM_PRIV;
1486
1487         if (evt[1] & EVTQ_1_STALL) {
1488                 flt->type = IOMMU_FAULT_PAGE_REQ;
1489                 flt->prm = (struct iommu_fault_page_request) {
1490                         .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1491                         .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1492                         .perm = perm,
1493                         .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1494                 };
1495
1496                 if (ssid_valid) {
1497                         flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1498                         flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1499                 }
1500         } else {
1501                 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1502                 flt->event = (struct iommu_fault_unrecoverable) {
1503                         .reason = reason,
1504                         .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1505                         .perm = perm,
1506                         .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1507                 };
1508
1509                 if (ssid_valid) {
1510                         flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1511                         flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1512                 }
1513         }
1514
1515         mutex_lock(&smmu->streams_mutex);
1516         master = arm_smmu_find_master(smmu, sid);
1517         if (!master) {
1518                 ret = -EINVAL;
1519                 goto out_unlock;
1520         }
1521
1522         ret = iommu_report_device_fault(master->dev, &fault_evt);
1523         if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1524                 /* Nobody cared, abort the access */
1525                 struct iommu_page_response resp = {
1526                         .pasid          = flt->prm.pasid,
1527                         .grpid          = flt->prm.grpid,
1528                         .code           = IOMMU_PAGE_RESP_FAILURE,
1529                 };
1530                 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1531         }
1532
1533 out_unlock:
1534         mutex_unlock(&smmu->streams_mutex);
1535         return ret;
1536 }
1537
1538 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1539 {
1540         int i, ret;
1541         struct arm_smmu_device *smmu = dev;
1542         struct arm_smmu_queue *q = &smmu->evtq.q;
1543         struct arm_smmu_ll_queue *llq = &q->llq;
1544         static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1545                                       DEFAULT_RATELIMIT_BURST);
1546         u64 evt[EVTQ_ENT_DWORDS];
1547
1548         do {
1549                 while (!queue_remove_raw(q, evt)) {
1550                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1551
1552                         ret = arm_smmu_handle_evt(smmu, evt);
1553                         if (!ret || !__ratelimit(&rs))
1554                                 continue;
1555
1556                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1557                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1558                                 dev_info(smmu->dev, "\t0x%016llx\n",
1559                                          (unsigned long long)evt[i]);
1560
1561                 }
1562
1563                 /*
1564                  * Not much we can do on overflow, so scream and pretend we're
1565                  * trying harder.
1566                  */
1567                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1568                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1569         } while (!queue_empty(llq));
1570
1571         /* Sync our overflow flag, as we believe we're up to speed */
1572         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1573                     Q_IDX(llq, llq->cons);
1574         return IRQ_HANDLED;
1575 }
1576
1577 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1578 {
1579         u32 sid, ssid;
1580         u16 grpid;
1581         bool ssv, last;
1582
1583         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1584         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1585         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1586         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1587         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1588
1589         dev_info(smmu->dev, "unexpected PRI request received:\n");
1590         dev_info(smmu->dev,
1591                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1592                  sid, ssid, grpid, last ? "L" : "",
1593                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1594                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1595                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1596                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1597                  evt[1] & PRIQ_1_ADDR_MASK);
1598
1599         if (last) {
1600                 struct arm_smmu_cmdq_ent cmd = {
1601                         .opcode                 = CMDQ_OP_PRI_RESP,
1602                         .substream_valid        = ssv,
1603                         .pri                    = {
1604                                 .sid    = sid,
1605                                 .ssid   = ssid,
1606                                 .grpid  = grpid,
1607                                 .resp   = PRI_RESP_DENY,
1608                         },
1609                 };
1610
1611                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1612         }
1613 }
1614
1615 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1616 {
1617         struct arm_smmu_device *smmu = dev;
1618         struct arm_smmu_queue *q = &smmu->priq.q;
1619         struct arm_smmu_ll_queue *llq = &q->llq;
1620         u64 evt[PRIQ_ENT_DWORDS];
1621
1622         do {
1623                 while (!queue_remove_raw(q, evt))
1624                         arm_smmu_handle_ppr(smmu, evt);
1625
1626                 if (queue_sync_prod_in(q) == -EOVERFLOW)
1627                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1628         } while (!queue_empty(llq));
1629
1630         /* Sync our overflow flag, as we believe we're up to speed */
1631         llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1632                       Q_IDX(llq, llq->cons);
1633         queue_sync_cons_out(q);
1634         return IRQ_HANDLED;
1635 }
1636
1637 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1638
1639 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1640 {
1641         u32 gerror, gerrorn, active;
1642         struct arm_smmu_device *smmu = dev;
1643
1644         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1645         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1646
1647         active = gerror ^ gerrorn;
1648         if (!(active & GERROR_ERR_MASK))
1649                 return IRQ_NONE; /* No errors pending */
1650
1651         dev_warn(smmu->dev,
1652                  "unexpected global error reported (0x%08x), this could be serious\n",
1653                  active);
1654
1655         if (active & GERROR_SFM_ERR) {
1656                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1657                 arm_smmu_device_disable(smmu);
1658         }
1659
1660         if (active & GERROR_MSI_GERROR_ABT_ERR)
1661                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1662
1663         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1664                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1665
1666         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1667                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1668
1669         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1670                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1671
1672         if (active & GERROR_PRIQ_ABT_ERR)
1673                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1674
1675         if (active & GERROR_EVTQ_ABT_ERR)
1676                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1677
1678         if (active & GERROR_CMDQ_ERR)
1679                 arm_smmu_cmdq_skip_err(smmu);
1680
1681         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1682         return IRQ_HANDLED;
1683 }
1684
1685 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1686 {
1687         struct arm_smmu_device *smmu = dev;
1688
1689         arm_smmu_evtq_thread(irq, dev);
1690         if (smmu->features & ARM_SMMU_FEAT_PRI)
1691                 arm_smmu_priq_thread(irq, dev);
1692
1693         return IRQ_HANDLED;
1694 }
1695
1696 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1697 {
1698         arm_smmu_gerror_handler(irq, dev);
1699         return IRQ_WAKE_THREAD;
1700 }
1701
1702 static void
1703 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1704                         struct arm_smmu_cmdq_ent *cmd)
1705 {
1706         size_t log2_span;
1707         size_t span_mask;
1708         /* ATC invalidates are always on 4096-bytes pages */
1709         size_t inval_grain_shift = 12;
1710         unsigned long page_start, page_end;
1711
1712         /*
1713          * ATS and PASID:
1714          *
1715          * If substream_valid is clear, the PCIe TLP is sent without a PASID
1716          * prefix. In that case all ATC entries within the address range are
1717          * invalidated, including those that were requested with a PASID! There
1718          * is no way to invalidate only entries without PASID.
1719          *
1720          * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1721          * traffic), translation requests without PASID create ATC entries
1722          * without PASID, which must be invalidated with substream_valid clear.
1723          * This has the unpleasant side-effect of invalidating all PASID-tagged
1724          * ATC entries within the address range.
1725          */
1726         *cmd = (struct arm_smmu_cmdq_ent) {
1727                 .opcode                 = CMDQ_OP_ATC_INV,
1728                 .substream_valid        = !!ssid,
1729                 .atc.ssid               = ssid,
1730         };
1731
1732         if (!size) {
1733                 cmd->atc.size = ATC_INV_SIZE_ALL;
1734                 return;
1735         }
1736
1737         page_start      = iova >> inval_grain_shift;
1738         page_end        = (iova + size - 1) >> inval_grain_shift;
1739
1740         /*
1741          * In an ATS Invalidate Request, the address must be aligned on the
1742          * range size, which must be a power of two number of page sizes. We
1743          * thus have to choose between grossly over-invalidating the region, or
1744          * splitting the invalidation into multiple commands. For simplicity
1745          * we'll go with the first solution, but should refine it in the future
1746          * if multiple commands are shown to be more efficient.
1747          *
1748          * Find the smallest power of two that covers the range. The most
1749          * significant differing bit between the start and end addresses,
1750          * fls(start ^ end), indicates the required span. For example:
1751          *
1752          * We want to invalidate pages [8; 11]. This is already the ideal range:
1753          *              x = 0b1000 ^ 0b1011 = 0b11
1754          *              span = 1 << fls(x) = 4
1755          *
1756          * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1757          *              x = 0b0111 ^ 0b1010 = 0b1101
1758          *              span = 1 << fls(x) = 16
1759          */
1760         log2_span       = fls_long(page_start ^ page_end);
1761         span_mask       = (1ULL << log2_span) - 1;
1762
1763         page_start      &= ~span_mask;
1764
1765         cmd->atc.addr   = page_start << inval_grain_shift;
1766         cmd->atc.size   = log2_span;
1767 }
1768
1769 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1770 {
1771         int i;
1772         struct arm_smmu_cmdq_ent cmd;
1773         struct arm_smmu_cmdq_batch cmds;
1774
1775         arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1776
1777         cmds.num = 0;
1778         for (i = 0; i < master->num_streams; i++) {
1779                 cmd.atc.sid = master->streams[i].id;
1780                 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1781         }
1782
1783         return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1784 }
1785
1786 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1787                             unsigned long iova, size_t size)
1788 {
1789         int i;
1790         unsigned long flags;
1791         struct arm_smmu_cmdq_ent cmd;
1792         struct arm_smmu_master *master;
1793         struct arm_smmu_cmdq_batch cmds;
1794
1795         if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1796                 return 0;
1797
1798         /*
1799          * Ensure that we've completed prior invalidation of the main TLBs
1800          * before we read 'nr_ats_masters' in case of a concurrent call to
1801          * arm_smmu_enable_ats():
1802          *
1803          *      // unmap()                      // arm_smmu_enable_ats()
1804          *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
1805          *      smp_mb();                       [...]
1806          *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1807          *
1808          * Ensures that we always see the incremented 'nr_ats_masters' count if
1809          * ATS was enabled at the PCI device before completion of the TLBI.
1810          */
1811         smp_mb();
1812         if (!atomic_read(&smmu_domain->nr_ats_masters))
1813                 return 0;
1814
1815         arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1816
1817         cmds.num = 0;
1818
1819         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1820         list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1821                 if (!master->ats_enabled)
1822                         continue;
1823
1824                 for (i = 0; i < master->num_streams; i++) {
1825                         cmd.atc.sid = master->streams[i].id;
1826                         arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1827                 }
1828         }
1829         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1830
1831         return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1832 }
1833
1834 /* IO_PGTABLE API */
1835 static void arm_smmu_tlb_inv_context(void *cookie)
1836 {
1837         struct arm_smmu_domain *smmu_domain = cookie;
1838         struct arm_smmu_device *smmu = smmu_domain->smmu;
1839         struct arm_smmu_cmdq_ent cmd;
1840
1841         /*
1842          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1843          * PTEs previously cleared by unmaps on the current CPU not yet visible
1844          * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1845          * insertion to guarantee those are observed before the TLBI. Do be
1846          * careful, 007.
1847          */
1848         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1849                 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1850         } else {
1851                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1852                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1853                 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1854         }
1855         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1856 }
1857
1858 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1859                                      unsigned long iova, size_t size,
1860                                      size_t granule,
1861                                      struct arm_smmu_domain *smmu_domain)
1862 {
1863         struct arm_smmu_device *smmu = smmu_domain->smmu;
1864         unsigned long end = iova + size, num_pages = 0, tg = 0;
1865         size_t inv_range = granule;
1866         struct arm_smmu_cmdq_batch cmds;
1867
1868         if (!size)
1869                 return;
1870
1871         if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1872                 /* Get the leaf page size */
1873                 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1874
1875                 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1876                 cmd->tlbi.tg = (tg - 10) / 2;
1877
1878                 /* Determine what level the granule is at */
1879                 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1880
1881                 num_pages = size >> tg;
1882         }
1883
1884         cmds.num = 0;
1885
1886         while (iova < end) {
1887                 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1888                         /*
1889                          * On each iteration of the loop, the range is 5 bits
1890                          * worth of the aligned size remaining.
1891                          * The range in pages is:
1892                          *
1893                          * range = (num_pages & (0x1f << __ffs(num_pages)))
1894                          */
1895                         unsigned long scale, num;
1896
1897                         /* Determine the power of 2 multiple number of pages */
1898                         scale = __ffs(num_pages);
1899                         cmd->tlbi.scale = scale;
1900
1901                         /* Determine how many chunks of 2^scale size we have */
1902                         num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1903                         cmd->tlbi.num = num - 1;
1904
1905                         /* range is num * 2^scale * pgsize */
1906                         inv_range = num << (scale + tg);
1907
1908                         /* Clear out the lower order bits for the next iteration */
1909                         num_pages -= num << scale;
1910                 }
1911
1912                 cmd->tlbi.addr = iova;
1913                 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1914                 iova += inv_range;
1915         }
1916         arm_smmu_cmdq_batch_submit(smmu, &cmds);
1917 }
1918
1919 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1920                                           size_t granule, bool leaf,
1921                                           struct arm_smmu_domain *smmu_domain)
1922 {
1923         struct arm_smmu_cmdq_ent cmd = {
1924                 .tlbi = {
1925                         .leaf   = leaf,
1926                 },
1927         };
1928
1929         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1930                 cmd.opcode      = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1931                                   CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1932                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1933         } else {
1934                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1935                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1936         }
1937         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1938
1939         /*
1940          * Unfortunately, this can't be leaf-only since we may have
1941          * zapped an entire table.
1942          */
1943         arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1944 }
1945
1946 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1947                                  size_t granule, bool leaf,
1948                                  struct arm_smmu_domain *smmu_domain)
1949 {
1950         struct arm_smmu_cmdq_ent cmd = {
1951                 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1952                           CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1953                 .tlbi = {
1954                         .asid   = asid,
1955                         .leaf   = leaf,
1956                 },
1957         };
1958
1959         __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1960 }
1961
1962 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1963                                          unsigned long iova, size_t granule,
1964                                          void *cookie)
1965 {
1966         struct arm_smmu_domain *smmu_domain = cookie;
1967         struct iommu_domain *domain = &smmu_domain->domain;
1968
1969         iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1970 }
1971
1972 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1973                                   size_t granule, void *cookie)
1974 {
1975         arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1976 }
1977
1978 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1979         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1980         .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1981         .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1982 };
1983
1984 /* IOMMU API */
1985 static bool arm_smmu_capable(enum iommu_cap cap)
1986 {
1987         switch (cap) {
1988         case IOMMU_CAP_CACHE_COHERENCY:
1989                 return true;
1990         case IOMMU_CAP_NOEXEC:
1991                 return true;
1992         default:
1993                 return false;
1994         }
1995 }
1996
1997 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1998 {
1999         struct arm_smmu_domain *smmu_domain;
2000
2001         if (type != IOMMU_DOMAIN_UNMANAGED &&
2002             type != IOMMU_DOMAIN_DMA &&
2003             type != IOMMU_DOMAIN_DMA_FQ &&
2004             type != IOMMU_DOMAIN_IDENTITY)
2005                 return NULL;
2006
2007         /*
2008          * Allocate the domain and initialise some of its data structures.
2009          * We can't really do anything meaningful until we've added a
2010          * master.
2011          */
2012         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2013         if (!smmu_domain)
2014                 return NULL;
2015
2016         mutex_init(&smmu_domain->init_mutex);
2017         INIT_LIST_HEAD(&smmu_domain->devices);
2018         spin_lock_init(&smmu_domain->devices_lock);
2019         INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2020
2021         return &smmu_domain->domain;
2022 }
2023
2024 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2025 {
2026         int idx, size = 1 << span;
2027
2028         do {
2029                 idx = find_first_zero_bit(map, size);
2030                 if (idx == size)
2031                         return -ENOSPC;
2032         } while (test_and_set_bit(idx, map));
2033
2034         return idx;
2035 }
2036
2037 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2038 {
2039         clear_bit(idx, map);
2040 }
2041
2042 static void arm_smmu_domain_free(struct iommu_domain *domain)
2043 {
2044         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2045         struct arm_smmu_device *smmu = smmu_domain->smmu;
2046
2047         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2048
2049         /* Free the CD and ASID, if we allocated them */
2050         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2051                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2052
2053                 /* Prevent SVA from touching the CD while we're freeing it */
2054                 mutex_lock(&arm_smmu_asid_lock);
2055                 if (cfg->cdcfg.cdtab)
2056                         arm_smmu_free_cd_tables(smmu_domain);
2057                 arm_smmu_free_asid(&cfg->cd);
2058                 mutex_unlock(&arm_smmu_asid_lock);
2059         } else {
2060                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2061                 if (cfg->vmid)
2062                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2063         }
2064
2065         kfree(smmu_domain);
2066 }
2067
2068 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2069                                        struct arm_smmu_master *master,
2070                                        struct io_pgtable_cfg *pgtbl_cfg)
2071 {
2072         int ret;
2073         u32 asid;
2074         struct arm_smmu_device *smmu = smmu_domain->smmu;
2075         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2076         typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2077
2078         refcount_set(&cfg->cd.refs, 1);
2079
2080         /* Prevent SVA from modifying the ASID until it is written to the CD */
2081         mutex_lock(&arm_smmu_asid_lock);
2082         ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2083                        XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2084         if (ret)
2085                 goto out_unlock;
2086
2087         cfg->s1cdmax = master->ssid_bits;
2088
2089         smmu_domain->stall_enabled = master->stall_enabled;
2090
2091         ret = arm_smmu_alloc_cd_tables(smmu_domain);
2092         if (ret)
2093                 goto out_free_asid;
2094
2095         cfg->cd.asid    = (u16)asid;
2096         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2097         cfg->cd.tcr     = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2098                           FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2099                           FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2100                           FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2101                           FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2102                           FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2103                           CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2104         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2105
2106         /*
2107          * Note that this will end up calling arm_smmu_sync_cd() before
2108          * the master has been added to the devices list for this domain.
2109          * This isn't an issue because the STE hasn't been installed yet.
2110          */
2111         ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2112         if (ret)
2113                 goto out_free_cd_tables;
2114
2115         mutex_unlock(&arm_smmu_asid_lock);
2116         return 0;
2117
2118 out_free_cd_tables:
2119         arm_smmu_free_cd_tables(smmu_domain);
2120 out_free_asid:
2121         arm_smmu_free_asid(&cfg->cd);
2122 out_unlock:
2123         mutex_unlock(&arm_smmu_asid_lock);
2124         return ret;
2125 }
2126
2127 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2128                                        struct arm_smmu_master *master,
2129                                        struct io_pgtable_cfg *pgtbl_cfg)
2130 {
2131         int vmid;
2132         struct arm_smmu_device *smmu = smmu_domain->smmu;
2133         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2134         typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2135
2136         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2137         if (vmid < 0)
2138                 return vmid;
2139
2140         vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2141         cfg->vmid       = (u16)vmid;
2142         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2143         cfg->vtcr       = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2144                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2145                           FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2146                           FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2147                           FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2148                           FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2149                           FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2150         return 0;
2151 }
2152
2153 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2154                                     struct arm_smmu_master *master)
2155 {
2156         int ret;
2157         unsigned long ias, oas;
2158         enum io_pgtable_fmt fmt;
2159         struct io_pgtable_cfg pgtbl_cfg;
2160         struct io_pgtable_ops *pgtbl_ops;
2161         int (*finalise_stage_fn)(struct arm_smmu_domain *,
2162                                  struct arm_smmu_master *,
2163                                  struct io_pgtable_cfg *);
2164         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2165         struct arm_smmu_device *smmu = smmu_domain->smmu;
2166
2167         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2168                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2169                 return 0;
2170         }
2171
2172         /* Restrict the stage to what we can actually support */
2173         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2174                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2175         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2176                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2177
2178         switch (smmu_domain->stage) {
2179         case ARM_SMMU_DOMAIN_S1:
2180                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2181                 ias = min_t(unsigned long, ias, VA_BITS);
2182                 oas = smmu->ias;
2183                 fmt = ARM_64_LPAE_S1;
2184                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2185                 break;
2186         case ARM_SMMU_DOMAIN_NESTED:
2187         case ARM_SMMU_DOMAIN_S2:
2188                 ias = smmu->ias;
2189                 oas = smmu->oas;
2190                 fmt = ARM_64_LPAE_S2;
2191                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2192                 break;
2193         default:
2194                 return -EINVAL;
2195         }
2196
2197         pgtbl_cfg = (struct io_pgtable_cfg) {
2198                 .pgsize_bitmap  = smmu->pgsize_bitmap,
2199                 .ias            = ias,
2200                 .oas            = oas,
2201                 .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2202                 .tlb            = &arm_smmu_flush_ops,
2203                 .iommu_dev      = smmu->dev,
2204         };
2205
2206         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2207         if (!pgtbl_ops)
2208                 return -ENOMEM;
2209
2210         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2211         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2212         domain->geometry.force_aperture = true;
2213
2214         ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2215         if (ret < 0) {
2216                 free_io_pgtable_ops(pgtbl_ops);
2217                 return ret;
2218         }
2219
2220         smmu_domain->pgtbl_ops = pgtbl_ops;
2221         return 0;
2222 }
2223
2224 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2225 {
2226         __le64 *step;
2227         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2228
2229         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2230                 struct arm_smmu_strtab_l1_desc *l1_desc;
2231                 int idx;
2232
2233                 /* Two-level walk */
2234                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2235                 l1_desc = &cfg->l1_desc[idx];
2236                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2237                 step = &l1_desc->l2ptr[idx];
2238         } else {
2239                 /* Simple linear lookup */
2240                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2241         }
2242
2243         return step;
2244 }
2245
2246 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2247 {
2248         int i, j;
2249         struct arm_smmu_device *smmu = master->smmu;
2250
2251         for (i = 0; i < master->num_streams; ++i) {
2252                 u32 sid = master->streams[i].id;
2253                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2254
2255                 /* Bridged PCI devices may end up with duplicated IDs */
2256                 for (j = 0; j < i; j++)
2257                         if (master->streams[j].id == sid)
2258                                 break;
2259                 if (j < i)
2260                         continue;
2261
2262                 arm_smmu_write_strtab_ent(master, sid, step);
2263         }
2264 }
2265
2266 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2267 {
2268         struct device *dev = master->dev;
2269         struct arm_smmu_device *smmu = master->smmu;
2270         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2271
2272         if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2273                 return false;
2274
2275         if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2276                 return false;
2277
2278         return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2279 }
2280
2281 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2282 {
2283         size_t stu;
2284         struct pci_dev *pdev;
2285         struct arm_smmu_device *smmu = master->smmu;
2286         struct arm_smmu_domain *smmu_domain = master->domain;
2287
2288         /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2289         if (!master->ats_enabled)
2290                 return;
2291
2292         /* Smallest Translation Unit: log2 of the smallest supported granule */
2293         stu = __ffs(smmu->pgsize_bitmap);
2294         pdev = to_pci_dev(master->dev);
2295
2296         atomic_inc(&smmu_domain->nr_ats_masters);
2297         arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2298         if (pci_enable_ats(pdev, stu))
2299                 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2300 }
2301
2302 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2303 {
2304         struct arm_smmu_domain *smmu_domain = master->domain;
2305
2306         if (!master->ats_enabled)
2307                 return;
2308
2309         pci_disable_ats(to_pci_dev(master->dev));
2310         /*
2311          * Ensure ATS is disabled at the endpoint before we issue the
2312          * ATC invalidation via the SMMU.
2313          */
2314         wmb();
2315         arm_smmu_atc_inv_master(master);
2316         atomic_dec(&smmu_domain->nr_ats_masters);
2317 }
2318
2319 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2320 {
2321         int ret;
2322         int features;
2323         int num_pasids;
2324         struct pci_dev *pdev;
2325
2326         if (!dev_is_pci(master->dev))
2327                 return -ENODEV;
2328
2329         pdev = to_pci_dev(master->dev);
2330
2331         features = pci_pasid_features(pdev);
2332         if (features < 0)
2333                 return features;
2334
2335         num_pasids = pci_max_pasids(pdev);
2336         if (num_pasids <= 0)
2337                 return num_pasids;
2338
2339         ret = pci_enable_pasid(pdev, features);
2340         if (ret) {
2341                 dev_err(&pdev->dev, "Failed to enable PASID\n");
2342                 return ret;
2343         }
2344
2345         master->ssid_bits = min_t(u8, ilog2(num_pasids),
2346                                   master->smmu->ssid_bits);
2347         return 0;
2348 }
2349
2350 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2351 {
2352         struct pci_dev *pdev;
2353
2354         if (!dev_is_pci(master->dev))
2355                 return;
2356
2357         pdev = to_pci_dev(master->dev);
2358
2359         if (!pdev->pasid_enabled)
2360                 return;
2361
2362         master->ssid_bits = 0;
2363         pci_disable_pasid(pdev);
2364 }
2365
2366 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2367 {
2368         unsigned long flags;
2369         struct arm_smmu_domain *smmu_domain = master->domain;
2370
2371         if (!smmu_domain)
2372                 return;
2373
2374         arm_smmu_disable_ats(master);
2375
2376         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2377         list_del(&master->domain_head);
2378         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2379
2380         master->domain = NULL;
2381         master->ats_enabled = false;
2382         arm_smmu_install_ste_for_dev(master);
2383 }
2384
2385 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2386 {
2387         int ret = 0;
2388         unsigned long flags;
2389         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2390         struct arm_smmu_device *smmu;
2391         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2392         struct arm_smmu_master *master;
2393
2394         if (!fwspec)
2395                 return -ENOENT;
2396
2397         master = dev_iommu_priv_get(dev);
2398         smmu = master->smmu;
2399
2400         /*
2401          * Checking that SVA is disabled ensures that this device isn't bound to
2402          * any mm, and can be safely detached from its old domain. Bonds cannot
2403          * be removed concurrently since we're holding the group mutex.
2404          */
2405         if (arm_smmu_master_sva_enabled(master)) {
2406                 dev_err(dev, "cannot attach - SVA enabled\n");
2407                 return -EBUSY;
2408         }
2409
2410         arm_smmu_detach_dev(master);
2411
2412         mutex_lock(&smmu_domain->init_mutex);
2413
2414         if (!smmu_domain->smmu) {
2415                 smmu_domain->smmu = smmu;
2416                 ret = arm_smmu_domain_finalise(domain, master);
2417                 if (ret) {
2418                         smmu_domain->smmu = NULL;
2419                         goto out_unlock;
2420                 }
2421         } else if (smmu_domain->smmu != smmu) {
2422                 dev_err(dev,
2423                         "cannot attach to SMMU %s (upstream of %s)\n",
2424                         dev_name(smmu_domain->smmu->dev),
2425                         dev_name(smmu->dev));
2426                 ret = -ENXIO;
2427                 goto out_unlock;
2428         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2429                    master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2430                 dev_err(dev,
2431                         "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2432                         smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2433                 ret = -EINVAL;
2434                 goto out_unlock;
2435         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2436                    smmu_domain->stall_enabled != master->stall_enabled) {
2437                 dev_err(dev, "cannot attach to stall-%s domain\n",
2438                         smmu_domain->stall_enabled ? "enabled" : "disabled");
2439                 ret = -EINVAL;
2440                 goto out_unlock;
2441         }
2442
2443         master->domain = smmu_domain;
2444
2445         if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2446                 master->ats_enabled = arm_smmu_ats_supported(master);
2447
2448         arm_smmu_install_ste_for_dev(master);
2449
2450         spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2451         list_add(&master->domain_head, &smmu_domain->devices);
2452         spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2453
2454         arm_smmu_enable_ats(master);
2455
2456 out_unlock:
2457         mutex_unlock(&smmu_domain->init_mutex);
2458         return ret;
2459 }
2460
2461 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2462                               phys_addr_t paddr, size_t pgsize, size_t pgcount,
2463                               int prot, gfp_t gfp, size_t *mapped)
2464 {
2465         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2466
2467         if (!ops)
2468                 return -ENODEV;
2469
2470         return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2471 }
2472
2473 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2474                                    size_t pgsize, size_t pgcount,
2475                                    struct iommu_iotlb_gather *gather)
2476 {
2477         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2478         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2479
2480         if (!ops)
2481                 return 0;
2482
2483         return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2484 }
2485
2486 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2487 {
2488         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2489
2490         if (smmu_domain->smmu)
2491                 arm_smmu_tlb_inv_context(smmu_domain);
2492 }
2493
2494 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2495                                 struct iommu_iotlb_gather *gather)
2496 {
2497         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2498
2499         if (!gather->pgsize)
2500                 return;
2501
2502         arm_smmu_tlb_inv_range_domain(gather->start,
2503                                       gather->end - gather->start + 1,
2504                                       gather->pgsize, true, smmu_domain);
2505 }
2506
2507 static phys_addr_t
2508 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2509 {
2510         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2511
2512         if (!ops)
2513                 return 0;
2514
2515         return ops->iova_to_phys(ops, iova);
2516 }
2517
2518 static struct platform_driver arm_smmu_driver;
2519
2520 static
2521 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2522 {
2523         struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2524                                                           fwnode);
2525         put_device(dev);
2526         return dev ? dev_get_drvdata(dev) : NULL;
2527 }
2528
2529 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2530 {
2531         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2532
2533         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2534                 limit *= 1UL << STRTAB_SPLIT;
2535
2536         return sid < limit;
2537 }
2538
2539 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2540                                   struct arm_smmu_master *master)
2541 {
2542         int i;
2543         int ret = 0;
2544         struct arm_smmu_stream *new_stream, *cur_stream;
2545         struct rb_node **new_node, *parent_node = NULL;
2546         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2547
2548         master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2549                                   GFP_KERNEL);
2550         if (!master->streams)
2551                 return -ENOMEM;
2552         master->num_streams = fwspec->num_ids;
2553
2554         mutex_lock(&smmu->streams_mutex);
2555         for (i = 0; i < fwspec->num_ids; i++) {
2556                 u32 sid = fwspec->ids[i];
2557
2558                 new_stream = &master->streams[i];
2559                 new_stream->id = sid;
2560                 new_stream->master = master;
2561
2562                 /*
2563                  * Check the SIDs are in range of the SMMU and our stream table
2564                  */
2565                 if (!arm_smmu_sid_in_range(smmu, sid)) {
2566                         ret = -ERANGE;
2567                         break;
2568                 }
2569
2570                 /* Ensure l2 strtab is initialised */
2571                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2572                         ret = arm_smmu_init_l2_strtab(smmu, sid);
2573                         if (ret)
2574                                 break;
2575                 }
2576
2577                 /* Insert into SID tree */
2578                 new_node = &(smmu->streams.rb_node);
2579                 while (*new_node) {
2580                         cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2581                                               node);
2582                         parent_node = *new_node;
2583                         if (cur_stream->id > new_stream->id) {
2584                                 new_node = &((*new_node)->rb_left);
2585                         } else if (cur_stream->id < new_stream->id) {
2586                                 new_node = &((*new_node)->rb_right);
2587                         } else {
2588                                 dev_warn(master->dev,
2589                                          "stream %u already in tree\n",
2590                                          cur_stream->id);
2591                                 ret = -EINVAL;
2592                                 break;
2593                         }
2594                 }
2595                 if (ret)
2596                         break;
2597
2598                 rb_link_node(&new_stream->node, parent_node, new_node);
2599                 rb_insert_color(&new_stream->node, &smmu->streams);
2600         }
2601
2602         if (ret) {
2603                 for (i--; i >= 0; i--)
2604                         rb_erase(&master->streams[i].node, &smmu->streams);
2605                 kfree(master->streams);
2606         }
2607         mutex_unlock(&smmu->streams_mutex);
2608
2609         return ret;
2610 }
2611
2612 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2613 {
2614         int i;
2615         struct arm_smmu_device *smmu = master->smmu;
2616         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2617
2618         if (!smmu || !master->streams)
2619                 return;
2620
2621         mutex_lock(&smmu->streams_mutex);
2622         for (i = 0; i < fwspec->num_ids; i++)
2623                 rb_erase(&master->streams[i].node, &smmu->streams);
2624         mutex_unlock(&smmu->streams_mutex);
2625
2626         kfree(master->streams);
2627 }
2628
2629 static struct iommu_ops arm_smmu_ops;
2630
2631 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2632 {
2633         int ret;
2634         struct arm_smmu_device *smmu;
2635         struct arm_smmu_master *master;
2636         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2637
2638         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2639                 return ERR_PTR(-ENODEV);
2640
2641         if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2642                 return ERR_PTR(-EBUSY);
2643
2644         smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2645         if (!smmu)
2646                 return ERR_PTR(-ENODEV);
2647
2648         master = kzalloc(sizeof(*master), GFP_KERNEL);
2649         if (!master)
2650                 return ERR_PTR(-ENOMEM);
2651
2652         master->dev = dev;
2653         master->smmu = smmu;
2654         INIT_LIST_HEAD(&master->bonds);
2655         dev_iommu_priv_set(dev, master);
2656
2657         ret = arm_smmu_insert_master(smmu, master);
2658         if (ret)
2659                 goto err_free_master;
2660
2661         device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2662         master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2663
2664         /*
2665          * Note that PASID must be enabled before, and disabled after ATS:
2666          * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2667          *
2668          *   Behavior is undefined if this bit is Set and the value of the PASID
2669          *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2670          *   are changed.
2671          */
2672         arm_smmu_enable_pasid(master);
2673
2674         if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2675                 master->ssid_bits = min_t(u8, master->ssid_bits,
2676                                           CTXDESC_LINEAR_CDMAX);
2677
2678         if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2679              device_property_read_bool(dev, "dma-can-stall")) ||
2680             smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2681                 master->stall_enabled = true;
2682
2683         return &smmu->iommu;
2684
2685 err_free_master:
2686         kfree(master);
2687         dev_iommu_priv_set(dev, NULL);
2688         return ERR_PTR(ret);
2689 }
2690
2691 static void arm_smmu_release_device(struct device *dev)
2692 {
2693         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2694         struct arm_smmu_master *master;
2695
2696         if (!fwspec || fwspec->ops != &arm_smmu_ops)
2697                 return;
2698
2699         master = dev_iommu_priv_get(dev);
2700         if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2701                 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2702         arm_smmu_detach_dev(master);
2703         arm_smmu_disable_pasid(master);
2704         arm_smmu_remove_master(master);
2705         kfree(master);
2706         iommu_fwspec_free(dev);
2707 }
2708
2709 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2710 {
2711         struct iommu_group *group;
2712
2713         /*
2714          * We don't support devices sharing stream IDs other than PCI RID
2715          * aliases, since the necessary ID-to-device lookup becomes rather
2716          * impractical given a potential sparse 32-bit stream ID space.
2717          */
2718         if (dev_is_pci(dev))
2719                 group = pci_device_group(dev);
2720         else
2721                 group = generic_device_group(dev);
2722
2723         return group;
2724 }
2725
2726 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2727 {
2728         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2729         int ret = 0;
2730
2731         mutex_lock(&smmu_domain->init_mutex);
2732         if (smmu_domain->smmu)
2733                 ret = -EPERM;
2734         else
2735                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2736         mutex_unlock(&smmu_domain->init_mutex);
2737
2738         return ret;
2739 }
2740
2741 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2742 {
2743         return iommu_fwspec_add_ids(dev, args->args, 1);
2744 }
2745
2746 static void arm_smmu_get_resv_regions(struct device *dev,
2747                                       struct list_head *head)
2748 {
2749         struct iommu_resv_region *region;
2750         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2751
2752         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2753                                          prot, IOMMU_RESV_SW_MSI);
2754         if (!region)
2755                 return;
2756
2757         list_add_tail(&region->list, head);
2758
2759         iommu_dma_get_resv_regions(dev, head);
2760 }
2761
2762 static bool arm_smmu_dev_has_feature(struct device *dev,
2763                                      enum iommu_dev_features feat)
2764 {
2765         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2766
2767         if (!master)
2768                 return false;
2769
2770         switch (feat) {
2771         case IOMMU_DEV_FEAT_IOPF:
2772                 return arm_smmu_master_iopf_supported(master);
2773         case IOMMU_DEV_FEAT_SVA:
2774                 return arm_smmu_master_sva_supported(master);
2775         default:
2776                 return false;
2777         }
2778 }
2779
2780 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2781                                          enum iommu_dev_features feat)
2782 {
2783         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2784
2785         if (!master)
2786                 return false;
2787
2788         switch (feat) {
2789         case IOMMU_DEV_FEAT_IOPF:
2790                 return master->iopf_enabled;
2791         case IOMMU_DEV_FEAT_SVA:
2792                 return arm_smmu_master_sva_enabled(master);
2793         default:
2794                 return false;
2795         }
2796 }
2797
2798 static int arm_smmu_dev_enable_feature(struct device *dev,
2799                                        enum iommu_dev_features feat)
2800 {
2801         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2802
2803         if (!arm_smmu_dev_has_feature(dev, feat))
2804                 return -ENODEV;
2805
2806         if (arm_smmu_dev_feature_enabled(dev, feat))
2807                 return -EBUSY;
2808
2809         switch (feat) {
2810         case IOMMU_DEV_FEAT_IOPF:
2811                 master->iopf_enabled = true;
2812                 return 0;
2813         case IOMMU_DEV_FEAT_SVA:
2814                 return arm_smmu_master_enable_sva(master);
2815         default:
2816                 return -EINVAL;
2817         }
2818 }
2819
2820 static int arm_smmu_dev_disable_feature(struct device *dev,
2821                                         enum iommu_dev_features feat)
2822 {
2823         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2824
2825         if (!arm_smmu_dev_feature_enabled(dev, feat))
2826                 return -EINVAL;
2827
2828         switch (feat) {
2829         case IOMMU_DEV_FEAT_IOPF:
2830                 if (master->sva_enabled)
2831                         return -EBUSY;
2832                 master->iopf_enabled = false;
2833                 return 0;
2834         case IOMMU_DEV_FEAT_SVA:
2835                 return arm_smmu_master_disable_sva(master);
2836         default:
2837                 return -EINVAL;
2838         }
2839 }
2840
2841 static struct iommu_ops arm_smmu_ops = {
2842         .capable                = arm_smmu_capable,
2843         .domain_alloc           = arm_smmu_domain_alloc,
2844         .domain_free            = arm_smmu_domain_free,
2845         .attach_dev             = arm_smmu_attach_dev,
2846         .map_pages              = arm_smmu_map_pages,
2847         .unmap_pages            = arm_smmu_unmap_pages,
2848         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2849         .iotlb_sync             = arm_smmu_iotlb_sync,
2850         .iova_to_phys           = arm_smmu_iova_to_phys,
2851         .probe_device           = arm_smmu_probe_device,
2852         .release_device         = arm_smmu_release_device,
2853         .device_group           = arm_smmu_device_group,
2854         .enable_nesting         = arm_smmu_enable_nesting,
2855         .of_xlate               = arm_smmu_of_xlate,
2856         .get_resv_regions       = arm_smmu_get_resv_regions,
2857         .put_resv_regions       = generic_iommu_put_resv_regions,
2858         .dev_has_feat           = arm_smmu_dev_has_feature,
2859         .dev_feat_enabled       = arm_smmu_dev_feature_enabled,
2860         .dev_enable_feat        = arm_smmu_dev_enable_feature,
2861         .dev_disable_feat       = arm_smmu_dev_disable_feature,
2862         .sva_bind               = arm_smmu_sva_bind,
2863         .sva_unbind             = arm_smmu_sva_unbind,
2864         .sva_get_pasid          = arm_smmu_sva_get_pasid,
2865         .page_response          = arm_smmu_page_response,
2866         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2867         .owner                  = THIS_MODULE,
2868 };
2869
2870 /* Probing and initialisation functions */
2871 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2872                                    struct arm_smmu_queue *q,
2873                                    void __iomem *page,
2874                                    unsigned long prod_off,
2875                                    unsigned long cons_off,
2876                                    size_t dwords, const char *name)
2877 {
2878         size_t qsz;
2879
2880         do {
2881                 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2882                 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2883                                               GFP_KERNEL);
2884                 if (q->base || qsz < PAGE_SIZE)
2885                         break;
2886
2887                 q->llq.max_n_shift--;
2888         } while (1);
2889
2890         if (!q->base) {
2891                 dev_err(smmu->dev,
2892                         "failed to allocate queue (0x%zx bytes) for %s\n",
2893                         qsz, name);
2894                 return -ENOMEM;
2895         }
2896
2897         if (!WARN_ON(q->base_dma & (qsz - 1))) {
2898                 dev_info(smmu->dev, "allocated %u entries for %s\n",
2899                          1 << q->llq.max_n_shift, name);
2900         }
2901
2902         q->prod_reg     = page + prod_off;
2903         q->cons_reg     = page + cons_off;
2904         q->ent_dwords   = dwords;
2905
2906         q->q_base  = Q_BASE_RWA;
2907         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2908         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2909
2910         q->llq.prod = q->llq.cons = 0;
2911         return 0;
2912 }
2913
2914 static void arm_smmu_cmdq_free_bitmap(void *data)
2915 {
2916         unsigned long *bitmap = data;
2917         bitmap_free(bitmap);
2918 }
2919
2920 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2921 {
2922         int ret = 0;
2923         struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2924         unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2925         atomic_long_t *bitmap;
2926
2927         atomic_set(&cmdq->owner_prod, 0);
2928         atomic_set(&cmdq->lock, 0);
2929
2930         bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2931         if (!bitmap) {
2932                 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2933                 ret = -ENOMEM;
2934         } else {
2935                 cmdq->valid_map = bitmap;
2936                 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2937         }
2938
2939         return ret;
2940 }
2941
2942 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2943 {
2944         int ret;
2945
2946         /* cmdq */
2947         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2948                                       ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2949                                       CMDQ_ENT_DWORDS, "cmdq");
2950         if (ret)
2951                 return ret;
2952
2953         ret = arm_smmu_cmdq_init(smmu);
2954         if (ret)
2955                 return ret;
2956
2957         /* evtq */
2958         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2959                                       ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2960                                       EVTQ_ENT_DWORDS, "evtq");
2961         if (ret)
2962                 return ret;
2963
2964         if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2965             (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2966                 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2967                 if (!smmu->evtq.iopf)
2968                         return -ENOMEM;
2969         }
2970
2971         /* priq */
2972         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2973                 return 0;
2974
2975         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2976                                        ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2977                                        PRIQ_ENT_DWORDS, "priq");
2978 }
2979
2980 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2981 {
2982         unsigned int i;
2983         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2984         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2985         void *strtab = smmu->strtab_cfg.strtab;
2986
2987         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2988         if (!cfg->l1_desc)
2989                 return -ENOMEM;
2990
2991         for (i = 0; i < cfg->num_l1_ents; ++i) {
2992                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2993                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2994         }
2995
2996         return 0;
2997 }
2998
2999 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3000 {
3001         void *strtab;
3002         u64 reg;
3003         u32 size, l1size;
3004         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3005
3006         /* Calculate the L1 size, capped to the SIDSIZE. */
3007         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3008         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3009         cfg->num_l1_ents = 1 << size;
3010
3011         size += STRTAB_SPLIT;
3012         if (size < smmu->sid_bits)
3013                 dev_warn(smmu->dev,
3014                          "2-level strtab only covers %u/%u bits of SID\n",
3015                          size, smmu->sid_bits);
3016
3017         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3018         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3019                                      GFP_KERNEL);
3020         if (!strtab) {
3021                 dev_err(smmu->dev,
3022                         "failed to allocate l1 stream table (%u bytes)\n",
3023                         l1size);
3024                 return -ENOMEM;
3025         }
3026         cfg->strtab = strtab;
3027
3028         /* Configure strtab_base_cfg for 2 levels */
3029         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3030         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3031         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3032         cfg->strtab_base_cfg = reg;
3033
3034         return arm_smmu_init_l1_strtab(smmu);
3035 }
3036
3037 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3038 {
3039         void *strtab;
3040         u64 reg;
3041         u32 size;
3042         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3043
3044         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3045         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3046                                      GFP_KERNEL);
3047         if (!strtab) {
3048                 dev_err(smmu->dev,
3049                         "failed to allocate linear stream table (%u bytes)\n",
3050                         size);
3051                 return -ENOMEM;
3052         }
3053         cfg->strtab = strtab;
3054         cfg->num_l1_ents = 1 << smmu->sid_bits;
3055
3056         /* Configure strtab_base_cfg for a linear table covering all SIDs */
3057         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3058         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3059         cfg->strtab_base_cfg = reg;
3060
3061         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3062         return 0;
3063 }
3064
3065 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3066 {
3067         u64 reg;
3068         int ret;
3069
3070         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3071                 ret = arm_smmu_init_strtab_2lvl(smmu);
3072         else
3073                 ret = arm_smmu_init_strtab_linear(smmu);
3074
3075         if (ret)
3076                 return ret;
3077
3078         /* Set the strtab base address */
3079         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3080         reg |= STRTAB_BASE_RA;
3081         smmu->strtab_cfg.strtab_base = reg;
3082
3083         /* Allocate the first VMID for stage-2 bypass STEs */
3084         set_bit(0, smmu->vmid_map);
3085         return 0;
3086 }
3087
3088 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3089 {
3090         int ret;
3091
3092         mutex_init(&smmu->streams_mutex);
3093         smmu->streams = RB_ROOT;
3094
3095         ret = arm_smmu_init_queues(smmu);
3096         if (ret)
3097                 return ret;
3098
3099         return arm_smmu_init_strtab(smmu);
3100 }
3101
3102 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3103                                    unsigned int reg_off, unsigned int ack_off)
3104 {
3105         u32 reg;
3106
3107         writel_relaxed(val, smmu->base + reg_off);
3108         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3109                                           1, ARM_SMMU_POLL_TIMEOUT_US);
3110 }
3111
3112 /* GBPA is "special" */
3113 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3114 {
3115         int ret;
3116         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3117
3118         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3119                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3120         if (ret)
3121                 return ret;
3122
3123         reg &= ~clr;
3124         reg |= set;
3125         writel_relaxed(reg | GBPA_UPDATE, gbpa);
3126         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3127                                          1, ARM_SMMU_POLL_TIMEOUT_US);
3128
3129         if (ret)
3130                 dev_err(smmu->dev, "GBPA not responding to update\n");
3131         return ret;
3132 }
3133
3134 static void arm_smmu_free_msis(void *data)
3135 {
3136         struct device *dev = data;
3137         platform_msi_domain_free_irqs(dev);
3138 }
3139
3140 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3141 {
3142         phys_addr_t doorbell;
3143         struct device *dev = msi_desc_to_dev(desc);
3144         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3145         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3146
3147         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3148         doorbell &= MSI_CFG0_ADDR_MASK;
3149
3150         writeq_relaxed(doorbell, smmu->base + cfg[0]);
3151         writel_relaxed(msg->data, smmu->base + cfg[1]);
3152         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3153 }
3154
3155 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3156 {
3157         struct msi_desc *desc;
3158         int ret, nvec = ARM_SMMU_MAX_MSIS;
3159         struct device *dev = smmu->dev;
3160
3161         /* Clear the MSI address regs */
3162         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3163         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3164
3165         if (smmu->features & ARM_SMMU_FEAT_PRI)
3166                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3167         else
3168                 nvec--;
3169
3170         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3171                 return;
3172
3173         if (!dev->msi.domain) {
3174                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3175                 return;
3176         }
3177
3178         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3179         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3180         if (ret) {
3181                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3182                 return;
3183         }
3184
3185         for_each_msi_entry(desc, dev) {
3186                 switch (desc->platform.msi_index) {
3187                 case EVTQ_MSI_INDEX:
3188                         smmu->evtq.q.irq = desc->irq;
3189                         break;
3190                 case GERROR_MSI_INDEX:
3191                         smmu->gerr_irq = desc->irq;
3192                         break;
3193                 case PRIQ_MSI_INDEX:
3194                         smmu->priq.q.irq = desc->irq;
3195                         break;
3196                 default:        /* Unknown */
3197                         continue;
3198                 }
3199         }
3200
3201         /* Add callback to free MSIs on teardown */
3202         devm_add_action(dev, arm_smmu_free_msis, dev);
3203 }
3204
3205 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3206 {
3207         int irq, ret;
3208
3209         arm_smmu_setup_msis(smmu);
3210
3211         /* Request interrupt lines */
3212         irq = smmu->evtq.q.irq;
3213         if (irq) {
3214                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3215                                                 arm_smmu_evtq_thread,
3216                                                 IRQF_ONESHOT,
3217                                                 "arm-smmu-v3-evtq", smmu);
3218                 if (ret < 0)
3219                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
3220         } else {
3221                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3222         }
3223
3224         irq = smmu->gerr_irq;
3225         if (irq) {
3226                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3227                                        0, "arm-smmu-v3-gerror", smmu);
3228                 if (ret < 0)
3229                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
3230         } else {
3231                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3232         }
3233
3234         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3235                 irq = smmu->priq.q.irq;
3236                 if (irq) {
3237                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3238                                                         arm_smmu_priq_thread,
3239                                                         IRQF_ONESHOT,
3240                                                         "arm-smmu-v3-priq",
3241                                                         smmu);
3242                         if (ret < 0)
3243                                 dev_warn(smmu->dev,
3244                                          "failed to enable priq irq\n");
3245                 } else {
3246                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3247                 }
3248         }
3249 }
3250
3251 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3252 {
3253         int ret, irq;
3254         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3255
3256         /* Disable IRQs first */
3257         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3258                                       ARM_SMMU_IRQ_CTRLACK);
3259         if (ret) {
3260                 dev_err(smmu->dev, "failed to disable irqs\n");
3261                 return ret;
3262         }
3263
3264         irq = smmu->combined_irq;
3265         if (irq) {
3266                 /*
3267                  * Cavium ThunderX2 implementation doesn't support unique irq
3268                  * lines. Use a single irq line for all the SMMUv3 interrupts.
3269                  */
3270                 ret = devm_request_threaded_irq(smmu->dev, irq,
3271                                         arm_smmu_combined_irq_handler,
3272                                         arm_smmu_combined_irq_thread,
3273                                         IRQF_ONESHOT,
3274                                         "arm-smmu-v3-combined-irq", smmu);
3275                 if (ret < 0)
3276                         dev_warn(smmu->dev, "failed to enable combined irq\n");
3277         } else
3278                 arm_smmu_setup_unique_irqs(smmu);
3279
3280         if (smmu->features & ARM_SMMU_FEAT_PRI)
3281                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3282
3283         /* Enable interrupt generation on the SMMU */
3284         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3285                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3286         if (ret)
3287                 dev_warn(smmu->dev, "failed to enable irqs\n");
3288
3289         return 0;
3290 }
3291
3292 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3293 {
3294         int ret;
3295
3296         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3297         if (ret)
3298                 dev_err(smmu->dev, "failed to clear cr0\n");
3299
3300         return ret;
3301 }
3302
3303 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3304 {
3305         int ret;
3306         u32 reg, enables;
3307         struct arm_smmu_cmdq_ent cmd;
3308
3309         /* Clear CR0 and sync (disables SMMU and queue processing) */
3310         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3311         if (reg & CR0_SMMUEN) {
3312                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3313                 WARN_ON(is_kdump_kernel() && !disable_bypass);
3314                 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3315         }
3316
3317         ret = arm_smmu_device_disable(smmu);
3318         if (ret)
3319                 return ret;
3320
3321         /* CR1 (table and queue memory attributes) */
3322         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3323               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3324               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3325               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3326               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3327               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3328         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3329
3330         /* CR2 (random crap) */
3331         reg = CR2_PTM | CR2_RECINVSID;
3332
3333         if (smmu->features & ARM_SMMU_FEAT_E2H)
3334                 reg |= CR2_E2H;
3335
3336         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3337
3338         /* Stream table */
3339         writeq_relaxed(smmu->strtab_cfg.strtab_base,
3340                        smmu->base + ARM_SMMU_STRTAB_BASE);
3341         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3342                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3343
3344         /* Command queue */
3345         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3346         writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3347         writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3348
3349         enables = CR0_CMDQEN;
3350         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3351                                       ARM_SMMU_CR0ACK);
3352         if (ret) {
3353                 dev_err(smmu->dev, "failed to enable command queue\n");
3354                 return ret;
3355         }
3356
3357         /* Invalidate any cached configuration */
3358         cmd.opcode = CMDQ_OP_CFGI_ALL;
3359         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3360
3361         /* Invalidate any stale TLB entries */
3362         if (smmu->features & ARM_SMMU_FEAT_HYP) {
3363                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3364                 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3365         }
3366
3367         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3368         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3369
3370         /* Event queue */
3371         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3372         writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3373         writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3374
3375         enables |= CR0_EVTQEN;
3376         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3377                                       ARM_SMMU_CR0ACK);
3378         if (ret) {
3379                 dev_err(smmu->dev, "failed to enable event queue\n");
3380                 return ret;
3381         }
3382
3383         /* PRI queue */
3384         if (smmu->features & ARM_SMMU_FEAT_PRI) {
3385                 writeq_relaxed(smmu->priq.q.q_base,
3386                                smmu->base + ARM_SMMU_PRIQ_BASE);
3387                 writel_relaxed(smmu->priq.q.llq.prod,
3388                                smmu->page1 + ARM_SMMU_PRIQ_PROD);
3389                 writel_relaxed(smmu->priq.q.llq.cons,
3390                                smmu->page1 + ARM_SMMU_PRIQ_CONS);
3391
3392                 enables |= CR0_PRIQEN;
3393                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3394                                               ARM_SMMU_CR0ACK);
3395                 if (ret) {
3396                         dev_err(smmu->dev, "failed to enable PRI queue\n");
3397                         return ret;
3398                 }
3399         }
3400
3401         if (smmu->features & ARM_SMMU_FEAT_ATS) {
3402                 enables |= CR0_ATSCHK;
3403                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3404                                               ARM_SMMU_CR0ACK);
3405                 if (ret) {
3406                         dev_err(smmu->dev, "failed to enable ATS check\n");
3407                         return ret;
3408                 }
3409         }
3410
3411         ret = arm_smmu_setup_irqs(smmu);
3412         if (ret) {
3413                 dev_err(smmu->dev, "failed to setup irqs\n");
3414                 return ret;
3415         }
3416
3417         if (is_kdump_kernel())
3418                 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3419
3420         /* Enable the SMMU interface, or ensure bypass */
3421         if (!bypass || disable_bypass) {
3422                 enables |= CR0_SMMUEN;
3423         } else {
3424                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3425                 if (ret)
3426                         return ret;
3427         }
3428         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3429                                       ARM_SMMU_CR0ACK);
3430         if (ret) {
3431                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3432                 return ret;
3433         }
3434
3435         return 0;
3436 }
3437
3438 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3439 {
3440         u32 reg;
3441         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3442
3443         /* IDR0 */
3444         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3445
3446         /* 2-level structures */
3447         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3448                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3449
3450         if (reg & IDR0_CD2L)
3451                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3452
3453         /*
3454          * Translation table endianness.
3455          * We currently require the same endianness as the CPU, but this
3456          * could be changed later by adding a new IO_PGTABLE_QUIRK.
3457          */
3458         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3459         case IDR0_TTENDIAN_MIXED:
3460                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3461                 break;
3462 #ifdef __BIG_ENDIAN
3463         case IDR0_TTENDIAN_BE:
3464                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3465                 break;
3466 #else
3467         case IDR0_TTENDIAN_LE:
3468                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3469                 break;
3470 #endif
3471         default:
3472                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3473                 return -ENXIO;
3474         }
3475
3476         /* Boolean feature flags */
3477         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3478                 smmu->features |= ARM_SMMU_FEAT_PRI;
3479
3480         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3481                 smmu->features |= ARM_SMMU_FEAT_ATS;
3482
3483         if (reg & IDR0_SEV)
3484                 smmu->features |= ARM_SMMU_FEAT_SEV;
3485
3486         if (reg & IDR0_MSI) {
3487                 smmu->features |= ARM_SMMU_FEAT_MSI;
3488                 if (coherent && !disable_msipolling)
3489                         smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3490         }
3491
3492         if (reg & IDR0_HYP) {
3493                 smmu->features |= ARM_SMMU_FEAT_HYP;
3494                 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3495                         smmu->features |= ARM_SMMU_FEAT_E2H;
3496         }
3497
3498         /*
3499          * The coherency feature as set by FW is used in preference to the ID
3500          * register, but warn on mismatch.
3501          */
3502         if (!!(reg & IDR0_COHACC) != coherent)
3503                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3504                          coherent ? "true" : "false");
3505
3506         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3507         case IDR0_STALL_MODEL_FORCE:
3508                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3509                 fallthrough;
3510         case IDR0_STALL_MODEL_STALL:
3511                 smmu->features |= ARM_SMMU_FEAT_STALLS;
3512         }
3513
3514         if (reg & IDR0_S1P)
3515                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3516
3517         if (reg & IDR0_S2P)
3518                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3519
3520         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3521                 dev_err(smmu->dev, "no translation support!\n");
3522                 return -ENXIO;
3523         }
3524
3525         /* We only support the AArch64 table format at present */
3526         switch (FIELD_GET(IDR0_TTF, reg)) {
3527         case IDR0_TTF_AARCH32_64:
3528                 smmu->ias = 40;
3529                 fallthrough;
3530         case IDR0_TTF_AARCH64:
3531                 break;
3532         default:
3533                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3534                 return -ENXIO;
3535         }
3536
3537         /* ASID/VMID sizes */
3538         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3539         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3540
3541         /* IDR1 */
3542         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3543         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3544                 dev_err(smmu->dev, "embedded implementation not supported\n");
3545                 return -ENXIO;
3546         }
3547
3548         /* Queue sizes, capped to ensure natural alignment */
3549         smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3550                                              FIELD_GET(IDR1_CMDQS, reg));
3551         if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3552                 /*
3553                  * We don't support splitting up batches, so one batch of
3554                  * commands plus an extra sync needs to fit inside the command
3555                  * queue. There's also no way we can handle the weird alignment
3556                  * restrictions on the base pointer for a unit-length queue.
3557                  */
3558                 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3559                         CMDQ_BATCH_ENTRIES);
3560                 return -ENXIO;
3561         }
3562
3563         smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3564                                              FIELD_GET(IDR1_EVTQS, reg));
3565         smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3566                                              FIELD_GET(IDR1_PRIQS, reg));
3567
3568         /* SID/SSID sizes */
3569         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3570         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3571
3572         /*
3573          * If the SMMU supports fewer bits than would fill a single L2 stream
3574          * table, use a linear table instead.
3575          */
3576         if (smmu->sid_bits <= STRTAB_SPLIT)
3577                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3578
3579         /* IDR3 */
3580         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3581         if (FIELD_GET(IDR3_RIL, reg))
3582                 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3583
3584         /* IDR5 */
3585         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3586
3587         /* Maximum number of outstanding stalls */
3588         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3589
3590         /* Page sizes */
3591         if (reg & IDR5_GRAN64K)
3592                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3593         if (reg & IDR5_GRAN16K)
3594                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3595         if (reg & IDR5_GRAN4K)
3596                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3597
3598         /* Input address size */
3599         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3600                 smmu->features |= ARM_SMMU_FEAT_VAX;
3601
3602         /* Output address size */
3603         switch (FIELD_GET(IDR5_OAS, reg)) {
3604         case IDR5_OAS_32_BIT:
3605                 smmu->oas = 32;
3606                 break;
3607         case IDR5_OAS_36_BIT:
3608                 smmu->oas = 36;
3609                 break;
3610         case IDR5_OAS_40_BIT:
3611                 smmu->oas = 40;
3612                 break;
3613         case IDR5_OAS_42_BIT:
3614                 smmu->oas = 42;
3615                 break;
3616         case IDR5_OAS_44_BIT:
3617                 smmu->oas = 44;
3618                 break;
3619         case IDR5_OAS_52_BIT:
3620                 smmu->oas = 52;
3621                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3622                 break;
3623         default:
3624                 dev_info(smmu->dev,
3625                         "unknown output address size. Truncating to 48-bit\n");
3626                 fallthrough;
3627         case IDR5_OAS_48_BIT:
3628                 smmu->oas = 48;
3629         }
3630
3631         if (arm_smmu_ops.pgsize_bitmap == -1UL)
3632                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3633         else
3634                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3635
3636         /* Set the DMA mask for our table walker */
3637         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3638                 dev_warn(smmu->dev,
3639                          "failed to set DMA mask for table walker\n");
3640
3641         smmu->ias = max(smmu->ias, smmu->oas);
3642
3643         if (arm_smmu_sva_supported(smmu))
3644                 smmu->features |= ARM_SMMU_FEAT_SVA;
3645
3646         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3647                  smmu->ias, smmu->oas, smmu->features);
3648         return 0;
3649 }
3650
3651 #ifdef CONFIG_ACPI
3652 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3653 {
3654         switch (model) {
3655         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3656                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3657                 break;
3658         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3659                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3660                 break;
3661         }
3662
3663         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3664 }
3665
3666 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3667                                       struct arm_smmu_device *smmu)
3668 {
3669         struct acpi_iort_smmu_v3 *iort_smmu;
3670         struct device *dev = smmu->dev;
3671         struct acpi_iort_node *node;
3672
3673         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3674
3675         /* Retrieve SMMUv3 specific data */
3676         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3677
3678         acpi_smmu_get_options(iort_smmu->model, smmu);
3679
3680         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3681                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3682
3683         return 0;
3684 }
3685 #else
3686 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3687                                              struct arm_smmu_device *smmu)
3688 {
3689         return -ENODEV;
3690 }
3691 #endif
3692
3693 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3694                                     struct arm_smmu_device *smmu)
3695 {
3696         struct device *dev = &pdev->dev;
3697         u32 cells;
3698         int ret = -EINVAL;
3699
3700         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3701                 dev_err(dev, "missing #iommu-cells property\n");
3702         else if (cells != 1)
3703                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3704         else
3705                 ret = 0;
3706
3707         parse_driver_options(smmu);
3708
3709         if (of_dma_is_coherent(dev->of_node))
3710                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3711
3712         return ret;
3713 }
3714
3715 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3716 {
3717         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3718                 return SZ_64K;
3719         else
3720                 return SZ_128K;
3721 }
3722
3723 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3724 {
3725         int err;
3726
3727 #ifdef CONFIG_PCI
3728         if (pci_bus_type.iommu_ops != ops) {
3729                 err = bus_set_iommu(&pci_bus_type, ops);
3730                 if (err)
3731                         return err;
3732         }
3733 #endif
3734 #ifdef CONFIG_ARM_AMBA
3735         if (amba_bustype.iommu_ops != ops) {
3736                 err = bus_set_iommu(&amba_bustype, ops);
3737                 if (err)
3738                         goto err_reset_pci_ops;
3739         }
3740 #endif
3741         if (platform_bus_type.iommu_ops != ops) {
3742                 err = bus_set_iommu(&platform_bus_type, ops);
3743                 if (err)
3744                         goto err_reset_amba_ops;
3745         }
3746
3747         return 0;
3748
3749 err_reset_amba_ops:
3750 #ifdef CONFIG_ARM_AMBA
3751         bus_set_iommu(&amba_bustype, NULL);
3752 #endif
3753 err_reset_pci_ops: __maybe_unused;
3754 #ifdef CONFIG_PCI
3755         bus_set_iommu(&pci_bus_type, NULL);
3756 #endif
3757         return err;
3758 }
3759
3760 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3761                                       resource_size_t size)
3762 {
3763         struct resource res = DEFINE_RES_MEM(start, size);
3764
3765         return devm_ioremap_resource(dev, &res);
3766 }
3767
3768 static int arm_smmu_device_probe(struct platform_device *pdev)
3769 {
3770         int irq, ret;
3771         struct resource *res;
3772         resource_size_t ioaddr;
3773         struct arm_smmu_device *smmu;
3774         struct device *dev = &pdev->dev;
3775         bool bypass;
3776
3777         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3778         if (!smmu)
3779                 return -ENOMEM;
3780         smmu->dev = dev;
3781
3782         if (dev->of_node) {
3783                 ret = arm_smmu_device_dt_probe(pdev, smmu);
3784         } else {
3785                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3786                 if (ret == -ENODEV)
3787                         return ret;
3788         }
3789
3790         /* Set bypass mode according to firmware probing result */
3791         bypass = !!ret;
3792
3793         /* Base address */
3794         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3795         if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3796                 dev_err(dev, "MMIO region too small (%pr)\n", res);
3797                 return -EINVAL;
3798         }
3799         ioaddr = res->start;
3800
3801         /*
3802          * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3803          * the PMCG registers which are reserved by the PMU driver.
3804          */
3805         smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3806         if (IS_ERR(smmu->base))
3807                 return PTR_ERR(smmu->base);
3808
3809         if (arm_smmu_resource_size(smmu) > SZ_64K) {
3810                 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3811                                                ARM_SMMU_REG_SZ);
3812                 if (IS_ERR(smmu->page1))
3813                         return PTR_ERR(smmu->page1);
3814         } else {
3815                 smmu->page1 = smmu->base;
3816         }
3817
3818         /* Interrupt lines */
3819
3820         irq = platform_get_irq_byname_optional(pdev, "combined");
3821         if (irq > 0)
3822                 smmu->combined_irq = irq;
3823         else {
3824                 irq = platform_get_irq_byname_optional(pdev, "eventq");
3825                 if (irq > 0)
3826                         smmu->evtq.q.irq = irq;
3827
3828                 irq = platform_get_irq_byname_optional(pdev, "priq");
3829                 if (irq > 0)
3830                         smmu->priq.q.irq = irq;
3831
3832                 irq = platform_get_irq_byname_optional(pdev, "gerror");
3833                 if (irq > 0)
3834                         smmu->gerr_irq = irq;
3835         }
3836         /* Probe the h/w */
3837         ret = arm_smmu_device_hw_probe(smmu);
3838         if (ret)
3839                 return ret;
3840
3841         /* Initialise in-memory data structures */
3842         ret = arm_smmu_init_structures(smmu);
3843         if (ret)
3844                 return ret;
3845
3846         /* Record our private device structure */
3847         platform_set_drvdata(pdev, smmu);
3848
3849         /* Reset the device */
3850         ret = arm_smmu_device_reset(smmu, bypass);
3851         if (ret)
3852                 return ret;
3853
3854         /* And we're up. Go go go! */
3855         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3856                                      "smmu3.%pa", &ioaddr);
3857         if (ret)
3858                 return ret;
3859
3860         ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3861         if (ret) {
3862                 dev_err(dev, "Failed to register iommu\n");
3863                 goto err_sysfs_remove;
3864         }
3865
3866         ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3867         if (ret)
3868                 goto err_unregister_device;
3869
3870         return 0;
3871
3872 err_unregister_device:
3873         iommu_device_unregister(&smmu->iommu);
3874 err_sysfs_remove:
3875         iommu_device_sysfs_remove(&smmu->iommu);
3876         return ret;
3877 }
3878
3879 static int arm_smmu_device_remove(struct platform_device *pdev)
3880 {
3881         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3882
3883         arm_smmu_set_bus_ops(NULL);
3884         iommu_device_unregister(&smmu->iommu);
3885         iommu_device_sysfs_remove(&smmu->iommu);
3886         arm_smmu_device_disable(smmu);
3887         iopf_queue_free(smmu->evtq.iopf);
3888
3889         return 0;
3890 }
3891
3892 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3893 {
3894         arm_smmu_device_remove(pdev);
3895 }
3896
3897 static const struct of_device_id arm_smmu_of_match[] = {
3898         { .compatible = "arm,smmu-v3", },
3899         { },
3900 };
3901 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3902
3903 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3904 {
3905         arm_smmu_sva_notifier_synchronize();
3906         platform_driver_unregister(drv);
3907 }
3908
3909 static struct platform_driver arm_smmu_driver = {
3910         .driver = {
3911                 .name                   = "arm-smmu-v3",
3912                 .of_match_table         = arm_smmu_of_match,
3913                 .suppress_bind_attrs    = true,
3914         },
3915         .probe  = arm_smmu_device_probe,
3916         .remove = arm_smmu_device_remove,
3917         .shutdown = arm_smmu_device_shutdown,
3918 };
3919 module_driver(arm_smmu_driver, platform_driver_register,
3920               arm_smmu_driver_unregister);
3921
3922 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3923 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3924 MODULE_ALIAS("platform:arm-smmu-v3");
3925 MODULE_LICENSE("GPL v2");