Merge tag 'leds-for-5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anasz...
[sfrench/cifs-2.6.git] / drivers / iommu / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/iopoll.h>
24 #include <linux/init.h>
25 #include <linux/moduleparam.h>
26 #include <linux/msi.h>
27 #include <linux/of.h>
28 #include <linux/of_address.h>
29 #include <linux/of_iommu.h>
30 #include <linux/of_platform.h>
31 #include <linux/pci.h>
32 #include <linux/platform_device.h>
33
34 #include <linux/amba/bus.h>
35
36 /* MMIO registers */
37 #define ARM_SMMU_IDR0                   0x0
38 #define IDR0_ST_LVL                     GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL                1
40 #define IDR0_STALL_MODEL                GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL          0
42 #define IDR0_STALL_MODEL_FORCE          2
43 #define IDR0_TTENDIAN                   GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED             0
45 #define IDR0_TTENDIAN_LE                2
46 #define IDR0_TTENDIAN_BE                3
47 #define IDR0_CD2L                       (1 << 19)
48 #define IDR0_VMID16                     (1 << 18)
49 #define IDR0_PRI                        (1 << 16)
50 #define IDR0_SEV                        (1 << 14)
51 #define IDR0_MSI                        (1 << 13)
52 #define IDR0_ASID16                     (1 << 12)
53 #define IDR0_ATS                        (1 << 10)
54 #define IDR0_HYP                        (1 << 9)
55 #define IDR0_COHACC                     (1 << 4)
56 #define IDR0_TTF                        GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64                2
58 #define IDR0_TTF_AARCH32_64             3
59 #define IDR0_S1P                        (1 << 1)
60 #define IDR0_S2P                        (1 << 0)
61
62 #define ARM_SMMU_IDR1                   0x4
63 #define IDR1_TABLES_PRESET              (1 << 30)
64 #define IDR1_QUEUES_PRESET              (1 << 29)
65 #define IDR1_REL                        (1 << 28)
66 #define IDR1_CMDQS                      GENMASK(25, 21)
67 #define IDR1_EVTQS                      GENMASK(20, 16)
68 #define IDR1_PRIQS                      GENMASK(15, 11)
69 #define IDR1_SSIDSIZE                   GENMASK(10, 6)
70 #define IDR1_SIDSIZE                    GENMASK(5, 0)
71
72 #define ARM_SMMU_IDR5                   0x14
73 #define IDR5_STALL_MAX                  GENMASK(31, 16)
74 #define IDR5_GRAN64K                    (1 << 6)
75 #define IDR5_GRAN16K                    (1 << 5)
76 #define IDR5_GRAN4K                     (1 << 4)
77 #define IDR5_OAS                        GENMASK(2, 0)
78 #define IDR5_OAS_32_BIT                 0
79 #define IDR5_OAS_36_BIT                 1
80 #define IDR5_OAS_40_BIT                 2
81 #define IDR5_OAS_42_BIT                 3
82 #define IDR5_OAS_44_BIT                 4
83 #define IDR5_OAS_48_BIT                 5
84 #define IDR5_OAS_52_BIT                 6
85 #define IDR5_VAX                        GENMASK(11, 10)
86 #define IDR5_VAX_52_BIT                 1
87
88 #define ARM_SMMU_CR0                    0x20
89 #define CR0_CMDQEN                      (1 << 3)
90 #define CR0_EVTQEN                      (1 << 2)
91 #define CR0_PRIQEN                      (1 << 1)
92 #define CR0_SMMUEN                      (1 << 0)
93
94 #define ARM_SMMU_CR0ACK                 0x24
95
96 #define ARM_SMMU_CR1                    0x28
97 #define CR1_TABLE_SH                    GENMASK(11, 10)
98 #define CR1_TABLE_OC                    GENMASK(9, 8)
99 #define CR1_TABLE_IC                    GENMASK(7, 6)
100 #define CR1_QUEUE_SH                    GENMASK(5, 4)
101 #define CR1_QUEUE_OC                    GENMASK(3, 2)
102 #define CR1_QUEUE_IC                    GENMASK(1, 0)
103 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
104 #define CR1_CACHE_NC                    0
105 #define CR1_CACHE_WB                    1
106 #define CR1_CACHE_WT                    2
107
108 #define ARM_SMMU_CR2                    0x2c
109 #define CR2_PTM                         (1 << 2)
110 #define CR2_RECINVSID                   (1 << 1)
111 #define CR2_E2H                         (1 << 0)
112
113 #define ARM_SMMU_GBPA                   0x44
114 #define GBPA_UPDATE                     (1 << 31)
115 #define GBPA_ABORT                      (1 << 20)
116
117 #define ARM_SMMU_IRQ_CTRL               0x50
118 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
119 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
120 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
121
122 #define ARM_SMMU_IRQ_CTRLACK            0x54
123
124 #define ARM_SMMU_GERROR                 0x60
125 #define GERROR_SFM_ERR                  (1 << 8)
126 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
127 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
128 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
129 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
130 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
131 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
132 #define GERROR_CMDQ_ERR                 (1 << 0)
133 #define GERROR_ERR_MASK                 0xfd
134
135 #define ARM_SMMU_GERRORN                0x64
136
137 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
138 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
139 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
140
141 #define ARM_SMMU_STRTAB_BASE            0x80
142 #define STRTAB_BASE_RA                  (1UL << 62)
143 #define STRTAB_BASE_ADDR_MASK           GENMASK_ULL(51, 6)
144
145 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
146 #define STRTAB_BASE_CFG_FMT             GENMASK(17, 16)
147 #define STRTAB_BASE_CFG_FMT_LINEAR      0
148 #define STRTAB_BASE_CFG_FMT_2LVL        1
149 #define STRTAB_BASE_CFG_SPLIT           GENMASK(10, 6)
150 #define STRTAB_BASE_CFG_LOG2SIZE        GENMASK(5, 0)
151
152 #define ARM_SMMU_CMDQ_BASE              0x90
153 #define ARM_SMMU_CMDQ_PROD              0x98
154 #define ARM_SMMU_CMDQ_CONS              0x9c
155
156 #define ARM_SMMU_EVTQ_BASE              0xa0
157 #define ARM_SMMU_EVTQ_PROD              0x100a8
158 #define ARM_SMMU_EVTQ_CONS              0x100ac
159 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
160 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
161 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
162
163 #define ARM_SMMU_PRIQ_BASE              0xc0
164 #define ARM_SMMU_PRIQ_PROD              0x100c8
165 #define ARM_SMMU_PRIQ_CONS              0x100cc
166 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
167 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
168 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
169
170 /* Common MSI config fields */
171 #define MSI_CFG0_ADDR_MASK              GENMASK_ULL(51, 2)
172 #define MSI_CFG2_SH                     GENMASK(5, 4)
173 #define MSI_CFG2_MEMATTR                GENMASK(3, 0)
174
175 /* Common memory attribute values */
176 #define ARM_SMMU_SH_NSH                 0
177 #define ARM_SMMU_SH_OSH                 2
178 #define ARM_SMMU_SH_ISH                 3
179 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE   0x1
180 #define ARM_SMMU_MEMATTR_OIWB           0xf
181
182 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
183 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
184 #define Q_OVERFLOW_FLAG                 (1 << 31)
185 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
186 #define Q_ENT(q, p)                     ((q)->base +                    \
187                                          Q_IDX(q, p) * (q)->ent_dwords)
188
189 #define Q_BASE_RWA                      (1UL << 62)
190 #define Q_BASE_ADDR_MASK                GENMASK_ULL(51, 5)
191 #define Q_BASE_LOG2SIZE                 GENMASK(4, 0)
192
193 /*
194  * Stream table.
195  *
196  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
197  * 2lvl: 128k L1 entries,
198  *       256 lazy entries per table (each table covers a PCI bus)
199  */
200 #define STRTAB_L1_SZ_SHIFT              20
201 #define STRTAB_SPLIT                    8
202
203 #define STRTAB_L1_DESC_DWORDS           1
204 #define STRTAB_L1_DESC_SPAN             GENMASK_ULL(4, 0)
205 #define STRTAB_L1_DESC_L2PTR_MASK       GENMASK_ULL(51, 6)
206
207 #define STRTAB_STE_DWORDS               8
208 #define STRTAB_STE_0_V                  (1UL << 0)
209 #define STRTAB_STE_0_CFG                GENMASK_ULL(3, 1)
210 #define STRTAB_STE_0_CFG_ABORT          0
211 #define STRTAB_STE_0_CFG_BYPASS         4
212 #define STRTAB_STE_0_CFG_S1_TRANS       5
213 #define STRTAB_STE_0_CFG_S2_TRANS       6
214
215 #define STRTAB_STE_0_S1FMT              GENMASK_ULL(5, 4)
216 #define STRTAB_STE_0_S1FMT_LINEAR       0
217 #define STRTAB_STE_0_S1CTXPTR_MASK      GENMASK_ULL(51, 6)
218 #define STRTAB_STE_0_S1CDMAX            GENMASK_ULL(63, 59)
219
220 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
221 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
222 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
223 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
224 #define STRTAB_STE_1_S1CIR              GENMASK_ULL(3, 2)
225 #define STRTAB_STE_1_S1COR              GENMASK_ULL(5, 4)
226 #define STRTAB_STE_1_S1CSH              GENMASK_ULL(7, 6)
227
228 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
229
230 #define STRTAB_STE_1_EATS               GENMASK_ULL(29, 28)
231 #define STRTAB_STE_1_EATS_ABT           0UL
232 #define STRTAB_STE_1_EATS_TRANS         1UL
233 #define STRTAB_STE_1_EATS_S1CHK         2UL
234
235 #define STRTAB_STE_1_STRW               GENMASK_ULL(31, 30)
236 #define STRTAB_STE_1_STRW_NSEL1         0UL
237 #define STRTAB_STE_1_STRW_EL2           2UL
238
239 #define STRTAB_STE_1_SHCFG              GENMASK_ULL(45, 44)
240 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
241
242 #define STRTAB_STE_2_S2VMID             GENMASK_ULL(15, 0)
243 #define STRTAB_STE_2_VTCR               GENMASK_ULL(50, 32)
244 #define STRTAB_STE_2_S2AA64             (1UL << 51)
245 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
246 #define STRTAB_STE_2_S2PTW              (1UL << 54)
247 #define STRTAB_STE_2_S2R                (1UL << 58)
248
249 #define STRTAB_STE_3_S2TTB_MASK         GENMASK_ULL(51, 4)
250
251 /* Context descriptor (stage-1 only) */
252 #define CTXDESC_CD_DWORDS               8
253 #define CTXDESC_CD_0_TCR_T0SZ           GENMASK_ULL(5, 0)
254 #define ARM64_TCR_T0SZ                  GENMASK_ULL(5, 0)
255 #define CTXDESC_CD_0_TCR_TG0            GENMASK_ULL(7, 6)
256 #define ARM64_TCR_TG0                   GENMASK_ULL(15, 14)
257 #define CTXDESC_CD_0_TCR_IRGN0          GENMASK_ULL(9, 8)
258 #define ARM64_TCR_IRGN0                 GENMASK_ULL(9, 8)
259 #define CTXDESC_CD_0_TCR_ORGN0          GENMASK_ULL(11, 10)
260 #define ARM64_TCR_ORGN0                 GENMASK_ULL(11, 10)
261 #define CTXDESC_CD_0_TCR_SH0            GENMASK_ULL(13, 12)
262 #define ARM64_TCR_SH0                   GENMASK_ULL(13, 12)
263 #define CTXDESC_CD_0_TCR_EPD0           (1ULL << 14)
264 #define ARM64_TCR_EPD0                  (1ULL << 7)
265 #define CTXDESC_CD_0_TCR_EPD1           (1ULL << 30)
266 #define ARM64_TCR_EPD1                  (1ULL << 23)
267
268 #define CTXDESC_CD_0_ENDI               (1UL << 15)
269 #define CTXDESC_CD_0_V                  (1UL << 31)
270
271 #define CTXDESC_CD_0_TCR_IPS            GENMASK_ULL(34, 32)
272 #define ARM64_TCR_IPS                   GENMASK_ULL(34, 32)
273 #define CTXDESC_CD_0_TCR_TBI0           (1ULL << 38)
274 #define ARM64_TCR_TBI0                  (1ULL << 37)
275
276 #define CTXDESC_CD_0_AA64               (1UL << 41)
277 #define CTXDESC_CD_0_S                  (1UL << 44)
278 #define CTXDESC_CD_0_R                  (1UL << 45)
279 #define CTXDESC_CD_0_A                  (1UL << 46)
280 #define CTXDESC_CD_0_ASET               (1UL << 47)
281 #define CTXDESC_CD_0_ASID               GENMASK_ULL(63, 48)
282
283 #define CTXDESC_CD_1_TTB0_MASK          GENMASK_ULL(51, 4)
284
285 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
286 #define ARM_SMMU_TCR2CD(tcr, fld)       FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
287                                         FIELD_GET(ARM64_TCR_##fld, tcr))
288
289 /* Command queue */
290 #define CMDQ_ENT_DWORDS                 2
291 #define CMDQ_MAX_SZ_SHIFT               8
292
293 #define CMDQ_CONS_ERR                   GENMASK(30, 24)
294 #define CMDQ_ERR_CERROR_NONE_IDX        0
295 #define CMDQ_ERR_CERROR_ILL_IDX         1
296 #define CMDQ_ERR_CERROR_ABT_IDX         2
297
298 #define CMDQ_0_OP                       GENMASK_ULL(7, 0)
299 #define CMDQ_0_SSV                      (1UL << 11)
300
301 #define CMDQ_PREFETCH_0_SID             GENMASK_ULL(63, 32)
302 #define CMDQ_PREFETCH_1_SIZE            GENMASK_ULL(4, 0)
303 #define CMDQ_PREFETCH_1_ADDR_MASK       GENMASK_ULL(63, 12)
304
305 #define CMDQ_CFGI_0_SID                 GENMASK_ULL(63, 32)
306 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
307 #define CMDQ_CFGI_1_RANGE               GENMASK_ULL(4, 0)
308
309 #define CMDQ_TLBI_0_VMID                GENMASK_ULL(47, 32)
310 #define CMDQ_TLBI_0_ASID                GENMASK_ULL(63, 48)
311 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
312 #define CMDQ_TLBI_1_VA_MASK             GENMASK_ULL(63, 12)
313 #define CMDQ_TLBI_1_IPA_MASK            GENMASK_ULL(51, 12)
314
315 #define CMDQ_PRI_0_SSID                 GENMASK_ULL(31, 12)
316 #define CMDQ_PRI_0_SID                  GENMASK_ULL(63, 32)
317 #define CMDQ_PRI_1_GRPID                GENMASK_ULL(8, 0)
318 #define CMDQ_PRI_1_RESP                 GENMASK_ULL(13, 12)
319
320 #define CMDQ_SYNC_0_CS                  GENMASK_ULL(13, 12)
321 #define CMDQ_SYNC_0_CS_NONE             0
322 #define CMDQ_SYNC_0_CS_IRQ              1
323 #define CMDQ_SYNC_0_CS_SEV              2
324 #define CMDQ_SYNC_0_MSH                 GENMASK_ULL(23, 22)
325 #define CMDQ_SYNC_0_MSIATTR             GENMASK_ULL(27, 24)
326 #define CMDQ_SYNC_0_MSIDATA             GENMASK_ULL(63, 32)
327 #define CMDQ_SYNC_1_MSIADDR_MASK        GENMASK_ULL(51, 2)
328
329 /* Event queue */
330 #define EVTQ_ENT_DWORDS                 4
331 #define EVTQ_MAX_SZ_SHIFT               7
332
333 #define EVTQ_0_ID                       GENMASK_ULL(7, 0)
334
335 /* PRI queue */
336 #define PRIQ_ENT_DWORDS                 2
337 #define PRIQ_MAX_SZ_SHIFT               8
338
339 #define PRIQ_0_SID                      GENMASK_ULL(31, 0)
340 #define PRIQ_0_SSID                     GENMASK_ULL(51, 32)
341 #define PRIQ_0_PERM_PRIV                (1UL << 58)
342 #define PRIQ_0_PERM_EXEC                (1UL << 59)
343 #define PRIQ_0_PERM_READ                (1UL << 60)
344 #define PRIQ_0_PERM_WRITE               (1UL << 61)
345 #define PRIQ_0_PRG_LAST                 (1UL << 62)
346 #define PRIQ_0_SSID_V                   (1UL << 63)
347
348 #define PRIQ_1_PRG_IDX                  GENMASK_ULL(8, 0)
349 #define PRIQ_1_ADDR_MASK                GENMASK_ULL(63, 12)
350
351 /* High-level queue structures */
352 #define ARM_SMMU_POLL_TIMEOUT_US        100
353 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US   1000000 /* 1s! */
354 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT   10
355
356 #define MSI_IOVA_BASE                   0x8000000
357 #define MSI_IOVA_LENGTH                 0x100000
358
359 /*
360  * not really modular, but the easiest way to keep compat with existing
361  * bootargs behaviour is to continue using module_param_named here.
362  */
363 static bool disable_bypass = 1;
364 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
365 MODULE_PARM_DESC(disable_bypass,
366         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
367
368 enum pri_resp {
369         PRI_RESP_DENY = 0,
370         PRI_RESP_FAIL = 1,
371         PRI_RESP_SUCC = 2,
372 };
373
374 enum arm_smmu_msi_index {
375         EVTQ_MSI_INDEX,
376         GERROR_MSI_INDEX,
377         PRIQ_MSI_INDEX,
378         ARM_SMMU_MAX_MSIS,
379 };
380
381 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
382         [EVTQ_MSI_INDEX] = {
383                 ARM_SMMU_EVTQ_IRQ_CFG0,
384                 ARM_SMMU_EVTQ_IRQ_CFG1,
385                 ARM_SMMU_EVTQ_IRQ_CFG2,
386         },
387         [GERROR_MSI_INDEX] = {
388                 ARM_SMMU_GERROR_IRQ_CFG0,
389                 ARM_SMMU_GERROR_IRQ_CFG1,
390                 ARM_SMMU_GERROR_IRQ_CFG2,
391         },
392         [PRIQ_MSI_INDEX] = {
393                 ARM_SMMU_PRIQ_IRQ_CFG0,
394                 ARM_SMMU_PRIQ_IRQ_CFG1,
395                 ARM_SMMU_PRIQ_IRQ_CFG2,
396         },
397 };
398
399 struct arm_smmu_cmdq_ent {
400         /* Common fields */
401         u8                              opcode;
402         bool                            substream_valid;
403
404         /* Command-specific fields */
405         union {
406                 #define CMDQ_OP_PREFETCH_CFG    0x1
407                 struct {
408                         u32                     sid;
409                         u8                      size;
410                         u64                     addr;
411                 } prefetch;
412
413                 #define CMDQ_OP_CFGI_STE        0x3
414                 #define CMDQ_OP_CFGI_ALL        0x4
415                 struct {
416                         u32                     sid;
417                         union {
418                                 bool            leaf;
419                                 u8              span;
420                         };
421                 } cfgi;
422
423                 #define CMDQ_OP_TLBI_NH_ASID    0x11
424                 #define CMDQ_OP_TLBI_NH_VA      0x12
425                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
426                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
427                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
428                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
429                 struct {
430                         u16                     asid;
431                         u16                     vmid;
432                         bool                    leaf;
433                         u64                     addr;
434                 } tlbi;
435
436                 #define CMDQ_OP_PRI_RESP        0x41
437                 struct {
438                         u32                     sid;
439                         u32                     ssid;
440                         u16                     grpid;
441                         enum pri_resp           resp;
442                 } pri;
443
444                 #define CMDQ_OP_CMD_SYNC        0x46
445                 struct {
446                         u32                     msidata;
447                         u64                     msiaddr;
448                 } sync;
449         };
450 };
451
452 struct arm_smmu_queue {
453         int                             irq; /* Wired interrupt */
454
455         __le64                          *base;
456         dma_addr_t                      base_dma;
457         u64                             q_base;
458
459         size_t                          ent_dwords;
460         u32                             max_n_shift;
461         u32                             prod;
462         u32                             cons;
463
464         u32 __iomem                     *prod_reg;
465         u32 __iomem                     *cons_reg;
466 };
467
468 struct arm_smmu_cmdq {
469         struct arm_smmu_queue           q;
470         spinlock_t                      lock;
471 };
472
473 struct arm_smmu_evtq {
474         struct arm_smmu_queue           q;
475         u32                             max_stalls;
476 };
477
478 struct arm_smmu_priq {
479         struct arm_smmu_queue           q;
480 };
481
482 /* High-level stream table and context descriptor structures */
483 struct arm_smmu_strtab_l1_desc {
484         u8                              span;
485
486         __le64                          *l2ptr;
487         dma_addr_t                      l2ptr_dma;
488 };
489
490 struct arm_smmu_s1_cfg {
491         __le64                          *cdptr;
492         dma_addr_t                      cdptr_dma;
493
494         struct arm_smmu_ctx_desc {
495                 u16     asid;
496                 u64     ttbr;
497                 u64     tcr;
498                 u64     mair;
499         }                               cd;
500 };
501
502 struct arm_smmu_s2_cfg {
503         u16                             vmid;
504         u64                             vttbr;
505         u64                             vtcr;
506 };
507
508 struct arm_smmu_strtab_ent {
509         /*
510          * An STE is "assigned" if the master emitting the corresponding SID
511          * is attached to a domain. The behaviour of an unassigned STE is
512          * determined by the disable_bypass parameter, whereas an assigned
513          * STE behaves according to s1_cfg/s2_cfg, which themselves are
514          * configured according to the domain type.
515          */
516         bool                            assigned;
517         struct arm_smmu_s1_cfg          *s1_cfg;
518         struct arm_smmu_s2_cfg          *s2_cfg;
519 };
520
521 struct arm_smmu_strtab_cfg {
522         __le64                          *strtab;
523         dma_addr_t                      strtab_dma;
524         struct arm_smmu_strtab_l1_desc  *l1_desc;
525         unsigned int                    num_l1_ents;
526
527         u64                             strtab_base;
528         u32                             strtab_base_cfg;
529 };
530
531 /* An SMMUv3 instance */
532 struct arm_smmu_device {
533         struct device                   *dev;
534         void __iomem                    *base;
535
536 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
537 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
538 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
539 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
540 #define ARM_SMMU_FEAT_PRI               (1 << 4)
541 #define ARM_SMMU_FEAT_ATS               (1 << 5)
542 #define ARM_SMMU_FEAT_SEV               (1 << 6)
543 #define ARM_SMMU_FEAT_MSI               (1 << 7)
544 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
545 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
546 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
547 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
548 #define ARM_SMMU_FEAT_HYP               (1 << 12)
549 #define ARM_SMMU_FEAT_STALL_FORCE       (1 << 13)
550 #define ARM_SMMU_FEAT_VAX               (1 << 14)
551         u32                             features;
552
553 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
554 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY    (1 << 1)
555         u32                             options;
556
557         struct arm_smmu_cmdq            cmdq;
558         struct arm_smmu_evtq            evtq;
559         struct arm_smmu_priq            priq;
560
561         int                             gerr_irq;
562         int                             combined_irq;
563         u32                             sync_nr;
564         u8                              prev_cmd_opcode;
565
566         unsigned long                   ias; /* IPA */
567         unsigned long                   oas; /* PA */
568         unsigned long                   pgsize_bitmap;
569
570 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
571         unsigned int                    asid_bits;
572         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
573
574 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
575         unsigned int                    vmid_bits;
576         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
577
578         unsigned int                    ssid_bits;
579         unsigned int                    sid_bits;
580
581         struct arm_smmu_strtab_cfg      strtab_cfg;
582
583         /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
584         union {
585                 u32                     sync_count;
586                 u64                     padding;
587         };
588
589         /* IOMMU core code handle */
590         struct iommu_device             iommu;
591 };
592
593 /* SMMU private data for each master */
594 struct arm_smmu_master_data {
595         struct arm_smmu_device          *smmu;
596         struct arm_smmu_strtab_ent      ste;
597 };
598
599 /* SMMU private data for an IOMMU domain */
600 enum arm_smmu_domain_stage {
601         ARM_SMMU_DOMAIN_S1 = 0,
602         ARM_SMMU_DOMAIN_S2,
603         ARM_SMMU_DOMAIN_NESTED,
604         ARM_SMMU_DOMAIN_BYPASS,
605 };
606
607 struct arm_smmu_domain {
608         struct arm_smmu_device          *smmu;
609         struct mutex                    init_mutex; /* Protects smmu pointer */
610
611         struct io_pgtable_ops           *pgtbl_ops;
612         bool                            non_strict;
613
614         enum arm_smmu_domain_stage      stage;
615         union {
616                 struct arm_smmu_s1_cfg  s1_cfg;
617                 struct arm_smmu_s2_cfg  s2_cfg;
618         };
619
620         struct iommu_domain             domain;
621 };
622
623 struct arm_smmu_option_prop {
624         u32 opt;
625         const char *prop;
626 };
627
628 static struct arm_smmu_option_prop arm_smmu_options[] = {
629         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
630         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
631         { 0, NULL},
632 };
633
634 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
635                                                  struct arm_smmu_device *smmu)
636 {
637         if ((offset > SZ_64K) &&
638             (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
639                 offset -= SZ_64K;
640
641         return smmu->base + offset;
642 }
643
644 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
645 {
646         return container_of(dom, struct arm_smmu_domain, domain);
647 }
648
649 static void parse_driver_options(struct arm_smmu_device *smmu)
650 {
651         int i = 0;
652
653         do {
654                 if (of_property_read_bool(smmu->dev->of_node,
655                                                 arm_smmu_options[i].prop)) {
656                         smmu->options |= arm_smmu_options[i].opt;
657                         dev_notice(smmu->dev, "option %s\n",
658                                 arm_smmu_options[i].prop);
659                 }
660         } while (arm_smmu_options[++i].opt);
661 }
662
663 /* Low-level queue manipulation functions */
664 static bool queue_full(struct arm_smmu_queue *q)
665 {
666         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
667                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
668 }
669
670 static bool queue_empty(struct arm_smmu_queue *q)
671 {
672         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
674 }
675
676 static void queue_sync_cons(struct arm_smmu_queue *q)
677 {
678         q->cons = readl_relaxed(q->cons_reg);
679 }
680
681 static void queue_inc_cons(struct arm_smmu_queue *q)
682 {
683         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
684
685         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
686
687         /*
688          * Ensure that all CPU accesses (reads and writes) to the queue
689          * are complete before we update the cons pointer.
690          */
691         mb();
692         writel_relaxed(q->cons, q->cons_reg);
693 }
694
695 static int queue_sync_prod(struct arm_smmu_queue *q)
696 {
697         int ret = 0;
698         u32 prod = readl_relaxed(q->prod_reg);
699
700         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
701                 ret = -EOVERFLOW;
702
703         q->prod = prod;
704         return ret;
705 }
706
707 static void queue_inc_prod(struct arm_smmu_queue *q)
708 {
709         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
710
711         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
712         writel(q->prod, q->prod_reg);
713 }
714
715 /*
716  * Wait for the SMMU to consume items. If sync is true, wait until the queue
717  * is empty. Otherwise, wait until there is at least one free slot.
718  */
719 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
720 {
721         ktime_t timeout;
722         unsigned int delay = 1, spin_cnt = 0;
723
724         /* Wait longer if it's a CMD_SYNC */
725         timeout = ktime_add_us(ktime_get(), sync ?
726                                             ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
727                                             ARM_SMMU_POLL_TIMEOUT_US);
728
729         while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
730                 if (ktime_compare(ktime_get(), timeout) > 0)
731                         return -ETIMEDOUT;
732
733                 if (wfe) {
734                         wfe();
735                 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
736                         cpu_relax();
737                         continue;
738                 } else {
739                         udelay(delay);
740                         delay *= 2;
741                         spin_cnt = 0;
742                 }
743         }
744
745         return 0;
746 }
747
748 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
749 {
750         int i;
751
752         for (i = 0; i < n_dwords; ++i)
753                 *dst++ = cpu_to_le64(*src++);
754 }
755
756 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
757 {
758         if (queue_full(q))
759                 return -ENOSPC;
760
761         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
762         queue_inc_prod(q);
763         return 0;
764 }
765
766 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
767 {
768         int i;
769
770         for (i = 0; i < n_dwords; ++i)
771                 *dst++ = le64_to_cpu(*src++);
772 }
773
774 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
775 {
776         if (queue_empty(q))
777                 return -EAGAIN;
778
779         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
780         queue_inc_cons(q);
781         return 0;
782 }
783
784 /* High-level queue accessors */
785 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
786 {
787         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
788         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
789
790         switch (ent->opcode) {
791         case CMDQ_OP_TLBI_EL2_ALL:
792         case CMDQ_OP_TLBI_NSNH_ALL:
793                 break;
794         case CMDQ_OP_PREFETCH_CFG:
795                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
796                 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
797                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
798                 break;
799         case CMDQ_OP_CFGI_STE:
800                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
801                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
802                 break;
803         case CMDQ_OP_CFGI_ALL:
804                 /* Cover the entire SID range */
805                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
806                 break;
807         case CMDQ_OP_TLBI_NH_VA:
808                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
809                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
810                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
811                 break;
812         case CMDQ_OP_TLBI_S2_IPA:
813                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
814                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
815                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
816                 break;
817         case CMDQ_OP_TLBI_NH_ASID:
818                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
819                 /* Fallthrough */
820         case CMDQ_OP_TLBI_S12_VMALL:
821                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
822                 break;
823         case CMDQ_OP_PRI_RESP:
824                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
825                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
826                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
827                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
828                 switch (ent->pri.resp) {
829                 case PRI_RESP_DENY:
830                 case PRI_RESP_FAIL:
831                 case PRI_RESP_SUCC:
832                         break;
833                 default:
834                         return -EINVAL;
835                 }
836                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
837                 break;
838         case CMDQ_OP_CMD_SYNC:
839                 if (ent->sync.msiaddr)
840                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
841                 else
842                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
843                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
844                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
845                 /*
846                  * Commands are written little-endian, but we want the SMMU to
847                  * receive MSIData, and thus write it back to memory, in CPU
848                  * byte order, so big-endian needs an extra byteswap here.
849                  */
850                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
851                                      cpu_to_le32(ent->sync.msidata));
852                 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
853                 break;
854         default:
855                 return -ENOENT;
856         }
857
858         return 0;
859 }
860
861 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
862 {
863         static const char *cerror_str[] = {
864                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
865                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
866                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
867         };
868
869         int i;
870         u64 cmd[CMDQ_ENT_DWORDS];
871         struct arm_smmu_queue *q = &smmu->cmdq.q;
872         u32 cons = readl_relaxed(q->cons_reg);
873         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
874         struct arm_smmu_cmdq_ent cmd_sync = {
875                 .opcode = CMDQ_OP_CMD_SYNC,
876         };
877
878         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
879                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
880
881         switch (idx) {
882         case CMDQ_ERR_CERROR_ABT_IDX:
883                 dev_err(smmu->dev, "retrying command fetch\n");
884         case CMDQ_ERR_CERROR_NONE_IDX:
885                 return;
886         case CMDQ_ERR_CERROR_ILL_IDX:
887                 /* Fallthrough */
888         default:
889                 break;
890         }
891
892         /*
893          * We may have concurrent producers, so we need to be careful
894          * not to touch any of the shadow cmdq state.
895          */
896         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
897         dev_err(smmu->dev, "skipping command in error state:\n");
898         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
899                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
900
901         /* Convert the erroneous command into a CMD_SYNC */
902         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
903                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
904                 return;
905         }
906
907         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
908 }
909
910 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
911 {
912         struct arm_smmu_queue *q = &smmu->cmdq.q;
913         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
914
915         smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
916
917         while (queue_insert_raw(q, cmd) == -ENOSPC) {
918                 if (queue_poll_cons(q, false, wfe))
919                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
920         }
921 }
922
923 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
924                                     struct arm_smmu_cmdq_ent *ent)
925 {
926         u64 cmd[CMDQ_ENT_DWORDS];
927         unsigned long flags;
928
929         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
930                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
931                          ent->opcode);
932                 return;
933         }
934
935         spin_lock_irqsave(&smmu->cmdq.lock, flags);
936         arm_smmu_cmdq_insert_cmd(smmu, cmd);
937         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
938 }
939
940 /*
941  * The difference between val and sync_idx is bounded by the maximum size of
942  * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
943  */
944 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
945 {
946         ktime_t timeout;
947         u32 val;
948
949         timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
950         val = smp_cond_load_acquire(&smmu->sync_count,
951                                     (int)(VAL - sync_idx) >= 0 ||
952                                     !ktime_before(ktime_get(), timeout));
953
954         return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
955 }
956
957 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
958 {
959         u64 cmd[CMDQ_ENT_DWORDS];
960         unsigned long flags;
961         struct arm_smmu_cmdq_ent ent = {
962                 .opcode = CMDQ_OP_CMD_SYNC,
963                 .sync   = {
964                         .msiaddr = virt_to_phys(&smmu->sync_count),
965                 },
966         };
967
968         spin_lock_irqsave(&smmu->cmdq.lock, flags);
969
970         /* Piggy-back on the previous command if it's a SYNC */
971         if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
972                 ent.sync.msidata = smmu->sync_nr;
973         } else {
974                 ent.sync.msidata = ++smmu->sync_nr;
975                 arm_smmu_cmdq_build_cmd(cmd, &ent);
976                 arm_smmu_cmdq_insert_cmd(smmu, cmd);
977         }
978
979         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
980
981         return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
982 }
983
984 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
985 {
986         u64 cmd[CMDQ_ENT_DWORDS];
987         unsigned long flags;
988         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
989         struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
990         int ret;
991
992         arm_smmu_cmdq_build_cmd(cmd, &ent);
993
994         spin_lock_irqsave(&smmu->cmdq.lock, flags);
995         arm_smmu_cmdq_insert_cmd(smmu, cmd);
996         ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
997         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
998
999         return ret;
1000 }
1001
1002 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1003 {
1004         int ret;
1005         bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1006                    (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1007
1008         ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1009                   : __arm_smmu_cmdq_issue_sync(smmu);
1010         if (ret)
1011                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1012 }
1013
1014 /* Context descriptor manipulation functions */
1015 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1016 {
1017         u64 val = 0;
1018
1019         /* Repack the TCR. Just care about TTBR0 for now */
1020         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1021         val |= ARM_SMMU_TCR2CD(tcr, TG0);
1022         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1023         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1024         val |= ARM_SMMU_TCR2CD(tcr, SH0);
1025         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1026         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1027         val |= ARM_SMMU_TCR2CD(tcr, IPS);
1028         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1029
1030         return val;
1031 }
1032
1033 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1034                                     struct arm_smmu_s1_cfg *cfg)
1035 {
1036         u64 val;
1037
1038         /*
1039          * We don't need to issue any invalidation here, as we'll invalidate
1040          * the STE when installing the new entry anyway.
1041          */
1042         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1043 #ifdef __BIG_ENDIAN
1044               CTXDESC_CD_0_ENDI |
1045 #endif
1046               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1047               CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1048               CTXDESC_CD_0_V;
1049
1050         /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1051         if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1052                 val |= CTXDESC_CD_0_S;
1053
1054         cfg->cdptr[0] = cpu_to_le64(val);
1055
1056         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1057         cfg->cdptr[1] = cpu_to_le64(val);
1058
1059         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1060 }
1061
1062 /* Stream table manipulation functions */
1063 static void
1064 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1065 {
1066         u64 val = 0;
1067
1068         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1069         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1070
1071         *dst = cpu_to_le64(val);
1072 }
1073
1074 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1075 {
1076         struct arm_smmu_cmdq_ent cmd = {
1077                 .opcode = CMDQ_OP_CFGI_STE,
1078                 .cfgi   = {
1079                         .sid    = sid,
1080                         .leaf   = true,
1081                 },
1082         };
1083
1084         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1085         arm_smmu_cmdq_issue_sync(smmu);
1086 }
1087
1088 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1089                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
1090 {
1091         /*
1092          * This is hideously complicated, but we only really care about
1093          * three cases at the moment:
1094          *
1095          * 1. Invalid (all zero) -> bypass/fault (init)
1096          * 2. Bypass/fault -> translation/bypass (attach)
1097          * 3. Translation/bypass -> bypass/fault (detach)
1098          *
1099          * Given that we can't update the STE atomically and the SMMU
1100          * doesn't read the thing in a defined order, that leaves us
1101          * with the following maintenance requirements:
1102          *
1103          * 1. Update Config, return (init time STEs aren't live)
1104          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1105          * 3. Update Config, sync
1106          */
1107         u64 val = le64_to_cpu(dst[0]);
1108         bool ste_live = false;
1109         struct arm_smmu_cmdq_ent prefetch_cmd = {
1110                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1111                 .prefetch       = {
1112                         .sid    = sid,
1113                 },
1114         };
1115
1116         if (val & STRTAB_STE_0_V) {
1117                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1118                 case STRTAB_STE_0_CFG_BYPASS:
1119                         break;
1120                 case STRTAB_STE_0_CFG_S1_TRANS:
1121                 case STRTAB_STE_0_CFG_S2_TRANS:
1122                         ste_live = true;
1123                         break;
1124                 case STRTAB_STE_0_CFG_ABORT:
1125                         if (disable_bypass)
1126                                 break;
1127                 default:
1128                         BUG(); /* STE corruption */
1129                 }
1130         }
1131
1132         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1133         val = STRTAB_STE_0_V;
1134
1135         /* Bypass/fault */
1136         if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1137                 if (!ste->assigned && disable_bypass)
1138                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1139                 else
1140                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1141
1142                 dst[0] = cpu_to_le64(val);
1143                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1144                                                 STRTAB_STE_1_SHCFG_INCOMING));
1145                 dst[2] = 0; /* Nuke the VMID */
1146                 /*
1147                  * The SMMU can perform negative caching, so we must sync
1148                  * the STE regardless of whether the old value was live.
1149                  */
1150                 if (smmu)
1151                         arm_smmu_sync_ste_for_sid(smmu, sid);
1152                 return;
1153         }
1154
1155         if (ste->s1_cfg) {
1156                 BUG_ON(ste_live);
1157                 dst[1] = cpu_to_le64(
1158                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1159                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1160                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1161 #ifdef CONFIG_PCI_ATS
1162                          FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1163 #endif
1164                          FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1165
1166                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1167                    !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1168                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1169
1170                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1171                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1172         }
1173
1174         if (ste->s2_cfg) {
1175                 BUG_ON(ste_live);
1176                 dst[2] = cpu_to_le64(
1177                          FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1178                          FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1179 #ifdef __BIG_ENDIAN
1180                          STRTAB_STE_2_S2ENDI |
1181 #endif
1182                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1183                          STRTAB_STE_2_S2R);
1184
1185                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1186
1187                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1188         }
1189
1190         arm_smmu_sync_ste_for_sid(smmu, sid);
1191         dst[0] = cpu_to_le64(val);
1192         arm_smmu_sync_ste_for_sid(smmu, sid);
1193
1194         /* It's likely that we'll want to use the new STE soon */
1195         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1196                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1197 }
1198
1199 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1200 {
1201         unsigned int i;
1202         struct arm_smmu_strtab_ent ste = { .assigned = false };
1203
1204         for (i = 0; i < nent; ++i) {
1205                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1206                 strtab += STRTAB_STE_DWORDS;
1207         }
1208 }
1209
1210 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1211 {
1212         size_t size;
1213         void *strtab;
1214         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1215         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1216
1217         if (desc->l2ptr)
1218                 return 0;
1219
1220         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1221         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1222
1223         desc->span = STRTAB_SPLIT + 1;
1224         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1225                                           GFP_KERNEL | __GFP_ZERO);
1226         if (!desc->l2ptr) {
1227                 dev_err(smmu->dev,
1228                         "failed to allocate l2 stream table for SID %u\n",
1229                         sid);
1230                 return -ENOMEM;
1231         }
1232
1233         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1234         arm_smmu_write_strtab_l1_desc(strtab, desc);
1235         return 0;
1236 }
1237
1238 /* IRQ and event handlers */
1239 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1240 {
1241         int i;
1242         struct arm_smmu_device *smmu = dev;
1243         struct arm_smmu_queue *q = &smmu->evtq.q;
1244         u64 evt[EVTQ_ENT_DWORDS];
1245
1246         do {
1247                 while (!queue_remove_raw(q, evt)) {
1248                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1249
1250                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1251                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1252                                 dev_info(smmu->dev, "\t0x%016llx\n",
1253                                          (unsigned long long)evt[i]);
1254
1255                 }
1256
1257                 /*
1258                  * Not much we can do on overflow, so scream and pretend we're
1259                  * trying harder.
1260                  */
1261                 if (queue_sync_prod(q) == -EOVERFLOW)
1262                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1263         } while (!queue_empty(q));
1264
1265         /* Sync our overflow flag, as we believe we're up to speed */
1266         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1267         return IRQ_HANDLED;
1268 }
1269
1270 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1271 {
1272         u32 sid, ssid;
1273         u16 grpid;
1274         bool ssv, last;
1275
1276         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1277         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1278         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1279         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1280         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1281
1282         dev_info(smmu->dev, "unexpected PRI request received:\n");
1283         dev_info(smmu->dev,
1284                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1285                  sid, ssid, grpid, last ? "L" : "",
1286                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1287                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1288                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1289                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1290                  evt[1] & PRIQ_1_ADDR_MASK);
1291
1292         if (last) {
1293                 struct arm_smmu_cmdq_ent cmd = {
1294                         .opcode                 = CMDQ_OP_PRI_RESP,
1295                         .substream_valid        = ssv,
1296                         .pri                    = {
1297                                 .sid    = sid,
1298                                 .ssid   = ssid,
1299                                 .grpid  = grpid,
1300                                 .resp   = PRI_RESP_DENY,
1301                         },
1302                 };
1303
1304                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1305         }
1306 }
1307
1308 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1309 {
1310         struct arm_smmu_device *smmu = dev;
1311         struct arm_smmu_queue *q = &smmu->priq.q;
1312         u64 evt[PRIQ_ENT_DWORDS];
1313
1314         do {
1315                 while (!queue_remove_raw(q, evt))
1316                         arm_smmu_handle_ppr(smmu, evt);
1317
1318                 if (queue_sync_prod(q) == -EOVERFLOW)
1319                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1320         } while (!queue_empty(q));
1321
1322         /* Sync our overflow flag, as we believe we're up to speed */
1323         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1324         writel(q->cons, q->cons_reg);
1325         return IRQ_HANDLED;
1326 }
1327
1328 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1329
1330 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1331 {
1332         u32 gerror, gerrorn, active;
1333         struct arm_smmu_device *smmu = dev;
1334
1335         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1336         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1337
1338         active = gerror ^ gerrorn;
1339         if (!(active & GERROR_ERR_MASK))
1340                 return IRQ_NONE; /* No errors pending */
1341
1342         dev_warn(smmu->dev,
1343                  "unexpected global error reported (0x%08x), this could be serious\n",
1344                  active);
1345
1346         if (active & GERROR_SFM_ERR) {
1347                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1348                 arm_smmu_device_disable(smmu);
1349         }
1350
1351         if (active & GERROR_MSI_GERROR_ABT_ERR)
1352                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1353
1354         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1355                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1356
1357         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1358                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1359
1360         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1361                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1362
1363         if (active & GERROR_PRIQ_ABT_ERR)
1364                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1365
1366         if (active & GERROR_EVTQ_ABT_ERR)
1367                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1368
1369         if (active & GERROR_CMDQ_ERR)
1370                 arm_smmu_cmdq_skip_err(smmu);
1371
1372         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1373         return IRQ_HANDLED;
1374 }
1375
1376 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1377 {
1378         struct arm_smmu_device *smmu = dev;
1379
1380         arm_smmu_evtq_thread(irq, dev);
1381         if (smmu->features & ARM_SMMU_FEAT_PRI)
1382                 arm_smmu_priq_thread(irq, dev);
1383
1384         return IRQ_HANDLED;
1385 }
1386
1387 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1388 {
1389         arm_smmu_gerror_handler(irq, dev);
1390         return IRQ_WAKE_THREAD;
1391 }
1392
1393 /* IO_PGTABLE API */
1394 static void arm_smmu_tlb_sync(void *cookie)
1395 {
1396         struct arm_smmu_domain *smmu_domain = cookie;
1397
1398         arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
1399 }
1400
1401 static void arm_smmu_tlb_inv_context(void *cookie)
1402 {
1403         struct arm_smmu_domain *smmu_domain = cookie;
1404         struct arm_smmu_device *smmu = smmu_domain->smmu;
1405         struct arm_smmu_cmdq_ent cmd;
1406
1407         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1408                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1409                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1410                 cmd.tlbi.vmid   = 0;
1411         } else {
1412                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1413                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1414         }
1415
1416         /*
1417          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1418          * PTEs previously cleared by unmaps on the current CPU not yet visible
1419          * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1420          * to guarantee those are observed before the TLBI. Do be careful, 007.
1421          */
1422         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1423         arm_smmu_cmdq_issue_sync(smmu);
1424 }
1425
1426 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1427                                           size_t granule, bool leaf, void *cookie)
1428 {
1429         struct arm_smmu_domain *smmu_domain = cookie;
1430         struct arm_smmu_device *smmu = smmu_domain->smmu;
1431         struct arm_smmu_cmdq_ent cmd = {
1432                 .tlbi = {
1433                         .leaf   = leaf,
1434                         .addr   = iova,
1435                 },
1436         };
1437
1438         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1439                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1440                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1441         } else {
1442                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1443                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1444         }
1445
1446         do {
1447                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1448                 cmd.tlbi.addr += granule;
1449         } while (size -= granule);
1450 }
1451
1452 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1453         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1454         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1455         .tlb_sync       = arm_smmu_tlb_sync,
1456 };
1457
1458 /* IOMMU API */
1459 static bool arm_smmu_capable(enum iommu_cap cap)
1460 {
1461         switch (cap) {
1462         case IOMMU_CAP_CACHE_COHERENCY:
1463                 return true;
1464         case IOMMU_CAP_NOEXEC:
1465                 return true;
1466         default:
1467                 return false;
1468         }
1469 }
1470
1471 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1472 {
1473         struct arm_smmu_domain *smmu_domain;
1474
1475         if (type != IOMMU_DOMAIN_UNMANAGED &&
1476             type != IOMMU_DOMAIN_DMA &&
1477             type != IOMMU_DOMAIN_IDENTITY)
1478                 return NULL;
1479
1480         /*
1481          * Allocate the domain and initialise some of its data structures.
1482          * We can't really do anything meaningful until we've added a
1483          * master.
1484          */
1485         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1486         if (!smmu_domain)
1487                 return NULL;
1488
1489         if (type == IOMMU_DOMAIN_DMA &&
1490             iommu_get_dma_cookie(&smmu_domain->domain)) {
1491                 kfree(smmu_domain);
1492                 return NULL;
1493         }
1494
1495         mutex_init(&smmu_domain->init_mutex);
1496         return &smmu_domain->domain;
1497 }
1498
1499 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1500 {
1501         int idx, size = 1 << span;
1502
1503         do {
1504                 idx = find_first_zero_bit(map, size);
1505                 if (idx == size)
1506                         return -ENOSPC;
1507         } while (test_and_set_bit(idx, map));
1508
1509         return idx;
1510 }
1511
1512 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1513 {
1514         clear_bit(idx, map);
1515 }
1516
1517 static void arm_smmu_domain_free(struct iommu_domain *domain)
1518 {
1519         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1520         struct arm_smmu_device *smmu = smmu_domain->smmu;
1521
1522         iommu_put_dma_cookie(domain);
1523         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1524
1525         /* Free the CD and ASID, if we allocated them */
1526         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1527                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1528
1529                 if (cfg->cdptr) {
1530                         dmam_free_coherent(smmu_domain->smmu->dev,
1531                                            CTXDESC_CD_DWORDS << 3,
1532                                            cfg->cdptr,
1533                                            cfg->cdptr_dma);
1534
1535                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1536                 }
1537         } else {
1538                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1539                 if (cfg->vmid)
1540                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1541         }
1542
1543         kfree(smmu_domain);
1544 }
1545
1546 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1547                                        struct io_pgtable_cfg *pgtbl_cfg)
1548 {
1549         int ret;
1550         int asid;
1551         struct arm_smmu_device *smmu = smmu_domain->smmu;
1552         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1553
1554         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1555         if (asid < 0)
1556                 return asid;
1557
1558         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1559                                          &cfg->cdptr_dma,
1560                                          GFP_KERNEL | __GFP_ZERO);
1561         if (!cfg->cdptr) {
1562                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1563                 ret = -ENOMEM;
1564                 goto out_free_asid;
1565         }
1566
1567         cfg->cd.asid    = (u16)asid;
1568         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1569         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1570         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1571         return 0;
1572
1573 out_free_asid:
1574         arm_smmu_bitmap_free(smmu->asid_map, asid);
1575         return ret;
1576 }
1577
1578 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1579                                        struct io_pgtable_cfg *pgtbl_cfg)
1580 {
1581         int vmid;
1582         struct arm_smmu_device *smmu = smmu_domain->smmu;
1583         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1584
1585         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1586         if (vmid < 0)
1587                 return vmid;
1588
1589         cfg->vmid       = (u16)vmid;
1590         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1591         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1592         return 0;
1593 }
1594
1595 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1596 {
1597         int ret;
1598         unsigned long ias, oas;
1599         enum io_pgtable_fmt fmt;
1600         struct io_pgtable_cfg pgtbl_cfg;
1601         struct io_pgtable_ops *pgtbl_ops;
1602         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1603                                  struct io_pgtable_cfg *);
1604         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1605         struct arm_smmu_device *smmu = smmu_domain->smmu;
1606
1607         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1608                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1609                 return 0;
1610         }
1611
1612         /* Restrict the stage to what we can actually support */
1613         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1614                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1615         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1616                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1617
1618         switch (smmu_domain->stage) {
1619         case ARM_SMMU_DOMAIN_S1:
1620                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1621                 ias = min_t(unsigned long, ias, VA_BITS);
1622                 oas = smmu->ias;
1623                 fmt = ARM_64_LPAE_S1;
1624                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1625                 break;
1626         case ARM_SMMU_DOMAIN_NESTED:
1627         case ARM_SMMU_DOMAIN_S2:
1628                 ias = smmu->ias;
1629                 oas = smmu->oas;
1630                 fmt = ARM_64_LPAE_S2;
1631                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1632                 break;
1633         default:
1634                 return -EINVAL;
1635         }
1636
1637         pgtbl_cfg = (struct io_pgtable_cfg) {
1638                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1639                 .ias            = ias,
1640                 .oas            = oas,
1641                 .tlb            = &arm_smmu_gather_ops,
1642                 .iommu_dev      = smmu->dev,
1643         };
1644
1645         if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1646                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1647
1648         if (smmu_domain->non_strict)
1649                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1650
1651         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1652         if (!pgtbl_ops)
1653                 return -ENOMEM;
1654
1655         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1656         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1657         domain->geometry.force_aperture = true;
1658
1659         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1660         if (ret < 0) {
1661                 free_io_pgtable_ops(pgtbl_ops);
1662                 return ret;
1663         }
1664
1665         smmu_domain->pgtbl_ops = pgtbl_ops;
1666         return 0;
1667 }
1668
1669 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1670 {
1671         __le64 *step;
1672         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1673
1674         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1675                 struct arm_smmu_strtab_l1_desc *l1_desc;
1676                 int idx;
1677
1678                 /* Two-level walk */
1679                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1680                 l1_desc = &cfg->l1_desc[idx];
1681                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1682                 step = &l1_desc->l2ptr[idx];
1683         } else {
1684                 /* Simple linear lookup */
1685                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1686         }
1687
1688         return step;
1689 }
1690
1691 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1692 {
1693         int i, j;
1694         struct arm_smmu_master_data *master = fwspec->iommu_priv;
1695         struct arm_smmu_device *smmu = master->smmu;
1696
1697         for (i = 0; i < fwspec->num_ids; ++i) {
1698                 u32 sid = fwspec->ids[i];
1699                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1700
1701                 /* Bridged PCI devices may end up with duplicated IDs */
1702                 for (j = 0; j < i; j++)
1703                         if (fwspec->ids[j] == sid)
1704                                 break;
1705                 if (j < i)
1706                         continue;
1707
1708                 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1709         }
1710 }
1711
1712 static void arm_smmu_detach_dev(struct device *dev)
1713 {
1714         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1715         struct arm_smmu_master_data *master = fwspec->iommu_priv;
1716
1717         master->ste.assigned = false;
1718         arm_smmu_install_ste_for_dev(fwspec);
1719 }
1720
1721 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1722 {
1723         int ret = 0;
1724         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1725         struct arm_smmu_device *smmu;
1726         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1727         struct arm_smmu_master_data *master;
1728         struct arm_smmu_strtab_ent *ste;
1729
1730         if (!fwspec)
1731                 return -ENOENT;
1732
1733         master = fwspec->iommu_priv;
1734         smmu = master->smmu;
1735         ste = &master->ste;
1736
1737         /* Already attached to a different domain? */
1738         if (ste->assigned)
1739                 arm_smmu_detach_dev(dev);
1740
1741         mutex_lock(&smmu_domain->init_mutex);
1742
1743         if (!smmu_domain->smmu) {
1744                 smmu_domain->smmu = smmu;
1745                 ret = arm_smmu_domain_finalise(domain);
1746                 if (ret) {
1747                         smmu_domain->smmu = NULL;
1748                         goto out_unlock;
1749                 }
1750         } else if (smmu_domain->smmu != smmu) {
1751                 dev_err(dev,
1752                         "cannot attach to SMMU %s (upstream of %s)\n",
1753                         dev_name(smmu_domain->smmu->dev),
1754                         dev_name(smmu->dev));
1755                 ret = -ENXIO;
1756                 goto out_unlock;
1757         }
1758
1759         ste->assigned = true;
1760
1761         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1762                 ste->s1_cfg = NULL;
1763                 ste->s2_cfg = NULL;
1764         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1765                 ste->s1_cfg = &smmu_domain->s1_cfg;
1766                 ste->s2_cfg = NULL;
1767                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1768         } else {
1769                 ste->s1_cfg = NULL;
1770                 ste->s2_cfg = &smmu_domain->s2_cfg;
1771         }
1772
1773         arm_smmu_install_ste_for_dev(fwspec);
1774 out_unlock:
1775         mutex_unlock(&smmu_domain->init_mutex);
1776         return ret;
1777 }
1778
1779 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1780                         phys_addr_t paddr, size_t size, int prot)
1781 {
1782         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1783
1784         if (!ops)
1785                 return -ENODEV;
1786
1787         return ops->map(ops, iova, paddr, size, prot);
1788 }
1789
1790 static size_t
1791 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1792 {
1793         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1794
1795         if (!ops)
1796                 return 0;
1797
1798         return ops->unmap(ops, iova, size);
1799 }
1800
1801 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1802 {
1803         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1804
1805         if (smmu_domain->smmu)
1806                 arm_smmu_tlb_inv_context(smmu_domain);
1807 }
1808
1809 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1810 {
1811         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1812
1813         if (smmu)
1814                 arm_smmu_cmdq_issue_sync(smmu);
1815 }
1816
1817 static phys_addr_t
1818 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1819 {
1820         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1821
1822         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1823                 return iova;
1824
1825         if (!ops)
1826                 return 0;
1827
1828         return ops->iova_to_phys(ops, iova);
1829 }
1830
1831 static struct platform_driver arm_smmu_driver;
1832
1833 static int arm_smmu_match_node(struct device *dev, void *data)
1834 {
1835         return dev->fwnode == data;
1836 }
1837
1838 static
1839 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1840 {
1841         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1842                                                 fwnode, arm_smmu_match_node);
1843         put_device(dev);
1844         return dev ? dev_get_drvdata(dev) : NULL;
1845 }
1846
1847 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1848 {
1849         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1850
1851         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1852                 limit *= 1UL << STRTAB_SPLIT;
1853
1854         return sid < limit;
1855 }
1856
1857 static struct iommu_ops arm_smmu_ops;
1858
1859 static int arm_smmu_add_device(struct device *dev)
1860 {
1861         int i, ret;
1862         struct arm_smmu_device *smmu;
1863         struct arm_smmu_master_data *master;
1864         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1865         struct iommu_group *group;
1866
1867         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1868                 return -ENODEV;
1869         /*
1870          * We _can_ actually withstand dodgy bus code re-calling add_device()
1871          * without an intervening remove_device()/of_xlate() sequence, but
1872          * we're not going to do so quietly...
1873          */
1874         if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1875                 master = fwspec->iommu_priv;
1876                 smmu = master->smmu;
1877         } else {
1878                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1879                 if (!smmu)
1880                         return -ENODEV;
1881                 master = kzalloc(sizeof(*master), GFP_KERNEL);
1882                 if (!master)
1883                         return -ENOMEM;
1884
1885                 master->smmu = smmu;
1886                 fwspec->iommu_priv = master;
1887         }
1888
1889         /* Check the SIDs are in range of the SMMU and our stream table */
1890         for (i = 0; i < fwspec->num_ids; i++) {
1891                 u32 sid = fwspec->ids[i];
1892
1893                 if (!arm_smmu_sid_in_range(smmu, sid))
1894                         return -ERANGE;
1895
1896                 /* Ensure l2 strtab is initialised */
1897                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1898                         ret = arm_smmu_init_l2_strtab(smmu, sid);
1899                         if (ret)
1900                                 return ret;
1901                 }
1902         }
1903
1904         group = iommu_group_get_for_dev(dev);
1905         if (!IS_ERR(group)) {
1906                 iommu_group_put(group);
1907                 iommu_device_link(&smmu->iommu, dev);
1908         }
1909
1910         return PTR_ERR_OR_ZERO(group);
1911 }
1912
1913 static void arm_smmu_remove_device(struct device *dev)
1914 {
1915         struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1916         struct arm_smmu_master_data *master;
1917         struct arm_smmu_device *smmu;
1918
1919         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1920                 return;
1921
1922         master = fwspec->iommu_priv;
1923         smmu = master->smmu;
1924         if (master && master->ste.assigned)
1925                 arm_smmu_detach_dev(dev);
1926         iommu_group_remove_device(dev);
1927         iommu_device_unlink(&smmu->iommu, dev);
1928         kfree(master);
1929         iommu_fwspec_free(dev);
1930 }
1931
1932 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1933 {
1934         struct iommu_group *group;
1935
1936         /*
1937          * We don't support devices sharing stream IDs other than PCI RID
1938          * aliases, since the necessary ID-to-device lookup becomes rather
1939          * impractical given a potential sparse 32-bit stream ID space.
1940          */
1941         if (dev_is_pci(dev))
1942                 group = pci_device_group(dev);
1943         else
1944                 group = generic_device_group(dev);
1945
1946         return group;
1947 }
1948
1949 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1950                                     enum iommu_attr attr, void *data)
1951 {
1952         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1953
1954         switch (domain->type) {
1955         case IOMMU_DOMAIN_UNMANAGED:
1956                 switch (attr) {
1957                 case DOMAIN_ATTR_NESTING:
1958                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1959                         return 0;
1960                 default:
1961                         return -ENODEV;
1962                 }
1963                 break;
1964         case IOMMU_DOMAIN_DMA:
1965                 switch (attr) {
1966                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1967                         *(int *)data = smmu_domain->non_strict;
1968                         return 0;
1969                 default:
1970                         return -ENODEV;
1971                 }
1972                 break;
1973         default:
1974                 return -EINVAL;
1975         }
1976 }
1977
1978 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1979                                     enum iommu_attr attr, void *data)
1980 {
1981         int ret = 0;
1982         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1983
1984         mutex_lock(&smmu_domain->init_mutex);
1985
1986         switch (domain->type) {
1987         case IOMMU_DOMAIN_UNMANAGED:
1988                 switch (attr) {
1989                 case DOMAIN_ATTR_NESTING:
1990                         if (smmu_domain->smmu) {
1991                                 ret = -EPERM;
1992                                 goto out_unlock;
1993                         }
1994
1995                         if (*(int *)data)
1996                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1997                         else
1998                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1999                         break;
2000                 default:
2001                         ret = -ENODEV;
2002                 }
2003                 break;
2004         case IOMMU_DOMAIN_DMA:
2005                 switch(attr) {
2006                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2007                         smmu_domain->non_strict = *(int *)data;
2008                         break;
2009                 default:
2010                         ret = -ENODEV;
2011                 }
2012                 break;
2013         default:
2014                 ret = -EINVAL;
2015         }
2016
2017 out_unlock:
2018         mutex_unlock(&smmu_domain->init_mutex);
2019         return ret;
2020 }
2021
2022 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2023 {
2024         return iommu_fwspec_add_ids(dev, args->args, 1);
2025 }
2026
2027 static void arm_smmu_get_resv_regions(struct device *dev,
2028                                       struct list_head *head)
2029 {
2030         struct iommu_resv_region *region;
2031         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2032
2033         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2034                                          prot, IOMMU_RESV_SW_MSI);
2035         if (!region)
2036                 return;
2037
2038         list_add_tail(&region->list, head);
2039
2040         iommu_dma_get_resv_regions(dev, head);
2041 }
2042
2043 static void arm_smmu_put_resv_regions(struct device *dev,
2044                                       struct list_head *head)
2045 {
2046         struct iommu_resv_region *entry, *next;
2047
2048         list_for_each_entry_safe(entry, next, head, list)
2049                 kfree(entry);
2050 }
2051
2052 static struct iommu_ops arm_smmu_ops = {
2053         .capable                = arm_smmu_capable,
2054         .domain_alloc           = arm_smmu_domain_alloc,
2055         .domain_free            = arm_smmu_domain_free,
2056         .attach_dev             = arm_smmu_attach_dev,
2057         .map                    = arm_smmu_map,
2058         .unmap                  = arm_smmu_unmap,
2059         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2060         .iotlb_sync             = arm_smmu_iotlb_sync,
2061         .iova_to_phys           = arm_smmu_iova_to_phys,
2062         .add_device             = arm_smmu_add_device,
2063         .remove_device          = arm_smmu_remove_device,
2064         .device_group           = arm_smmu_device_group,
2065         .domain_get_attr        = arm_smmu_domain_get_attr,
2066         .domain_set_attr        = arm_smmu_domain_set_attr,
2067         .of_xlate               = arm_smmu_of_xlate,
2068         .get_resv_regions       = arm_smmu_get_resv_regions,
2069         .put_resv_regions       = arm_smmu_put_resv_regions,
2070         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2071 };
2072
2073 /* Probing and initialisation functions */
2074 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2075                                    struct arm_smmu_queue *q,
2076                                    unsigned long prod_off,
2077                                    unsigned long cons_off,
2078                                    size_t dwords)
2079 {
2080         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2081
2082         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2083         if (!q->base) {
2084                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2085                         qsz);
2086                 return -ENOMEM;
2087         }
2088
2089         q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
2090         q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
2091         q->ent_dwords   = dwords;
2092
2093         q->q_base  = Q_BASE_RWA;
2094         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2095         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2096
2097         q->prod = q->cons = 0;
2098         return 0;
2099 }
2100
2101 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2102 {
2103         int ret;
2104
2105         /* cmdq */
2106         spin_lock_init(&smmu->cmdq.lock);
2107         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2108                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2109         if (ret)
2110                 return ret;
2111
2112         /* evtq */
2113         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2114                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2115         if (ret)
2116                 return ret;
2117
2118         /* priq */
2119         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2120                 return 0;
2121
2122         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2123                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2124 }
2125
2126 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2127 {
2128         unsigned int i;
2129         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2130         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2131         void *strtab = smmu->strtab_cfg.strtab;
2132
2133         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2134         if (!cfg->l1_desc) {
2135                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2136                 return -ENOMEM;
2137         }
2138
2139         for (i = 0; i < cfg->num_l1_ents; ++i) {
2140                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2141                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2142         }
2143
2144         return 0;
2145 }
2146
2147 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2148 {
2149         void *strtab;
2150         u64 reg;
2151         u32 size, l1size;
2152         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2153
2154         /* Calculate the L1 size, capped to the SIDSIZE. */
2155         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2156         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2157         cfg->num_l1_ents = 1 << size;
2158
2159         size += STRTAB_SPLIT;
2160         if (size < smmu->sid_bits)
2161                 dev_warn(smmu->dev,
2162                          "2-level strtab only covers %u/%u bits of SID\n",
2163                          size, smmu->sid_bits);
2164
2165         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2166         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2167                                      GFP_KERNEL | __GFP_ZERO);
2168         if (!strtab) {
2169                 dev_err(smmu->dev,
2170                         "failed to allocate l1 stream table (%u bytes)\n",
2171                         size);
2172                 return -ENOMEM;
2173         }
2174         cfg->strtab = strtab;
2175
2176         /* Configure strtab_base_cfg for 2 levels */
2177         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2178         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2179         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2180         cfg->strtab_base_cfg = reg;
2181
2182         return arm_smmu_init_l1_strtab(smmu);
2183 }
2184
2185 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2186 {
2187         void *strtab;
2188         u64 reg;
2189         u32 size;
2190         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2191
2192         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2193         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2194                                      GFP_KERNEL | __GFP_ZERO);
2195         if (!strtab) {
2196                 dev_err(smmu->dev,
2197                         "failed to allocate linear stream table (%u bytes)\n",
2198                         size);
2199                 return -ENOMEM;
2200         }
2201         cfg->strtab = strtab;
2202         cfg->num_l1_ents = 1 << smmu->sid_bits;
2203
2204         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2205         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2206         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2207         cfg->strtab_base_cfg = reg;
2208
2209         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2210         return 0;
2211 }
2212
2213 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2214 {
2215         u64 reg;
2216         int ret;
2217
2218         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2219                 ret = arm_smmu_init_strtab_2lvl(smmu);
2220         else
2221                 ret = arm_smmu_init_strtab_linear(smmu);
2222
2223         if (ret)
2224                 return ret;
2225
2226         /* Set the strtab base address */
2227         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2228         reg |= STRTAB_BASE_RA;
2229         smmu->strtab_cfg.strtab_base = reg;
2230
2231         /* Allocate the first VMID for stage-2 bypass STEs */
2232         set_bit(0, smmu->vmid_map);
2233         return 0;
2234 }
2235
2236 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2237 {
2238         int ret;
2239
2240         ret = arm_smmu_init_queues(smmu);
2241         if (ret)
2242                 return ret;
2243
2244         return arm_smmu_init_strtab(smmu);
2245 }
2246
2247 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2248                                    unsigned int reg_off, unsigned int ack_off)
2249 {
2250         u32 reg;
2251
2252         writel_relaxed(val, smmu->base + reg_off);
2253         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2254                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2255 }
2256
2257 /* GBPA is "special" */
2258 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2259 {
2260         int ret;
2261         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2262
2263         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2264                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2265         if (ret)
2266                 return ret;
2267
2268         reg &= ~clr;
2269         reg |= set;
2270         writel_relaxed(reg | GBPA_UPDATE, gbpa);
2271         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2272                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2273
2274         if (ret)
2275                 dev_err(smmu->dev, "GBPA not responding to update\n");
2276         return ret;
2277 }
2278
2279 static void arm_smmu_free_msis(void *data)
2280 {
2281         struct device *dev = data;
2282         platform_msi_domain_free_irqs(dev);
2283 }
2284
2285 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2286 {
2287         phys_addr_t doorbell;
2288         struct device *dev = msi_desc_to_dev(desc);
2289         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2290         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2291
2292         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2293         doorbell &= MSI_CFG0_ADDR_MASK;
2294
2295         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2296         writel_relaxed(msg->data, smmu->base + cfg[1]);
2297         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2298 }
2299
2300 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2301 {
2302         struct msi_desc *desc;
2303         int ret, nvec = ARM_SMMU_MAX_MSIS;
2304         struct device *dev = smmu->dev;
2305
2306         /* Clear the MSI address regs */
2307         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2308         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2309
2310         if (smmu->features & ARM_SMMU_FEAT_PRI)
2311                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2312         else
2313                 nvec--;
2314
2315         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2316                 return;
2317
2318         if (!dev->msi_domain) {
2319                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2320                 return;
2321         }
2322
2323         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2324         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2325         if (ret) {
2326                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2327                 return;
2328         }
2329
2330         for_each_msi_entry(desc, dev) {
2331                 switch (desc->platform.msi_index) {
2332                 case EVTQ_MSI_INDEX:
2333                         smmu->evtq.q.irq = desc->irq;
2334                         break;
2335                 case GERROR_MSI_INDEX:
2336                         smmu->gerr_irq = desc->irq;
2337                         break;
2338                 case PRIQ_MSI_INDEX:
2339                         smmu->priq.q.irq = desc->irq;
2340                         break;
2341                 default:        /* Unknown */
2342                         continue;
2343                 }
2344         }
2345
2346         /* Add callback to free MSIs on teardown */
2347         devm_add_action(dev, arm_smmu_free_msis, dev);
2348 }
2349
2350 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2351 {
2352         int irq, ret;
2353
2354         arm_smmu_setup_msis(smmu);
2355
2356         /* Request interrupt lines */
2357         irq = smmu->evtq.q.irq;
2358         if (irq) {
2359                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2360                                                 arm_smmu_evtq_thread,
2361                                                 IRQF_ONESHOT,
2362                                                 "arm-smmu-v3-evtq", smmu);
2363                 if (ret < 0)
2364                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2365         } else {
2366                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2367         }
2368
2369         irq = smmu->gerr_irq;
2370         if (irq) {
2371                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2372                                        0, "arm-smmu-v3-gerror", smmu);
2373                 if (ret < 0)
2374                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2375         } else {
2376                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2377         }
2378
2379         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2380                 irq = smmu->priq.q.irq;
2381                 if (irq) {
2382                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2383                                                         arm_smmu_priq_thread,
2384                                                         IRQF_ONESHOT,
2385                                                         "arm-smmu-v3-priq",
2386                                                         smmu);
2387                         if (ret < 0)
2388                                 dev_warn(smmu->dev,
2389                                          "failed to enable priq irq\n");
2390                 } else {
2391                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2392                 }
2393         }
2394 }
2395
2396 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2397 {
2398         int ret, irq;
2399         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2400
2401         /* Disable IRQs first */
2402         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2403                                       ARM_SMMU_IRQ_CTRLACK);
2404         if (ret) {
2405                 dev_err(smmu->dev, "failed to disable irqs\n");
2406                 return ret;
2407         }
2408
2409         irq = smmu->combined_irq;
2410         if (irq) {
2411                 /*
2412                  * Cavium ThunderX2 implementation doesn't support unique irq
2413                  * lines. Use a single irq line for all the SMMUv3 interrupts.
2414                  */
2415                 ret = devm_request_threaded_irq(smmu->dev, irq,
2416                                         arm_smmu_combined_irq_handler,
2417                                         arm_smmu_combined_irq_thread,
2418                                         IRQF_ONESHOT,
2419                                         "arm-smmu-v3-combined-irq", smmu);
2420                 if (ret < 0)
2421                         dev_warn(smmu->dev, "failed to enable combined irq\n");
2422         } else
2423                 arm_smmu_setup_unique_irqs(smmu);
2424
2425         if (smmu->features & ARM_SMMU_FEAT_PRI)
2426                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2427
2428         /* Enable interrupt generation on the SMMU */
2429         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2430                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2431         if (ret)
2432                 dev_warn(smmu->dev, "failed to enable irqs\n");
2433
2434         return 0;
2435 }
2436
2437 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2438 {
2439         int ret;
2440
2441         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2442         if (ret)
2443                 dev_err(smmu->dev, "failed to clear cr0\n");
2444
2445         return ret;
2446 }
2447
2448 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2449 {
2450         int ret;
2451         u32 reg, enables;
2452         struct arm_smmu_cmdq_ent cmd;
2453
2454         /* Clear CR0 and sync (disables SMMU and queue processing) */
2455         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2456         if (reg & CR0_SMMUEN) {
2457                 if (is_kdump_kernel()) {
2458                         arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2459                         arm_smmu_device_disable(smmu);
2460                         return -EBUSY;
2461                 }
2462
2463                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2464         }
2465
2466         ret = arm_smmu_device_disable(smmu);
2467         if (ret)
2468                 return ret;
2469
2470         /* CR1 (table and queue memory attributes) */
2471         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2472               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2473               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2474               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2475               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2476               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2477         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2478
2479         /* CR2 (random crap) */
2480         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2481         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2482
2483         /* Stream table */
2484         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2485                        smmu->base + ARM_SMMU_STRTAB_BASE);
2486         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2487                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2488
2489         /* Command queue */
2490         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2491         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2492         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2493
2494         enables = CR0_CMDQEN;
2495         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2496                                       ARM_SMMU_CR0ACK);
2497         if (ret) {
2498                 dev_err(smmu->dev, "failed to enable command queue\n");
2499                 return ret;
2500         }
2501
2502         /* Invalidate any cached configuration */
2503         cmd.opcode = CMDQ_OP_CFGI_ALL;
2504         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2505         arm_smmu_cmdq_issue_sync(smmu);
2506
2507         /* Invalidate any stale TLB entries */
2508         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2509                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2510                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2511         }
2512
2513         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2514         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2515         arm_smmu_cmdq_issue_sync(smmu);
2516
2517         /* Event queue */
2518         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2519         writel_relaxed(smmu->evtq.q.prod,
2520                        arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2521         writel_relaxed(smmu->evtq.q.cons,
2522                        arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2523
2524         enables |= CR0_EVTQEN;
2525         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2526                                       ARM_SMMU_CR0ACK);
2527         if (ret) {
2528                 dev_err(smmu->dev, "failed to enable event queue\n");
2529                 return ret;
2530         }
2531
2532         /* PRI queue */
2533         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2534                 writeq_relaxed(smmu->priq.q.q_base,
2535                                smmu->base + ARM_SMMU_PRIQ_BASE);
2536                 writel_relaxed(smmu->priq.q.prod,
2537                                arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2538                 writel_relaxed(smmu->priq.q.cons,
2539                                arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2540
2541                 enables |= CR0_PRIQEN;
2542                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2543                                               ARM_SMMU_CR0ACK);
2544                 if (ret) {
2545                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2546                         return ret;
2547                 }
2548         }
2549
2550         ret = arm_smmu_setup_irqs(smmu);
2551         if (ret) {
2552                 dev_err(smmu->dev, "failed to setup irqs\n");
2553                 return ret;
2554         }
2555
2556
2557         /* Enable the SMMU interface, or ensure bypass */
2558         if (!bypass || disable_bypass) {
2559                 enables |= CR0_SMMUEN;
2560         } else {
2561                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2562                 if (ret)
2563                         return ret;
2564         }
2565         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2566                                       ARM_SMMU_CR0ACK);
2567         if (ret) {
2568                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2569                 return ret;
2570         }
2571
2572         return 0;
2573 }
2574
2575 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2576 {
2577         u32 reg;
2578         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2579
2580         /* IDR0 */
2581         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2582
2583         /* 2-level structures */
2584         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2585                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2586
2587         if (reg & IDR0_CD2L)
2588                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2589
2590         /*
2591          * Translation table endianness.
2592          * We currently require the same endianness as the CPU, but this
2593          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2594          */
2595         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2596         case IDR0_TTENDIAN_MIXED:
2597                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2598                 break;
2599 #ifdef __BIG_ENDIAN
2600         case IDR0_TTENDIAN_BE:
2601                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2602                 break;
2603 #else
2604         case IDR0_TTENDIAN_LE:
2605                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2606                 break;
2607 #endif
2608         default:
2609                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2610                 return -ENXIO;
2611         }
2612
2613         /* Boolean feature flags */
2614         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2615                 smmu->features |= ARM_SMMU_FEAT_PRI;
2616
2617         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2618                 smmu->features |= ARM_SMMU_FEAT_ATS;
2619
2620         if (reg & IDR0_SEV)
2621                 smmu->features |= ARM_SMMU_FEAT_SEV;
2622
2623         if (reg & IDR0_MSI)
2624                 smmu->features |= ARM_SMMU_FEAT_MSI;
2625
2626         if (reg & IDR0_HYP)
2627                 smmu->features |= ARM_SMMU_FEAT_HYP;
2628
2629         /*
2630          * The coherency feature as set by FW is used in preference to the ID
2631          * register, but warn on mismatch.
2632          */
2633         if (!!(reg & IDR0_COHACC) != coherent)
2634                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2635                          coherent ? "true" : "false");
2636
2637         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2638         case IDR0_STALL_MODEL_FORCE:
2639                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2640                 /* Fallthrough */
2641         case IDR0_STALL_MODEL_STALL:
2642                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2643         }
2644
2645         if (reg & IDR0_S1P)
2646                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2647
2648         if (reg & IDR0_S2P)
2649                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2650
2651         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2652                 dev_err(smmu->dev, "no translation support!\n");
2653                 return -ENXIO;
2654         }
2655
2656         /* We only support the AArch64 table format at present */
2657         switch (FIELD_GET(IDR0_TTF, reg)) {
2658         case IDR0_TTF_AARCH32_64:
2659                 smmu->ias = 40;
2660                 /* Fallthrough */
2661         case IDR0_TTF_AARCH64:
2662                 break;
2663         default:
2664                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2665                 return -ENXIO;
2666         }
2667
2668         /* ASID/VMID sizes */
2669         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2670         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2671
2672         /* IDR1 */
2673         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2674         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2675                 dev_err(smmu->dev, "embedded implementation not supported\n");
2676                 return -ENXIO;
2677         }
2678
2679         /* Queue sizes, capped at 4k */
2680         smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2681                                          FIELD_GET(IDR1_CMDQS, reg));
2682         if (!smmu->cmdq.q.max_n_shift) {
2683                 /* Odd alignment restrictions on the base, so ignore for now */
2684                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2685                 return -ENXIO;
2686         }
2687
2688         smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2689                                          FIELD_GET(IDR1_EVTQS, reg));
2690         smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2691                                          FIELD_GET(IDR1_PRIQS, reg));
2692
2693         /* SID/SSID sizes */
2694         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2695         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2696
2697         /*
2698          * If the SMMU supports fewer bits than would fill a single L2 stream
2699          * table, use a linear table instead.
2700          */
2701         if (smmu->sid_bits <= STRTAB_SPLIT)
2702                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2703
2704         /* IDR5 */
2705         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2706
2707         /* Maximum number of outstanding stalls */
2708         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2709
2710         /* Page sizes */
2711         if (reg & IDR5_GRAN64K)
2712                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2713         if (reg & IDR5_GRAN16K)
2714                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2715         if (reg & IDR5_GRAN4K)
2716                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2717
2718         /* Input address size */
2719         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2720                 smmu->features |= ARM_SMMU_FEAT_VAX;
2721
2722         /* Output address size */
2723         switch (FIELD_GET(IDR5_OAS, reg)) {
2724         case IDR5_OAS_32_BIT:
2725                 smmu->oas = 32;
2726                 break;
2727         case IDR5_OAS_36_BIT:
2728                 smmu->oas = 36;
2729                 break;
2730         case IDR5_OAS_40_BIT:
2731                 smmu->oas = 40;
2732                 break;
2733         case IDR5_OAS_42_BIT:
2734                 smmu->oas = 42;
2735                 break;
2736         case IDR5_OAS_44_BIT:
2737                 smmu->oas = 44;
2738                 break;
2739         case IDR5_OAS_52_BIT:
2740                 smmu->oas = 52;
2741                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2742                 break;
2743         default:
2744                 dev_info(smmu->dev,
2745                         "unknown output address size. Truncating to 48-bit\n");
2746                 /* Fallthrough */
2747         case IDR5_OAS_48_BIT:
2748                 smmu->oas = 48;
2749         }
2750
2751         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2752                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2753         else
2754                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2755
2756         /* Set the DMA mask for our table walker */
2757         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2758                 dev_warn(smmu->dev,
2759                          "failed to set DMA mask for table walker\n");
2760
2761         smmu->ias = max(smmu->ias, smmu->oas);
2762
2763         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2764                  smmu->ias, smmu->oas, smmu->features);
2765         return 0;
2766 }
2767
2768 #ifdef CONFIG_ACPI
2769 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2770 {
2771         switch (model) {
2772         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2773                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2774                 break;
2775         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2776                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2777                 break;
2778         }
2779
2780         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2781 }
2782
2783 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2784                                       struct arm_smmu_device *smmu)
2785 {
2786         struct acpi_iort_smmu_v3 *iort_smmu;
2787         struct device *dev = smmu->dev;
2788         struct acpi_iort_node *node;
2789
2790         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2791
2792         /* Retrieve SMMUv3 specific data */
2793         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2794
2795         acpi_smmu_get_options(iort_smmu->model, smmu);
2796
2797         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2798                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2799
2800         return 0;
2801 }
2802 #else
2803 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2804                                              struct arm_smmu_device *smmu)
2805 {
2806         return -ENODEV;
2807 }
2808 #endif
2809
2810 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2811                                     struct arm_smmu_device *smmu)
2812 {
2813         struct device *dev = &pdev->dev;
2814         u32 cells;
2815         int ret = -EINVAL;
2816
2817         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2818                 dev_err(dev, "missing #iommu-cells property\n");
2819         else if (cells != 1)
2820                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2821         else
2822                 ret = 0;
2823
2824         parse_driver_options(smmu);
2825
2826         if (of_dma_is_coherent(dev->of_node))
2827                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2828
2829         return ret;
2830 }
2831
2832 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2833 {
2834         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2835                 return SZ_64K;
2836         else
2837                 return SZ_128K;
2838 }
2839
2840 static int arm_smmu_device_probe(struct platform_device *pdev)
2841 {
2842         int irq, ret;
2843         struct resource *res;
2844         resource_size_t ioaddr;
2845         struct arm_smmu_device *smmu;
2846         struct device *dev = &pdev->dev;
2847         bool bypass;
2848
2849         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2850         if (!smmu) {
2851                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2852                 return -ENOMEM;
2853         }
2854         smmu->dev = dev;
2855
2856         if (dev->of_node) {
2857                 ret = arm_smmu_device_dt_probe(pdev, smmu);
2858         } else {
2859                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2860                 if (ret == -ENODEV)
2861                         return ret;
2862         }
2863
2864         /* Set bypass mode according to firmware probing result */
2865         bypass = !!ret;
2866
2867         /* Base address */
2868         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2869         if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2870                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2871                 return -EINVAL;
2872         }
2873         ioaddr = res->start;
2874
2875         smmu->base = devm_ioremap_resource(dev, res);
2876         if (IS_ERR(smmu->base))
2877                 return PTR_ERR(smmu->base);
2878
2879         /* Interrupt lines */
2880
2881         irq = platform_get_irq_byname(pdev, "combined");
2882         if (irq > 0)
2883                 smmu->combined_irq = irq;
2884         else {
2885                 irq = platform_get_irq_byname(pdev, "eventq");
2886                 if (irq > 0)
2887                         smmu->evtq.q.irq = irq;
2888
2889                 irq = platform_get_irq_byname(pdev, "priq");
2890                 if (irq > 0)
2891                         smmu->priq.q.irq = irq;
2892
2893                 irq = platform_get_irq_byname(pdev, "gerror");
2894                 if (irq > 0)
2895                         smmu->gerr_irq = irq;
2896         }
2897         /* Probe the h/w */
2898         ret = arm_smmu_device_hw_probe(smmu);
2899         if (ret)
2900                 return ret;
2901
2902         /* Initialise in-memory data structures */
2903         ret = arm_smmu_init_structures(smmu);
2904         if (ret)
2905                 return ret;
2906
2907         /* Record our private device structure */
2908         platform_set_drvdata(pdev, smmu);
2909
2910         /* Reset the device */
2911         ret = arm_smmu_device_reset(smmu, bypass);
2912         if (ret)
2913                 return ret;
2914
2915         /* And we're up. Go go go! */
2916         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2917                                      "smmu3.%pa", &ioaddr);
2918         if (ret)
2919                 return ret;
2920
2921         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2922         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2923
2924         ret = iommu_device_register(&smmu->iommu);
2925         if (ret) {
2926                 dev_err(dev, "Failed to register iommu\n");
2927                 return ret;
2928         }
2929
2930 #ifdef CONFIG_PCI
2931         if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2932                 pci_request_acs();
2933                 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2934                 if (ret)
2935                         return ret;
2936         }
2937 #endif
2938 #ifdef CONFIG_ARM_AMBA
2939         if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2940                 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2941                 if (ret)
2942                         return ret;
2943         }
2944 #endif
2945         if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2946                 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2947                 if (ret)
2948                         return ret;
2949         }
2950         return 0;
2951 }
2952
2953 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2954 {
2955         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2956
2957         arm_smmu_device_disable(smmu);
2958 }
2959
2960 static const struct of_device_id arm_smmu_of_match[] = {
2961         { .compatible = "arm,smmu-v3", },
2962         { },
2963 };
2964
2965 static struct platform_driver arm_smmu_driver = {
2966         .driver = {
2967                 .name           = "arm-smmu-v3",
2968                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2969                 .suppress_bind_attrs = true,
2970         },
2971         .probe  = arm_smmu_device_probe,
2972         .shutdown = arm_smmu_device_shutdown,
2973 };
2974 builtin_platform_driver(arm_smmu_driver);