Merge branch 'linus' into perf/urgent, to pick up dependent commits
[sfrench/cifs-2.6.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
62 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
63
64 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
65 #define TLB_SPIN_COUNT                  10
66
67 /* Maximum number of context banks per SMMU */
68 #define ARM_SMMU_MAX_CBS                128
69
70 /* SMMU global address space */
71 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
72 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
73
74 /*
75  * SMMU global address space with conditional offset to access secure
76  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
77  * nsGFSYNR0: 0x450)
78  */
79 #define ARM_SMMU_GR0_NS(smmu)                                           \
80         ((smmu)->base +                                                 \
81                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
82                         ? 0x400 : 0))
83
84 /*
85  * Some 64-bit registers only make sense to write atomically, but in such
86  * cases all the data relevant to AArch32 formats lies within the lower word,
87  * therefore this actually makes more sense than it might first appear.
88  */
89 #ifdef CONFIG_64BIT
90 #define smmu_write_atomic_lq            writeq_relaxed
91 #else
92 #define smmu_write_atomic_lq            writel_relaxed
93 #endif
94
95 /* Translation context bank */
96 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
97
98 #define MSI_IOVA_BASE                   0x8000000
99 #define MSI_IOVA_LENGTH                 0x100000
100
101 static int force_stage;
102 module_param(force_stage, int, S_IRUGO);
103 MODULE_PARM_DESC(force_stage,
104         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
105 static bool disable_bypass;
106 module_param(disable_bypass, bool, S_IRUGO);
107 MODULE_PARM_DESC(disable_bypass,
108         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
109
110 enum arm_smmu_arch_version {
111         ARM_SMMU_V1,
112         ARM_SMMU_V1_64K,
113         ARM_SMMU_V2,
114 };
115
116 enum arm_smmu_implementation {
117         GENERIC_SMMU,
118         ARM_MMU500,
119         CAVIUM_SMMUV2,
120 };
121
122 /* Until ACPICA headers cover IORT rev. C */
123 #ifndef ACPI_IORT_SMMU_CORELINK_MMU401
124 #define ACPI_IORT_SMMU_CORELINK_MMU401  0x4
125 #endif
126 #ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
127 #define ACPI_IORT_SMMU_CAVIUM_THUNDERX  0x5
128 #endif
129
130 struct arm_smmu_s2cr {
131         struct iommu_group              *group;
132         int                             count;
133         enum arm_smmu_s2cr_type         type;
134         enum arm_smmu_s2cr_privcfg      privcfg;
135         u8                              cbndx;
136 };
137
138 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
139         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
140 }
141
142 struct arm_smmu_smr {
143         u16                             mask;
144         u16                             id;
145         bool                            valid;
146 };
147
148 struct arm_smmu_cb {
149         u64                             ttbr[2];
150         u32                             tcr[2];
151         u32                             mair[2];
152         struct arm_smmu_cfg             *cfg;
153 };
154
155 struct arm_smmu_master_cfg {
156         struct arm_smmu_device          *smmu;
157         s16                             smendx[];
158 };
159 #define INVALID_SMENDX                  -1
160 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
161 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
162 #define fwspec_smendx(fw, i) \
163         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
164 #define for_each_cfg_sme(fw, i, idx) \
165         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
166
167 struct arm_smmu_device {
168         struct device                   *dev;
169
170         void __iomem                    *base;
171         void __iomem                    *cb_base;
172         unsigned long                   pgshift;
173
174 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
175 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
176 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
177 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
178 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
179 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
180 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
181 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
183 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
184 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
185 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
186 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
187         u32                             features;
188
189 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
190         u32                             options;
191         enum arm_smmu_arch_version      version;
192         enum arm_smmu_implementation    model;
193
194         u32                             num_context_banks;
195         u32                             num_s2_context_banks;
196         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
197         struct arm_smmu_cb              *cbs;
198         atomic_t                        irptndx;
199
200         u32                             num_mapping_groups;
201         u16                             streamid_mask;
202         u16                             smr_mask_mask;
203         struct arm_smmu_smr             *smrs;
204         struct arm_smmu_s2cr            *s2crs;
205         struct mutex                    stream_map_mutex;
206
207         unsigned long                   va_size;
208         unsigned long                   ipa_size;
209         unsigned long                   pa_size;
210         unsigned long                   pgsize_bitmap;
211
212         u32                             num_global_irqs;
213         u32                             num_context_irqs;
214         unsigned int                    *irqs;
215
216         u32                             cavium_id_base; /* Specific to Cavium */
217
218         spinlock_t                      global_sync_lock;
219
220         /* IOMMU core code handle */
221         struct iommu_device             iommu;
222 };
223
224 enum arm_smmu_context_fmt {
225         ARM_SMMU_CTX_FMT_NONE,
226         ARM_SMMU_CTX_FMT_AARCH64,
227         ARM_SMMU_CTX_FMT_AARCH32_L,
228         ARM_SMMU_CTX_FMT_AARCH32_S,
229 };
230
231 struct arm_smmu_cfg {
232         u8                              cbndx;
233         u8                              irptndx;
234         union {
235                 u16                     asid;
236                 u16                     vmid;
237         };
238         u32                             cbar;
239         enum arm_smmu_context_fmt       fmt;
240 };
241 #define INVALID_IRPTNDX                 0xff
242
243 enum arm_smmu_domain_stage {
244         ARM_SMMU_DOMAIN_S1 = 0,
245         ARM_SMMU_DOMAIN_S2,
246         ARM_SMMU_DOMAIN_NESTED,
247         ARM_SMMU_DOMAIN_BYPASS,
248 };
249
250 struct arm_smmu_domain {
251         struct arm_smmu_device          *smmu;
252         struct io_pgtable_ops           *pgtbl_ops;
253         struct arm_smmu_cfg             cfg;
254         enum arm_smmu_domain_stage      stage;
255         struct mutex                    init_mutex; /* Protects smmu pointer */
256         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
257         struct iommu_domain             domain;
258 };
259
260 struct arm_smmu_option_prop {
261         u32 opt;
262         const char *prop;
263 };
264
265 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
266
267 static bool using_legacy_binding, using_generic_binding;
268
269 static struct arm_smmu_option_prop arm_smmu_options[] = {
270         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
271         { 0, NULL},
272 };
273
274 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
275 {
276         return container_of(dom, struct arm_smmu_domain, domain);
277 }
278
279 static void parse_driver_options(struct arm_smmu_device *smmu)
280 {
281         int i = 0;
282
283         do {
284                 if (of_property_read_bool(smmu->dev->of_node,
285                                                 arm_smmu_options[i].prop)) {
286                         smmu->options |= arm_smmu_options[i].opt;
287                         dev_notice(smmu->dev, "option %s\n",
288                                 arm_smmu_options[i].prop);
289                 }
290         } while (arm_smmu_options[++i].opt);
291 }
292
293 static struct device_node *dev_get_dev_node(struct device *dev)
294 {
295         if (dev_is_pci(dev)) {
296                 struct pci_bus *bus = to_pci_dev(dev)->bus;
297
298                 while (!pci_is_root_bus(bus))
299                         bus = bus->parent;
300                 return of_node_get(bus->bridge->parent->of_node);
301         }
302
303         return of_node_get(dev->of_node);
304 }
305
306 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
307 {
308         *((__be32 *)data) = cpu_to_be32(alias);
309         return 0; /* Continue walking */
310 }
311
312 static int __find_legacy_master_phandle(struct device *dev, void *data)
313 {
314         struct of_phandle_iterator *it = *(void **)data;
315         struct device_node *np = it->node;
316         int err;
317
318         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
319                             "#stream-id-cells", 0)
320                 if (it->node == np) {
321                         *(void **)data = dev;
322                         return 1;
323                 }
324         it->node = np;
325         return err == -ENOENT ? 0 : err;
326 }
327
328 static struct platform_driver arm_smmu_driver;
329 static struct iommu_ops arm_smmu_ops;
330
331 static int arm_smmu_register_legacy_master(struct device *dev,
332                                            struct arm_smmu_device **smmu)
333 {
334         struct device *smmu_dev;
335         struct device_node *np;
336         struct of_phandle_iterator it;
337         void *data = &it;
338         u32 *sids;
339         __be32 pci_sid;
340         int err;
341
342         np = dev_get_dev_node(dev);
343         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
344                 of_node_put(np);
345                 return -ENODEV;
346         }
347
348         it.node = np;
349         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
350                                      __find_legacy_master_phandle);
351         smmu_dev = data;
352         of_node_put(np);
353         if (err == 0)
354                 return -ENODEV;
355         if (err < 0)
356                 return err;
357
358         if (dev_is_pci(dev)) {
359                 /* "mmu-masters" assumes Stream ID == Requester ID */
360                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
361                                        &pci_sid);
362                 it.cur = &pci_sid;
363                 it.cur_count = 1;
364         }
365
366         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
367                                 &arm_smmu_ops);
368         if (err)
369                 return err;
370
371         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
372         if (!sids)
373                 return -ENOMEM;
374
375         *smmu = dev_get_drvdata(smmu_dev);
376         of_phandle_iterator_args(&it, sids, it.cur_count);
377         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
378         kfree(sids);
379         return err;
380 }
381
382 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
383 {
384         int idx;
385
386         do {
387                 idx = find_next_zero_bit(map, end, start);
388                 if (idx == end)
389                         return -ENOSPC;
390         } while (test_and_set_bit(idx, map));
391
392         return idx;
393 }
394
395 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
396 {
397         clear_bit(idx, map);
398 }
399
400 /* Wait for any pending TLB invalidations to complete */
401 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
402                                 void __iomem *sync, void __iomem *status)
403 {
404         unsigned int spin_cnt, delay;
405
406         writel_relaxed(0, sync);
407         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
408                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
409                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
410                                 return;
411                         cpu_relax();
412                 }
413                 udelay(delay);
414         }
415         dev_err_ratelimited(smmu->dev,
416                             "TLB sync timed out -- SMMU may be deadlocked\n");
417 }
418
419 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
420 {
421         void __iomem *base = ARM_SMMU_GR0(smmu);
422         unsigned long flags;
423
424         spin_lock_irqsave(&smmu->global_sync_lock, flags);
425         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
426                             base + ARM_SMMU_GR0_sTLBGSTATUS);
427         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
428 }
429
430 static void arm_smmu_tlb_sync_context(void *cookie)
431 {
432         struct arm_smmu_domain *smmu_domain = cookie;
433         struct arm_smmu_device *smmu = smmu_domain->smmu;
434         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
435         unsigned long flags;
436
437         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
438         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
439                             base + ARM_SMMU_CB_TLBSTATUS);
440         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
441 }
442
443 static void arm_smmu_tlb_sync_vmid(void *cookie)
444 {
445         struct arm_smmu_domain *smmu_domain = cookie;
446
447         arm_smmu_tlb_sync_global(smmu_domain->smmu);
448 }
449
450 static void arm_smmu_tlb_inv_context_s1(void *cookie)
451 {
452         struct arm_smmu_domain *smmu_domain = cookie;
453         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
454         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
455
456         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
457         arm_smmu_tlb_sync_context(cookie);
458 }
459
460 static void arm_smmu_tlb_inv_context_s2(void *cookie)
461 {
462         struct arm_smmu_domain *smmu_domain = cookie;
463         struct arm_smmu_device *smmu = smmu_domain->smmu;
464         void __iomem *base = ARM_SMMU_GR0(smmu);
465
466         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
467         arm_smmu_tlb_sync_global(smmu);
468 }
469
470 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
471                                           size_t granule, bool leaf, void *cookie)
472 {
473         struct arm_smmu_domain *smmu_domain = cookie;
474         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
475         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
476         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
477
478         if (stage1) {
479                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
480
481                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
482                         iova &= ~12UL;
483                         iova |= cfg->asid;
484                         do {
485                                 writel_relaxed(iova, reg);
486                                 iova += granule;
487                         } while (size -= granule);
488                 } else {
489                         iova >>= 12;
490                         iova |= (u64)cfg->asid << 48;
491                         do {
492                                 writeq_relaxed(iova, reg);
493                                 iova += granule >> 12;
494                         } while (size -= granule);
495                 }
496         } else {
497                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
498                               ARM_SMMU_CB_S2_TLBIIPAS2;
499                 iova >>= 12;
500                 do {
501                         smmu_write_atomic_lq(iova, reg);
502                         iova += granule >> 12;
503                 } while (size -= granule);
504         }
505 }
506
507 /*
508  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
509  * almost negligible, but the benefit of getting the first one in as far ahead
510  * of the sync as possible is significant, hence we don't just make this a
511  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
512  */
513 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
514                                          size_t granule, bool leaf, void *cookie)
515 {
516         struct arm_smmu_domain *smmu_domain = cookie;
517         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
518
519         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
520 }
521
522 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
523         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
524         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
525         .tlb_sync       = arm_smmu_tlb_sync_context,
526 };
527
528 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
529         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
530         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
531         .tlb_sync       = arm_smmu_tlb_sync_context,
532 };
533
534 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
535         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
536         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
537         .tlb_sync       = arm_smmu_tlb_sync_vmid,
538 };
539
540 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
541 {
542         u32 fsr, fsynr;
543         unsigned long iova;
544         struct iommu_domain *domain = dev;
545         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
546         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
547         struct arm_smmu_device *smmu = smmu_domain->smmu;
548         void __iomem *cb_base;
549
550         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
551         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
552
553         if (!(fsr & FSR_FAULT))
554                 return IRQ_NONE;
555
556         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
557         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
558
559         dev_err_ratelimited(smmu->dev,
560         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
561                             fsr, iova, fsynr, cfg->cbndx);
562
563         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
564         return IRQ_HANDLED;
565 }
566
567 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
568 {
569         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
570         struct arm_smmu_device *smmu = dev;
571         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
572
573         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
574         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
575         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
576         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
577
578         if (!gfsr)
579                 return IRQ_NONE;
580
581         dev_err_ratelimited(smmu->dev,
582                 "Unexpected global fault, this could be serious\n");
583         dev_err_ratelimited(smmu->dev,
584                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
585                 gfsr, gfsynr0, gfsynr1, gfsynr2);
586
587         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
588         return IRQ_HANDLED;
589 }
590
591 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
592                                        struct io_pgtable_cfg *pgtbl_cfg)
593 {
594         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
595         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
596         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
597
598         cb->cfg = cfg;
599
600         /* TTBCR */
601         if (stage1) {
602                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
603                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
604                 } else {
605                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
606                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
607                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
608                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
609                                 cb->tcr[1] |= TTBCR2_AS;
610                 }
611         } else {
612                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
613         }
614
615         /* TTBRs */
616         if (stage1) {
617                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
618                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
619                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
620                 } else {
621                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
622                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
623                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
624                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
625                 }
626         } else {
627                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
628         }
629
630         /* MAIRs (stage-1 only) */
631         if (stage1) {
632                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
633                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
634                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
635                 } else {
636                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
637                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
638                 }
639         }
640 }
641
642 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
643 {
644         u32 reg;
645         bool stage1;
646         struct arm_smmu_cb *cb = &smmu->cbs[idx];
647         struct arm_smmu_cfg *cfg = cb->cfg;
648         void __iomem *cb_base, *gr1_base;
649
650         cb_base = ARM_SMMU_CB(smmu, idx);
651
652         /* Unassigned context banks only need disabling */
653         if (!cfg) {
654                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
655                 return;
656         }
657
658         gr1_base = ARM_SMMU_GR1(smmu);
659         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
660
661         /* CBA2R */
662         if (smmu->version > ARM_SMMU_V1) {
663                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
664                         reg = CBA2R_RW64_64BIT;
665                 else
666                         reg = CBA2R_RW64_32BIT;
667                 /* 16-bit VMIDs live in CBA2R */
668                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
669                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
670
671                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
672         }
673
674         /* CBAR */
675         reg = cfg->cbar;
676         if (smmu->version < ARM_SMMU_V2)
677                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
678
679         /*
680          * Use the weakest shareability/memory types, so they are
681          * overridden by the ttbcr/pte.
682          */
683         if (stage1) {
684                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
685                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
686         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
687                 /* 8-bit VMIDs live in CBAR */
688                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
689         }
690         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
691
692         /*
693          * TTBCR
694          * We must write this before the TTBRs, since it determines the
695          * access behaviour of some fields (in particular, ASID[15:8]).
696          */
697         if (stage1 && smmu->version > ARM_SMMU_V1)
698                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
699         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
700
701         /* TTBRs */
702         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
703                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
704                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
705                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
706         } else {
707                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
708                 if (stage1)
709                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
710         }
711
712         /* MAIRs (stage-1 only) */
713         if (stage1) {
714                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
715                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
716         }
717
718         /* SCTLR */
719         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
720         if (stage1)
721                 reg |= SCTLR_S1_ASIDPNE;
722         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
723                 reg |= SCTLR_E;
724
725         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
726 }
727
728 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
729                                         struct arm_smmu_device *smmu)
730 {
731         int irq, start, ret = 0;
732         unsigned long ias, oas;
733         struct io_pgtable_ops *pgtbl_ops;
734         struct io_pgtable_cfg pgtbl_cfg;
735         enum io_pgtable_fmt fmt;
736         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
737         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
738         const struct iommu_gather_ops *tlb_ops;
739
740         mutex_lock(&smmu_domain->init_mutex);
741         if (smmu_domain->smmu)
742                 goto out_unlock;
743
744         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
745                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
746                 smmu_domain->smmu = smmu;
747                 goto out_unlock;
748         }
749
750         /*
751          * Mapping the requested stage onto what we support is surprisingly
752          * complicated, mainly because the spec allows S1+S2 SMMUs without
753          * support for nested translation. That means we end up with the
754          * following table:
755          *
756          * Requested        Supported        Actual
757          *     S1               N              S1
758          *     S1             S1+S2            S1
759          *     S1               S2             S2
760          *     S1               S1             S1
761          *     N                N              N
762          *     N              S1+S2            S2
763          *     N                S2             S2
764          *     N                S1             S1
765          *
766          * Note that you can't actually request stage-2 mappings.
767          */
768         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
769                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
770         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
771                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
772
773         /*
774          * Choosing a suitable context format is even more fiddly. Until we
775          * grow some way for the caller to express a preference, and/or move
776          * the decision into the io-pgtable code where it arguably belongs,
777          * just aim for the closest thing to the rest of the system, and hope
778          * that the hardware isn't esoteric enough that we can't assume AArch64
779          * support to be a superset of AArch32 support...
780          */
781         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
782                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
783         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
784             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
785             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
786             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
787                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
788         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
789             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
790                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
791                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
792                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
793
794         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
795                 ret = -EINVAL;
796                 goto out_unlock;
797         }
798
799         switch (smmu_domain->stage) {
800         case ARM_SMMU_DOMAIN_S1:
801                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
802                 start = smmu->num_s2_context_banks;
803                 ias = smmu->va_size;
804                 oas = smmu->ipa_size;
805                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
806                         fmt = ARM_64_LPAE_S1;
807                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
808                         fmt = ARM_32_LPAE_S1;
809                         ias = min(ias, 32UL);
810                         oas = min(oas, 40UL);
811                 } else {
812                         fmt = ARM_V7S;
813                         ias = min(ias, 32UL);
814                         oas = min(oas, 32UL);
815                 }
816                 tlb_ops = &arm_smmu_s1_tlb_ops;
817                 break;
818         case ARM_SMMU_DOMAIN_NESTED:
819                 /*
820                  * We will likely want to change this if/when KVM gets
821                  * involved.
822                  */
823         case ARM_SMMU_DOMAIN_S2:
824                 cfg->cbar = CBAR_TYPE_S2_TRANS;
825                 start = 0;
826                 ias = smmu->ipa_size;
827                 oas = smmu->pa_size;
828                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
829                         fmt = ARM_64_LPAE_S2;
830                 } else {
831                         fmt = ARM_32_LPAE_S2;
832                         ias = min(ias, 40UL);
833                         oas = min(oas, 40UL);
834                 }
835                 if (smmu->version == ARM_SMMU_V2)
836                         tlb_ops = &arm_smmu_s2_tlb_ops_v2;
837                 else
838                         tlb_ops = &arm_smmu_s2_tlb_ops_v1;
839                 break;
840         default:
841                 ret = -EINVAL;
842                 goto out_unlock;
843         }
844         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
845                                       smmu->num_context_banks);
846         if (ret < 0)
847                 goto out_unlock;
848
849         cfg->cbndx = ret;
850         if (smmu->version < ARM_SMMU_V2) {
851                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
852                 cfg->irptndx %= smmu->num_context_irqs;
853         } else {
854                 cfg->irptndx = cfg->cbndx;
855         }
856
857         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
858                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
859         else
860                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
861
862         pgtbl_cfg = (struct io_pgtable_cfg) {
863                 .pgsize_bitmap  = smmu->pgsize_bitmap,
864                 .ias            = ias,
865                 .oas            = oas,
866                 .tlb            = tlb_ops,
867                 .iommu_dev      = smmu->dev,
868         };
869
870         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
871                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
872
873         smmu_domain->smmu = smmu;
874         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
875         if (!pgtbl_ops) {
876                 ret = -ENOMEM;
877                 goto out_clear_smmu;
878         }
879
880         /* Update the domain's page sizes to reflect the page table format */
881         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
882         domain->geometry.aperture_end = (1UL << ias) - 1;
883         domain->geometry.force_aperture = true;
884
885         /* Initialise the context bank with our page table cfg */
886         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
887         arm_smmu_write_context_bank(smmu, cfg->cbndx);
888
889         /*
890          * Request context fault interrupt. Do this last to avoid the
891          * handler seeing a half-initialised domain state.
892          */
893         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
894         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
895                                IRQF_SHARED, "arm-smmu-context-fault", domain);
896         if (ret < 0) {
897                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
898                         cfg->irptndx, irq);
899                 cfg->irptndx = INVALID_IRPTNDX;
900         }
901
902         mutex_unlock(&smmu_domain->init_mutex);
903
904         /* Publish page table ops for map/unmap */
905         smmu_domain->pgtbl_ops = pgtbl_ops;
906         return 0;
907
908 out_clear_smmu:
909         smmu_domain->smmu = NULL;
910 out_unlock:
911         mutex_unlock(&smmu_domain->init_mutex);
912         return ret;
913 }
914
915 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
916 {
917         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
918         struct arm_smmu_device *smmu = smmu_domain->smmu;
919         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
920         int irq;
921
922         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
923                 return;
924
925         /*
926          * Disable the context bank and free the page tables before freeing
927          * it.
928          */
929         smmu->cbs[cfg->cbndx].cfg = NULL;
930         arm_smmu_write_context_bank(smmu, cfg->cbndx);
931
932         if (cfg->irptndx != INVALID_IRPTNDX) {
933                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
934                 devm_free_irq(smmu->dev, irq, domain);
935         }
936
937         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
938         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
939 }
940
941 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
942 {
943         struct arm_smmu_domain *smmu_domain;
944
945         if (type != IOMMU_DOMAIN_UNMANAGED &&
946             type != IOMMU_DOMAIN_DMA &&
947             type != IOMMU_DOMAIN_IDENTITY)
948                 return NULL;
949         /*
950          * Allocate the domain and initialise some of its data structures.
951          * We can't really do anything meaningful until we've added a
952          * master.
953          */
954         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
955         if (!smmu_domain)
956                 return NULL;
957
958         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
959             iommu_get_dma_cookie(&smmu_domain->domain))) {
960                 kfree(smmu_domain);
961                 return NULL;
962         }
963
964         mutex_init(&smmu_domain->init_mutex);
965         spin_lock_init(&smmu_domain->cb_lock);
966
967         return &smmu_domain->domain;
968 }
969
970 static void arm_smmu_domain_free(struct iommu_domain *domain)
971 {
972         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
973
974         /*
975          * Free the domain resources. We assume that all devices have
976          * already been detached.
977          */
978         iommu_put_dma_cookie(domain);
979         arm_smmu_destroy_domain_context(domain);
980         kfree(smmu_domain);
981 }
982
983 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
984 {
985         struct arm_smmu_smr *smr = smmu->smrs + idx;
986         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
987
988         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
989                 reg |= SMR_VALID;
990         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
991 }
992
993 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
994 {
995         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
996         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
997                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
998                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
999
1000         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1001             smmu->smrs[idx].valid)
1002                 reg |= S2CR_EXIDVALID;
1003         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1004 }
1005
1006 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1007 {
1008         arm_smmu_write_s2cr(smmu, idx);
1009         if (smmu->smrs)
1010                 arm_smmu_write_smr(smmu, idx);
1011 }
1012
1013 /*
1014  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1015  * should be called after sCR0 is written.
1016  */
1017 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1018 {
1019         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1020         u32 smr;
1021
1022         if (!smmu->smrs)
1023                 return;
1024
1025         /*
1026          * SMR.ID bits may not be preserved if the corresponding MASK
1027          * bits are set, so check each one separately. We can reject
1028          * masters later if they try to claim IDs outside these masks.
1029          */
1030         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1031         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1032         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1033         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1034
1035         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1036         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1037         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1038         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1039 }
1040
1041 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1042 {
1043         struct arm_smmu_smr *smrs = smmu->smrs;
1044         int i, free_idx = -ENOSPC;
1045
1046         /* Stream indexing is blissfully easy */
1047         if (!smrs)
1048                 return id;
1049
1050         /* Validating SMRs is... less so */
1051         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1052                 if (!smrs[i].valid) {
1053                         /*
1054                          * Note the first free entry we come across, which
1055                          * we'll claim in the end if nothing else matches.
1056                          */
1057                         if (free_idx < 0)
1058                                 free_idx = i;
1059                         continue;
1060                 }
1061                 /*
1062                  * If the new entry is _entirely_ matched by an existing entry,
1063                  * then reuse that, with the guarantee that there also cannot
1064                  * be any subsequent conflicting entries. In normal use we'd
1065                  * expect simply identical entries for this case, but there's
1066                  * no harm in accommodating the generalisation.
1067                  */
1068                 if ((mask & smrs[i].mask) == mask &&
1069                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1070                         return i;
1071                 /*
1072                  * If the new entry has any other overlap with an existing one,
1073                  * though, then there always exists at least one stream ID
1074                  * which would cause a conflict, and we can't allow that risk.
1075                  */
1076                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1077                         return -EINVAL;
1078         }
1079
1080         return free_idx;
1081 }
1082
1083 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1084 {
1085         if (--smmu->s2crs[idx].count)
1086                 return false;
1087
1088         smmu->s2crs[idx] = s2cr_init_val;
1089         if (smmu->smrs)
1090                 smmu->smrs[idx].valid = false;
1091
1092         return true;
1093 }
1094
1095 static int arm_smmu_master_alloc_smes(struct device *dev)
1096 {
1097         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1098         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1099         struct arm_smmu_device *smmu = cfg->smmu;
1100         struct arm_smmu_smr *smrs = smmu->smrs;
1101         struct iommu_group *group;
1102         int i, idx, ret;
1103
1104         mutex_lock(&smmu->stream_map_mutex);
1105         /* Figure out a viable stream map entry allocation */
1106         for_each_cfg_sme(fwspec, i, idx) {
1107                 u16 sid = fwspec->ids[i];
1108                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1109
1110                 if (idx != INVALID_SMENDX) {
1111                         ret = -EEXIST;
1112                         goto out_err;
1113                 }
1114
1115                 ret = arm_smmu_find_sme(smmu, sid, mask);
1116                 if (ret < 0)
1117                         goto out_err;
1118
1119                 idx = ret;
1120                 if (smrs && smmu->s2crs[idx].count == 0) {
1121                         smrs[idx].id = sid;
1122                         smrs[idx].mask = mask;
1123                         smrs[idx].valid = true;
1124                 }
1125                 smmu->s2crs[idx].count++;
1126                 cfg->smendx[i] = (s16)idx;
1127         }
1128
1129         group = iommu_group_get_for_dev(dev);
1130         if (!group)
1131                 group = ERR_PTR(-ENOMEM);
1132         if (IS_ERR(group)) {
1133                 ret = PTR_ERR(group);
1134                 goto out_err;
1135         }
1136         iommu_group_put(group);
1137
1138         /* It worked! Now, poke the actual hardware */
1139         for_each_cfg_sme(fwspec, i, idx) {
1140                 arm_smmu_write_sme(smmu, idx);
1141                 smmu->s2crs[idx].group = group;
1142         }
1143
1144         mutex_unlock(&smmu->stream_map_mutex);
1145         return 0;
1146
1147 out_err:
1148         while (i--) {
1149                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1150                 cfg->smendx[i] = INVALID_SMENDX;
1151         }
1152         mutex_unlock(&smmu->stream_map_mutex);
1153         return ret;
1154 }
1155
1156 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1157 {
1158         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1159         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1160         int i, idx;
1161
1162         mutex_lock(&smmu->stream_map_mutex);
1163         for_each_cfg_sme(fwspec, i, idx) {
1164                 if (arm_smmu_free_sme(smmu, idx))
1165                         arm_smmu_write_sme(smmu, idx);
1166                 cfg->smendx[i] = INVALID_SMENDX;
1167         }
1168         mutex_unlock(&smmu->stream_map_mutex);
1169 }
1170
1171 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1172                                       struct iommu_fwspec *fwspec)
1173 {
1174         struct arm_smmu_device *smmu = smmu_domain->smmu;
1175         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1176         u8 cbndx = smmu_domain->cfg.cbndx;
1177         enum arm_smmu_s2cr_type type;
1178         int i, idx;
1179
1180         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1181                 type = S2CR_TYPE_BYPASS;
1182         else
1183                 type = S2CR_TYPE_TRANS;
1184
1185         for_each_cfg_sme(fwspec, i, idx) {
1186                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1187                         continue;
1188
1189                 s2cr[idx].type = type;
1190                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1191                 s2cr[idx].cbndx = cbndx;
1192                 arm_smmu_write_s2cr(smmu, idx);
1193         }
1194         return 0;
1195 }
1196
1197 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1198 {
1199         int ret;
1200         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1201         struct arm_smmu_device *smmu;
1202         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1203
1204         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1205                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1206                 return -ENXIO;
1207         }
1208
1209         /*
1210          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1211          * domains between of_xlate() and add_device() - we have no way to cope
1212          * with that, so until ARM gets converted to rely on groups and default
1213          * domains, just say no (but more politely than by dereferencing NULL).
1214          * This should be at least a WARN_ON once that's sorted.
1215          */
1216         if (!fwspec->iommu_priv)
1217                 return -ENODEV;
1218
1219         smmu = fwspec_smmu(fwspec);
1220         /* Ensure that the domain is finalised */
1221         ret = arm_smmu_init_domain_context(domain, smmu);
1222         if (ret < 0)
1223                 return ret;
1224
1225         /*
1226          * Sanity check the domain. We don't support domains across
1227          * different SMMUs.
1228          */
1229         if (smmu_domain->smmu != smmu) {
1230                 dev_err(dev,
1231                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1232                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1233                 return -EINVAL;
1234         }
1235
1236         /* Looks ok, so add the device to the domain */
1237         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1238 }
1239
1240 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1241                         phys_addr_t paddr, size_t size, int prot)
1242 {
1243         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1244
1245         if (!ops)
1246                 return -ENODEV;
1247
1248         return ops->map(ops, iova, paddr, size, prot);
1249 }
1250
1251 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1252                              size_t size)
1253 {
1254         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1255
1256         if (!ops)
1257                 return 0;
1258
1259         return ops->unmap(ops, iova, size);
1260 }
1261
1262 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1263                                               dma_addr_t iova)
1264 {
1265         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1266         struct arm_smmu_device *smmu = smmu_domain->smmu;
1267         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1268         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1269         struct device *dev = smmu->dev;
1270         void __iomem *cb_base;
1271         u32 tmp;
1272         u64 phys;
1273         unsigned long va, flags;
1274
1275         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1276
1277         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1278         /* ATS1 registers can only be written atomically */
1279         va = iova & ~0xfffUL;
1280         if (smmu->version == ARM_SMMU_V2)
1281                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1282         else /* Register is only 32-bit in v1 */
1283                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1284
1285         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1286                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1287                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1288                 dev_err(dev,
1289                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1290                         &iova);
1291                 return ops->iova_to_phys(ops, iova);
1292         }
1293
1294         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1295         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1296         if (phys & CB_PAR_F) {
1297                 dev_err(dev, "translation fault!\n");
1298                 dev_err(dev, "PAR = 0x%llx\n", phys);
1299                 return 0;
1300         }
1301
1302         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1303 }
1304
1305 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1306                                         dma_addr_t iova)
1307 {
1308         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1309         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1310
1311         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1312                 return iova;
1313
1314         if (!ops)
1315                 return 0;
1316
1317         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1318                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1319                 return arm_smmu_iova_to_phys_hard(domain, iova);
1320
1321         return ops->iova_to_phys(ops, iova);
1322 }
1323
1324 static bool arm_smmu_capable(enum iommu_cap cap)
1325 {
1326         switch (cap) {
1327         case IOMMU_CAP_CACHE_COHERENCY:
1328                 /*
1329                  * Return true here as the SMMU can always send out coherent
1330                  * requests.
1331                  */
1332                 return true;
1333         case IOMMU_CAP_NOEXEC:
1334                 return true;
1335         default:
1336                 return false;
1337         }
1338 }
1339
1340 static int arm_smmu_match_node(struct device *dev, void *data)
1341 {
1342         return dev->fwnode == data;
1343 }
1344
1345 static
1346 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1347 {
1348         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1349                                                 fwnode, arm_smmu_match_node);
1350         put_device(dev);
1351         return dev ? dev_get_drvdata(dev) : NULL;
1352 }
1353
1354 static int arm_smmu_add_device(struct device *dev)
1355 {
1356         struct arm_smmu_device *smmu;
1357         struct arm_smmu_master_cfg *cfg;
1358         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1359         int i, ret;
1360
1361         if (using_legacy_binding) {
1362                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1363
1364                 /*
1365                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1366                  * will allocate/initialise a new one. Thus we need to update fwspec for
1367                  * later use.
1368                  */
1369                 fwspec = dev->iommu_fwspec;
1370                 if (ret)
1371                         goto out_free;
1372         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1373                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1374         } else {
1375                 return -ENODEV;
1376         }
1377
1378         ret = -EINVAL;
1379         for (i = 0; i < fwspec->num_ids; i++) {
1380                 u16 sid = fwspec->ids[i];
1381                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1382
1383                 if (sid & ~smmu->streamid_mask) {
1384                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1385                                 sid, smmu->streamid_mask);
1386                         goto out_free;
1387                 }
1388                 if (mask & ~smmu->smr_mask_mask) {
1389                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1390                                 mask, smmu->smr_mask_mask);
1391                         goto out_free;
1392                 }
1393         }
1394
1395         ret = -ENOMEM;
1396         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1397                       GFP_KERNEL);
1398         if (!cfg)
1399                 goto out_free;
1400
1401         cfg->smmu = smmu;
1402         fwspec->iommu_priv = cfg;
1403         while (i--)
1404                 cfg->smendx[i] = INVALID_SMENDX;
1405
1406         ret = arm_smmu_master_alloc_smes(dev);
1407         if (ret)
1408                 goto out_cfg_free;
1409
1410         iommu_device_link(&smmu->iommu, dev);
1411
1412         return 0;
1413
1414 out_cfg_free:
1415         kfree(cfg);
1416 out_free:
1417         iommu_fwspec_free(dev);
1418         return ret;
1419 }
1420
1421 static void arm_smmu_remove_device(struct device *dev)
1422 {
1423         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1424         struct arm_smmu_master_cfg *cfg;
1425         struct arm_smmu_device *smmu;
1426
1427
1428         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1429                 return;
1430
1431         cfg  = fwspec->iommu_priv;
1432         smmu = cfg->smmu;
1433
1434         iommu_device_unlink(&smmu->iommu, dev);
1435         arm_smmu_master_free_smes(fwspec);
1436         iommu_group_remove_device(dev);
1437         kfree(fwspec->iommu_priv);
1438         iommu_fwspec_free(dev);
1439 }
1440
1441 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1442 {
1443         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1444         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1445         struct iommu_group *group = NULL;
1446         int i, idx;
1447
1448         for_each_cfg_sme(fwspec, i, idx) {
1449                 if (group && smmu->s2crs[idx].group &&
1450                     group != smmu->s2crs[idx].group)
1451                         return ERR_PTR(-EINVAL);
1452
1453                 group = smmu->s2crs[idx].group;
1454         }
1455
1456         if (group)
1457                 return iommu_group_ref_get(group);
1458
1459         if (dev_is_pci(dev))
1460                 group = pci_device_group(dev);
1461         else
1462                 group = generic_device_group(dev);
1463
1464         return group;
1465 }
1466
1467 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1468                                     enum iommu_attr attr, void *data)
1469 {
1470         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1471
1472         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1473                 return -EINVAL;
1474
1475         switch (attr) {
1476         case DOMAIN_ATTR_NESTING:
1477                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1478                 return 0;
1479         default:
1480                 return -ENODEV;
1481         }
1482 }
1483
1484 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1485                                     enum iommu_attr attr, void *data)
1486 {
1487         int ret = 0;
1488         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1489
1490         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1491                 return -EINVAL;
1492
1493         mutex_lock(&smmu_domain->init_mutex);
1494
1495         switch (attr) {
1496         case DOMAIN_ATTR_NESTING:
1497                 if (smmu_domain->smmu) {
1498                         ret = -EPERM;
1499                         goto out_unlock;
1500                 }
1501
1502                 if (*(int *)data)
1503                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1504                 else
1505                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1506
1507                 break;
1508         default:
1509                 ret = -ENODEV;
1510         }
1511
1512 out_unlock:
1513         mutex_unlock(&smmu_domain->init_mutex);
1514         return ret;
1515 }
1516
1517 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1518 {
1519         u32 mask, fwid = 0;
1520
1521         if (args->args_count > 0)
1522                 fwid |= (u16)args->args[0];
1523
1524         if (args->args_count > 1)
1525                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1526         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1527                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1528
1529         return iommu_fwspec_add_ids(dev, &fwid, 1);
1530 }
1531
1532 static void arm_smmu_get_resv_regions(struct device *dev,
1533                                       struct list_head *head)
1534 {
1535         struct iommu_resv_region *region;
1536         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1537
1538         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1539                                          prot, IOMMU_RESV_SW_MSI);
1540         if (!region)
1541                 return;
1542
1543         list_add_tail(&region->list, head);
1544
1545         iommu_dma_get_resv_regions(dev, head);
1546 }
1547
1548 static void arm_smmu_put_resv_regions(struct device *dev,
1549                                       struct list_head *head)
1550 {
1551         struct iommu_resv_region *entry, *next;
1552
1553         list_for_each_entry_safe(entry, next, head, list)
1554                 kfree(entry);
1555 }
1556
1557 static struct iommu_ops arm_smmu_ops = {
1558         .capable                = arm_smmu_capable,
1559         .domain_alloc           = arm_smmu_domain_alloc,
1560         .domain_free            = arm_smmu_domain_free,
1561         .attach_dev             = arm_smmu_attach_dev,
1562         .map                    = arm_smmu_map,
1563         .unmap                  = arm_smmu_unmap,
1564         .map_sg                 = default_iommu_map_sg,
1565         .iova_to_phys           = arm_smmu_iova_to_phys,
1566         .add_device             = arm_smmu_add_device,
1567         .remove_device          = arm_smmu_remove_device,
1568         .device_group           = arm_smmu_device_group,
1569         .domain_get_attr        = arm_smmu_domain_get_attr,
1570         .domain_set_attr        = arm_smmu_domain_set_attr,
1571         .of_xlate               = arm_smmu_of_xlate,
1572         .get_resv_regions       = arm_smmu_get_resv_regions,
1573         .put_resv_regions       = arm_smmu_put_resv_regions,
1574         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1575 };
1576
1577 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1578 {
1579         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1580         int i;
1581         u32 reg, major;
1582
1583         /* clear global FSR */
1584         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1585         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1586
1587         /*
1588          * Reset stream mapping groups: Initial values mark all SMRn as
1589          * invalid and all S2CRn as bypass unless overridden.
1590          */
1591         for (i = 0; i < smmu->num_mapping_groups; ++i)
1592                 arm_smmu_write_sme(smmu, i);
1593
1594         if (smmu->model == ARM_MMU500) {
1595                 /*
1596                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1597                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1598                  * bit is only present in MMU-500r2 onwards.
1599                  */
1600                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1601                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1602                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1603                 if (major >= 2)
1604                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1605                 /*
1606                  * Allow unmatched Stream IDs to allocate bypass
1607                  * TLB entries for reduced latency.
1608                  */
1609                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
1610                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1611         }
1612
1613         /* Make sure all context banks are disabled and clear CB_FSR  */
1614         for (i = 0; i < smmu->num_context_banks; ++i) {
1615                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1616
1617                 arm_smmu_write_context_bank(smmu, i);
1618                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1619                 /*
1620                  * Disable MMU-500's not-particularly-beneficial next-page
1621                  * prefetcher for the sake of errata #841119 and #826419.
1622                  */
1623                 if (smmu->model == ARM_MMU500) {
1624                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1625                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1626                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1627                 }
1628         }
1629
1630         /* Invalidate the TLB, just in case */
1631         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1632         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1633
1634         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1635
1636         /* Enable fault reporting */
1637         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1638
1639         /* Disable TLB broadcasting. */
1640         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1641
1642         /* Enable client access, handling unmatched streams as appropriate */
1643         reg &= ~sCR0_CLIENTPD;
1644         if (disable_bypass)
1645                 reg |= sCR0_USFCFG;
1646         else
1647                 reg &= ~sCR0_USFCFG;
1648
1649         /* Disable forced broadcasting */
1650         reg &= ~sCR0_FB;
1651
1652         /* Don't upgrade barriers */
1653         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1654
1655         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1656                 reg |= sCR0_VMID16EN;
1657
1658         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1659                 reg |= sCR0_EXIDENABLE;
1660
1661         /* Push the button */
1662         arm_smmu_tlb_sync_global(smmu);
1663         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1664 }
1665
1666 static int arm_smmu_id_size_to_bits(int size)
1667 {
1668         switch (size) {
1669         case 0:
1670                 return 32;
1671         case 1:
1672                 return 36;
1673         case 2:
1674                 return 40;
1675         case 3:
1676                 return 42;
1677         case 4:
1678                 return 44;
1679         case 5:
1680         default:
1681                 return 48;
1682         }
1683 }
1684
1685 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1686 {
1687         unsigned long size;
1688         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1689         u32 id;
1690         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1691         int i;
1692
1693         dev_notice(smmu->dev, "probing hardware configuration...\n");
1694         dev_notice(smmu->dev, "SMMUv%d with:\n",
1695                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1696
1697         /* ID0 */
1698         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1699
1700         /* Restrict available stages based on module parameter */
1701         if (force_stage == 1)
1702                 id &= ~(ID0_S2TS | ID0_NTS);
1703         else if (force_stage == 2)
1704                 id &= ~(ID0_S1TS | ID0_NTS);
1705
1706         if (id & ID0_S1TS) {
1707                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1708                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1709         }
1710
1711         if (id & ID0_S2TS) {
1712                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1713                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1714         }
1715
1716         if (id & ID0_NTS) {
1717                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1718                 dev_notice(smmu->dev, "\tnested translation\n");
1719         }
1720
1721         if (!(smmu->features &
1722                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1723                 dev_err(smmu->dev, "\tno translation support!\n");
1724                 return -ENODEV;
1725         }
1726
1727         if ((id & ID0_S1TS) &&
1728                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1729                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1730                 dev_notice(smmu->dev, "\taddress translation ops\n");
1731         }
1732
1733         /*
1734          * In order for DMA API calls to work properly, we must defer to what
1735          * the FW says about coherency, regardless of what the hardware claims.
1736          * Fortunately, this also opens up a workaround for systems where the
1737          * ID register value has ended up configured incorrectly.
1738          */
1739         cttw_reg = !!(id & ID0_CTTW);
1740         if (cttw_fw || cttw_reg)
1741                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1742                            cttw_fw ? "" : "non-");
1743         if (cttw_fw != cttw_reg)
1744                 dev_notice(smmu->dev,
1745                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1746
1747         /* Max. number of entries we have for stream matching/indexing */
1748         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1749                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1750                 size = 1 << 16;
1751         } else {
1752                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1753         }
1754         smmu->streamid_mask = size - 1;
1755         if (id & ID0_SMS) {
1756                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1757                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1758                 if (size == 0) {
1759                         dev_err(smmu->dev,
1760                                 "stream-matching supported, but no SMRs present!\n");
1761                         return -ENODEV;
1762                 }
1763
1764                 /* Zero-initialised to mark as invalid */
1765                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1766                                           GFP_KERNEL);
1767                 if (!smmu->smrs)
1768                         return -ENOMEM;
1769
1770                 dev_notice(smmu->dev,
1771                            "\tstream matching with %lu register groups", size);
1772         }
1773         /* s2cr->type == 0 means translation, so initialise explicitly */
1774         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1775                                          GFP_KERNEL);
1776         if (!smmu->s2crs)
1777                 return -ENOMEM;
1778         for (i = 0; i < size; i++)
1779                 smmu->s2crs[i] = s2cr_init_val;
1780
1781         smmu->num_mapping_groups = size;
1782         mutex_init(&smmu->stream_map_mutex);
1783         spin_lock_init(&smmu->global_sync_lock);
1784
1785         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1786                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1787                 if (!(id & ID0_PTFS_NO_AARCH32S))
1788                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1789         }
1790
1791         /* ID1 */
1792         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1793         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1794
1795         /* Check for size mismatch of SMMU address space from mapped region */
1796         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1797         size <<= smmu->pgshift;
1798         if (smmu->cb_base != gr0_base + size)
1799                 dev_warn(smmu->dev,
1800                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1801                         size * 2, (smmu->cb_base - gr0_base) * 2);
1802
1803         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1804         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1805         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1806                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1807                 return -ENODEV;
1808         }
1809         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1810                    smmu->num_context_banks, smmu->num_s2_context_banks);
1811         /*
1812          * Cavium CN88xx erratum #27704.
1813          * Ensure ASID and VMID allocation is unique across all SMMUs in
1814          * the system.
1815          */
1816         if (smmu->model == CAVIUM_SMMUV2) {
1817                 smmu->cavium_id_base =
1818                         atomic_add_return(smmu->num_context_banks,
1819                                           &cavium_smmu_context_count);
1820                 smmu->cavium_id_base -= smmu->num_context_banks;
1821                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1822         }
1823         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1824                                  sizeof(*smmu->cbs), GFP_KERNEL);
1825         if (!smmu->cbs)
1826                 return -ENOMEM;
1827
1828         /* ID2 */
1829         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1830         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1831         smmu->ipa_size = size;
1832
1833         /* The output mask is also applied for bypass */
1834         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1835         smmu->pa_size = size;
1836
1837         if (id & ID2_VMID16)
1838                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1839
1840         /*
1841          * What the page table walker can address actually depends on which
1842          * descriptor format is in use, but since a) we don't know that yet,
1843          * and b) it can vary per context bank, this will have to do...
1844          */
1845         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1846                 dev_warn(smmu->dev,
1847                          "failed to set DMA mask for table walker\n");
1848
1849         if (smmu->version < ARM_SMMU_V2) {
1850                 smmu->va_size = smmu->ipa_size;
1851                 if (smmu->version == ARM_SMMU_V1_64K)
1852                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1853         } else {
1854                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1855                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1856                 if (id & ID2_PTFS_4K)
1857                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1858                 if (id & ID2_PTFS_16K)
1859                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1860                 if (id & ID2_PTFS_64K)
1861                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1862         }
1863
1864         /* Now we've corralled the various formats, what'll it do? */
1865         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1866                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1867         if (smmu->features &
1868             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1869                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1870         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1871                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1872         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1873                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1874
1875         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1876                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1877         else
1878                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1879         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1880                    smmu->pgsize_bitmap);
1881
1882
1883         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1884                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1885                            smmu->va_size, smmu->ipa_size);
1886
1887         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1888                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1889                            smmu->ipa_size, smmu->pa_size);
1890
1891         return 0;
1892 }
1893
1894 struct arm_smmu_match_data {
1895         enum arm_smmu_arch_version version;
1896         enum arm_smmu_implementation model;
1897 };
1898
1899 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1900 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1901
1902 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1903 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1904 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1905 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1906 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1907
1908 static const struct of_device_id arm_smmu_of_match[] = {
1909         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1910         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1911         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1912         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1913         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1914         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1915         { },
1916 };
1917 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1918
1919 #ifdef CONFIG_ACPI
1920 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1921 {
1922         int ret = 0;
1923
1924         switch (model) {
1925         case ACPI_IORT_SMMU_V1:
1926         case ACPI_IORT_SMMU_CORELINK_MMU400:
1927                 smmu->version = ARM_SMMU_V1;
1928                 smmu->model = GENERIC_SMMU;
1929                 break;
1930         case ACPI_IORT_SMMU_CORELINK_MMU401:
1931                 smmu->version = ARM_SMMU_V1_64K;
1932                 smmu->model = GENERIC_SMMU;
1933                 break;
1934         case ACPI_IORT_SMMU_V2:
1935                 smmu->version = ARM_SMMU_V2;
1936                 smmu->model = GENERIC_SMMU;
1937                 break;
1938         case ACPI_IORT_SMMU_CORELINK_MMU500:
1939                 smmu->version = ARM_SMMU_V2;
1940                 smmu->model = ARM_MMU500;
1941                 break;
1942         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1943                 smmu->version = ARM_SMMU_V2;
1944                 smmu->model = CAVIUM_SMMUV2;
1945                 break;
1946         default:
1947                 ret = -ENODEV;
1948         }
1949
1950         return ret;
1951 }
1952
1953 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1954                                       struct arm_smmu_device *smmu)
1955 {
1956         struct device *dev = smmu->dev;
1957         struct acpi_iort_node *node =
1958                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1959         struct acpi_iort_smmu *iort_smmu;
1960         int ret;
1961
1962         /* Retrieve SMMU1/2 specific data */
1963         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1964
1965         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1966         if (ret < 0)
1967                 return ret;
1968
1969         /* Ignore the configuration access interrupt */
1970         smmu->num_global_irqs = 1;
1971
1972         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1973                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1974
1975         return 0;
1976 }
1977 #else
1978 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1979                                              struct arm_smmu_device *smmu)
1980 {
1981         return -ENODEV;
1982 }
1983 #endif
1984
1985 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1986                                     struct arm_smmu_device *smmu)
1987 {
1988         const struct arm_smmu_match_data *data;
1989         struct device *dev = &pdev->dev;
1990         bool legacy_binding;
1991
1992         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1993                                  &smmu->num_global_irqs)) {
1994                 dev_err(dev, "missing #global-interrupts property\n");
1995                 return -ENODEV;
1996         }
1997
1998         data = of_device_get_match_data(dev);
1999         smmu->version = data->version;
2000         smmu->model = data->model;
2001
2002         parse_driver_options(smmu);
2003
2004         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2005         if (legacy_binding && !using_generic_binding) {
2006                 if (!using_legacy_binding)
2007                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2008                 using_legacy_binding = true;
2009         } else if (!legacy_binding && !using_legacy_binding) {
2010                 using_generic_binding = true;
2011         } else {
2012                 dev_err(dev, "not probing due to mismatched DT properties\n");
2013                 return -ENODEV;
2014         }
2015
2016         if (of_dma_is_coherent(dev->of_node))
2017                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2018
2019         return 0;
2020 }
2021
2022 static void arm_smmu_bus_init(void)
2023 {
2024         /* Oh, for a proper bus abstraction */
2025         if (!iommu_present(&platform_bus_type))
2026                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2027 #ifdef CONFIG_ARM_AMBA
2028         if (!iommu_present(&amba_bustype))
2029                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2030 #endif
2031 #ifdef CONFIG_PCI
2032         if (!iommu_present(&pci_bus_type)) {
2033                 pci_request_acs();
2034                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2035         }
2036 #endif
2037 }
2038
2039 static int arm_smmu_device_probe(struct platform_device *pdev)
2040 {
2041         struct resource *res;
2042         resource_size_t ioaddr;
2043         struct arm_smmu_device *smmu;
2044         struct device *dev = &pdev->dev;
2045         int num_irqs, i, err;
2046
2047         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2048         if (!smmu) {
2049                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2050                 return -ENOMEM;
2051         }
2052         smmu->dev = dev;
2053
2054         if (dev->of_node)
2055                 err = arm_smmu_device_dt_probe(pdev, smmu);
2056         else
2057                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2058
2059         if (err)
2060                 return err;
2061
2062         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2063         ioaddr = res->start;
2064         smmu->base = devm_ioremap_resource(dev, res);
2065         if (IS_ERR(smmu->base))
2066                 return PTR_ERR(smmu->base);
2067         smmu->cb_base = smmu->base + resource_size(res) / 2;
2068
2069         num_irqs = 0;
2070         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2071                 num_irqs++;
2072                 if (num_irqs > smmu->num_global_irqs)
2073                         smmu->num_context_irqs++;
2074         }
2075
2076         if (!smmu->num_context_irqs) {
2077                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2078                         num_irqs, smmu->num_global_irqs + 1);
2079                 return -ENODEV;
2080         }
2081
2082         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2083                                   GFP_KERNEL);
2084         if (!smmu->irqs) {
2085                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2086                 return -ENOMEM;
2087         }
2088
2089         for (i = 0; i < num_irqs; ++i) {
2090                 int irq = platform_get_irq(pdev, i);
2091
2092                 if (irq < 0) {
2093                         dev_err(dev, "failed to get irq index %d\n", i);
2094                         return -ENODEV;
2095                 }
2096                 smmu->irqs[i] = irq;
2097         }
2098
2099         err = arm_smmu_device_cfg_probe(smmu);
2100         if (err)
2101                 return err;
2102
2103         if (smmu->version == ARM_SMMU_V2 &&
2104             smmu->num_context_banks != smmu->num_context_irqs) {
2105                 dev_err(dev,
2106                         "found only %d context interrupt(s) but %d required\n",
2107                         smmu->num_context_irqs, smmu->num_context_banks);
2108                 return -ENODEV;
2109         }
2110
2111         for (i = 0; i < smmu->num_global_irqs; ++i) {
2112                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2113                                        arm_smmu_global_fault,
2114                                        IRQF_SHARED,
2115                                        "arm-smmu global fault",
2116                                        smmu);
2117                 if (err) {
2118                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2119                                 i, smmu->irqs[i]);
2120                         return err;
2121                 }
2122         }
2123
2124         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2125                                      "smmu.%pa", &ioaddr);
2126         if (err) {
2127                 dev_err(dev, "Failed to register iommu in sysfs\n");
2128                 return err;
2129         }
2130
2131         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2132         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2133
2134         err = iommu_device_register(&smmu->iommu);
2135         if (err) {
2136                 dev_err(dev, "Failed to register iommu\n");
2137                 return err;
2138         }
2139
2140         platform_set_drvdata(pdev, smmu);
2141         arm_smmu_device_reset(smmu);
2142         arm_smmu_test_smr_masks(smmu);
2143
2144         /*
2145          * For ACPI and generic DT bindings, an SMMU will be probed before
2146          * any device which might need it, so we want the bus ops in place
2147          * ready to handle default domain setup as soon as any SMMU exists.
2148          */
2149         if (!using_legacy_binding)
2150                 arm_smmu_bus_init();
2151
2152         return 0;
2153 }
2154
2155 /*
2156  * With the legacy DT binding in play, though, we have no guarantees about
2157  * probe order, but then we're also not doing default domains, so we can
2158  * delay setting bus ops until we're sure every possible SMMU is ready,
2159  * and that way ensure that no add_device() calls get missed.
2160  */
2161 static int arm_smmu_legacy_bus_init(void)
2162 {
2163         if (using_legacy_binding)
2164                 arm_smmu_bus_init();
2165         return 0;
2166 }
2167 device_initcall_sync(arm_smmu_legacy_bus_init);
2168
2169 static int arm_smmu_device_remove(struct platform_device *pdev)
2170 {
2171         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2172
2173         if (!smmu)
2174                 return -ENODEV;
2175
2176         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2177                 dev_err(&pdev->dev, "removing device with active domains!\n");
2178
2179         /* Turn the thing off */
2180         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2181         return 0;
2182 }
2183
2184 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2185 {
2186         arm_smmu_device_remove(pdev);
2187 }
2188
2189 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2190 {
2191         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2192
2193         arm_smmu_device_reset(smmu);
2194         return 0;
2195 }
2196
2197 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2198
2199 static struct platform_driver arm_smmu_driver = {
2200         .driver = {
2201                 .name           = "arm-smmu",
2202                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2203                 .pm             = &arm_smmu_pm_ops,
2204         },
2205         .probe  = arm_smmu_device_probe,
2206         .remove = arm_smmu_device_remove,
2207         .shutdown = arm_smmu_device_shutdown,
2208 };
2209 module_platform_driver(arm_smmu_driver);
2210
2211 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2212 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2213 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2214 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2215 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2216 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2217
2218 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2219 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2220 MODULE_LICENSE("GPL v2");