mm, hugetlb: further simplify hugetlb allocation API
[sfrench/cifs-2.6.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
58
59 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
60
61 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
64
65 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
66 #define TLB_SPIN_COUNT                  10
67
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS                128
70
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
74
75 /*
76  * SMMU global address space with conditional offset to access secure
77  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
78  * nsGFSYNR0: 0x450)
79  */
80 #define ARM_SMMU_GR0_NS(smmu)                                           \
81         ((smmu)->base +                                                 \
82                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
83                         ? 0x400 : 0))
84
85 /*
86  * Some 64-bit registers only make sense to write atomically, but in such
87  * cases all the data relevant to AArch32 formats lies within the lower word,
88  * therefore this actually makes more sense than it might first appear.
89  */
90 #ifdef CONFIG_64BIT
91 #define smmu_write_atomic_lq            writeq_relaxed
92 #else
93 #define smmu_write_atomic_lq            writel_relaxed
94 #endif
95
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
98
99 #define MSI_IOVA_BASE                   0x8000000
100 #define MSI_IOVA_LENGTH                 0x100000
101
102 static int force_stage;
103 module_param(force_stage, int, S_IRUGO);
104 MODULE_PARM_DESC(force_stage,
105         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
106 static bool disable_bypass;
107 module_param(disable_bypass, bool, S_IRUGO);
108 MODULE_PARM_DESC(disable_bypass,
109         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
110
111 enum arm_smmu_arch_version {
112         ARM_SMMU_V1,
113         ARM_SMMU_V1_64K,
114         ARM_SMMU_V2,
115 };
116
117 enum arm_smmu_implementation {
118         GENERIC_SMMU,
119         ARM_MMU500,
120         CAVIUM_SMMUV2,
121 };
122
123 struct arm_smmu_s2cr {
124         struct iommu_group              *group;
125         int                             count;
126         enum arm_smmu_s2cr_type         type;
127         enum arm_smmu_s2cr_privcfg      privcfg;
128         u8                              cbndx;
129 };
130
131 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
132         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
133 }
134
135 struct arm_smmu_smr {
136         u16                             mask;
137         u16                             id;
138         bool                            valid;
139 };
140
141 struct arm_smmu_cb {
142         u64                             ttbr[2];
143         u32                             tcr[2];
144         u32                             mair[2];
145         struct arm_smmu_cfg             *cfg;
146 };
147
148 struct arm_smmu_master_cfg {
149         struct arm_smmu_device          *smmu;
150         s16                             smendx[];
151 };
152 #define INVALID_SMENDX                  -1
153 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
154 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
155 #define fwspec_smendx(fw, i) \
156         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
157 #define for_each_cfg_sme(fw, i, idx) \
158         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
159
160 struct arm_smmu_device {
161         struct device                   *dev;
162
163         void __iomem                    *base;
164         void __iomem                    *cb_base;
165         unsigned long                   pgshift;
166
167 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
168 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
169 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
170 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
171 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
172 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
173 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
174 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
177 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
179 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
180         u32                             features;
181
182 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
183         u32                             options;
184         enum arm_smmu_arch_version      version;
185         enum arm_smmu_implementation    model;
186
187         u32                             num_context_banks;
188         u32                             num_s2_context_banks;
189         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
190         struct arm_smmu_cb              *cbs;
191         atomic_t                        irptndx;
192
193         u32                             num_mapping_groups;
194         u16                             streamid_mask;
195         u16                             smr_mask_mask;
196         struct arm_smmu_smr             *smrs;
197         struct arm_smmu_s2cr            *s2crs;
198         struct mutex                    stream_map_mutex;
199
200         unsigned long                   va_size;
201         unsigned long                   ipa_size;
202         unsigned long                   pa_size;
203         unsigned long                   pgsize_bitmap;
204
205         u32                             num_global_irqs;
206         u32                             num_context_irqs;
207         unsigned int                    *irqs;
208
209         u32                             cavium_id_base; /* Specific to Cavium */
210
211         spinlock_t                      global_sync_lock;
212
213         /* IOMMU core code handle */
214         struct iommu_device             iommu;
215 };
216
217 enum arm_smmu_context_fmt {
218         ARM_SMMU_CTX_FMT_NONE,
219         ARM_SMMU_CTX_FMT_AARCH64,
220         ARM_SMMU_CTX_FMT_AARCH32_L,
221         ARM_SMMU_CTX_FMT_AARCH32_S,
222 };
223
224 struct arm_smmu_cfg {
225         u8                              cbndx;
226         u8                              irptndx;
227         union {
228                 u16                     asid;
229                 u16                     vmid;
230         };
231         u32                             cbar;
232         enum arm_smmu_context_fmt       fmt;
233 };
234 #define INVALID_IRPTNDX                 0xff
235
236 enum arm_smmu_domain_stage {
237         ARM_SMMU_DOMAIN_S1 = 0,
238         ARM_SMMU_DOMAIN_S2,
239         ARM_SMMU_DOMAIN_NESTED,
240         ARM_SMMU_DOMAIN_BYPASS,
241 };
242
243 struct arm_smmu_domain {
244         struct arm_smmu_device          *smmu;
245         struct io_pgtable_ops           *pgtbl_ops;
246         const struct iommu_gather_ops   *tlb_ops;
247         struct arm_smmu_cfg             cfg;
248         enum arm_smmu_domain_stage      stage;
249         struct mutex                    init_mutex; /* Protects smmu pointer */
250         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
251         struct iommu_domain             domain;
252 };
253
254 struct arm_smmu_option_prop {
255         u32 opt;
256         const char *prop;
257 };
258
259 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
260
261 static bool using_legacy_binding, using_generic_binding;
262
263 static struct arm_smmu_option_prop arm_smmu_options[] = {
264         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
265         { 0, NULL},
266 };
267
268 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
269 {
270         return container_of(dom, struct arm_smmu_domain, domain);
271 }
272
273 static void parse_driver_options(struct arm_smmu_device *smmu)
274 {
275         int i = 0;
276
277         do {
278                 if (of_property_read_bool(smmu->dev->of_node,
279                                                 arm_smmu_options[i].prop)) {
280                         smmu->options |= arm_smmu_options[i].opt;
281                         dev_notice(smmu->dev, "option %s\n",
282                                 arm_smmu_options[i].prop);
283                 }
284         } while (arm_smmu_options[++i].opt);
285 }
286
287 static struct device_node *dev_get_dev_node(struct device *dev)
288 {
289         if (dev_is_pci(dev)) {
290                 struct pci_bus *bus = to_pci_dev(dev)->bus;
291
292                 while (!pci_is_root_bus(bus))
293                         bus = bus->parent;
294                 return of_node_get(bus->bridge->parent->of_node);
295         }
296
297         return of_node_get(dev->of_node);
298 }
299
300 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
301 {
302         *((__be32 *)data) = cpu_to_be32(alias);
303         return 0; /* Continue walking */
304 }
305
306 static int __find_legacy_master_phandle(struct device *dev, void *data)
307 {
308         struct of_phandle_iterator *it = *(void **)data;
309         struct device_node *np = it->node;
310         int err;
311
312         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
313                             "#stream-id-cells", 0)
314                 if (it->node == np) {
315                         *(void **)data = dev;
316                         return 1;
317                 }
318         it->node = np;
319         return err == -ENOENT ? 0 : err;
320 }
321
322 static struct platform_driver arm_smmu_driver;
323 static struct iommu_ops arm_smmu_ops;
324
325 static int arm_smmu_register_legacy_master(struct device *dev,
326                                            struct arm_smmu_device **smmu)
327 {
328         struct device *smmu_dev;
329         struct device_node *np;
330         struct of_phandle_iterator it;
331         void *data = &it;
332         u32 *sids;
333         __be32 pci_sid;
334         int err;
335
336         np = dev_get_dev_node(dev);
337         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
338                 of_node_put(np);
339                 return -ENODEV;
340         }
341
342         it.node = np;
343         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
344                                      __find_legacy_master_phandle);
345         smmu_dev = data;
346         of_node_put(np);
347         if (err == 0)
348                 return -ENODEV;
349         if (err < 0)
350                 return err;
351
352         if (dev_is_pci(dev)) {
353                 /* "mmu-masters" assumes Stream ID == Requester ID */
354                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
355                                        &pci_sid);
356                 it.cur = &pci_sid;
357                 it.cur_count = 1;
358         }
359
360         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
361                                 &arm_smmu_ops);
362         if (err)
363                 return err;
364
365         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
366         if (!sids)
367                 return -ENOMEM;
368
369         *smmu = dev_get_drvdata(smmu_dev);
370         of_phandle_iterator_args(&it, sids, it.cur_count);
371         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
372         kfree(sids);
373         return err;
374 }
375
376 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
377 {
378         int idx;
379
380         do {
381                 idx = find_next_zero_bit(map, end, start);
382                 if (idx == end)
383                         return -ENOSPC;
384         } while (test_and_set_bit(idx, map));
385
386         return idx;
387 }
388
389 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
390 {
391         clear_bit(idx, map);
392 }
393
394 /* Wait for any pending TLB invalidations to complete */
395 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
396                                 void __iomem *sync, void __iomem *status)
397 {
398         unsigned int spin_cnt, delay;
399
400         writel_relaxed(0, sync);
401         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
402                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
403                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
404                                 return;
405                         cpu_relax();
406                 }
407                 udelay(delay);
408         }
409         dev_err_ratelimited(smmu->dev,
410                             "TLB sync timed out -- SMMU may be deadlocked\n");
411 }
412
413 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
414 {
415         void __iomem *base = ARM_SMMU_GR0(smmu);
416         unsigned long flags;
417
418         spin_lock_irqsave(&smmu->global_sync_lock, flags);
419         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
420                             base + ARM_SMMU_GR0_sTLBGSTATUS);
421         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
422 }
423
424 static void arm_smmu_tlb_sync_context(void *cookie)
425 {
426         struct arm_smmu_domain *smmu_domain = cookie;
427         struct arm_smmu_device *smmu = smmu_domain->smmu;
428         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
429         unsigned long flags;
430
431         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
432         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
433                             base + ARM_SMMU_CB_TLBSTATUS);
434         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
435 }
436
437 static void arm_smmu_tlb_sync_vmid(void *cookie)
438 {
439         struct arm_smmu_domain *smmu_domain = cookie;
440
441         arm_smmu_tlb_sync_global(smmu_domain->smmu);
442 }
443
444 static void arm_smmu_tlb_inv_context_s1(void *cookie)
445 {
446         struct arm_smmu_domain *smmu_domain = cookie;
447         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
448         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
449
450         writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
451         arm_smmu_tlb_sync_context(cookie);
452 }
453
454 static void arm_smmu_tlb_inv_context_s2(void *cookie)
455 {
456         struct arm_smmu_domain *smmu_domain = cookie;
457         struct arm_smmu_device *smmu = smmu_domain->smmu;
458         void __iomem *base = ARM_SMMU_GR0(smmu);
459
460         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
461         arm_smmu_tlb_sync_global(smmu);
462 }
463
464 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
465                                           size_t granule, bool leaf, void *cookie)
466 {
467         struct arm_smmu_domain *smmu_domain = cookie;
468         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
469         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
470         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
471
472         if (stage1) {
473                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
474
475                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
476                         iova &= ~12UL;
477                         iova |= cfg->asid;
478                         do {
479                                 writel_relaxed(iova, reg);
480                                 iova += granule;
481                         } while (size -= granule);
482                 } else {
483                         iova >>= 12;
484                         iova |= (u64)cfg->asid << 48;
485                         do {
486                                 writeq_relaxed(iova, reg);
487                                 iova += granule >> 12;
488                         } while (size -= granule);
489                 }
490         } else {
491                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
492                               ARM_SMMU_CB_S2_TLBIIPAS2;
493                 iova >>= 12;
494                 do {
495                         smmu_write_atomic_lq(iova, reg);
496                         iova += granule >> 12;
497                 } while (size -= granule);
498         }
499 }
500
501 /*
502  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
503  * almost negligible, but the benefit of getting the first one in as far ahead
504  * of the sync as possible is significant, hence we don't just make this a
505  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
506  */
507 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
508                                          size_t granule, bool leaf, void *cookie)
509 {
510         struct arm_smmu_domain *smmu_domain = cookie;
511         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
512
513         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
514 }
515
516 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
517         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
518         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
519         .tlb_sync       = arm_smmu_tlb_sync_context,
520 };
521
522 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
523         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
524         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
525         .tlb_sync       = arm_smmu_tlb_sync_context,
526 };
527
528 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
529         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
530         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
531         .tlb_sync       = arm_smmu_tlb_sync_vmid,
532 };
533
534 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
535 {
536         u32 fsr, fsynr;
537         unsigned long iova;
538         struct iommu_domain *domain = dev;
539         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
540         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
541         struct arm_smmu_device *smmu = smmu_domain->smmu;
542         void __iomem *cb_base;
543
544         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
545         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
546
547         if (!(fsr & FSR_FAULT))
548                 return IRQ_NONE;
549
550         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
551         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
552
553         dev_err_ratelimited(smmu->dev,
554         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
555                             fsr, iova, fsynr, cfg->cbndx);
556
557         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
558         return IRQ_HANDLED;
559 }
560
561 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
562 {
563         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
564         struct arm_smmu_device *smmu = dev;
565         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
566
567         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
568         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
569         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
570         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
571
572         if (!gfsr)
573                 return IRQ_NONE;
574
575         dev_err_ratelimited(smmu->dev,
576                 "Unexpected global fault, this could be serious\n");
577         dev_err_ratelimited(smmu->dev,
578                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
579                 gfsr, gfsynr0, gfsynr1, gfsynr2);
580
581         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
582         return IRQ_HANDLED;
583 }
584
585 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
586                                        struct io_pgtable_cfg *pgtbl_cfg)
587 {
588         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
589         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
590         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
591
592         cb->cfg = cfg;
593
594         /* TTBCR */
595         if (stage1) {
596                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
597                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
598                 } else {
599                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
600                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
601                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
602                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
603                                 cb->tcr[1] |= TTBCR2_AS;
604                 }
605         } else {
606                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
607         }
608
609         /* TTBRs */
610         if (stage1) {
611                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
612                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
613                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
614                 } else {
615                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
616                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
617                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
618                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
619                 }
620         } else {
621                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
622         }
623
624         /* MAIRs (stage-1 only) */
625         if (stage1) {
626                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
627                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
628                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
629                 } else {
630                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
631                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
632                 }
633         }
634 }
635
636 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
637 {
638         u32 reg;
639         bool stage1;
640         struct arm_smmu_cb *cb = &smmu->cbs[idx];
641         struct arm_smmu_cfg *cfg = cb->cfg;
642         void __iomem *cb_base, *gr1_base;
643
644         cb_base = ARM_SMMU_CB(smmu, idx);
645
646         /* Unassigned context banks only need disabling */
647         if (!cfg) {
648                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
649                 return;
650         }
651
652         gr1_base = ARM_SMMU_GR1(smmu);
653         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
654
655         /* CBA2R */
656         if (smmu->version > ARM_SMMU_V1) {
657                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
658                         reg = CBA2R_RW64_64BIT;
659                 else
660                         reg = CBA2R_RW64_32BIT;
661                 /* 16-bit VMIDs live in CBA2R */
662                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
663                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
664
665                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
666         }
667
668         /* CBAR */
669         reg = cfg->cbar;
670         if (smmu->version < ARM_SMMU_V2)
671                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
672
673         /*
674          * Use the weakest shareability/memory types, so they are
675          * overridden by the ttbcr/pte.
676          */
677         if (stage1) {
678                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
679                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
680         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
681                 /* 8-bit VMIDs live in CBAR */
682                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
683         }
684         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
685
686         /*
687          * TTBCR
688          * We must write this before the TTBRs, since it determines the
689          * access behaviour of some fields (in particular, ASID[15:8]).
690          */
691         if (stage1 && smmu->version > ARM_SMMU_V1)
692                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
693         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
694
695         /* TTBRs */
696         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
697                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
698                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
699                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
700         } else {
701                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
702                 if (stage1)
703                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
704         }
705
706         /* MAIRs (stage-1 only) */
707         if (stage1) {
708                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
709                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
710         }
711
712         /* SCTLR */
713         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
714         if (stage1)
715                 reg |= SCTLR_S1_ASIDPNE;
716         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
717                 reg |= SCTLR_E;
718
719         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
720 }
721
722 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
723                                         struct arm_smmu_device *smmu)
724 {
725         int irq, start, ret = 0;
726         unsigned long ias, oas;
727         struct io_pgtable_ops *pgtbl_ops;
728         struct io_pgtable_cfg pgtbl_cfg;
729         enum io_pgtable_fmt fmt;
730         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
731         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
732
733         mutex_lock(&smmu_domain->init_mutex);
734         if (smmu_domain->smmu)
735                 goto out_unlock;
736
737         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
738                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
739                 smmu_domain->smmu = smmu;
740                 goto out_unlock;
741         }
742
743         /*
744          * Mapping the requested stage onto what we support is surprisingly
745          * complicated, mainly because the spec allows S1+S2 SMMUs without
746          * support for nested translation. That means we end up with the
747          * following table:
748          *
749          * Requested        Supported        Actual
750          *     S1               N              S1
751          *     S1             S1+S2            S1
752          *     S1               S2             S2
753          *     S1               S1             S1
754          *     N                N              N
755          *     N              S1+S2            S2
756          *     N                S2             S2
757          *     N                S1             S1
758          *
759          * Note that you can't actually request stage-2 mappings.
760          */
761         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
762                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
763         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
764                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
765
766         /*
767          * Choosing a suitable context format is even more fiddly. Until we
768          * grow some way for the caller to express a preference, and/or move
769          * the decision into the io-pgtable code where it arguably belongs,
770          * just aim for the closest thing to the rest of the system, and hope
771          * that the hardware isn't esoteric enough that we can't assume AArch64
772          * support to be a superset of AArch32 support...
773          */
774         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
775                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
776         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
777             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
778             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
779             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
780                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
781         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
782             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
783                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
784                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
785                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
786
787         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
788                 ret = -EINVAL;
789                 goto out_unlock;
790         }
791
792         switch (smmu_domain->stage) {
793         case ARM_SMMU_DOMAIN_S1:
794                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
795                 start = smmu->num_s2_context_banks;
796                 ias = smmu->va_size;
797                 oas = smmu->ipa_size;
798                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
799                         fmt = ARM_64_LPAE_S1;
800                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
801                         fmt = ARM_32_LPAE_S1;
802                         ias = min(ias, 32UL);
803                         oas = min(oas, 40UL);
804                 } else {
805                         fmt = ARM_V7S;
806                         ias = min(ias, 32UL);
807                         oas = min(oas, 32UL);
808                 }
809                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
810                 break;
811         case ARM_SMMU_DOMAIN_NESTED:
812                 /*
813                  * We will likely want to change this if/when KVM gets
814                  * involved.
815                  */
816         case ARM_SMMU_DOMAIN_S2:
817                 cfg->cbar = CBAR_TYPE_S2_TRANS;
818                 start = 0;
819                 ias = smmu->ipa_size;
820                 oas = smmu->pa_size;
821                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
822                         fmt = ARM_64_LPAE_S2;
823                 } else {
824                         fmt = ARM_32_LPAE_S2;
825                         ias = min(ias, 40UL);
826                         oas = min(oas, 40UL);
827                 }
828                 if (smmu->version == ARM_SMMU_V2)
829                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
830                 else
831                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
832                 break;
833         default:
834                 ret = -EINVAL;
835                 goto out_unlock;
836         }
837         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
838                                       smmu->num_context_banks);
839         if (ret < 0)
840                 goto out_unlock;
841
842         cfg->cbndx = ret;
843         if (smmu->version < ARM_SMMU_V2) {
844                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
845                 cfg->irptndx %= smmu->num_context_irqs;
846         } else {
847                 cfg->irptndx = cfg->cbndx;
848         }
849
850         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
851                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
852         else
853                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
854
855         pgtbl_cfg = (struct io_pgtable_cfg) {
856                 .pgsize_bitmap  = smmu->pgsize_bitmap,
857                 .ias            = ias,
858                 .oas            = oas,
859                 .tlb            = smmu_domain->tlb_ops,
860                 .iommu_dev      = smmu->dev,
861         };
862
863         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
864                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
865
866         smmu_domain->smmu = smmu;
867         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
868         if (!pgtbl_ops) {
869                 ret = -ENOMEM;
870                 goto out_clear_smmu;
871         }
872
873         /* Update the domain's page sizes to reflect the page table format */
874         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
875         domain->geometry.aperture_end = (1UL << ias) - 1;
876         domain->geometry.force_aperture = true;
877
878         /* Initialise the context bank with our page table cfg */
879         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
880         arm_smmu_write_context_bank(smmu, cfg->cbndx);
881
882         /*
883          * Request context fault interrupt. Do this last to avoid the
884          * handler seeing a half-initialised domain state.
885          */
886         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
887         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
888                                IRQF_SHARED, "arm-smmu-context-fault", domain);
889         if (ret < 0) {
890                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
891                         cfg->irptndx, irq);
892                 cfg->irptndx = INVALID_IRPTNDX;
893         }
894
895         mutex_unlock(&smmu_domain->init_mutex);
896
897         /* Publish page table ops for map/unmap */
898         smmu_domain->pgtbl_ops = pgtbl_ops;
899         return 0;
900
901 out_clear_smmu:
902         smmu_domain->smmu = NULL;
903 out_unlock:
904         mutex_unlock(&smmu_domain->init_mutex);
905         return ret;
906 }
907
908 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
909 {
910         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
911         struct arm_smmu_device *smmu = smmu_domain->smmu;
912         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
913         int irq;
914
915         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
916                 return;
917
918         /*
919          * Disable the context bank and free the page tables before freeing
920          * it.
921          */
922         smmu->cbs[cfg->cbndx].cfg = NULL;
923         arm_smmu_write_context_bank(smmu, cfg->cbndx);
924
925         if (cfg->irptndx != INVALID_IRPTNDX) {
926                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
927                 devm_free_irq(smmu->dev, irq, domain);
928         }
929
930         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
931         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
932 }
933
934 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
935 {
936         struct arm_smmu_domain *smmu_domain;
937
938         if (type != IOMMU_DOMAIN_UNMANAGED &&
939             type != IOMMU_DOMAIN_DMA &&
940             type != IOMMU_DOMAIN_IDENTITY)
941                 return NULL;
942         /*
943          * Allocate the domain and initialise some of its data structures.
944          * We can't really do anything meaningful until we've added a
945          * master.
946          */
947         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
948         if (!smmu_domain)
949                 return NULL;
950
951         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
952             iommu_get_dma_cookie(&smmu_domain->domain))) {
953                 kfree(smmu_domain);
954                 return NULL;
955         }
956
957         mutex_init(&smmu_domain->init_mutex);
958         spin_lock_init(&smmu_domain->cb_lock);
959
960         return &smmu_domain->domain;
961 }
962
963 static void arm_smmu_domain_free(struct iommu_domain *domain)
964 {
965         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
966
967         /*
968          * Free the domain resources. We assume that all devices have
969          * already been detached.
970          */
971         iommu_put_dma_cookie(domain);
972         arm_smmu_destroy_domain_context(domain);
973         kfree(smmu_domain);
974 }
975
976 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
977 {
978         struct arm_smmu_smr *smr = smmu->smrs + idx;
979         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
980
981         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
982                 reg |= SMR_VALID;
983         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
984 }
985
986 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
987 {
988         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
989         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
990                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
991                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
992
993         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
994             smmu->smrs[idx].valid)
995                 reg |= S2CR_EXIDVALID;
996         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
997 }
998
999 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1000 {
1001         arm_smmu_write_s2cr(smmu, idx);
1002         if (smmu->smrs)
1003                 arm_smmu_write_smr(smmu, idx);
1004 }
1005
1006 /*
1007  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1008  * should be called after sCR0 is written.
1009  */
1010 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1011 {
1012         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1013         u32 smr;
1014
1015         if (!smmu->smrs)
1016                 return;
1017
1018         /*
1019          * SMR.ID bits may not be preserved if the corresponding MASK
1020          * bits are set, so check each one separately. We can reject
1021          * masters later if they try to claim IDs outside these masks.
1022          */
1023         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1024         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1025         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1026         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1027
1028         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1029         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1030         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1031         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1032 }
1033
1034 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1035 {
1036         struct arm_smmu_smr *smrs = smmu->smrs;
1037         int i, free_idx = -ENOSPC;
1038
1039         /* Stream indexing is blissfully easy */
1040         if (!smrs)
1041                 return id;
1042
1043         /* Validating SMRs is... less so */
1044         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1045                 if (!smrs[i].valid) {
1046                         /*
1047                          * Note the first free entry we come across, which
1048                          * we'll claim in the end if nothing else matches.
1049                          */
1050                         if (free_idx < 0)
1051                                 free_idx = i;
1052                         continue;
1053                 }
1054                 /*
1055                  * If the new entry is _entirely_ matched by an existing entry,
1056                  * then reuse that, with the guarantee that there also cannot
1057                  * be any subsequent conflicting entries. In normal use we'd
1058                  * expect simply identical entries for this case, but there's
1059                  * no harm in accommodating the generalisation.
1060                  */
1061                 if ((mask & smrs[i].mask) == mask &&
1062                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1063                         return i;
1064                 /*
1065                  * If the new entry has any other overlap with an existing one,
1066                  * though, then there always exists at least one stream ID
1067                  * which would cause a conflict, and we can't allow that risk.
1068                  */
1069                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1070                         return -EINVAL;
1071         }
1072
1073         return free_idx;
1074 }
1075
1076 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1077 {
1078         if (--smmu->s2crs[idx].count)
1079                 return false;
1080
1081         smmu->s2crs[idx] = s2cr_init_val;
1082         if (smmu->smrs)
1083                 smmu->smrs[idx].valid = false;
1084
1085         return true;
1086 }
1087
1088 static int arm_smmu_master_alloc_smes(struct device *dev)
1089 {
1090         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1091         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1092         struct arm_smmu_device *smmu = cfg->smmu;
1093         struct arm_smmu_smr *smrs = smmu->smrs;
1094         struct iommu_group *group;
1095         int i, idx, ret;
1096
1097         mutex_lock(&smmu->stream_map_mutex);
1098         /* Figure out a viable stream map entry allocation */
1099         for_each_cfg_sme(fwspec, i, idx) {
1100                 u16 sid = fwspec->ids[i];
1101                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1102
1103                 if (idx != INVALID_SMENDX) {
1104                         ret = -EEXIST;
1105                         goto out_err;
1106                 }
1107
1108                 ret = arm_smmu_find_sme(smmu, sid, mask);
1109                 if (ret < 0)
1110                         goto out_err;
1111
1112                 idx = ret;
1113                 if (smrs && smmu->s2crs[idx].count == 0) {
1114                         smrs[idx].id = sid;
1115                         smrs[idx].mask = mask;
1116                         smrs[idx].valid = true;
1117                 }
1118                 smmu->s2crs[idx].count++;
1119                 cfg->smendx[i] = (s16)idx;
1120         }
1121
1122         group = iommu_group_get_for_dev(dev);
1123         if (!group)
1124                 group = ERR_PTR(-ENOMEM);
1125         if (IS_ERR(group)) {
1126                 ret = PTR_ERR(group);
1127                 goto out_err;
1128         }
1129         iommu_group_put(group);
1130
1131         /* It worked! Now, poke the actual hardware */
1132         for_each_cfg_sme(fwspec, i, idx) {
1133                 arm_smmu_write_sme(smmu, idx);
1134                 smmu->s2crs[idx].group = group;
1135         }
1136
1137         mutex_unlock(&smmu->stream_map_mutex);
1138         return 0;
1139
1140 out_err:
1141         while (i--) {
1142                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1143                 cfg->smendx[i] = INVALID_SMENDX;
1144         }
1145         mutex_unlock(&smmu->stream_map_mutex);
1146         return ret;
1147 }
1148
1149 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1150 {
1151         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1152         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1153         int i, idx;
1154
1155         mutex_lock(&smmu->stream_map_mutex);
1156         for_each_cfg_sme(fwspec, i, idx) {
1157                 if (arm_smmu_free_sme(smmu, idx))
1158                         arm_smmu_write_sme(smmu, idx);
1159                 cfg->smendx[i] = INVALID_SMENDX;
1160         }
1161         mutex_unlock(&smmu->stream_map_mutex);
1162 }
1163
1164 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1165                                       struct iommu_fwspec *fwspec)
1166 {
1167         struct arm_smmu_device *smmu = smmu_domain->smmu;
1168         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1169         u8 cbndx = smmu_domain->cfg.cbndx;
1170         enum arm_smmu_s2cr_type type;
1171         int i, idx;
1172
1173         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1174                 type = S2CR_TYPE_BYPASS;
1175         else
1176                 type = S2CR_TYPE_TRANS;
1177
1178         for_each_cfg_sme(fwspec, i, idx) {
1179                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1180                         continue;
1181
1182                 s2cr[idx].type = type;
1183                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1184                 s2cr[idx].cbndx = cbndx;
1185                 arm_smmu_write_s2cr(smmu, idx);
1186         }
1187         return 0;
1188 }
1189
1190 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1191 {
1192         int ret;
1193         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1194         struct arm_smmu_device *smmu;
1195         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1196
1197         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1198                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1199                 return -ENXIO;
1200         }
1201
1202         /*
1203          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1204          * domains between of_xlate() and add_device() - we have no way to cope
1205          * with that, so until ARM gets converted to rely on groups and default
1206          * domains, just say no (but more politely than by dereferencing NULL).
1207          * This should be at least a WARN_ON once that's sorted.
1208          */
1209         if (!fwspec->iommu_priv)
1210                 return -ENODEV;
1211
1212         smmu = fwspec_smmu(fwspec);
1213         /* Ensure that the domain is finalised */
1214         ret = arm_smmu_init_domain_context(domain, smmu);
1215         if (ret < 0)
1216                 return ret;
1217
1218         /*
1219          * Sanity check the domain. We don't support domains across
1220          * different SMMUs.
1221          */
1222         if (smmu_domain->smmu != smmu) {
1223                 dev_err(dev,
1224                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1225                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1226                 return -EINVAL;
1227         }
1228
1229         /* Looks ok, so add the device to the domain */
1230         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1231 }
1232
1233 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1234                         phys_addr_t paddr, size_t size, int prot)
1235 {
1236         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1237
1238         if (!ops)
1239                 return -ENODEV;
1240
1241         return ops->map(ops, iova, paddr, size, prot);
1242 }
1243
1244 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1245                              size_t size)
1246 {
1247         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1248
1249         if (!ops)
1250                 return 0;
1251
1252         return ops->unmap(ops, iova, size);
1253 }
1254
1255 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1256 {
1257         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1258
1259         if (smmu_domain->tlb_ops)
1260                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1261 }
1262
1263 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1264                                               dma_addr_t iova)
1265 {
1266         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1267         struct arm_smmu_device *smmu = smmu_domain->smmu;
1268         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1269         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1270         struct device *dev = smmu->dev;
1271         void __iomem *cb_base;
1272         u32 tmp;
1273         u64 phys;
1274         unsigned long va, flags;
1275
1276         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1277
1278         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1279         /* ATS1 registers can only be written atomically */
1280         va = iova & ~0xfffUL;
1281         if (smmu->version == ARM_SMMU_V2)
1282                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1283         else /* Register is only 32-bit in v1 */
1284                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1285
1286         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1287                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1288                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1289                 dev_err(dev,
1290                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1291                         &iova);
1292                 return ops->iova_to_phys(ops, iova);
1293         }
1294
1295         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1296         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1297         if (phys & CB_PAR_F) {
1298                 dev_err(dev, "translation fault!\n");
1299                 dev_err(dev, "PAR = 0x%llx\n", phys);
1300                 return 0;
1301         }
1302
1303         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1304 }
1305
1306 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1307                                         dma_addr_t iova)
1308 {
1309         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1310         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1311
1312         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1313                 return iova;
1314
1315         if (!ops)
1316                 return 0;
1317
1318         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1319                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1320                 return arm_smmu_iova_to_phys_hard(domain, iova);
1321
1322         return ops->iova_to_phys(ops, iova);
1323 }
1324
1325 static bool arm_smmu_capable(enum iommu_cap cap)
1326 {
1327         switch (cap) {
1328         case IOMMU_CAP_CACHE_COHERENCY:
1329                 /*
1330                  * Return true here as the SMMU can always send out coherent
1331                  * requests.
1332                  */
1333                 return true;
1334         case IOMMU_CAP_NOEXEC:
1335                 return true;
1336         default:
1337                 return false;
1338         }
1339 }
1340
1341 static int arm_smmu_match_node(struct device *dev, void *data)
1342 {
1343         return dev->fwnode == data;
1344 }
1345
1346 static
1347 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1348 {
1349         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1350                                                 fwnode, arm_smmu_match_node);
1351         put_device(dev);
1352         return dev ? dev_get_drvdata(dev) : NULL;
1353 }
1354
1355 static int arm_smmu_add_device(struct device *dev)
1356 {
1357         struct arm_smmu_device *smmu;
1358         struct arm_smmu_master_cfg *cfg;
1359         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1360         int i, ret;
1361
1362         if (using_legacy_binding) {
1363                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1364
1365                 /*
1366                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1367                  * will allocate/initialise a new one. Thus we need to update fwspec for
1368                  * later use.
1369                  */
1370                 fwspec = dev->iommu_fwspec;
1371                 if (ret)
1372                         goto out_free;
1373         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1374                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1375         } else {
1376                 return -ENODEV;
1377         }
1378
1379         ret = -EINVAL;
1380         for (i = 0; i < fwspec->num_ids; i++) {
1381                 u16 sid = fwspec->ids[i];
1382                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1383
1384                 if (sid & ~smmu->streamid_mask) {
1385                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1386                                 sid, smmu->streamid_mask);
1387                         goto out_free;
1388                 }
1389                 if (mask & ~smmu->smr_mask_mask) {
1390                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1391                                 mask, smmu->smr_mask_mask);
1392                         goto out_free;
1393                 }
1394         }
1395
1396         ret = -ENOMEM;
1397         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1398                       GFP_KERNEL);
1399         if (!cfg)
1400                 goto out_free;
1401
1402         cfg->smmu = smmu;
1403         fwspec->iommu_priv = cfg;
1404         while (i--)
1405                 cfg->smendx[i] = INVALID_SMENDX;
1406
1407         ret = arm_smmu_master_alloc_smes(dev);
1408         if (ret)
1409                 goto out_cfg_free;
1410
1411         iommu_device_link(&smmu->iommu, dev);
1412
1413         return 0;
1414
1415 out_cfg_free:
1416         kfree(cfg);
1417 out_free:
1418         iommu_fwspec_free(dev);
1419         return ret;
1420 }
1421
1422 static void arm_smmu_remove_device(struct device *dev)
1423 {
1424         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1425         struct arm_smmu_master_cfg *cfg;
1426         struct arm_smmu_device *smmu;
1427
1428
1429         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1430                 return;
1431
1432         cfg  = fwspec->iommu_priv;
1433         smmu = cfg->smmu;
1434
1435         iommu_device_unlink(&smmu->iommu, dev);
1436         arm_smmu_master_free_smes(fwspec);
1437         iommu_group_remove_device(dev);
1438         kfree(fwspec->iommu_priv);
1439         iommu_fwspec_free(dev);
1440 }
1441
1442 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1443 {
1444         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1445         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1446         struct iommu_group *group = NULL;
1447         int i, idx;
1448
1449         for_each_cfg_sme(fwspec, i, idx) {
1450                 if (group && smmu->s2crs[idx].group &&
1451                     group != smmu->s2crs[idx].group)
1452                         return ERR_PTR(-EINVAL);
1453
1454                 group = smmu->s2crs[idx].group;
1455         }
1456
1457         if (group)
1458                 return iommu_group_ref_get(group);
1459
1460         if (dev_is_pci(dev))
1461                 group = pci_device_group(dev);
1462         else
1463                 group = generic_device_group(dev);
1464
1465         return group;
1466 }
1467
1468 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1469                                     enum iommu_attr attr, void *data)
1470 {
1471         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1472
1473         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1474                 return -EINVAL;
1475
1476         switch (attr) {
1477         case DOMAIN_ATTR_NESTING:
1478                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1479                 return 0;
1480         default:
1481                 return -ENODEV;
1482         }
1483 }
1484
1485 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1486                                     enum iommu_attr attr, void *data)
1487 {
1488         int ret = 0;
1489         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1490
1491         if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1492                 return -EINVAL;
1493
1494         mutex_lock(&smmu_domain->init_mutex);
1495
1496         switch (attr) {
1497         case DOMAIN_ATTR_NESTING:
1498                 if (smmu_domain->smmu) {
1499                         ret = -EPERM;
1500                         goto out_unlock;
1501                 }
1502
1503                 if (*(int *)data)
1504                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1505                 else
1506                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1507
1508                 break;
1509         default:
1510                 ret = -ENODEV;
1511         }
1512
1513 out_unlock:
1514         mutex_unlock(&smmu_domain->init_mutex);
1515         return ret;
1516 }
1517
1518 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1519 {
1520         u32 mask, fwid = 0;
1521
1522         if (args->args_count > 0)
1523                 fwid |= (u16)args->args[0];
1524
1525         if (args->args_count > 1)
1526                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1527         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1528                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1529
1530         return iommu_fwspec_add_ids(dev, &fwid, 1);
1531 }
1532
1533 static void arm_smmu_get_resv_regions(struct device *dev,
1534                                       struct list_head *head)
1535 {
1536         struct iommu_resv_region *region;
1537         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1538
1539         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1540                                          prot, IOMMU_RESV_SW_MSI);
1541         if (!region)
1542                 return;
1543
1544         list_add_tail(&region->list, head);
1545
1546         iommu_dma_get_resv_regions(dev, head);
1547 }
1548
1549 static void arm_smmu_put_resv_regions(struct device *dev,
1550                                       struct list_head *head)
1551 {
1552         struct iommu_resv_region *entry, *next;
1553
1554         list_for_each_entry_safe(entry, next, head, list)
1555                 kfree(entry);
1556 }
1557
1558 static struct iommu_ops arm_smmu_ops = {
1559         .capable                = arm_smmu_capable,
1560         .domain_alloc           = arm_smmu_domain_alloc,
1561         .domain_free            = arm_smmu_domain_free,
1562         .attach_dev             = arm_smmu_attach_dev,
1563         .map                    = arm_smmu_map,
1564         .unmap                  = arm_smmu_unmap,
1565         .map_sg                 = default_iommu_map_sg,
1566         .flush_iotlb_all        = arm_smmu_iotlb_sync,
1567         .iotlb_sync             = arm_smmu_iotlb_sync,
1568         .iova_to_phys           = arm_smmu_iova_to_phys,
1569         .add_device             = arm_smmu_add_device,
1570         .remove_device          = arm_smmu_remove_device,
1571         .device_group           = arm_smmu_device_group,
1572         .domain_get_attr        = arm_smmu_domain_get_attr,
1573         .domain_set_attr        = arm_smmu_domain_set_attr,
1574         .of_xlate               = arm_smmu_of_xlate,
1575         .get_resv_regions       = arm_smmu_get_resv_regions,
1576         .put_resv_regions       = arm_smmu_put_resv_regions,
1577         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1578 };
1579
1580 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1581 {
1582         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1583         int i;
1584         u32 reg, major;
1585
1586         /* clear global FSR */
1587         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1588         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1589
1590         /*
1591          * Reset stream mapping groups: Initial values mark all SMRn as
1592          * invalid and all S2CRn as bypass unless overridden.
1593          */
1594         for (i = 0; i < smmu->num_mapping_groups; ++i)
1595                 arm_smmu_write_sme(smmu, i);
1596
1597         if (smmu->model == ARM_MMU500) {
1598                 /*
1599                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1600                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1601                  * bit is only present in MMU-500r2 onwards.
1602                  */
1603                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1604                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1605                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1606                 if (major >= 2)
1607                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1608                 /*
1609                  * Allow unmatched Stream IDs to allocate bypass
1610                  * TLB entries for reduced latency.
1611                  */
1612                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1613                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1614         }
1615
1616         /* Make sure all context banks are disabled and clear CB_FSR  */
1617         for (i = 0; i < smmu->num_context_banks; ++i) {
1618                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1619
1620                 arm_smmu_write_context_bank(smmu, i);
1621                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1622                 /*
1623                  * Disable MMU-500's not-particularly-beneficial next-page
1624                  * prefetcher for the sake of errata #841119 and #826419.
1625                  */
1626                 if (smmu->model == ARM_MMU500) {
1627                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1628                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1629                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1630                 }
1631         }
1632
1633         /* Invalidate the TLB, just in case */
1634         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1635         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1636
1637         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1638
1639         /* Enable fault reporting */
1640         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1641
1642         /* Disable TLB broadcasting. */
1643         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1644
1645         /* Enable client access, handling unmatched streams as appropriate */
1646         reg &= ~sCR0_CLIENTPD;
1647         if (disable_bypass)
1648                 reg |= sCR0_USFCFG;
1649         else
1650                 reg &= ~sCR0_USFCFG;
1651
1652         /* Disable forced broadcasting */
1653         reg &= ~sCR0_FB;
1654
1655         /* Don't upgrade barriers */
1656         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1657
1658         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1659                 reg |= sCR0_VMID16EN;
1660
1661         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1662                 reg |= sCR0_EXIDENABLE;
1663
1664         /* Push the button */
1665         arm_smmu_tlb_sync_global(smmu);
1666         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1667 }
1668
1669 static int arm_smmu_id_size_to_bits(int size)
1670 {
1671         switch (size) {
1672         case 0:
1673                 return 32;
1674         case 1:
1675                 return 36;
1676         case 2:
1677                 return 40;
1678         case 3:
1679                 return 42;
1680         case 4:
1681                 return 44;
1682         case 5:
1683         default:
1684                 return 48;
1685         }
1686 }
1687
1688 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1689 {
1690         unsigned long size;
1691         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1692         u32 id;
1693         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1694         int i;
1695
1696         dev_notice(smmu->dev, "probing hardware configuration...\n");
1697         dev_notice(smmu->dev, "SMMUv%d with:\n",
1698                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1699
1700         /* ID0 */
1701         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1702
1703         /* Restrict available stages based on module parameter */
1704         if (force_stage == 1)
1705                 id &= ~(ID0_S2TS | ID0_NTS);
1706         else if (force_stage == 2)
1707                 id &= ~(ID0_S1TS | ID0_NTS);
1708
1709         if (id & ID0_S1TS) {
1710                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1711                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1712         }
1713
1714         if (id & ID0_S2TS) {
1715                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1716                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1717         }
1718
1719         if (id & ID0_NTS) {
1720                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1721                 dev_notice(smmu->dev, "\tnested translation\n");
1722         }
1723
1724         if (!(smmu->features &
1725                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1726                 dev_err(smmu->dev, "\tno translation support!\n");
1727                 return -ENODEV;
1728         }
1729
1730         if ((id & ID0_S1TS) &&
1731                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1732                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1733                 dev_notice(smmu->dev, "\taddress translation ops\n");
1734         }
1735
1736         /*
1737          * In order for DMA API calls to work properly, we must defer to what
1738          * the FW says about coherency, regardless of what the hardware claims.
1739          * Fortunately, this also opens up a workaround for systems where the
1740          * ID register value has ended up configured incorrectly.
1741          */
1742         cttw_reg = !!(id & ID0_CTTW);
1743         if (cttw_fw || cttw_reg)
1744                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1745                            cttw_fw ? "" : "non-");
1746         if (cttw_fw != cttw_reg)
1747                 dev_notice(smmu->dev,
1748                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1749
1750         /* Max. number of entries we have for stream matching/indexing */
1751         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1752                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1753                 size = 1 << 16;
1754         } else {
1755                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1756         }
1757         smmu->streamid_mask = size - 1;
1758         if (id & ID0_SMS) {
1759                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1760                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1761                 if (size == 0) {
1762                         dev_err(smmu->dev,
1763                                 "stream-matching supported, but no SMRs present!\n");
1764                         return -ENODEV;
1765                 }
1766
1767                 /* Zero-initialised to mark as invalid */
1768                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1769                                           GFP_KERNEL);
1770                 if (!smmu->smrs)
1771                         return -ENOMEM;
1772
1773                 dev_notice(smmu->dev,
1774                            "\tstream matching with %lu register groups", size);
1775         }
1776         /* s2cr->type == 0 means translation, so initialise explicitly */
1777         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1778                                          GFP_KERNEL);
1779         if (!smmu->s2crs)
1780                 return -ENOMEM;
1781         for (i = 0; i < size; i++)
1782                 smmu->s2crs[i] = s2cr_init_val;
1783
1784         smmu->num_mapping_groups = size;
1785         mutex_init(&smmu->stream_map_mutex);
1786         spin_lock_init(&smmu->global_sync_lock);
1787
1788         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1789                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1790                 if (!(id & ID0_PTFS_NO_AARCH32S))
1791                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1792         }
1793
1794         /* ID1 */
1795         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1796         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1797
1798         /* Check for size mismatch of SMMU address space from mapped region */
1799         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1800         size <<= smmu->pgshift;
1801         if (smmu->cb_base != gr0_base + size)
1802                 dev_warn(smmu->dev,
1803                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1804                         size * 2, (smmu->cb_base - gr0_base) * 2);
1805
1806         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1807         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1808         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1809                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1810                 return -ENODEV;
1811         }
1812         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1813                    smmu->num_context_banks, smmu->num_s2_context_banks);
1814         /*
1815          * Cavium CN88xx erratum #27704.
1816          * Ensure ASID and VMID allocation is unique across all SMMUs in
1817          * the system.
1818          */
1819         if (smmu->model == CAVIUM_SMMUV2) {
1820                 smmu->cavium_id_base =
1821                         atomic_add_return(smmu->num_context_banks,
1822                                           &cavium_smmu_context_count);
1823                 smmu->cavium_id_base -= smmu->num_context_banks;
1824                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1825         }
1826         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1827                                  sizeof(*smmu->cbs), GFP_KERNEL);
1828         if (!smmu->cbs)
1829                 return -ENOMEM;
1830
1831         /* ID2 */
1832         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1833         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1834         smmu->ipa_size = size;
1835
1836         /* The output mask is also applied for bypass */
1837         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1838         smmu->pa_size = size;
1839
1840         if (id & ID2_VMID16)
1841                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1842
1843         /*
1844          * What the page table walker can address actually depends on which
1845          * descriptor format is in use, but since a) we don't know that yet,
1846          * and b) it can vary per context bank, this will have to do...
1847          */
1848         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1849                 dev_warn(smmu->dev,
1850                          "failed to set DMA mask for table walker\n");
1851
1852         if (smmu->version < ARM_SMMU_V2) {
1853                 smmu->va_size = smmu->ipa_size;
1854                 if (smmu->version == ARM_SMMU_V1_64K)
1855                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1856         } else {
1857                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1858                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1859                 if (id & ID2_PTFS_4K)
1860                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1861                 if (id & ID2_PTFS_16K)
1862                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1863                 if (id & ID2_PTFS_64K)
1864                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1865         }
1866
1867         /* Now we've corralled the various formats, what'll it do? */
1868         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1869                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1870         if (smmu->features &
1871             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1872                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1873         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1874                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1875         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1876                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1877
1878         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1879                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1880         else
1881                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1882         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1883                    smmu->pgsize_bitmap);
1884
1885
1886         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1887                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1888                            smmu->va_size, smmu->ipa_size);
1889
1890         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1891                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1892                            smmu->ipa_size, smmu->pa_size);
1893
1894         return 0;
1895 }
1896
1897 struct arm_smmu_match_data {
1898         enum arm_smmu_arch_version version;
1899         enum arm_smmu_implementation model;
1900 };
1901
1902 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1903 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1904
1905 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1906 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1907 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1908 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1909 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1910
1911 static const struct of_device_id arm_smmu_of_match[] = {
1912         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1913         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1914         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1915         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1916         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1917         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1918         { },
1919 };
1920 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1921
1922 #ifdef CONFIG_ACPI
1923 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1924 {
1925         int ret = 0;
1926
1927         switch (model) {
1928         case ACPI_IORT_SMMU_V1:
1929         case ACPI_IORT_SMMU_CORELINK_MMU400:
1930                 smmu->version = ARM_SMMU_V1;
1931                 smmu->model = GENERIC_SMMU;
1932                 break;
1933         case ACPI_IORT_SMMU_CORELINK_MMU401:
1934                 smmu->version = ARM_SMMU_V1_64K;
1935                 smmu->model = GENERIC_SMMU;
1936                 break;
1937         case ACPI_IORT_SMMU_V2:
1938                 smmu->version = ARM_SMMU_V2;
1939                 smmu->model = GENERIC_SMMU;
1940                 break;
1941         case ACPI_IORT_SMMU_CORELINK_MMU500:
1942                 smmu->version = ARM_SMMU_V2;
1943                 smmu->model = ARM_MMU500;
1944                 break;
1945         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1946                 smmu->version = ARM_SMMU_V2;
1947                 smmu->model = CAVIUM_SMMUV2;
1948                 break;
1949         default:
1950                 ret = -ENODEV;
1951         }
1952
1953         return ret;
1954 }
1955
1956 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1957                                       struct arm_smmu_device *smmu)
1958 {
1959         struct device *dev = smmu->dev;
1960         struct acpi_iort_node *node =
1961                 *(struct acpi_iort_node **)dev_get_platdata(dev);
1962         struct acpi_iort_smmu *iort_smmu;
1963         int ret;
1964
1965         /* Retrieve SMMU1/2 specific data */
1966         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1967
1968         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1969         if (ret < 0)
1970                 return ret;
1971
1972         /* Ignore the configuration access interrupt */
1973         smmu->num_global_irqs = 1;
1974
1975         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1976                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1977
1978         return 0;
1979 }
1980 #else
1981 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1982                                              struct arm_smmu_device *smmu)
1983 {
1984         return -ENODEV;
1985 }
1986 #endif
1987
1988 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1989                                     struct arm_smmu_device *smmu)
1990 {
1991         const struct arm_smmu_match_data *data;
1992         struct device *dev = &pdev->dev;
1993         bool legacy_binding;
1994
1995         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1996                                  &smmu->num_global_irqs)) {
1997                 dev_err(dev, "missing #global-interrupts property\n");
1998                 return -ENODEV;
1999         }
2000
2001         data = of_device_get_match_data(dev);
2002         smmu->version = data->version;
2003         smmu->model = data->model;
2004
2005         parse_driver_options(smmu);
2006
2007         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2008         if (legacy_binding && !using_generic_binding) {
2009                 if (!using_legacy_binding)
2010                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2011                 using_legacy_binding = true;
2012         } else if (!legacy_binding && !using_legacy_binding) {
2013                 using_generic_binding = true;
2014         } else {
2015                 dev_err(dev, "not probing due to mismatched DT properties\n");
2016                 return -ENODEV;
2017         }
2018
2019         if (of_dma_is_coherent(dev->of_node))
2020                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2021
2022         return 0;
2023 }
2024
2025 static void arm_smmu_bus_init(void)
2026 {
2027         /* Oh, for a proper bus abstraction */
2028         if (!iommu_present(&platform_bus_type))
2029                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2030 #ifdef CONFIG_ARM_AMBA
2031         if (!iommu_present(&amba_bustype))
2032                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2033 #endif
2034 #ifdef CONFIG_PCI
2035         if (!iommu_present(&pci_bus_type)) {
2036                 pci_request_acs();
2037                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2038         }
2039 #endif
2040 }
2041
2042 static int arm_smmu_device_probe(struct platform_device *pdev)
2043 {
2044         struct resource *res;
2045         resource_size_t ioaddr;
2046         struct arm_smmu_device *smmu;
2047         struct device *dev = &pdev->dev;
2048         int num_irqs, i, err;
2049
2050         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2051         if (!smmu) {
2052                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2053                 return -ENOMEM;
2054         }
2055         smmu->dev = dev;
2056
2057         if (dev->of_node)
2058                 err = arm_smmu_device_dt_probe(pdev, smmu);
2059         else
2060                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2061
2062         if (err)
2063                 return err;
2064
2065         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2066         ioaddr = res->start;
2067         smmu->base = devm_ioremap_resource(dev, res);
2068         if (IS_ERR(smmu->base))
2069                 return PTR_ERR(smmu->base);
2070         smmu->cb_base = smmu->base + resource_size(res) / 2;
2071
2072         num_irqs = 0;
2073         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2074                 num_irqs++;
2075                 if (num_irqs > smmu->num_global_irqs)
2076                         smmu->num_context_irqs++;
2077         }
2078
2079         if (!smmu->num_context_irqs) {
2080                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2081                         num_irqs, smmu->num_global_irqs + 1);
2082                 return -ENODEV;
2083         }
2084
2085         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2086                                   GFP_KERNEL);
2087         if (!smmu->irqs) {
2088                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2089                 return -ENOMEM;
2090         }
2091
2092         for (i = 0; i < num_irqs; ++i) {
2093                 int irq = platform_get_irq(pdev, i);
2094
2095                 if (irq < 0) {
2096                         dev_err(dev, "failed to get irq index %d\n", i);
2097                         return -ENODEV;
2098                 }
2099                 smmu->irqs[i] = irq;
2100         }
2101
2102         err = arm_smmu_device_cfg_probe(smmu);
2103         if (err)
2104                 return err;
2105
2106         if (smmu->version == ARM_SMMU_V2 &&
2107             smmu->num_context_banks != smmu->num_context_irqs) {
2108                 dev_err(dev,
2109                         "found only %d context interrupt(s) but %d required\n",
2110                         smmu->num_context_irqs, smmu->num_context_banks);
2111                 return -ENODEV;
2112         }
2113
2114         for (i = 0; i < smmu->num_global_irqs; ++i) {
2115                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2116                                        arm_smmu_global_fault,
2117                                        IRQF_SHARED,
2118                                        "arm-smmu global fault",
2119                                        smmu);
2120                 if (err) {
2121                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2122                                 i, smmu->irqs[i]);
2123                         return err;
2124                 }
2125         }
2126
2127         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2128                                      "smmu.%pa", &ioaddr);
2129         if (err) {
2130                 dev_err(dev, "Failed to register iommu in sysfs\n");
2131                 return err;
2132         }
2133
2134         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2135         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2136
2137         err = iommu_device_register(&smmu->iommu);
2138         if (err) {
2139                 dev_err(dev, "Failed to register iommu\n");
2140                 return err;
2141         }
2142
2143         platform_set_drvdata(pdev, smmu);
2144         arm_smmu_device_reset(smmu);
2145         arm_smmu_test_smr_masks(smmu);
2146
2147         /*
2148          * For ACPI and generic DT bindings, an SMMU will be probed before
2149          * any device which might need it, so we want the bus ops in place
2150          * ready to handle default domain setup as soon as any SMMU exists.
2151          */
2152         if (!using_legacy_binding)
2153                 arm_smmu_bus_init();
2154
2155         return 0;
2156 }
2157
2158 /*
2159  * With the legacy DT binding in play, though, we have no guarantees about
2160  * probe order, but then we're also not doing default domains, so we can
2161  * delay setting bus ops until we're sure every possible SMMU is ready,
2162  * and that way ensure that no add_device() calls get missed.
2163  */
2164 static int arm_smmu_legacy_bus_init(void)
2165 {
2166         if (using_legacy_binding)
2167                 arm_smmu_bus_init();
2168         return 0;
2169 }
2170 device_initcall_sync(arm_smmu_legacy_bus_init);
2171
2172 static int arm_smmu_device_remove(struct platform_device *pdev)
2173 {
2174         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2175
2176         if (!smmu)
2177                 return -ENODEV;
2178
2179         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2180                 dev_err(&pdev->dev, "removing device with active domains!\n");
2181
2182         /* Turn the thing off */
2183         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2184         return 0;
2185 }
2186
2187 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2188 {
2189         arm_smmu_device_remove(pdev);
2190 }
2191
2192 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2193 {
2194         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2195
2196         arm_smmu_device_reset(smmu);
2197         return 0;
2198 }
2199
2200 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2201
2202 static struct platform_driver arm_smmu_driver = {
2203         .driver = {
2204                 .name           = "arm-smmu",
2205                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2206                 .pm             = &arm_smmu_pm_ops,
2207         },
2208         .probe  = arm_smmu_device_probe,
2209         .remove = arm_smmu_device_remove,
2210         .shutdown = arm_smmu_device_shutdown,
2211 };
2212 module_platform_driver(arm_smmu_driver);
2213
2214 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2215 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2216 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2217 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2218 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2219 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2220
2221 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2222 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2223 MODULE_LICENSE("GPL v2");