net/mlx4_core: drop useless LIST_HEAD
[linux-2.6-block.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  *      - Extended Stream ID (16 bit)
28  */
29
30 #define pr_fmt(fmt) "arm-smmu: " fmt
31
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
53
54 #include <linux/amba/bus.h>
55 #include <linux/fsl/mc.h>
56
57 #include "io-pgtable.h"
58 #include "arm-smmu-regs.h"
59
60 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
61
62 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
63 #define ARM_MMU500_ACR_S2CRB_TLBEN      (1 << 10)
64 #define ARM_MMU500_ACR_SMTNMB_TLBEN     (1 << 8)
65
66 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
67 #define TLB_SPIN_COUNT                  10
68
69 /* Maximum number of context banks per SMMU */
70 #define ARM_SMMU_MAX_CBS                128
71
72 /* SMMU global address space */
73 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
74 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
75
76 /*
77  * SMMU global address space with conditional offset to access secure
78  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
79  * nsGFSYNR0: 0x450)
80  */
81 #define ARM_SMMU_GR0_NS(smmu)                                           \
82         ((smmu)->base +                                                 \
83                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
84                         ? 0x400 : 0))
85
86 /*
87  * Some 64-bit registers only make sense to write atomically, but in such
88  * cases all the data relevant to AArch32 formats lies within the lower word,
89  * therefore this actually makes more sense than it might first appear.
90  */
91 #ifdef CONFIG_64BIT
92 #define smmu_write_atomic_lq            writeq_relaxed
93 #else
94 #define smmu_write_atomic_lq            writel_relaxed
95 #endif
96
97 /* Translation context bank */
98 #define ARM_SMMU_CB(smmu, n)    ((smmu)->cb_base + ((n) << (smmu)->pgshift))
99
100 #define MSI_IOVA_BASE                   0x8000000
101 #define MSI_IOVA_LENGTH                 0x100000
102
103 static int force_stage;
104 module_param(force_stage, int, S_IRUGO);
105 MODULE_PARM_DESC(force_stage,
106         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
107 static bool disable_bypass;
108 module_param(disable_bypass, bool, S_IRUGO);
109 MODULE_PARM_DESC(disable_bypass,
110         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
111
112 enum arm_smmu_arch_version {
113         ARM_SMMU_V1,
114         ARM_SMMU_V1_64K,
115         ARM_SMMU_V2,
116 };
117
118 enum arm_smmu_implementation {
119         GENERIC_SMMU,
120         ARM_MMU500,
121         CAVIUM_SMMUV2,
122 };
123
124 struct arm_smmu_s2cr {
125         struct iommu_group              *group;
126         int                             count;
127         enum arm_smmu_s2cr_type         type;
128         enum arm_smmu_s2cr_privcfg      privcfg;
129         u8                              cbndx;
130 };
131
132 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
133         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
134 }
135
136 struct arm_smmu_smr {
137         u16                             mask;
138         u16                             id;
139         bool                            valid;
140 };
141
142 struct arm_smmu_cb {
143         u64                             ttbr[2];
144         u32                             tcr[2];
145         u32                             mair[2];
146         struct arm_smmu_cfg             *cfg;
147 };
148
149 struct arm_smmu_master_cfg {
150         struct arm_smmu_device          *smmu;
151         s16                             smendx[];
152 };
153 #define INVALID_SMENDX                  -1
154 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
155 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
156 #define fwspec_smendx(fw, i) \
157         (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
158 #define for_each_cfg_sme(fw, i, idx) \
159         for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
160
161 struct arm_smmu_device {
162         struct device                   *dev;
163
164         void __iomem                    *base;
165         void __iomem                    *cb_base;
166         unsigned long                   pgshift;
167
168 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
169 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
170 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
171 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
172 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
173 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
174 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
177 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
179 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
180 #define ARM_SMMU_FEAT_EXIDS             (1 << 12)
181         u32                             features;
182
183 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
184         u32                             options;
185         enum arm_smmu_arch_version      version;
186         enum arm_smmu_implementation    model;
187
188         u32                             num_context_banks;
189         u32                             num_s2_context_banks;
190         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
191         struct arm_smmu_cb              *cbs;
192         atomic_t                        irptndx;
193
194         u32                             num_mapping_groups;
195         u16                             streamid_mask;
196         u16                             smr_mask_mask;
197         struct arm_smmu_smr             *smrs;
198         struct arm_smmu_s2cr            *s2crs;
199         struct mutex                    stream_map_mutex;
200
201         unsigned long                   va_size;
202         unsigned long                   ipa_size;
203         unsigned long                   pa_size;
204         unsigned long                   pgsize_bitmap;
205
206         u32                             num_global_irqs;
207         u32                             num_context_irqs;
208         unsigned int                    *irqs;
209
210         u32                             cavium_id_base; /* Specific to Cavium */
211
212         spinlock_t                      global_sync_lock;
213
214         /* IOMMU core code handle */
215         struct iommu_device             iommu;
216 };
217
218 enum arm_smmu_context_fmt {
219         ARM_SMMU_CTX_FMT_NONE,
220         ARM_SMMU_CTX_FMT_AARCH64,
221         ARM_SMMU_CTX_FMT_AARCH32_L,
222         ARM_SMMU_CTX_FMT_AARCH32_S,
223 };
224
225 struct arm_smmu_cfg {
226         u8                              cbndx;
227         u8                              irptndx;
228         union {
229                 u16                     asid;
230                 u16                     vmid;
231         };
232         u32                             cbar;
233         enum arm_smmu_context_fmt       fmt;
234 };
235 #define INVALID_IRPTNDX                 0xff
236
237 enum arm_smmu_domain_stage {
238         ARM_SMMU_DOMAIN_S1 = 0,
239         ARM_SMMU_DOMAIN_S2,
240         ARM_SMMU_DOMAIN_NESTED,
241         ARM_SMMU_DOMAIN_BYPASS,
242 };
243
244 struct arm_smmu_domain {
245         struct arm_smmu_device          *smmu;
246         struct io_pgtable_ops           *pgtbl_ops;
247         const struct iommu_gather_ops   *tlb_ops;
248         struct arm_smmu_cfg             cfg;
249         enum arm_smmu_domain_stage      stage;
250         bool                            non_strict;
251         struct mutex                    init_mutex; /* Protects smmu pointer */
252         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
253         struct iommu_domain             domain;
254 };
255
256 struct arm_smmu_option_prop {
257         u32 opt;
258         const char *prop;
259 };
260
261 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
262
263 static bool using_legacy_binding, using_generic_binding;
264
265 static struct arm_smmu_option_prop arm_smmu_options[] = {
266         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
267         { 0, NULL},
268 };
269
270 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
271 {
272         return container_of(dom, struct arm_smmu_domain, domain);
273 }
274
275 static void parse_driver_options(struct arm_smmu_device *smmu)
276 {
277         int i = 0;
278
279         do {
280                 if (of_property_read_bool(smmu->dev->of_node,
281                                                 arm_smmu_options[i].prop)) {
282                         smmu->options |= arm_smmu_options[i].opt;
283                         dev_notice(smmu->dev, "option %s\n",
284                                 arm_smmu_options[i].prop);
285                 }
286         } while (arm_smmu_options[++i].opt);
287 }
288
289 static struct device_node *dev_get_dev_node(struct device *dev)
290 {
291         if (dev_is_pci(dev)) {
292                 struct pci_bus *bus = to_pci_dev(dev)->bus;
293
294                 while (!pci_is_root_bus(bus))
295                         bus = bus->parent;
296                 return of_node_get(bus->bridge->parent->of_node);
297         }
298
299         return of_node_get(dev->of_node);
300 }
301
302 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
303 {
304         *((__be32 *)data) = cpu_to_be32(alias);
305         return 0; /* Continue walking */
306 }
307
308 static int __find_legacy_master_phandle(struct device *dev, void *data)
309 {
310         struct of_phandle_iterator *it = *(void **)data;
311         struct device_node *np = it->node;
312         int err;
313
314         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
315                             "#stream-id-cells", 0)
316                 if (it->node == np) {
317                         *(void **)data = dev;
318                         return 1;
319                 }
320         it->node = np;
321         return err == -ENOENT ? 0 : err;
322 }
323
324 static struct platform_driver arm_smmu_driver;
325 static struct iommu_ops arm_smmu_ops;
326
327 static int arm_smmu_register_legacy_master(struct device *dev,
328                                            struct arm_smmu_device **smmu)
329 {
330         struct device *smmu_dev;
331         struct device_node *np;
332         struct of_phandle_iterator it;
333         void *data = &it;
334         u32 *sids;
335         __be32 pci_sid;
336         int err;
337
338         np = dev_get_dev_node(dev);
339         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
340                 of_node_put(np);
341                 return -ENODEV;
342         }
343
344         it.node = np;
345         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
346                                      __find_legacy_master_phandle);
347         smmu_dev = data;
348         of_node_put(np);
349         if (err == 0)
350                 return -ENODEV;
351         if (err < 0)
352                 return err;
353
354         if (dev_is_pci(dev)) {
355                 /* "mmu-masters" assumes Stream ID == Requester ID */
356                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
357                                        &pci_sid);
358                 it.cur = &pci_sid;
359                 it.cur_count = 1;
360         }
361
362         err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
363                                 &arm_smmu_ops);
364         if (err)
365                 return err;
366
367         sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
368         if (!sids)
369                 return -ENOMEM;
370
371         *smmu = dev_get_drvdata(smmu_dev);
372         of_phandle_iterator_args(&it, sids, it.cur_count);
373         err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
374         kfree(sids);
375         return err;
376 }
377
378 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
379 {
380         int idx;
381
382         do {
383                 idx = find_next_zero_bit(map, end, start);
384                 if (idx == end)
385                         return -ENOSPC;
386         } while (test_and_set_bit(idx, map));
387
388         return idx;
389 }
390
391 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
392 {
393         clear_bit(idx, map);
394 }
395
396 /* Wait for any pending TLB invalidations to complete */
397 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
398                                 void __iomem *sync, void __iomem *status)
399 {
400         unsigned int spin_cnt, delay;
401
402         writel_relaxed(0, sync);
403         for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
404                 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
405                         if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
406                                 return;
407                         cpu_relax();
408                 }
409                 udelay(delay);
410         }
411         dev_err_ratelimited(smmu->dev,
412                             "TLB sync timed out -- SMMU may be deadlocked\n");
413 }
414
415 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
416 {
417         void __iomem *base = ARM_SMMU_GR0(smmu);
418         unsigned long flags;
419
420         spin_lock_irqsave(&smmu->global_sync_lock, flags);
421         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
422                             base + ARM_SMMU_GR0_sTLBGSTATUS);
423         spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
424 }
425
426 static void arm_smmu_tlb_sync_context(void *cookie)
427 {
428         struct arm_smmu_domain *smmu_domain = cookie;
429         struct arm_smmu_device *smmu = smmu_domain->smmu;
430         void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
431         unsigned long flags;
432
433         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
434         __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
435                             base + ARM_SMMU_CB_TLBSTATUS);
436         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
437 }
438
439 static void arm_smmu_tlb_sync_vmid(void *cookie)
440 {
441         struct arm_smmu_domain *smmu_domain = cookie;
442
443         arm_smmu_tlb_sync_global(smmu_domain->smmu);
444 }
445
446 static void arm_smmu_tlb_inv_context_s1(void *cookie)
447 {
448         struct arm_smmu_domain *smmu_domain = cookie;
449         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
450         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
451
452         /*
453          * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
454          * cleared by the current CPU are visible to the SMMU before the TLBI.
455          */
456         writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
457         arm_smmu_tlb_sync_context(cookie);
458 }
459
460 static void arm_smmu_tlb_inv_context_s2(void *cookie)
461 {
462         struct arm_smmu_domain *smmu_domain = cookie;
463         struct arm_smmu_device *smmu = smmu_domain->smmu;
464         void __iomem *base = ARM_SMMU_GR0(smmu);
465
466         /* NOTE: see above */
467         writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
468         arm_smmu_tlb_sync_global(smmu);
469 }
470
471 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
472                                           size_t granule, bool leaf, void *cookie)
473 {
474         struct arm_smmu_domain *smmu_domain = cookie;
475         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
476         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
477         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
478
479         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
480                 wmb();
481
482         if (stage1) {
483                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
484
485                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
486                         iova &= ~12UL;
487                         iova |= cfg->asid;
488                         do {
489                                 writel_relaxed(iova, reg);
490                                 iova += granule;
491                         } while (size -= granule);
492                 } else {
493                         iova >>= 12;
494                         iova |= (u64)cfg->asid << 48;
495                         do {
496                                 writeq_relaxed(iova, reg);
497                                 iova += granule >> 12;
498                         } while (size -= granule);
499                 }
500         } else {
501                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
502                               ARM_SMMU_CB_S2_TLBIIPAS2;
503                 iova >>= 12;
504                 do {
505                         smmu_write_atomic_lq(iova, reg);
506                         iova += granule >> 12;
507                 } while (size -= granule);
508         }
509 }
510
511 /*
512  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
513  * almost negligible, but the benefit of getting the first one in as far ahead
514  * of the sync as possible is significant, hence we don't just make this a
515  * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
516  */
517 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
518                                          size_t granule, bool leaf, void *cookie)
519 {
520         struct arm_smmu_domain *smmu_domain = cookie;
521         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
522
523         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
524                 wmb();
525
526         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
527 }
528
529 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
530         .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
531         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
532         .tlb_sync       = arm_smmu_tlb_sync_context,
533 };
534
535 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
536         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
537         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
538         .tlb_sync       = arm_smmu_tlb_sync_context,
539 };
540
541 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
542         .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
543         .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
544         .tlb_sync       = arm_smmu_tlb_sync_vmid,
545 };
546
547 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
548 {
549         u32 fsr, fsynr;
550         unsigned long iova;
551         struct iommu_domain *domain = dev;
552         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
553         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
554         struct arm_smmu_device *smmu = smmu_domain->smmu;
555         void __iomem *cb_base;
556
557         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
558         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
559
560         if (!(fsr & FSR_FAULT))
561                 return IRQ_NONE;
562
563         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
564         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
565
566         dev_err_ratelimited(smmu->dev,
567         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
568                             fsr, iova, fsynr, cfg->cbndx);
569
570         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
571         return IRQ_HANDLED;
572 }
573
574 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
575 {
576         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
577         struct arm_smmu_device *smmu = dev;
578         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
579
580         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
581         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
582         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
583         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
584
585         if (!gfsr)
586                 return IRQ_NONE;
587
588         dev_err_ratelimited(smmu->dev,
589                 "Unexpected global fault, this could be serious\n");
590         dev_err_ratelimited(smmu->dev,
591                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
592                 gfsr, gfsynr0, gfsynr1, gfsynr2);
593
594         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
595         return IRQ_HANDLED;
596 }
597
598 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
599                                        struct io_pgtable_cfg *pgtbl_cfg)
600 {
601         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
602         struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
603         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
604
605         cb->cfg = cfg;
606
607         /* TTBCR */
608         if (stage1) {
609                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
610                         cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
611                 } else {
612                         cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
613                         cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
614                         cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
615                         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
616                                 cb->tcr[1] |= TTBCR2_AS;
617                 }
618         } else {
619                 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
620         }
621
622         /* TTBRs */
623         if (stage1) {
624                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
625                         cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
626                         cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
627                 } else {
628                         cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
629                         cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
630                         cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
631                         cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
632                 }
633         } else {
634                 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
635         }
636
637         /* MAIRs (stage-1 only) */
638         if (stage1) {
639                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
640                         cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
641                         cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
642                 } else {
643                         cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
644                         cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
645                 }
646         }
647 }
648
649 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
650 {
651         u32 reg;
652         bool stage1;
653         struct arm_smmu_cb *cb = &smmu->cbs[idx];
654         struct arm_smmu_cfg *cfg = cb->cfg;
655         void __iomem *cb_base, *gr1_base;
656
657         cb_base = ARM_SMMU_CB(smmu, idx);
658
659         /* Unassigned context banks only need disabling */
660         if (!cfg) {
661                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
662                 return;
663         }
664
665         gr1_base = ARM_SMMU_GR1(smmu);
666         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
667
668         /* CBA2R */
669         if (smmu->version > ARM_SMMU_V1) {
670                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
671                         reg = CBA2R_RW64_64BIT;
672                 else
673                         reg = CBA2R_RW64_32BIT;
674                 /* 16-bit VMIDs live in CBA2R */
675                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
676                         reg |= cfg->vmid << CBA2R_VMID_SHIFT;
677
678                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
679         }
680
681         /* CBAR */
682         reg = cfg->cbar;
683         if (smmu->version < ARM_SMMU_V2)
684                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
685
686         /*
687          * Use the weakest shareability/memory types, so they are
688          * overridden by the ttbcr/pte.
689          */
690         if (stage1) {
691                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
692                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
693         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
694                 /* 8-bit VMIDs live in CBAR */
695                 reg |= cfg->vmid << CBAR_VMID_SHIFT;
696         }
697         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
698
699         /*
700          * TTBCR
701          * We must write this before the TTBRs, since it determines the
702          * access behaviour of some fields (in particular, ASID[15:8]).
703          */
704         if (stage1 && smmu->version > ARM_SMMU_V1)
705                 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
706         writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
707
708         /* TTBRs */
709         if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
710                 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
711                 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
712                 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
713         } else {
714                 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
715                 if (stage1)
716                         writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
717         }
718
719         /* MAIRs (stage-1 only) */
720         if (stage1) {
721                 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
722                 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
723         }
724
725         /* SCTLR */
726         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
727         if (stage1)
728                 reg |= SCTLR_S1_ASIDPNE;
729         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
730                 reg |= SCTLR_E;
731
732         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
733 }
734
735 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
736                                         struct arm_smmu_device *smmu)
737 {
738         int irq, start, ret = 0;
739         unsigned long ias, oas;
740         struct io_pgtable_ops *pgtbl_ops;
741         struct io_pgtable_cfg pgtbl_cfg;
742         enum io_pgtable_fmt fmt;
743         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
744         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
745
746         mutex_lock(&smmu_domain->init_mutex);
747         if (smmu_domain->smmu)
748                 goto out_unlock;
749
750         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
751                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
752                 smmu_domain->smmu = smmu;
753                 goto out_unlock;
754         }
755
756         /*
757          * Mapping the requested stage onto what we support is surprisingly
758          * complicated, mainly because the spec allows S1+S2 SMMUs without
759          * support for nested translation. That means we end up with the
760          * following table:
761          *
762          * Requested        Supported        Actual
763          *     S1               N              S1
764          *     S1             S1+S2            S1
765          *     S1               S2             S2
766          *     S1               S1             S1
767          *     N                N              N
768          *     N              S1+S2            S2
769          *     N                S2             S2
770          *     N                S1             S1
771          *
772          * Note that you can't actually request stage-2 mappings.
773          */
774         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
775                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
776         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
777                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
778
779         /*
780          * Choosing a suitable context format is even more fiddly. Until we
781          * grow some way for the caller to express a preference, and/or move
782          * the decision into the io-pgtable code where it arguably belongs,
783          * just aim for the closest thing to the rest of the system, and hope
784          * that the hardware isn't esoteric enough that we can't assume AArch64
785          * support to be a superset of AArch32 support...
786          */
787         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
788                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
789         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
790             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
791             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
792             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
793                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
794         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
795             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
796                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
797                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
798                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
799
800         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
801                 ret = -EINVAL;
802                 goto out_unlock;
803         }
804
805         switch (smmu_domain->stage) {
806         case ARM_SMMU_DOMAIN_S1:
807                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
808                 start = smmu->num_s2_context_banks;
809                 ias = smmu->va_size;
810                 oas = smmu->ipa_size;
811                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
812                         fmt = ARM_64_LPAE_S1;
813                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
814                         fmt = ARM_32_LPAE_S1;
815                         ias = min(ias, 32UL);
816                         oas = min(oas, 40UL);
817                 } else {
818                         fmt = ARM_V7S;
819                         ias = min(ias, 32UL);
820                         oas = min(oas, 32UL);
821                 }
822                 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
823                 break;
824         case ARM_SMMU_DOMAIN_NESTED:
825                 /*
826                  * We will likely want to change this if/when KVM gets
827                  * involved.
828                  */
829         case ARM_SMMU_DOMAIN_S2:
830                 cfg->cbar = CBAR_TYPE_S2_TRANS;
831                 start = 0;
832                 ias = smmu->ipa_size;
833                 oas = smmu->pa_size;
834                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
835                         fmt = ARM_64_LPAE_S2;
836                 } else {
837                         fmt = ARM_32_LPAE_S2;
838                         ias = min(ias, 40UL);
839                         oas = min(oas, 40UL);
840                 }
841                 if (smmu->version == ARM_SMMU_V2)
842                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
843                 else
844                         smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
845                 break;
846         default:
847                 ret = -EINVAL;
848                 goto out_unlock;
849         }
850         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
851                                       smmu->num_context_banks);
852         if (ret < 0)
853                 goto out_unlock;
854
855         cfg->cbndx = ret;
856         if (smmu->version < ARM_SMMU_V2) {
857                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
858                 cfg->irptndx %= smmu->num_context_irqs;
859         } else {
860                 cfg->irptndx = cfg->cbndx;
861         }
862
863         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
864                 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
865         else
866                 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
867
868         pgtbl_cfg = (struct io_pgtable_cfg) {
869                 .pgsize_bitmap  = smmu->pgsize_bitmap,
870                 .ias            = ias,
871                 .oas            = oas,
872                 .tlb            = smmu_domain->tlb_ops,
873                 .iommu_dev      = smmu->dev,
874         };
875
876         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
877                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
878
879         if (smmu_domain->non_strict)
880                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
881
882         smmu_domain->smmu = smmu;
883         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
884         if (!pgtbl_ops) {
885                 ret = -ENOMEM;
886                 goto out_clear_smmu;
887         }
888
889         /* Update the domain's page sizes to reflect the page table format */
890         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
891         domain->geometry.aperture_end = (1UL << ias) - 1;
892         domain->geometry.force_aperture = true;
893
894         /* Initialise the context bank with our page table cfg */
895         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
896         arm_smmu_write_context_bank(smmu, cfg->cbndx);
897
898         /*
899          * Request context fault interrupt. Do this last to avoid the
900          * handler seeing a half-initialised domain state.
901          */
902         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
903         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
904                                IRQF_SHARED, "arm-smmu-context-fault", domain);
905         if (ret < 0) {
906                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
907                         cfg->irptndx, irq);
908                 cfg->irptndx = INVALID_IRPTNDX;
909         }
910
911         mutex_unlock(&smmu_domain->init_mutex);
912
913         /* Publish page table ops for map/unmap */
914         smmu_domain->pgtbl_ops = pgtbl_ops;
915         return 0;
916
917 out_clear_smmu:
918         smmu_domain->smmu = NULL;
919 out_unlock:
920         mutex_unlock(&smmu_domain->init_mutex);
921         return ret;
922 }
923
924 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
925 {
926         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
927         struct arm_smmu_device *smmu = smmu_domain->smmu;
928         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
929         int irq;
930
931         if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
932                 return;
933
934         /*
935          * Disable the context bank and free the page tables before freeing
936          * it.
937          */
938         smmu->cbs[cfg->cbndx].cfg = NULL;
939         arm_smmu_write_context_bank(smmu, cfg->cbndx);
940
941         if (cfg->irptndx != INVALID_IRPTNDX) {
942                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
943                 devm_free_irq(smmu->dev, irq, domain);
944         }
945
946         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
947         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
948 }
949
950 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
951 {
952         struct arm_smmu_domain *smmu_domain;
953
954         if (type != IOMMU_DOMAIN_UNMANAGED &&
955             type != IOMMU_DOMAIN_DMA &&
956             type != IOMMU_DOMAIN_IDENTITY)
957                 return NULL;
958         /*
959          * Allocate the domain and initialise some of its data structures.
960          * We can't really do anything meaningful until we've added a
961          * master.
962          */
963         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
964         if (!smmu_domain)
965                 return NULL;
966
967         if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
968             iommu_get_dma_cookie(&smmu_domain->domain))) {
969                 kfree(smmu_domain);
970                 return NULL;
971         }
972
973         mutex_init(&smmu_domain->init_mutex);
974         spin_lock_init(&smmu_domain->cb_lock);
975
976         return &smmu_domain->domain;
977 }
978
979 static void arm_smmu_domain_free(struct iommu_domain *domain)
980 {
981         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
982
983         /*
984          * Free the domain resources. We assume that all devices have
985          * already been detached.
986          */
987         iommu_put_dma_cookie(domain);
988         arm_smmu_destroy_domain_context(domain);
989         kfree(smmu_domain);
990 }
991
992 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
993 {
994         struct arm_smmu_smr *smr = smmu->smrs + idx;
995         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
996
997         if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
998                 reg |= SMR_VALID;
999         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1000 }
1001
1002 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1003 {
1004         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1005         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1006                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1007                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1008
1009         if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1010             smmu->smrs[idx].valid)
1011                 reg |= S2CR_EXIDVALID;
1012         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1013 }
1014
1015 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1016 {
1017         arm_smmu_write_s2cr(smmu, idx);
1018         if (smmu->smrs)
1019                 arm_smmu_write_smr(smmu, idx);
1020 }
1021
1022 /*
1023  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1024  * should be called after sCR0 is written.
1025  */
1026 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1027 {
1028         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1029         u32 smr;
1030
1031         if (!smmu->smrs)
1032                 return;
1033
1034         /*
1035          * SMR.ID bits may not be preserved if the corresponding MASK
1036          * bits are set, so check each one separately. We can reject
1037          * masters later if they try to claim IDs outside these masks.
1038          */
1039         smr = smmu->streamid_mask << SMR_ID_SHIFT;
1040         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1041         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1042         smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1043
1044         smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1045         writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1046         smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1047         smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1048 }
1049
1050 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1051 {
1052         struct arm_smmu_smr *smrs = smmu->smrs;
1053         int i, free_idx = -ENOSPC;
1054
1055         /* Stream indexing is blissfully easy */
1056         if (!smrs)
1057                 return id;
1058
1059         /* Validating SMRs is... less so */
1060         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1061                 if (!smrs[i].valid) {
1062                         /*
1063                          * Note the first free entry we come across, which
1064                          * we'll claim in the end if nothing else matches.
1065                          */
1066                         if (free_idx < 0)
1067                                 free_idx = i;
1068                         continue;
1069                 }
1070                 /*
1071                  * If the new entry is _entirely_ matched by an existing entry,
1072                  * then reuse that, with the guarantee that there also cannot
1073                  * be any subsequent conflicting entries. In normal use we'd
1074                  * expect simply identical entries for this case, but there's
1075                  * no harm in accommodating the generalisation.
1076                  */
1077                 if ((mask & smrs[i].mask) == mask &&
1078                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1079                         return i;
1080                 /*
1081                  * If the new entry has any other overlap with an existing one,
1082                  * though, then there always exists at least one stream ID
1083                  * which would cause a conflict, and we can't allow that risk.
1084                  */
1085                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1086                         return -EINVAL;
1087         }
1088
1089         return free_idx;
1090 }
1091
1092 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1093 {
1094         if (--smmu->s2crs[idx].count)
1095                 return false;
1096
1097         smmu->s2crs[idx] = s2cr_init_val;
1098         if (smmu->smrs)
1099                 smmu->smrs[idx].valid = false;
1100
1101         return true;
1102 }
1103
1104 static int arm_smmu_master_alloc_smes(struct device *dev)
1105 {
1106         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1107         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1108         struct arm_smmu_device *smmu = cfg->smmu;
1109         struct arm_smmu_smr *smrs = smmu->smrs;
1110         struct iommu_group *group;
1111         int i, idx, ret;
1112
1113         mutex_lock(&smmu->stream_map_mutex);
1114         /* Figure out a viable stream map entry allocation */
1115         for_each_cfg_sme(fwspec, i, idx) {
1116                 u16 sid = fwspec->ids[i];
1117                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1118
1119                 if (idx != INVALID_SMENDX) {
1120                         ret = -EEXIST;
1121                         goto out_err;
1122                 }
1123
1124                 ret = arm_smmu_find_sme(smmu, sid, mask);
1125                 if (ret < 0)
1126                         goto out_err;
1127
1128                 idx = ret;
1129                 if (smrs && smmu->s2crs[idx].count == 0) {
1130                         smrs[idx].id = sid;
1131                         smrs[idx].mask = mask;
1132                         smrs[idx].valid = true;
1133                 }
1134                 smmu->s2crs[idx].count++;
1135                 cfg->smendx[i] = (s16)idx;
1136         }
1137
1138         group = iommu_group_get_for_dev(dev);
1139         if (!group)
1140                 group = ERR_PTR(-ENOMEM);
1141         if (IS_ERR(group)) {
1142                 ret = PTR_ERR(group);
1143                 goto out_err;
1144         }
1145         iommu_group_put(group);
1146
1147         /* It worked! Now, poke the actual hardware */
1148         for_each_cfg_sme(fwspec, i, idx) {
1149                 arm_smmu_write_sme(smmu, idx);
1150                 smmu->s2crs[idx].group = group;
1151         }
1152
1153         mutex_unlock(&smmu->stream_map_mutex);
1154         return 0;
1155
1156 out_err:
1157         while (i--) {
1158                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1159                 cfg->smendx[i] = INVALID_SMENDX;
1160         }
1161         mutex_unlock(&smmu->stream_map_mutex);
1162         return ret;
1163 }
1164
1165 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1166 {
1167         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1168         struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1169         int i, idx;
1170
1171         mutex_lock(&smmu->stream_map_mutex);
1172         for_each_cfg_sme(fwspec, i, idx) {
1173                 if (arm_smmu_free_sme(smmu, idx))
1174                         arm_smmu_write_sme(smmu, idx);
1175                 cfg->smendx[i] = INVALID_SMENDX;
1176         }
1177         mutex_unlock(&smmu->stream_map_mutex);
1178 }
1179
1180 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1181                                       struct iommu_fwspec *fwspec)
1182 {
1183         struct arm_smmu_device *smmu = smmu_domain->smmu;
1184         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1185         u8 cbndx = smmu_domain->cfg.cbndx;
1186         enum arm_smmu_s2cr_type type;
1187         int i, idx;
1188
1189         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1190                 type = S2CR_TYPE_BYPASS;
1191         else
1192                 type = S2CR_TYPE_TRANS;
1193
1194         for_each_cfg_sme(fwspec, i, idx) {
1195                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1196                         continue;
1197
1198                 s2cr[idx].type = type;
1199                 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1200                 s2cr[idx].cbndx = cbndx;
1201                 arm_smmu_write_s2cr(smmu, idx);
1202         }
1203         return 0;
1204 }
1205
1206 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1207 {
1208         int ret;
1209         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1210         struct arm_smmu_device *smmu;
1211         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1212
1213         if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1214                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1215                 return -ENXIO;
1216         }
1217
1218         /*
1219          * FIXME: The arch/arm DMA API code tries to attach devices to its own
1220          * domains between of_xlate() and add_device() - we have no way to cope
1221          * with that, so until ARM gets converted to rely on groups and default
1222          * domains, just say no (but more politely than by dereferencing NULL).
1223          * This should be at least a WARN_ON once that's sorted.
1224          */
1225         if (!fwspec->iommu_priv)
1226                 return -ENODEV;
1227
1228         smmu = fwspec_smmu(fwspec);
1229         /* Ensure that the domain is finalised */
1230         ret = arm_smmu_init_domain_context(domain, smmu);
1231         if (ret < 0)
1232                 return ret;
1233
1234         /*
1235          * Sanity check the domain. We don't support domains across
1236          * different SMMUs.
1237          */
1238         if (smmu_domain->smmu != smmu) {
1239                 dev_err(dev,
1240                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1241                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1242                 return -EINVAL;
1243         }
1244
1245         /* Looks ok, so add the device to the domain */
1246         return arm_smmu_domain_add_master(smmu_domain, fwspec);
1247 }
1248
1249 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1250                         phys_addr_t paddr, size_t size, int prot)
1251 {
1252         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1253
1254         if (!ops)
1255                 return -ENODEV;
1256
1257         return ops->map(ops, iova, paddr, size, prot);
1258 }
1259
1260 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1261                              size_t size)
1262 {
1263         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1264
1265         if (!ops)
1266                 return 0;
1267
1268         return ops->unmap(ops, iova, size);
1269 }
1270
1271 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1272 {
1273         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1274
1275         if (smmu_domain->tlb_ops)
1276                 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1277 }
1278
1279 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1280 {
1281         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1282
1283         if (smmu_domain->tlb_ops)
1284                 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1285 }
1286
1287 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1288                                               dma_addr_t iova)
1289 {
1290         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1291         struct arm_smmu_device *smmu = smmu_domain->smmu;
1292         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1293         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1294         struct device *dev = smmu->dev;
1295         void __iomem *cb_base;
1296         u32 tmp;
1297         u64 phys;
1298         unsigned long va, flags;
1299
1300         cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1301
1302         spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1303         /* ATS1 registers can only be written atomically */
1304         va = iova & ~0xfffUL;
1305         if (smmu->version == ARM_SMMU_V2)
1306                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1307         else /* Register is only 32-bit in v1 */
1308                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1309
1310         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1311                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1312                 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1313                 dev_err(dev,
1314                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1315                         &iova);
1316                 return ops->iova_to_phys(ops, iova);
1317         }
1318
1319         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1320         spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1321         if (phys & CB_PAR_F) {
1322                 dev_err(dev, "translation fault!\n");
1323                 dev_err(dev, "PAR = 0x%llx\n", phys);
1324                 return 0;
1325         }
1326
1327         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1328 }
1329
1330 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1331                                         dma_addr_t iova)
1332 {
1333         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1334         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1335
1336         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1337                 return iova;
1338
1339         if (!ops)
1340                 return 0;
1341
1342         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1343                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1344                 return arm_smmu_iova_to_phys_hard(domain, iova);
1345
1346         return ops->iova_to_phys(ops, iova);
1347 }
1348
1349 static bool arm_smmu_capable(enum iommu_cap cap)
1350 {
1351         switch (cap) {
1352         case IOMMU_CAP_CACHE_COHERENCY:
1353                 /*
1354                  * Return true here as the SMMU can always send out coherent
1355                  * requests.
1356                  */
1357                 return true;
1358         case IOMMU_CAP_NOEXEC:
1359                 return true;
1360         default:
1361                 return false;
1362         }
1363 }
1364
1365 static int arm_smmu_match_node(struct device *dev, void *data)
1366 {
1367         return dev->fwnode == data;
1368 }
1369
1370 static
1371 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1372 {
1373         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1374                                                 fwnode, arm_smmu_match_node);
1375         put_device(dev);
1376         return dev ? dev_get_drvdata(dev) : NULL;
1377 }
1378
1379 static int arm_smmu_add_device(struct device *dev)
1380 {
1381         struct arm_smmu_device *smmu;
1382         struct arm_smmu_master_cfg *cfg;
1383         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1384         int i, ret;
1385
1386         if (using_legacy_binding) {
1387                 ret = arm_smmu_register_legacy_master(dev, &smmu);
1388
1389                 /*
1390                  * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1391                  * will allocate/initialise a new one. Thus we need to update fwspec for
1392                  * later use.
1393                  */
1394                 fwspec = dev->iommu_fwspec;
1395                 if (ret)
1396                         goto out_free;
1397         } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1398                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1399         } else {
1400                 return -ENODEV;
1401         }
1402
1403         ret = -EINVAL;
1404         for (i = 0; i < fwspec->num_ids; i++) {
1405                 u16 sid = fwspec->ids[i];
1406                 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1407
1408                 if (sid & ~smmu->streamid_mask) {
1409                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1410                                 sid, smmu->streamid_mask);
1411                         goto out_free;
1412                 }
1413                 if (mask & ~smmu->smr_mask_mask) {
1414                         dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1415                                 mask, smmu->smr_mask_mask);
1416                         goto out_free;
1417                 }
1418         }
1419
1420         ret = -ENOMEM;
1421         cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1422                       GFP_KERNEL);
1423         if (!cfg)
1424                 goto out_free;
1425
1426         cfg->smmu = smmu;
1427         fwspec->iommu_priv = cfg;
1428         while (i--)
1429                 cfg->smendx[i] = INVALID_SMENDX;
1430
1431         ret = arm_smmu_master_alloc_smes(dev);
1432         if (ret)
1433                 goto out_cfg_free;
1434
1435         iommu_device_link(&smmu->iommu, dev);
1436
1437         return 0;
1438
1439 out_cfg_free:
1440         kfree(cfg);
1441 out_free:
1442         iommu_fwspec_free(dev);
1443         return ret;
1444 }
1445
1446 static void arm_smmu_remove_device(struct device *dev)
1447 {
1448         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1449         struct arm_smmu_master_cfg *cfg;
1450         struct arm_smmu_device *smmu;
1451
1452
1453         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1454                 return;
1455
1456         cfg  = fwspec->iommu_priv;
1457         smmu = cfg->smmu;
1458
1459         iommu_device_unlink(&smmu->iommu, dev);
1460         arm_smmu_master_free_smes(fwspec);
1461         iommu_group_remove_device(dev);
1462         kfree(fwspec->iommu_priv);
1463         iommu_fwspec_free(dev);
1464 }
1465
1466 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1467 {
1468         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1469         struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1470         struct iommu_group *group = NULL;
1471         int i, idx;
1472
1473         for_each_cfg_sme(fwspec, i, idx) {
1474                 if (group && smmu->s2crs[idx].group &&
1475                     group != smmu->s2crs[idx].group)
1476                         return ERR_PTR(-EINVAL);
1477
1478                 group = smmu->s2crs[idx].group;
1479         }
1480
1481         if (group)
1482                 return iommu_group_ref_get(group);
1483
1484         if (dev_is_pci(dev))
1485                 group = pci_device_group(dev);
1486         else if (dev_is_fsl_mc(dev))
1487                 group = fsl_mc_device_group(dev);
1488         else
1489                 group = generic_device_group(dev);
1490
1491         return group;
1492 }
1493
1494 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1495                                     enum iommu_attr attr, void *data)
1496 {
1497         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1498
1499         switch(domain->type) {
1500         case IOMMU_DOMAIN_UNMANAGED:
1501                 switch (attr) {
1502                 case DOMAIN_ATTR_NESTING:
1503                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1504                         return 0;
1505                 default:
1506                         return -ENODEV;
1507                 }
1508                 break;
1509         case IOMMU_DOMAIN_DMA:
1510                 switch (attr) {
1511                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1512                         *(int *)data = smmu_domain->non_strict;
1513                         return 0;
1514                 default:
1515                         return -ENODEV;
1516                 }
1517                 break;
1518         default:
1519                 return -EINVAL;
1520         }
1521 }
1522
1523 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1524                                     enum iommu_attr attr, void *data)
1525 {
1526         int ret = 0;
1527         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1528
1529         mutex_lock(&smmu_domain->init_mutex);
1530
1531         switch(domain->type) {
1532         case IOMMU_DOMAIN_UNMANAGED:
1533                 switch (attr) {
1534                 case DOMAIN_ATTR_NESTING:
1535                         if (smmu_domain->smmu) {
1536                                 ret = -EPERM;
1537                                 goto out_unlock;
1538                         }
1539
1540                         if (*(int *)data)
1541                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1542                         else
1543                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1544                         break;
1545                 default:
1546                         ret = -ENODEV;
1547                 }
1548                 break;
1549         case IOMMU_DOMAIN_DMA:
1550                 switch (attr) {
1551                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1552                         smmu_domain->non_strict = *(int *)data;
1553                         break;
1554                 default:
1555                         ret = -ENODEV;
1556                 }
1557                 break;
1558         default:
1559                 ret = -EINVAL;
1560         }
1561 out_unlock:
1562         mutex_unlock(&smmu_domain->init_mutex);
1563         return ret;
1564 }
1565
1566 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1567 {
1568         u32 mask, fwid = 0;
1569
1570         if (args->args_count > 0)
1571                 fwid |= (u16)args->args[0];
1572
1573         if (args->args_count > 1)
1574                 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1575         else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1576                 fwid |= (u16)mask << SMR_MASK_SHIFT;
1577
1578         return iommu_fwspec_add_ids(dev, &fwid, 1);
1579 }
1580
1581 static void arm_smmu_get_resv_regions(struct device *dev,
1582                                       struct list_head *head)
1583 {
1584         struct iommu_resv_region *region;
1585         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1586
1587         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1588                                          prot, IOMMU_RESV_SW_MSI);
1589         if (!region)
1590                 return;
1591
1592         list_add_tail(&region->list, head);
1593
1594         iommu_dma_get_resv_regions(dev, head);
1595 }
1596
1597 static void arm_smmu_put_resv_regions(struct device *dev,
1598                                       struct list_head *head)
1599 {
1600         struct iommu_resv_region *entry, *next;
1601
1602         list_for_each_entry_safe(entry, next, head, list)
1603                 kfree(entry);
1604 }
1605
1606 static struct iommu_ops arm_smmu_ops = {
1607         .capable                = arm_smmu_capable,
1608         .domain_alloc           = arm_smmu_domain_alloc,
1609         .domain_free            = arm_smmu_domain_free,
1610         .attach_dev             = arm_smmu_attach_dev,
1611         .map                    = arm_smmu_map,
1612         .unmap                  = arm_smmu_unmap,
1613         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
1614         .iotlb_sync             = arm_smmu_iotlb_sync,
1615         .iova_to_phys           = arm_smmu_iova_to_phys,
1616         .add_device             = arm_smmu_add_device,
1617         .remove_device          = arm_smmu_remove_device,
1618         .device_group           = arm_smmu_device_group,
1619         .domain_get_attr        = arm_smmu_domain_get_attr,
1620         .domain_set_attr        = arm_smmu_domain_set_attr,
1621         .of_xlate               = arm_smmu_of_xlate,
1622         .get_resv_regions       = arm_smmu_get_resv_regions,
1623         .put_resv_regions       = arm_smmu_put_resv_regions,
1624         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1625 };
1626
1627 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1628 {
1629         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1630         int i;
1631         u32 reg, major;
1632
1633         /* clear global FSR */
1634         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1635         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1636
1637         /*
1638          * Reset stream mapping groups: Initial values mark all SMRn as
1639          * invalid and all S2CRn as bypass unless overridden.
1640          */
1641         for (i = 0; i < smmu->num_mapping_groups; ++i)
1642                 arm_smmu_write_sme(smmu, i);
1643
1644         if (smmu->model == ARM_MMU500) {
1645                 /*
1646                  * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1647                  * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1648                  * bit is only present in MMU-500r2 onwards.
1649                  */
1650                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1651                 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1652                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1653                 if (major >= 2)
1654                         reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1655                 /*
1656                  * Allow unmatched Stream IDs to allocate bypass
1657                  * TLB entries for reduced latency.
1658                  */
1659                 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1660                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1661         }
1662
1663         /* Make sure all context banks are disabled and clear CB_FSR  */
1664         for (i = 0; i < smmu->num_context_banks; ++i) {
1665                 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1666
1667                 arm_smmu_write_context_bank(smmu, i);
1668                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1669                 /*
1670                  * Disable MMU-500's not-particularly-beneficial next-page
1671                  * prefetcher for the sake of errata #841119 and #826419.
1672                  */
1673                 if (smmu->model == ARM_MMU500) {
1674                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1675                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1676                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1677                 }
1678         }
1679
1680         /* Invalidate the TLB, just in case */
1681         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1682         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1683
1684         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1685
1686         /* Enable fault reporting */
1687         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1688
1689         /* Disable TLB broadcasting. */
1690         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1691
1692         /* Enable client access, handling unmatched streams as appropriate */
1693         reg &= ~sCR0_CLIENTPD;
1694         if (disable_bypass)
1695                 reg |= sCR0_USFCFG;
1696         else
1697                 reg &= ~sCR0_USFCFG;
1698
1699         /* Disable forced broadcasting */
1700         reg &= ~sCR0_FB;
1701
1702         /* Don't upgrade barriers */
1703         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1704
1705         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1706                 reg |= sCR0_VMID16EN;
1707
1708         if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1709                 reg |= sCR0_EXIDENABLE;
1710
1711         /* Push the button */
1712         arm_smmu_tlb_sync_global(smmu);
1713         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1714 }
1715
1716 static int arm_smmu_id_size_to_bits(int size)
1717 {
1718         switch (size) {
1719         case 0:
1720                 return 32;
1721         case 1:
1722                 return 36;
1723         case 2:
1724                 return 40;
1725         case 3:
1726                 return 42;
1727         case 4:
1728                 return 44;
1729         case 5:
1730         default:
1731                 return 48;
1732         }
1733 }
1734
1735 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1736 {
1737         unsigned long size;
1738         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1739         u32 id;
1740         bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1741         int i;
1742
1743         dev_notice(smmu->dev, "probing hardware configuration...\n");
1744         dev_notice(smmu->dev, "SMMUv%d with:\n",
1745                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1746
1747         /* ID0 */
1748         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1749
1750         /* Restrict available stages based on module parameter */
1751         if (force_stage == 1)
1752                 id &= ~(ID0_S2TS | ID0_NTS);
1753         else if (force_stage == 2)
1754                 id &= ~(ID0_S1TS | ID0_NTS);
1755
1756         if (id & ID0_S1TS) {
1757                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1758                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1759         }
1760
1761         if (id & ID0_S2TS) {
1762                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1763                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1764         }
1765
1766         if (id & ID0_NTS) {
1767                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1768                 dev_notice(smmu->dev, "\tnested translation\n");
1769         }
1770
1771         if (!(smmu->features &
1772                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1773                 dev_err(smmu->dev, "\tno translation support!\n");
1774                 return -ENODEV;
1775         }
1776
1777         if ((id & ID0_S1TS) &&
1778                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1779                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1780                 dev_notice(smmu->dev, "\taddress translation ops\n");
1781         }
1782
1783         /*
1784          * In order for DMA API calls to work properly, we must defer to what
1785          * the FW says about coherency, regardless of what the hardware claims.
1786          * Fortunately, this also opens up a workaround for systems where the
1787          * ID register value has ended up configured incorrectly.
1788          */
1789         cttw_reg = !!(id & ID0_CTTW);
1790         if (cttw_fw || cttw_reg)
1791                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1792                            cttw_fw ? "" : "non-");
1793         if (cttw_fw != cttw_reg)
1794                 dev_notice(smmu->dev,
1795                            "\t(IDR0.CTTW overridden by FW configuration)\n");
1796
1797         /* Max. number of entries we have for stream matching/indexing */
1798         if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1799                 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1800                 size = 1 << 16;
1801         } else {
1802                 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1803         }
1804         smmu->streamid_mask = size - 1;
1805         if (id & ID0_SMS) {
1806                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1807                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1808                 if (size == 0) {
1809                         dev_err(smmu->dev,
1810                                 "stream-matching supported, but no SMRs present!\n");
1811                         return -ENODEV;
1812                 }
1813
1814                 /* Zero-initialised to mark as invalid */
1815                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1816                                           GFP_KERNEL);
1817                 if (!smmu->smrs)
1818                         return -ENOMEM;
1819
1820                 dev_notice(smmu->dev,
1821                            "\tstream matching with %lu register groups", size);
1822         }
1823         /* s2cr->type == 0 means translation, so initialise explicitly */
1824         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1825                                          GFP_KERNEL);
1826         if (!smmu->s2crs)
1827                 return -ENOMEM;
1828         for (i = 0; i < size; i++)
1829                 smmu->s2crs[i] = s2cr_init_val;
1830
1831         smmu->num_mapping_groups = size;
1832         mutex_init(&smmu->stream_map_mutex);
1833         spin_lock_init(&smmu->global_sync_lock);
1834
1835         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1836                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1837                 if (!(id & ID0_PTFS_NO_AARCH32S))
1838                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1839         }
1840
1841         /* ID1 */
1842         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1843         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1844
1845         /* Check for size mismatch of SMMU address space from mapped region */
1846         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1847         size <<= smmu->pgshift;
1848         if (smmu->cb_base != gr0_base + size)
1849                 dev_warn(smmu->dev,
1850                         "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1851                         size * 2, (smmu->cb_base - gr0_base) * 2);
1852
1853         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1854         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1855         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1856                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1857                 return -ENODEV;
1858         }
1859         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1860                    smmu->num_context_banks, smmu->num_s2_context_banks);
1861         /*
1862          * Cavium CN88xx erratum #27704.
1863          * Ensure ASID and VMID allocation is unique across all SMMUs in
1864          * the system.
1865          */
1866         if (smmu->model == CAVIUM_SMMUV2) {
1867                 smmu->cavium_id_base =
1868                         atomic_add_return(smmu->num_context_banks,
1869                                           &cavium_smmu_context_count);
1870                 smmu->cavium_id_base -= smmu->num_context_banks;
1871                 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1872         }
1873         smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1874                                  sizeof(*smmu->cbs), GFP_KERNEL);
1875         if (!smmu->cbs)
1876                 return -ENOMEM;
1877
1878         /* ID2 */
1879         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1880         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1881         smmu->ipa_size = size;
1882
1883         /* The output mask is also applied for bypass */
1884         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1885         smmu->pa_size = size;
1886
1887         if (id & ID2_VMID16)
1888                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1889
1890         /*
1891          * What the page table walker can address actually depends on which
1892          * descriptor format is in use, but since a) we don't know that yet,
1893          * and b) it can vary per context bank, this will have to do...
1894          */
1895         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1896                 dev_warn(smmu->dev,
1897                          "failed to set DMA mask for table walker\n");
1898
1899         if (smmu->version < ARM_SMMU_V2) {
1900                 smmu->va_size = smmu->ipa_size;
1901                 if (smmu->version == ARM_SMMU_V1_64K)
1902                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1903         } else {
1904                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1905                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1906                 if (id & ID2_PTFS_4K)
1907                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1908                 if (id & ID2_PTFS_16K)
1909                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1910                 if (id & ID2_PTFS_64K)
1911                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1912         }
1913
1914         /* Now we've corralled the various formats, what'll it do? */
1915         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1916                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1917         if (smmu->features &
1918             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1919                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1920         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1921                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1922         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1923                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1924
1925         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1926                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1927         else
1928                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1929         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1930                    smmu->pgsize_bitmap);
1931
1932
1933         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1934                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1935                            smmu->va_size, smmu->ipa_size);
1936
1937         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1938                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1939                            smmu->ipa_size, smmu->pa_size);
1940
1941         return 0;
1942 }
1943
1944 struct arm_smmu_match_data {
1945         enum arm_smmu_arch_version version;
1946         enum arm_smmu_implementation model;
1947 };
1948
1949 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1950 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1951
1952 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1953 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1954 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1955 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1956 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1957
1958 static const struct of_device_id arm_smmu_of_match[] = {
1959         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1960         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1961         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1962         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1963         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1964         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1965         { },
1966 };
1967 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1968
1969 #ifdef CONFIG_ACPI
1970 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1971 {
1972         int ret = 0;
1973
1974         switch (model) {
1975         case ACPI_IORT_SMMU_V1:
1976         case ACPI_IORT_SMMU_CORELINK_MMU400:
1977                 smmu->version = ARM_SMMU_V1;
1978                 smmu->model = GENERIC_SMMU;
1979                 break;
1980         case ACPI_IORT_SMMU_CORELINK_MMU401:
1981                 smmu->version = ARM_SMMU_V1_64K;
1982                 smmu->model = GENERIC_SMMU;
1983                 break;
1984         case ACPI_IORT_SMMU_V2:
1985                 smmu->version = ARM_SMMU_V2;
1986                 smmu->model = GENERIC_SMMU;
1987                 break;
1988         case ACPI_IORT_SMMU_CORELINK_MMU500:
1989                 smmu->version = ARM_SMMU_V2;
1990                 smmu->model = ARM_MMU500;
1991                 break;
1992         case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1993                 smmu->version = ARM_SMMU_V2;
1994                 smmu->model = CAVIUM_SMMUV2;
1995                 break;
1996         default:
1997                 ret = -ENODEV;
1998         }
1999
2000         return ret;
2001 }
2002
2003 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2004                                       struct arm_smmu_device *smmu)
2005 {
2006         struct device *dev = smmu->dev;
2007         struct acpi_iort_node *node =
2008                 *(struct acpi_iort_node **)dev_get_platdata(dev);
2009         struct acpi_iort_smmu *iort_smmu;
2010         int ret;
2011
2012         /* Retrieve SMMU1/2 specific data */
2013         iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2014
2015         ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2016         if (ret < 0)
2017                 return ret;
2018
2019         /* Ignore the configuration access interrupt */
2020         smmu->num_global_irqs = 1;
2021
2022         if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2023                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2024
2025         return 0;
2026 }
2027 #else
2028 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2029                                              struct arm_smmu_device *smmu)
2030 {
2031         return -ENODEV;
2032 }
2033 #endif
2034
2035 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2036                                     struct arm_smmu_device *smmu)
2037 {
2038         const struct arm_smmu_match_data *data;
2039         struct device *dev = &pdev->dev;
2040         bool legacy_binding;
2041
2042         if (of_property_read_u32(dev->of_node, "#global-interrupts",
2043                                  &smmu->num_global_irqs)) {
2044                 dev_err(dev, "missing #global-interrupts property\n");
2045                 return -ENODEV;
2046         }
2047
2048         data = of_device_get_match_data(dev);
2049         smmu->version = data->version;
2050         smmu->model = data->model;
2051
2052         parse_driver_options(smmu);
2053
2054         legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2055         if (legacy_binding && !using_generic_binding) {
2056                 if (!using_legacy_binding)
2057                         pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2058                 using_legacy_binding = true;
2059         } else if (!legacy_binding && !using_legacy_binding) {
2060                 using_generic_binding = true;
2061         } else {
2062                 dev_err(dev, "not probing due to mismatched DT properties\n");
2063                 return -ENODEV;
2064         }
2065
2066         if (of_dma_is_coherent(dev->of_node))
2067                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2068
2069         return 0;
2070 }
2071
2072 static void arm_smmu_bus_init(void)
2073 {
2074         /* Oh, for a proper bus abstraction */
2075         if (!iommu_present(&platform_bus_type))
2076                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2077 #ifdef CONFIG_ARM_AMBA
2078         if (!iommu_present(&amba_bustype))
2079                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2080 #endif
2081 #ifdef CONFIG_PCI
2082         if (!iommu_present(&pci_bus_type)) {
2083                 pci_request_acs();
2084                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2085         }
2086 #endif
2087 #ifdef CONFIG_FSL_MC_BUS
2088         if (!iommu_present(&fsl_mc_bus_type))
2089                 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2090 #endif
2091 }
2092
2093 static int arm_smmu_device_probe(struct platform_device *pdev)
2094 {
2095         struct resource *res;
2096         resource_size_t ioaddr;
2097         struct arm_smmu_device *smmu;
2098         struct device *dev = &pdev->dev;
2099         int num_irqs, i, err;
2100
2101         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2102         if (!smmu) {
2103                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2104                 return -ENOMEM;
2105         }
2106         smmu->dev = dev;
2107
2108         if (dev->of_node)
2109                 err = arm_smmu_device_dt_probe(pdev, smmu);
2110         else
2111                 err = arm_smmu_device_acpi_probe(pdev, smmu);
2112
2113         if (err)
2114                 return err;
2115
2116         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2117         ioaddr = res->start;
2118         smmu->base = devm_ioremap_resource(dev, res);
2119         if (IS_ERR(smmu->base))
2120                 return PTR_ERR(smmu->base);
2121         smmu->cb_base = smmu->base + resource_size(res) / 2;
2122
2123         num_irqs = 0;
2124         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2125                 num_irqs++;
2126                 if (num_irqs > smmu->num_global_irqs)
2127                         smmu->num_context_irqs++;
2128         }
2129
2130         if (!smmu->num_context_irqs) {
2131                 dev_err(dev, "found %d interrupts but expected at least %d\n",
2132                         num_irqs, smmu->num_global_irqs + 1);
2133                 return -ENODEV;
2134         }
2135
2136         smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2137                                   GFP_KERNEL);
2138         if (!smmu->irqs) {
2139                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2140                 return -ENOMEM;
2141         }
2142
2143         for (i = 0; i < num_irqs; ++i) {
2144                 int irq = platform_get_irq(pdev, i);
2145
2146                 if (irq < 0) {
2147                         dev_err(dev, "failed to get irq index %d\n", i);
2148                         return -ENODEV;
2149                 }
2150                 smmu->irqs[i] = irq;
2151         }
2152
2153         err = arm_smmu_device_cfg_probe(smmu);
2154         if (err)
2155                 return err;
2156
2157         if (smmu->version == ARM_SMMU_V2) {
2158                 if (smmu->num_context_banks > smmu->num_context_irqs) {
2159                         dev_err(dev,
2160                               "found only %d context irq(s) but %d required\n",
2161                               smmu->num_context_irqs, smmu->num_context_banks);
2162                         return -ENODEV;
2163                 }
2164
2165                 /* Ignore superfluous interrupts */
2166                 smmu->num_context_irqs = smmu->num_context_banks;
2167         }
2168
2169         for (i = 0; i < smmu->num_global_irqs; ++i) {
2170                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2171                                        arm_smmu_global_fault,
2172                                        IRQF_SHARED,
2173                                        "arm-smmu global fault",
2174                                        smmu);
2175                 if (err) {
2176                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2177                                 i, smmu->irqs[i]);
2178                         return err;
2179                 }
2180         }
2181
2182         err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2183                                      "smmu.%pa", &ioaddr);
2184         if (err) {
2185                 dev_err(dev, "Failed to register iommu in sysfs\n");
2186                 return err;
2187         }
2188
2189         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2190         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2191
2192         err = iommu_device_register(&smmu->iommu);
2193         if (err) {
2194                 dev_err(dev, "Failed to register iommu\n");
2195                 return err;
2196         }
2197
2198         platform_set_drvdata(pdev, smmu);
2199         arm_smmu_device_reset(smmu);
2200         arm_smmu_test_smr_masks(smmu);
2201
2202         /*
2203          * For ACPI and generic DT bindings, an SMMU will be probed before
2204          * any device which might need it, so we want the bus ops in place
2205          * ready to handle default domain setup as soon as any SMMU exists.
2206          */
2207         if (!using_legacy_binding)
2208                 arm_smmu_bus_init();
2209
2210         return 0;
2211 }
2212
2213 /*
2214  * With the legacy DT binding in play, though, we have no guarantees about
2215  * probe order, but then we're also not doing default domains, so we can
2216  * delay setting bus ops until we're sure every possible SMMU is ready,
2217  * and that way ensure that no add_device() calls get missed.
2218  */
2219 static int arm_smmu_legacy_bus_init(void)
2220 {
2221         if (using_legacy_binding)
2222                 arm_smmu_bus_init();
2223         return 0;
2224 }
2225 device_initcall_sync(arm_smmu_legacy_bus_init);
2226
2227 static int arm_smmu_device_remove(struct platform_device *pdev)
2228 {
2229         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2230
2231         if (!smmu)
2232                 return -ENODEV;
2233
2234         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2235                 dev_err(&pdev->dev, "removing device with active domains!\n");
2236
2237         /* Turn the thing off */
2238         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2239         return 0;
2240 }
2241
2242 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2243 {
2244         arm_smmu_device_remove(pdev);
2245 }
2246
2247 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2248 {
2249         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2250
2251         arm_smmu_device_reset(smmu);
2252         return 0;
2253 }
2254
2255 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2256
2257 static struct platform_driver arm_smmu_driver = {
2258         .driver = {
2259                 .name           = "arm-smmu",
2260                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2261                 .pm             = &arm_smmu_pm_ops,
2262         },
2263         .probe  = arm_smmu_device_probe,
2264         .remove = arm_smmu_device_remove,
2265         .shutdown = arm_smmu_device_shutdown,
2266 };
2267 module_platform_driver(arm_smmu_driver);
2268
2269 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2270 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2271 MODULE_LICENSE("GPL v2");