iommu/arm-smmu: Clear cache lock bit of ACR
[linux-2.6-block.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  */
28
29 #define pr_fmt(fmt) "arm-smmu: " fmt
30
31 #include <linux/delay.h>
32 #include <linux/dma-iommu.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/err.h>
35 #include <linux/interrupt.h>
36 #include <linux/io.h>
37 #include <linux/io-64-nonatomic-hi-lo.h>
38 #include <linux/iommu.h>
39 #include <linux/iopoll.h>
40 #include <linux/module.h>
41 #include <linux/of.h>
42 #include <linux/of_address.h>
43 #include <linux/pci.h>
44 #include <linux/platform_device.h>
45 #include <linux/slab.h>
46 #include <linux/spinlock.h>
47
48 #include <linux/amba/bus.h>
49
50 #include "io-pgtable.h"
51
52 /* Maximum number of stream IDs assigned to a single device */
53 #define MAX_MASTER_STREAMIDS            MAX_PHANDLE_ARGS
54
55 /* Maximum number of context banks per SMMU */
56 #define ARM_SMMU_MAX_CBS                128
57
58 /* Maximum number of mapping groups per SMMU */
59 #define ARM_SMMU_MAX_SMRS               128
60
61 /* SMMU global address space */
62 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
63 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
64
65 /*
66  * SMMU global address space with conditional offset to access secure
67  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
68  * nsGFSYNR0: 0x450)
69  */
70 #define ARM_SMMU_GR0_NS(smmu)                                           \
71         ((smmu)->base +                                                 \
72                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
73                         ? 0x400 : 0))
74
75 /*
76  * Some 64-bit registers only make sense to write atomically, but in such
77  * cases all the data relevant to AArch32 formats lies within the lower word,
78  * therefore this actually makes more sense than it might first appear.
79  */
80 #ifdef CONFIG_64BIT
81 #define smmu_write_atomic_lq            writeq_relaxed
82 #else
83 #define smmu_write_atomic_lq            writel_relaxed
84 #endif
85
86 /* Configuration registers */
87 #define ARM_SMMU_GR0_sCR0               0x0
88 #define sCR0_CLIENTPD                   (1 << 0)
89 #define sCR0_GFRE                       (1 << 1)
90 #define sCR0_GFIE                       (1 << 2)
91 #define sCR0_GCFGFRE                    (1 << 4)
92 #define sCR0_GCFGFIE                    (1 << 5)
93 #define sCR0_USFCFG                     (1 << 10)
94 #define sCR0_VMIDPNE                    (1 << 11)
95 #define sCR0_PTM                        (1 << 12)
96 #define sCR0_FB                         (1 << 13)
97 #define sCR0_VMID16EN                   (1 << 31)
98 #define sCR0_BSU_SHIFT                  14
99 #define sCR0_BSU_MASK                   0x3
100
101 /* Auxiliary Configuration register */
102 #define ARM_SMMU_GR0_sACR               0x10
103
104 /* Identification registers */
105 #define ARM_SMMU_GR0_ID0                0x20
106 #define ARM_SMMU_GR0_ID1                0x24
107 #define ARM_SMMU_GR0_ID2                0x28
108 #define ARM_SMMU_GR0_ID3                0x2c
109 #define ARM_SMMU_GR0_ID4                0x30
110 #define ARM_SMMU_GR0_ID5                0x34
111 #define ARM_SMMU_GR0_ID6                0x38
112 #define ARM_SMMU_GR0_ID7                0x3c
113 #define ARM_SMMU_GR0_sGFSR              0x48
114 #define ARM_SMMU_GR0_sGFSYNR0           0x50
115 #define ARM_SMMU_GR0_sGFSYNR1           0x54
116 #define ARM_SMMU_GR0_sGFSYNR2           0x58
117
118 #define ID0_S1TS                        (1 << 30)
119 #define ID0_S2TS                        (1 << 29)
120 #define ID0_NTS                         (1 << 28)
121 #define ID0_SMS                         (1 << 27)
122 #define ID0_ATOSNS                      (1 << 26)
123 #define ID0_PTFS_NO_AARCH32             (1 << 25)
124 #define ID0_PTFS_NO_AARCH32S            (1 << 24)
125 #define ID0_CTTW                        (1 << 14)
126 #define ID0_NUMIRPT_SHIFT               16
127 #define ID0_NUMIRPT_MASK                0xff
128 #define ID0_NUMSIDB_SHIFT               9
129 #define ID0_NUMSIDB_MASK                0xf
130 #define ID0_NUMSMRG_SHIFT               0
131 #define ID0_NUMSMRG_MASK                0xff
132
133 #define ID1_PAGESIZE                    (1 << 31)
134 #define ID1_NUMPAGENDXB_SHIFT           28
135 #define ID1_NUMPAGENDXB_MASK            7
136 #define ID1_NUMS2CB_SHIFT               16
137 #define ID1_NUMS2CB_MASK                0xff
138 #define ID1_NUMCB_SHIFT                 0
139 #define ID1_NUMCB_MASK                  0xff
140
141 #define ID2_OAS_SHIFT                   4
142 #define ID2_OAS_MASK                    0xf
143 #define ID2_IAS_SHIFT                   0
144 #define ID2_IAS_MASK                    0xf
145 #define ID2_UBS_SHIFT                   8
146 #define ID2_UBS_MASK                    0xf
147 #define ID2_PTFS_4K                     (1 << 12)
148 #define ID2_PTFS_16K                    (1 << 13)
149 #define ID2_PTFS_64K                    (1 << 14)
150 #define ID2_VMID16                      (1 << 15)
151
152 #define ID7_MAJOR_SHIFT                 4
153 #define ID7_MAJOR_MASK                  0xf
154
155 /* Global TLB invalidation */
156 #define ARM_SMMU_GR0_TLBIVMID           0x64
157 #define ARM_SMMU_GR0_TLBIALLNSNH        0x68
158 #define ARM_SMMU_GR0_TLBIALLH           0x6c
159 #define ARM_SMMU_GR0_sTLBGSYNC          0x70
160 #define ARM_SMMU_GR0_sTLBGSTATUS        0x74
161 #define sTLBGSTATUS_GSACTIVE            (1 << 0)
162 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
163
164 /* Stream mapping registers */
165 #define ARM_SMMU_GR0_SMR(n)             (0x800 + ((n) << 2))
166 #define SMR_VALID                       (1 << 31)
167 #define SMR_MASK_SHIFT                  16
168 #define SMR_MASK_MASK                   0x7fff
169 #define SMR_ID_SHIFT                    0
170 #define SMR_ID_MASK                     0x7fff
171
172 #define ARM_SMMU_GR0_S2CR(n)            (0xc00 + ((n) << 2))
173 #define S2CR_CBNDX_SHIFT                0
174 #define S2CR_CBNDX_MASK                 0xff
175 #define S2CR_TYPE_SHIFT                 16
176 #define S2CR_TYPE_MASK                  0x3
177 #define S2CR_TYPE_TRANS                 (0 << S2CR_TYPE_SHIFT)
178 #define S2CR_TYPE_BYPASS                (1 << S2CR_TYPE_SHIFT)
179 #define S2CR_TYPE_FAULT                 (2 << S2CR_TYPE_SHIFT)
180
181 #define S2CR_PRIVCFG_SHIFT              24
182 #define S2CR_PRIVCFG_UNPRIV             (2 << S2CR_PRIVCFG_SHIFT)
183
184 /* Context bank attribute registers */
185 #define ARM_SMMU_GR1_CBAR(n)            (0x0 + ((n) << 2))
186 #define CBAR_VMID_SHIFT                 0
187 #define CBAR_VMID_MASK                  0xff
188 #define CBAR_S1_BPSHCFG_SHIFT           8
189 #define CBAR_S1_BPSHCFG_MASK            3
190 #define CBAR_S1_BPSHCFG_NSH             3
191 #define CBAR_S1_MEMATTR_SHIFT           12
192 #define CBAR_S1_MEMATTR_MASK            0xf
193 #define CBAR_S1_MEMATTR_WB              0xf
194 #define CBAR_TYPE_SHIFT                 16
195 #define CBAR_TYPE_MASK                  0x3
196 #define CBAR_TYPE_S2_TRANS              (0 << CBAR_TYPE_SHIFT)
197 #define CBAR_TYPE_S1_TRANS_S2_BYPASS    (1 << CBAR_TYPE_SHIFT)
198 #define CBAR_TYPE_S1_TRANS_S2_FAULT     (2 << CBAR_TYPE_SHIFT)
199 #define CBAR_TYPE_S1_TRANS_S2_TRANS     (3 << CBAR_TYPE_SHIFT)
200 #define CBAR_IRPTNDX_SHIFT              24
201 #define CBAR_IRPTNDX_MASK               0xff
202
203 #define ARM_SMMU_GR1_CBA2R(n)           (0x800 + ((n) << 2))
204 #define CBA2R_RW64_32BIT                (0 << 0)
205 #define CBA2R_RW64_64BIT                (1 << 0)
206 #define CBA2R_VMID_SHIFT                16
207 #define CBA2R_VMID_MASK                 0xffff
208
209 /* Translation context bank */
210 #define ARM_SMMU_CB_BASE(smmu)          ((smmu)->base + ((smmu)->size >> 1))
211 #define ARM_SMMU_CB(smmu, n)            ((n) * (1 << (smmu)->pgshift))
212
213 #define ARM_SMMU_CB_SCTLR               0x0
214 #define ARM_SMMU_CB_ACTLR               0x4
215 #define ARM_SMMU_CB_RESUME              0x8
216 #define ARM_SMMU_CB_TTBCR2              0x10
217 #define ARM_SMMU_CB_TTBR0               0x20
218 #define ARM_SMMU_CB_TTBR1               0x28
219 #define ARM_SMMU_CB_TTBCR               0x30
220 #define ARM_SMMU_CB_S1_MAIR0            0x38
221 #define ARM_SMMU_CB_S1_MAIR1            0x3c
222 #define ARM_SMMU_CB_PAR                 0x50
223 #define ARM_SMMU_CB_FSR                 0x58
224 #define ARM_SMMU_CB_FAR                 0x60
225 #define ARM_SMMU_CB_FSYNR0              0x68
226 #define ARM_SMMU_CB_S1_TLBIVA           0x600
227 #define ARM_SMMU_CB_S1_TLBIASID         0x610
228 #define ARM_SMMU_CB_S1_TLBIVAL          0x620
229 #define ARM_SMMU_CB_S2_TLBIIPAS2        0x630
230 #define ARM_SMMU_CB_S2_TLBIIPAS2L       0x638
231 #define ARM_SMMU_CB_ATS1PR              0x800
232 #define ARM_SMMU_CB_ATSR                0x8f0
233
234 #define SCTLR_S1_ASIDPNE                (1 << 12)
235 #define SCTLR_CFCFG                     (1 << 7)
236 #define SCTLR_CFIE                      (1 << 6)
237 #define SCTLR_CFRE                      (1 << 5)
238 #define SCTLR_E                         (1 << 4)
239 #define SCTLR_AFE                       (1 << 2)
240 #define SCTLR_TRE                       (1 << 1)
241 #define SCTLR_M                         (1 << 0)
242 #define SCTLR_EAE_SBOP                  (SCTLR_AFE | SCTLR_TRE)
243
244 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
245
246 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
247
248 #define CB_PAR_F                        (1 << 0)
249
250 #define ATSR_ACTIVE                     (1 << 0)
251
252 #define RESUME_RETRY                    (0 << 0)
253 #define RESUME_TERMINATE                (1 << 0)
254
255 #define TTBCR2_SEP_SHIFT                15
256 #define TTBCR2_SEP_UPSTREAM             (0x7 << TTBCR2_SEP_SHIFT)
257
258 #define TTBRn_ASID_SHIFT                48
259
260 #define FSR_MULTI                       (1 << 31)
261 #define FSR_SS                          (1 << 30)
262 #define FSR_UUT                         (1 << 8)
263 #define FSR_ASF                         (1 << 7)
264 #define FSR_TLBLKF                      (1 << 6)
265 #define FSR_TLBMCF                      (1 << 5)
266 #define FSR_EF                          (1 << 4)
267 #define FSR_PF                          (1 << 3)
268 #define FSR_AFF                         (1 << 2)
269 #define FSR_TF                          (1 << 1)
270
271 #define FSR_IGN                         (FSR_AFF | FSR_ASF | \
272                                          FSR_TLBMCF | FSR_TLBLKF)
273 #define FSR_FAULT                       (FSR_MULTI | FSR_SS | FSR_UUT | \
274                                          FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
275
276 #define FSYNR0_WNR                      (1 << 4)
277
278 static int force_stage;
279 module_param(force_stage, int, S_IRUGO);
280 MODULE_PARM_DESC(force_stage,
281         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
282 static bool disable_bypass;
283 module_param(disable_bypass, bool, S_IRUGO);
284 MODULE_PARM_DESC(disable_bypass,
285         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
286
287 enum arm_smmu_arch_version {
288         ARM_SMMU_V1,
289         ARM_SMMU_V1_64K,
290         ARM_SMMU_V2,
291 };
292
293 enum arm_smmu_implementation {
294         GENERIC_SMMU,
295         ARM_MMU500,
296         CAVIUM_SMMUV2,
297 };
298
299 struct arm_smmu_smr {
300         u8                              idx;
301         u16                             mask;
302         u16                             id;
303 };
304
305 struct arm_smmu_master_cfg {
306         int                             num_streamids;
307         u16                             streamids[MAX_MASTER_STREAMIDS];
308         struct arm_smmu_smr             *smrs;
309 };
310
311 struct arm_smmu_master {
312         struct device_node              *of_node;
313         struct rb_node                  node;
314         struct arm_smmu_master_cfg      cfg;
315 };
316
317 struct arm_smmu_device {
318         struct device                   *dev;
319
320         void __iomem                    *base;
321         unsigned long                   size;
322         unsigned long                   pgshift;
323
324 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
325 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
326 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
327 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
328 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
329 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
330 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
331 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
332 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
333 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
334 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
335 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
336         u32                             features;
337
338 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
339         u32                             options;
340         enum arm_smmu_arch_version      version;
341         enum arm_smmu_implementation    model;
342
343         u32                             num_context_banks;
344         u32                             num_s2_context_banks;
345         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
346         atomic_t                        irptndx;
347
348         u32                             num_mapping_groups;
349         DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
350
351         unsigned long                   va_size;
352         unsigned long                   ipa_size;
353         unsigned long                   pa_size;
354
355         u32                             num_global_irqs;
356         u32                             num_context_irqs;
357         unsigned int                    *irqs;
358
359         struct list_head                list;
360         struct rb_root                  masters;
361
362         u32                             cavium_id_base; /* Specific to Cavium */
363 };
364
365 enum arm_smmu_context_fmt {
366         ARM_SMMU_CTX_FMT_NONE,
367         ARM_SMMU_CTX_FMT_AARCH64,
368         ARM_SMMU_CTX_FMT_AARCH32_L,
369         ARM_SMMU_CTX_FMT_AARCH32_S,
370 };
371
372 struct arm_smmu_cfg {
373         u8                              cbndx;
374         u8                              irptndx;
375         u32                             cbar;
376         enum arm_smmu_context_fmt       fmt;
377 };
378 #define INVALID_IRPTNDX                 0xff
379
380 #define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
381 #define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
382
383 enum arm_smmu_domain_stage {
384         ARM_SMMU_DOMAIN_S1 = 0,
385         ARM_SMMU_DOMAIN_S2,
386         ARM_SMMU_DOMAIN_NESTED,
387 };
388
389 struct arm_smmu_domain {
390         struct arm_smmu_device          *smmu;
391         struct io_pgtable_ops           *pgtbl_ops;
392         spinlock_t                      pgtbl_lock;
393         struct arm_smmu_cfg             cfg;
394         enum arm_smmu_domain_stage      stage;
395         struct mutex                    init_mutex; /* Protects smmu pointer */
396         struct iommu_domain             domain;
397 };
398
399 static struct iommu_ops arm_smmu_ops;
400
401 static DEFINE_SPINLOCK(arm_smmu_devices_lock);
402 static LIST_HEAD(arm_smmu_devices);
403
404 struct arm_smmu_option_prop {
405         u32 opt;
406         const char *prop;
407 };
408
409 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
410
411 static struct arm_smmu_option_prop arm_smmu_options[] = {
412         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
413         { 0, NULL},
414 };
415
416 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
417 {
418         return container_of(dom, struct arm_smmu_domain, domain);
419 }
420
421 static void parse_driver_options(struct arm_smmu_device *smmu)
422 {
423         int i = 0;
424
425         do {
426                 if (of_property_read_bool(smmu->dev->of_node,
427                                                 arm_smmu_options[i].prop)) {
428                         smmu->options |= arm_smmu_options[i].opt;
429                         dev_notice(smmu->dev, "option %s\n",
430                                 arm_smmu_options[i].prop);
431                 }
432         } while (arm_smmu_options[++i].opt);
433 }
434
435 static struct device_node *dev_get_dev_node(struct device *dev)
436 {
437         if (dev_is_pci(dev)) {
438                 struct pci_bus *bus = to_pci_dev(dev)->bus;
439
440                 while (!pci_is_root_bus(bus))
441                         bus = bus->parent;
442                 return bus->bridge->parent->of_node;
443         }
444
445         return dev->of_node;
446 }
447
448 static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
449                                                 struct device_node *dev_node)
450 {
451         struct rb_node *node = smmu->masters.rb_node;
452
453         while (node) {
454                 struct arm_smmu_master *master;
455
456                 master = container_of(node, struct arm_smmu_master, node);
457
458                 if (dev_node < master->of_node)
459                         node = node->rb_left;
460                 else if (dev_node > master->of_node)
461                         node = node->rb_right;
462                 else
463                         return master;
464         }
465
466         return NULL;
467 }
468
469 static struct arm_smmu_master_cfg *
470 find_smmu_master_cfg(struct device *dev)
471 {
472         struct arm_smmu_master_cfg *cfg = NULL;
473         struct iommu_group *group = iommu_group_get(dev);
474
475         if (group) {
476                 cfg = iommu_group_get_iommudata(group);
477                 iommu_group_put(group);
478         }
479
480         return cfg;
481 }
482
483 static int insert_smmu_master(struct arm_smmu_device *smmu,
484                               struct arm_smmu_master *master)
485 {
486         struct rb_node **new, *parent;
487
488         new = &smmu->masters.rb_node;
489         parent = NULL;
490         while (*new) {
491                 struct arm_smmu_master *this
492                         = container_of(*new, struct arm_smmu_master, node);
493
494                 parent = *new;
495                 if (master->of_node < this->of_node)
496                         new = &((*new)->rb_left);
497                 else if (master->of_node > this->of_node)
498                         new = &((*new)->rb_right);
499                 else
500                         return -EEXIST;
501         }
502
503         rb_link_node(&master->node, parent, new);
504         rb_insert_color(&master->node, &smmu->masters);
505         return 0;
506 }
507
508 static int register_smmu_master(struct arm_smmu_device *smmu,
509                                 struct device *dev,
510                                 struct of_phandle_args *masterspec)
511 {
512         int i;
513         struct arm_smmu_master *master;
514
515         master = find_smmu_master(smmu, masterspec->np);
516         if (master) {
517                 dev_err(dev,
518                         "rejecting multiple registrations for master device %s\n",
519                         masterspec->np->name);
520                 return -EBUSY;
521         }
522
523         if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
524                 dev_err(dev,
525                         "reached maximum number (%d) of stream IDs for master device %s\n",
526                         MAX_MASTER_STREAMIDS, masterspec->np->name);
527                 return -ENOSPC;
528         }
529
530         master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
531         if (!master)
532                 return -ENOMEM;
533
534         master->of_node                 = masterspec->np;
535         master->cfg.num_streamids       = masterspec->args_count;
536
537         for (i = 0; i < master->cfg.num_streamids; ++i) {
538                 u16 streamid = masterspec->args[i];
539
540                 if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) &&
541                      (streamid >= smmu->num_mapping_groups)) {
542                         dev_err(dev,
543                                 "stream ID for master device %s greater than maximum allowed (%d)\n",
544                                 masterspec->np->name, smmu->num_mapping_groups);
545                         return -ERANGE;
546                 }
547                 master->cfg.streamids[i] = streamid;
548         }
549         return insert_smmu_master(smmu, master);
550 }
551
552 static struct arm_smmu_device *find_smmu_for_device(struct device *dev)
553 {
554         struct arm_smmu_device *smmu;
555         struct arm_smmu_master *master = NULL;
556         struct device_node *dev_node = dev_get_dev_node(dev);
557
558         spin_lock(&arm_smmu_devices_lock);
559         list_for_each_entry(smmu, &arm_smmu_devices, list) {
560                 master = find_smmu_master(smmu, dev_node);
561                 if (master)
562                         break;
563         }
564         spin_unlock(&arm_smmu_devices_lock);
565
566         return master ? smmu : NULL;
567 }
568
569 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
570 {
571         int idx;
572
573         do {
574                 idx = find_next_zero_bit(map, end, start);
575                 if (idx == end)
576                         return -ENOSPC;
577         } while (test_and_set_bit(idx, map));
578
579         return idx;
580 }
581
582 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
583 {
584         clear_bit(idx, map);
585 }
586
587 /* Wait for any pending TLB invalidations to complete */
588 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
589 {
590         int count = 0;
591         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
592
593         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
594         while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
595                & sTLBGSTATUS_GSACTIVE) {
596                 cpu_relax();
597                 if (++count == TLB_LOOP_TIMEOUT) {
598                         dev_err_ratelimited(smmu->dev,
599                         "TLB sync timed out -- SMMU may be deadlocked\n");
600                         return;
601                 }
602                 udelay(1);
603         }
604 }
605
606 static void arm_smmu_tlb_sync(void *cookie)
607 {
608         struct arm_smmu_domain *smmu_domain = cookie;
609         __arm_smmu_tlb_sync(smmu_domain->smmu);
610 }
611
612 static void arm_smmu_tlb_inv_context(void *cookie)
613 {
614         struct arm_smmu_domain *smmu_domain = cookie;
615         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
616         struct arm_smmu_device *smmu = smmu_domain->smmu;
617         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
618         void __iomem *base;
619
620         if (stage1) {
621                 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
622                 writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
623                                base + ARM_SMMU_CB_S1_TLBIASID);
624         } else {
625                 base = ARM_SMMU_GR0(smmu);
626                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
627                                base + ARM_SMMU_GR0_TLBIVMID);
628         }
629
630         __arm_smmu_tlb_sync(smmu);
631 }
632
633 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
634                                           size_t granule, bool leaf, void *cookie)
635 {
636         struct arm_smmu_domain *smmu_domain = cookie;
637         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
638         struct arm_smmu_device *smmu = smmu_domain->smmu;
639         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
640         void __iomem *reg;
641
642         if (stage1) {
643                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
644                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
645
646                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
647                         iova &= ~12UL;
648                         iova |= ARM_SMMU_CB_ASID(smmu, cfg);
649                         do {
650                                 writel_relaxed(iova, reg);
651                                 iova += granule;
652                         } while (size -= granule);
653                 } else {
654                         iova >>= 12;
655                         iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
656                         do {
657                                 writeq_relaxed(iova, reg);
658                                 iova += granule >> 12;
659                         } while (size -= granule);
660                 }
661         } else if (smmu->version == ARM_SMMU_V2) {
662                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
663                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
664                               ARM_SMMU_CB_S2_TLBIIPAS2;
665                 iova >>= 12;
666                 do {
667                         smmu_write_atomic_lq(iova, reg);
668                         iova += granule >> 12;
669                 } while (size -= granule);
670         } else {
671                 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
672                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
673         }
674 }
675
676 static struct iommu_gather_ops arm_smmu_gather_ops = {
677         .tlb_flush_all  = arm_smmu_tlb_inv_context,
678         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
679         .tlb_sync       = arm_smmu_tlb_sync,
680 };
681
682 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
683 {
684         int flags, ret;
685         u32 fsr, fsynr, resume;
686         unsigned long iova;
687         struct iommu_domain *domain = dev;
688         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
689         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
690         struct arm_smmu_device *smmu = smmu_domain->smmu;
691         void __iomem *cb_base;
692
693         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
694         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
695
696         if (!(fsr & FSR_FAULT))
697                 return IRQ_NONE;
698
699         if (fsr & FSR_IGN)
700                 dev_err_ratelimited(smmu->dev,
701                                     "Unexpected context fault (fsr 0x%x)\n",
702                                     fsr);
703
704         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
705         flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
706
707         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
708         if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
709                 ret = IRQ_HANDLED;
710                 resume = RESUME_RETRY;
711         } else {
712                 dev_err_ratelimited(smmu->dev,
713                     "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
714                     iova, fsynr, cfg->cbndx);
715                 ret = IRQ_NONE;
716                 resume = RESUME_TERMINATE;
717         }
718
719         /* Clear the faulting FSR */
720         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
721
722         /* Retry or terminate any stalled transactions */
723         if (fsr & FSR_SS)
724                 writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
725
726         return ret;
727 }
728
729 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
730 {
731         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
732         struct arm_smmu_device *smmu = dev;
733         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
734
735         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
736         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
737         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
738         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
739
740         if (!gfsr)
741                 return IRQ_NONE;
742
743         dev_err_ratelimited(smmu->dev,
744                 "Unexpected global fault, this could be serious\n");
745         dev_err_ratelimited(smmu->dev,
746                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
747                 gfsr, gfsynr0, gfsynr1, gfsynr2);
748
749         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
750         return IRQ_HANDLED;
751 }
752
753 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
754                                        struct io_pgtable_cfg *pgtbl_cfg)
755 {
756         u32 reg;
757         u64 reg64;
758         bool stage1;
759         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
760         struct arm_smmu_device *smmu = smmu_domain->smmu;
761         void __iomem *cb_base, *gr1_base;
762
763         gr1_base = ARM_SMMU_GR1(smmu);
764         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
765         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
766
767         if (smmu->version > ARM_SMMU_V1) {
768                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
769                         reg = CBA2R_RW64_64BIT;
770                 else
771                         reg = CBA2R_RW64_32BIT;
772                 /* 16-bit VMIDs live in CBA2R */
773                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
774                         reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
775
776                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
777         }
778
779         /* CBAR */
780         reg = cfg->cbar;
781         if (smmu->version < ARM_SMMU_V2)
782                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
783
784         /*
785          * Use the weakest shareability/memory types, so they are
786          * overridden by the ttbcr/pte.
787          */
788         if (stage1) {
789                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
790                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
791         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
792                 /* 8-bit VMIDs live in CBAR */
793                 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
794         }
795         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
796
797         /* TTBRs */
798         if (stage1) {
799                 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
800
801                 reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
802                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
803
804                 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
805                 reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
806                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
807         } else {
808                 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
809                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
810         }
811
812         /* TTBCR */
813         if (stage1) {
814                 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
815                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
816                 if (smmu->version > ARM_SMMU_V1) {
817                         reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
818                         reg |= TTBCR2_SEP_UPSTREAM;
819                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
820                 }
821         } else {
822                 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
823                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
824         }
825
826         /* MAIRs (stage-1 only) */
827         if (stage1) {
828                 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
829                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
830                 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
831                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1);
832         }
833
834         /* SCTLR */
835         reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
836         if (stage1)
837                 reg |= SCTLR_S1_ASIDPNE;
838 #ifdef __BIG_ENDIAN
839         reg |= SCTLR_E;
840 #endif
841         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
842 }
843
844 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
845                                         struct arm_smmu_device *smmu)
846 {
847         int irq, start, ret = 0;
848         unsigned long ias, oas;
849         struct io_pgtable_ops *pgtbl_ops;
850         struct io_pgtable_cfg pgtbl_cfg;
851         enum io_pgtable_fmt fmt;
852         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
853         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
854
855         mutex_lock(&smmu_domain->init_mutex);
856         if (smmu_domain->smmu)
857                 goto out_unlock;
858
859         /*
860          * Mapping the requested stage onto what we support is surprisingly
861          * complicated, mainly because the spec allows S1+S2 SMMUs without
862          * support for nested translation. That means we end up with the
863          * following table:
864          *
865          * Requested        Supported        Actual
866          *     S1               N              S1
867          *     S1             S1+S2            S1
868          *     S1               S2             S2
869          *     S1               S1             S1
870          *     N                N              N
871          *     N              S1+S2            S2
872          *     N                S2             S2
873          *     N                S1             S1
874          *
875          * Note that you can't actually request stage-2 mappings.
876          */
877         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
878                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
879         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
880                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
881
882         /*
883          * Choosing a suitable context format is even more fiddly. Until we
884          * grow some way for the caller to express a preference, and/or move
885          * the decision into the io-pgtable code where it arguably belongs,
886          * just aim for the closest thing to the rest of the system, and hope
887          * that the hardware isn't esoteric enough that we can't assume AArch64
888          * support to be a superset of AArch32 support...
889          */
890         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
891                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
892         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
893             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
894                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
895                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
896                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
897
898         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
899                 ret = -EINVAL;
900                 goto out_unlock;
901         }
902
903         switch (smmu_domain->stage) {
904         case ARM_SMMU_DOMAIN_S1:
905                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
906                 start = smmu->num_s2_context_banks;
907                 ias = smmu->va_size;
908                 oas = smmu->ipa_size;
909                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
910                         fmt = ARM_64_LPAE_S1;
911                 } else {
912                         fmt = ARM_32_LPAE_S1;
913                         ias = min(ias, 32UL);
914                         oas = min(oas, 40UL);
915                 }
916                 break;
917         case ARM_SMMU_DOMAIN_NESTED:
918                 /*
919                  * We will likely want to change this if/when KVM gets
920                  * involved.
921                  */
922         case ARM_SMMU_DOMAIN_S2:
923                 cfg->cbar = CBAR_TYPE_S2_TRANS;
924                 start = 0;
925                 ias = smmu->ipa_size;
926                 oas = smmu->pa_size;
927                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
928                         fmt = ARM_64_LPAE_S2;
929                 } else {
930                         fmt = ARM_32_LPAE_S2;
931                         ias = min(ias, 40UL);
932                         oas = min(oas, 40UL);
933                 }
934                 break;
935         default:
936                 ret = -EINVAL;
937                 goto out_unlock;
938         }
939
940         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
941                                       smmu->num_context_banks);
942         if (IS_ERR_VALUE(ret))
943                 goto out_unlock;
944
945         cfg->cbndx = ret;
946         if (smmu->version < ARM_SMMU_V2) {
947                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
948                 cfg->irptndx %= smmu->num_context_irqs;
949         } else {
950                 cfg->irptndx = cfg->cbndx;
951         }
952
953         pgtbl_cfg = (struct io_pgtable_cfg) {
954                 .pgsize_bitmap  = arm_smmu_ops.pgsize_bitmap,
955                 .ias            = ias,
956                 .oas            = oas,
957                 .tlb            = &arm_smmu_gather_ops,
958                 .iommu_dev      = smmu->dev,
959         };
960
961         smmu_domain->smmu = smmu;
962         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
963         if (!pgtbl_ops) {
964                 ret = -ENOMEM;
965                 goto out_clear_smmu;
966         }
967
968         /* Update our support page sizes to reflect the page table format */
969         arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
970
971         /* Initialise the context bank with our page table cfg */
972         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
973
974         /*
975          * Request context fault interrupt. Do this last to avoid the
976          * handler seeing a half-initialised domain state.
977          */
978         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
979         ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
980                           "arm-smmu-context-fault", domain);
981         if (IS_ERR_VALUE(ret)) {
982                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
983                         cfg->irptndx, irq);
984                 cfg->irptndx = INVALID_IRPTNDX;
985         }
986
987         mutex_unlock(&smmu_domain->init_mutex);
988
989         /* Publish page table ops for map/unmap */
990         smmu_domain->pgtbl_ops = pgtbl_ops;
991         return 0;
992
993 out_clear_smmu:
994         smmu_domain->smmu = NULL;
995 out_unlock:
996         mutex_unlock(&smmu_domain->init_mutex);
997         return ret;
998 }
999
1000 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
1001 {
1002         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1003         struct arm_smmu_device *smmu = smmu_domain->smmu;
1004         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1005         void __iomem *cb_base;
1006         int irq;
1007
1008         if (!smmu)
1009                 return;
1010
1011         /*
1012          * Disable the context bank and free the page tables before freeing
1013          * it.
1014          */
1015         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1016         writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1017
1018         if (cfg->irptndx != INVALID_IRPTNDX) {
1019                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1020                 free_irq(irq, domain);
1021         }
1022
1023         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1024         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1025 }
1026
1027 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1028 {
1029         struct arm_smmu_domain *smmu_domain;
1030
1031         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1032                 return NULL;
1033         /*
1034          * Allocate the domain and initialise some of its data structures.
1035          * We can't really do anything meaningful until we've added a
1036          * master.
1037          */
1038         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1039         if (!smmu_domain)
1040                 return NULL;
1041
1042         if (type == IOMMU_DOMAIN_DMA &&
1043             iommu_get_dma_cookie(&smmu_domain->domain)) {
1044                 kfree(smmu_domain);
1045                 return NULL;
1046         }
1047
1048         mutex_init(&smmu_domain->init_mutex);
1049         spin_lock_init(&smmu_domain->pgtbl_lock);
1050
1051         return &smmu_domain->domain;
1052 }
1053
1054 static void arm_smmu_domain_free(struct iommu_domain *domain)
1055 {
1056         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1057
1058         /*
1059          * Free the domain resources. We assume that all devices have
1060          * already been detached.
1061          */
1062         iommu_put_dma_cookie(domain);
1063         arm_smmu_destroy_domain_context(domain);
1064         kfree(smmu_domain);
1065 }
1066
1067 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
1068                                           struct arm_smmu_master_cfg *cfg)
1069 {
1070         int i;
1071         struct arm_smmu_smr *smrs;
1072         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1073
1074         if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
1075                 return 0;
1076
1077         if (cfg->smrs)
1078                 return -EEXIST;
1079
1080         smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
1081         if (!smrs) {
1082                 dev_err(smmu->dev, "failed to allocate %d SMRs\n",
1083                         cfg->num_streamids);
1084                 return -ENOMEM;
1085         }
1086
1087         /* Allocate the SMRs on the SMMU */
1088         for (i = 0; i < cfg->num_streamids; ++i) {
1089                 int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
1090                                                   smmu->num_mapping_groups);
1091                 if (IS_ERR_VALUE(idx)) {
1092                         dev_err(smmu->dev, "failed to allocate free SMR\n");
1093                         goto err_free_smrs;
1094                 }
1095
1096                 smrs[i] = (struct arm_smmu_smr) {
1097                         .idx    = idx,
1098                         .mask   = 0, /* We don't currently share SMRs */
1099                         .id     = cfg->streamids[i],
1100                 };
1101         }
1102
1103         /* It worked! Now, poke the actual hardware */
1104         for (i = 0; i < cfg->num_streamids; ++i) {
1105                 u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
1106                           smrs[i].mask << SMR_MASK_SHIFT;
1107                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
1108         }
1109
1110         cfg->smrs = smrs;
1111         return 0;
1112
1113 err_free_smrs:
1114         while (--i >= 0)
1115                 __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
1116         kfree(smrs);
1117         return -ENOSPC;
1118 }
1119
1120 static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
1121                                       struct arm_smmu_master_cfg *cfg)
1122 {
1123         int i;
1124         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1125         struct arm_smmu_smr *smrs = cfg->smrs;
1126
1127         if (!smrs)
1128                 return;
1129
1130         /* Invalidate the SMRs before freeing back to the allocator */
1131         for (i = 0; i < cfg->num_streamids; ++i) {
1132                 u8 idx = smrs[i].idx;
1133
1134                 writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
1135                 __arm_smmu_free_bitmap(smmu->smr_map, idx);
1136         }
1137
1138         cfg->smrs = NULL;
1139         kfree(smrs);
1140 }
1141
1142 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1143                                       struct arm_smmu_master_cfg *cfg)
1144 {
1145         int i, ret;
1146         struct arm_smmu_device *smmu = smmu_domain->smmu;
1147         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1148
1149         /* Devices in an IOMMU group may already be configured */
1150         ret = arm_smmu_master_configure_smrs(smmu, cfg);
1151         if (ret)
1152                 return ret == -EEXIST ? 0 : ret;
1153
1154         /*
1155          * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1156          * for all devices behind the SMMU.
1157          */
1158         if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1159                 return 0;
1160
1161         for (i = 0; i < cfg->num_streamids; ++i) {
1162                 u32 idx, s2cr;
1163
1164                 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
1165                 s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
1166                        (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
1167                 writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1168         }
1169
1170         return 0;
1171 }
1172
1173 static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
1174                                           struct arm_smmu_master_cfg *cfg)
1175 {
1176         int i;
1177         struct arm_smmu_device *smmu = smmu_domain->smmu;
1178         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1179
1180         /* An IOMMU group is torn down by the first device to be removed */
1181         if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs)
1182                 return;
1183
1184         /*
1185          * We *must* clear the S2CR first, because freeing the SMR means
1186          * that it can be re-allocated immediately.
1187          */
1188         for (i = 0; i < cfg->num_streamids; ++i) {
1189                 u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
1190                 u32 reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
1191
1192                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1193         }
1194
1195         arm_smmu_master_free_smrs(smmu, cfg);
1196 }
1197
1198 static void arm_smmu_detach_dev(struct device *dev,
1199                                 struct arm_smmu_master_cfg *cfg)
1200 {
1201         struct iommu_domain *domain = dev->archdata.iommu;
1202         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1203
1204         dev->archdata.iommu = NULL;
1205         arm_smmu_domain_remove_master(smmu_domain, cfg);
1206 }
1207
1208 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1209 {
1210         int ret;
1211         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1212         struct arm_smmu_device *smmu;
1213         struct arm_smmu_master_cfg *cfg;
1214
1215         smmu = find_smmu_for_device(dev);
1216         if (!smmu) {
1217                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1218                 return -ENXIO;
1219         }
1220
1221         /* Ensure that the domain is finalised */
1222         ret = arm_smmu_init_domain_context(domain, smmu);
1223         if (IS_ERR_VALUE(ret))
1224                 return ret;
1225
1226         /*
1227          * Sanity check the domain. We don't support domains across
1228          * different SMMUs.
1229          */
1230         if (smmu_domain->smmu != smmu) {
1231                 dev_err(dev,
1232                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1233                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1234                 return -EINVAL;
1235         }
1236
1237         /* Looks ok, so add the device to the domain */
1238         cfg = find_smmu_master_cfg(dev);
1239         if (!cfg)
1240                 return -ENODEV;
1241
1242         /* Detach the dev from its current domain */
1243         if (dev->archdata.iommu)
1244                 arm_smmu_detach_dev(dev, cfg);
1245
1246         ret = arm_smmu_domain_add_master(smmu_domain, cfg);
1247         if (!ret)
1248                 dev->archdata.iommu = domain;
1249         return ret;
1250 }
1251
1252 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1253                         phys_addr_t paddr, size_t size, int prot)
1254 {
1255         int ret;
1256         unsigned long flags;
1257         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1258         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1259
1260         if (!ops)
1261                 return -ENODEV;
1262
1263         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1264         ret = ops->map(ops, iova, paddr, size, prot);
1265         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1266         return ret;
1267 }
1268
1269 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1270                              size_t size)
1271 {
1272         size_t ret;
1273         unsigned long flags;
1274         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1275         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1276
1277         if (!ops)
1278                 return 0;
1279
1280         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1281         ret = ops->unmap(ops, iova, size);
1282         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1283         return ret;
1284 }
1285
1286 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1287                                               dma_addr_t iova)
1288 {
1289         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1290         struct arm_smmu_device *smmu = smmu_domain->smmu;
1291         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1292         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1293         struct device *dev = smmu->dev;
1294         void __iomem *cb_base;
1295         u32 tmp;
1296         u64 phys;
1297         unsigned long va;
1298
1299         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1300
1301         /* ATS1 registers can only be written atomically */
1302         va = iova & ~0xfffUL;
1303         if (smmu->version == ARM_SMMU_V2)
1304                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1305         else /* Register is only 32-bit in v1 */
1306                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1307
1308         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1309                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1310                 dev_err(dev,
1311                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1312                         &iova);
1313                 return ops->iova_to_phys(ops, iova);
1314         }
1315
1316         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1317         if (phys & CB_PAR_F) {
1318                 dev_err(dev, "translation fault!\n");
1319                 dev_err(dev, "PAR = 0x%llx\n", phys);
1320                 return 0;
1321         }
1322
1323         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1324 }
1325
1326 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1327                                         dma_addr_t iova)
1328 {
1329         phys_addr_t ret;
1330         unsigned long flags;
1331         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1332         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1333
1334         if (!ops)
1335                 return 0;
1336
1337         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1338         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1339                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1340                 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1341         } else {
1342                 ret = ops->iova_to_phys(ops, iova);
1343         }
1344
1345         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1346
1347         return ret;
1348 }
1349
1350 static bool arm_smmu_capable(enum iommu_cap cap)
1351 {
1352         switch (cap) {
1353         case IOMMU_CAP_CACHE_COHERENCY:
1354                 /*
1355                  * Return true here as the SMMU can always send out coherent
1356                  * requests.
1357                  */
1358                 return true;
1359         case IOMMU_CAP_INTR_REMAP:
1360                 return true; /* MSIs are just memory writes */
1361         case IOMMU_CAP_NOEXEC:
1362                 return true;
1363         default:
1364                 return false;
1365         }
1366 }
1367
1368 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
1369 {
1370         *((u16 *)data) = alias;
1371         return 0; /* Continue walking */
1372 }
1373
1374 static void __arm_smmu_release_pci_iommudata(void *data)
1375 {
1376         kfree(data);
1377 }
1378
1379 static int arm_smmu_init_pci_device(struct pci_dev *pdev,
1380                                     struct iommu_group *group)
1381 {
1382         struct arm_smmu_master_cfg *cfg;
1383         u16 sid;
1384         int i;
1385
1386         cfg = iommu_group_get_iommudata(group);
1387         if (!cfg) {
1388                 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
1389                 if (!cfg)
1390                         return -ENOMEM;
1391
1392                 iommu_group_set_iommudata(group, cfg,
1393                                           __arm_smmu_release_pci_iommudata);
1394         }
1395
1396         if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
1397                 return -ENOSPC;
1398
1399         /*
1400          * Assume Stream ID == Requester ID for now.
1401          * We need a way to describe the ID mappings in FDT.
1402          */
1403         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1404         for (i = 0; i < cfg->num_streamids; ++i)
1405                 if (cfg->streamids[i] == sid)
1406                         break;
1407
1408         /* Avoid duplicate SIDs, as this can lead to SMR conflicts */
1409         if (i == cfg->num_streamids)
1410                 cfg->streamids[cfg->num_streamids++] = sid;
1411
1412         return 0;
1413 }
1414
1415 static int arm_smmu_init_platform_device(struct device *dev,
1416                                          struct iommu_group *group)
1417 {
1418         struct arm_smmu_device *smmu = find_smmu_for_device(dev);
1419         struct arm_smmu_master *master;
1420
1421         if (!smmu)
1422                 return -ENODEV;
1423
1424         master = find_smmu_master(smmu, dev->of_node);
1425         if (!master)
1426                 return -ENODEV;
1427
1428         iommu_group_set_iommudata(group, &master->cfg, NULL);
1429
1430         return 0;
1431 }
1432
1433 static int arm_smmu_add_device(struct device *dev)
1434 {
1435         struct iommu_group *group;
1436
1437         group = iommu_group_get_for_dev(dev);
1438         if (IS_ERR(group))
1439                 return PTR_ERR(group);
1440
1441         iommu_group_put(group);
1442         return 0;
1443 }
1444
1445 static void arm_smmu_remove_device(struct device *dev)
1446 {
1447         iommu_group_remove_device(dev);
1448 }
1449
1450 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1451 {
1452         struct iommu_group *group;
1453         int ret;
1454
1455         if (dev_is_pci(dev))
1456                 group = pci_device_group(dev);
1457         else
1458                 group = generic_device_group(dev);
1459
1460         if (IS_ERR(group))
1461                 return group;
1462
1463         if (dev_is_pci(dev))
1464                 ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
1465         else
1466                 ret = arm_smmu_init_platform_device(dev, group);
1467
1468         if (ret) {
1469                 iommu_group_put(group);
1470                 group = ERR_PTR(ret);
1471         }
1472
1473         return group;
1474 }
1475
1476 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1477                                     enum iommu_attr attr, void *data)
1478 {
1479         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1480
1481         switch (attr) {
1482         case DOMAIN_ATTR_NESTING:
1483                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1484                 return 0;
1485         default:
1486                 return -ENODEV;
1487         }
1488 }
1489
1490 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1491                                     enum iommu_attr attr, void *data)
1492 {
1493         int ret = 0;
1494         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1495
1496         mutex_lock(&smmu_domain->init_mutex);
1497
1498         switch (attr) {
1499         case DOMAIN_ATTR_NESTING:
1500                 if (smmu_domain->smmu) {
1501                         ret = -EPERM;
1502                         goto out_unlock;
1503                 }
1504
1505                 if (*(int *)data)
1506                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1507                 else
1508                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1509
1510                 break;
1511         default:
1512                 ret = -ENODEV;
1513         }
1514
1515 out_unlock:
1516         mutex_unlock(&smmu_domain->init_mutex);
1517         return ret;
1518 }
1519
1520 static struct iommu_ops arm_smmu_ops = {
1521         .capable                = arm_smmu_capable,
1522         .domain_alloc           = arm_smmu_domain_alloc,
1523         .domain_free            = arm_smmu_domain_free,
1524         .attach_dev             = arm_smmu_attach_dev,
1525         .map                    = arm_smmu_map,
1526         .unmap                  = arm_smmu_unmap,
1527         .map_sg                 = default_iommu_map_sg,
1528         .iova_to_phys           = arm_smmu_iova_to_phys,
1529         .add_device             = arm_smmu_add_device,
1530         .remove_device          = arm_smmu_remove_device,
1531         .device_group           = arm_smmu_device_group,
1532         .domain_get_attr        = arm_smmu_domain_get_attr,
1533         .domain_set_attr        = arm_smmu_domain_set_attr,
1534         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1535 };
1536
1537 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1538 {
1539         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1540         void __iomem *cb_base;
1541         int i = 0;
1542         u32 reg, major;
1543
1544         /* clear global FSR */
1545         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1546         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1547
1548         /* Mark all SMRn as invalid and all S2CRn as bypass unless overridden */
1549         reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
1550         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1551                 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i));
1552                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
1553         }
1554
1555         /*
1556          * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1557          * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1558          * bit is only present in MMU-500r2 onwards.
1559          */
1560         reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1561         major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1562         if ((smmu->model == ARM_MMU500) && (major >= 2)) {
1563                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1564                 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1565                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1566         }
1567
1568         /* Make sure all context banks are disabled and clear CB_FSR  */
1569         for (i = 0; i < smmu->num_context_banks; ++i) {
1570                 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1571                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1572                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1573                 /*
1574                  * Disable MMU-500's not-particularly-beneficial next-page
1575                  * prefetcher for the sake of errata #841119 and #826419.
1576                  */
1577                 if (smmu->model == ARM_MMU500) {
1578                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1579                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1580                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1581                 }
1582         }
1583
1584         /* Invalidate the TLB, just in case */
1585         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1586         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1587
1588         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1589
1590         /* Enable fault reporting */
1591         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1592
1593         /* Disable TLB broadcasting. */
1594         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1595
1596         /* Enable client access, handling unmatched streams as appropriate */
1597         reg &= ~sCR0_CLIENTPD;
1598         if (disable_bypass)
1599                 reg |= sCR0_USFCFG;
1600         else
1601                 reg &= ~sCR0_USFCFG;
1602
1603         /* Disable forced broadcasting */
1604         reg &= ~sCR0_FB;
1605
1606         /* Don't upgrade barriers */
1607         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1608
1609         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1610                 reg |= sCR0_VMID16EN;
1611
1612         /* Push the button */
1613         __arm_smmu_tlb_sync(smmu);
1614         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1615 }
1616
1617 static int arm_smmu_id_size_to_bits(int size)
1618 {
1619         switch (size) {
1620         case 0:
1621                 return 32;
1622         case 1:
1623                 return 36;
1624         case 2:
1625                 return 40;
1626         case 3:
1627                 return 42;
1628         case 4:
1629                 return 44;
1630         case 5:
1631         default:
1632                 return 48;
1633         }
1634 }
1635
1636 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1637 {
1638         unsigned long size;
1639         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1640         u32 id;
1641         bool cttw_dt, cttw_reg;
1642
1643         dev_notice(smmu->dev, "probing hardware configuration...\n");
1644         dev_notice(smmu->dev, "SMMUv%d with:\n",
1645                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1646
1647         /* ID0 */
1648         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1649
1650         /* Restrict available stages based on module parameter */
1651         if (force_stage == 1)
1652                 id &= ~(ID0_S2TS | ID0_NTS);
1653         else if (force_stage == 2)
1654                 id &= ~(ID0_S1TS | ID0_NTS);
1655
1656         if (id & ID0_S1TS) {
1657                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1658                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1659         }
1660
1661         if (id & ID0_S2TS) {
1662                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1663                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1664         }
1665
1666         if (id & ID0_NTS) {
1667                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1668                 dev_notice(smmu->dev, "\tnested translation\n");
1669         }
1670
1671         if (!(smmu->features &
1672                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1673                 dev_err(smmu->dev, "\tno translation support!\n");
1674                 return -ENODEV;
1675         }
1676
1677         if ((id & ID0_S1TS) &&
1678                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1679                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1680                 dev_notice(smmu->dev, "\taddress translation ops\n");
1681         }
1682
1683         /*
1684          * In order for DMA API calls to work properly, we must defer to what
1685          * the DT says about coherency, regardless of what the hardware claims.
1686          * Fortunately, this also opens up a workaround for systems where the
1687          * ID register value has ended up configured incorrectly.
1688          */
1689         cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
1690         cttw_reg = !!(id & ID0_CTTW);
1691         if (cttw_dt)
1692                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1693         if (cttw_dt || cttw_reg)
1694                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1695                            cttw_dt ? "" : "non-");
1696         if (cttw_dt != cttw_reg)
1697                 dev_notice(smmu->dev,
1698                            "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1699
1700         if (id & ID0_SMS) {
1701                 u32 smr, sid, mask;
1702
1703                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1704                 smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) &
1705                                            ID0_NUMSMRG_MASK;
1706                 if (smmu->num_mapping_groups == 0) {
1707                         dev_err(smmu->dev,
1708                                 "stream-matching supported, but no SMRs present!\n");
1709                         return -ENODEV;
1710                 }
1711
1712                 smr = SMR_MASK_MASK << SMR_MASK_SHIFT;
1713                 smr |= (SMR_ID_MASK << SMR_ID_SHIFT);
1714                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1715                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1716
1717                 mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK;
1718                 sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK;
1719                 if ((mask & sid) != sid) {
1720                         dev_err(smmu->dev,
1721                                 "SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
1722                                 mask, sid);
1723                         return -ENODEV;
1724                 }
1725
1726                 dev_notice(smmu->dev,
1727                            "\tstream matching with %u register groups, mask 0x%x",
1728                            smmu->num_mapping_groups, mask);
1729         } else {
1730                 smmu->num_mapping_groups = (id >> ID0_NUMSIDB_SHIFT) &
1731                                            ID0_NUMSIDB_MASK;
1732         }
1733
1734         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1735                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1736                 if (!(id & ID0_PTFS_NO_AARCH32S))
1737                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1738         }
1739
1740         /* ID1 */
1741         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1742         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1743
1744         /* Check for size mismatch of SMMU address space from mapped region */
1745         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1746         size *= 2 << smmu->pgshift;
1747         if (smmu->size != size)
1748                 dev_warn(smmu->dev,
1749                         "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1750                         size, smmu->size);
1751
1752         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1753         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1754         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1755                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1756                 return -ENODEV;
1757         }
1758         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1759                    smmu->num_context_banks, smmu->num_s2_context_banks);
1760         /*
1761          * Cavium CN88xx erratum #27704.
1762          * Ensure ASID and VMID allocation is unique across all SMMUs in
1763          * the system.
1764          */
1765         if (smmu->model == CAVIUM_SMMUV2) {
1766                 smmu->cavium_id_base =
1767                         atomic_add_return(smmu->num_context_banks,
1768                                           &cavium_smmu_context_count);
1769                 smmu->cavium_id_base -= smmu->num_context_banks;
1770         }
1771
1772         /* ID2 */
1773         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1774         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1775         smmu->ipa_size = size;
1776
1777         /* The output mask is also applied for bypass */
1778         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1779         smmu->pa_size = size;
1780
1781         if (id & ID2_VMID16)
1782                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1783
1784         /*
1785          * What the page table walker can address actually depends on which
1786          * descriptor format is in use, but since a) we don't know that yet,
1787          * and b) it can vary per context bank, this will have to do...
1788          */
1789         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1790                 dev_warn(smmu->dev,
1791                          "failed to set DMA mask for table walker\n");
1792
1793         if (smmu->version < ARM_SMMU_V2) {
1794                 smmu->va_size = smmu->ipa_size;
1795                 if (smmu->version == ARM_SMMU_V1_64K)
1796                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1797         } else {
1798                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1799                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1800                 if (id & ID2_PTFS_4K)
1801                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1802                 if (id & ID2_PTFS_16K)
1803                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1804                 if (id & ID2_PTFS_64K)
1805                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1806         }
1807
1808         /* Now we've corralled the various formats, what'll it do? */
1809         size = 0;
1810         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1811                 size |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1812         if (smmu->features &
1813             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1814                 size |= SZ_4K | SZ_2M | SZ_1G;
1815         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1816                 size |= SZ_16K | SZ_32M;
1817         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1818                 size |= SZ_64K | SZ_512M;
1819
1820         arm_smmu_ops.pgsize_bitmap &= size;
1821         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size);
1822
1823         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1824                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1825                            smmu->va_size, smmu->ipa_size);
1826
1827         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1828                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1829                            smmu->ipa_size, smmu->pa_size);
1830
1831         return 0;
1832 }
1833
1834 struct arm_smmu_match_data {
1835         enum arm_smmu_arch_version version;
1836         enum arm_smmu_implementation model;
1837 };
1838
1839 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1840 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1841
1842 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1843 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1844 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1845 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1846 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1847
1848 static const struct of_device_id arm_smmu_of_match[] = {
1849         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1850         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1851         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1852         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1853         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1854         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1855         { },
1856 };
1857 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1858
1859 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1860 {
1861         const struct of_device_id *of_id;
1862         const struct arm_smmu_match_data *data;
1863         struct resource *res;
1864         struct arm_smmu_device *smmu;
1865         struct device *dev = &pdev->dev;
1866         struct rb_node *node;
1867         struct of_phandle_args masterspec;
1868         int num_irqs, i, err;
1869
1870         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1871         if (!smmu) {
1872                 dev_err(dev, "failed to allocate arm_smmu_device\n");
1873                 return -ENOMEM;
1874         }
1875         smmu->dev = dev;
1876
1877         of_id = of_match_node(arm_smmu_of_match, dev->of_node);
1878         data = of_id->data;
1879         smmu->version = data->version;
1880         smmu->model = data->model;
1881
1882         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1883         smmu->base = devm_ioremap_resource(dev, res);
1884         if (IS_ERR(smmu->base))
1885                 return PTR_ERR(smmu->base);
1886         smmu->size = resource_size(res);
1887
1888         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1889                                  &smmu->num_global_irqs)) {
1890                 dev_err(dev, "missing #global-interrupts property\n");
1891                 return -ENODEV;
1892         }
1893
1894         num_irqs = 0;
1895         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1896                 num_irqs++;
1897                 if (num_irqs > smmu->num_global_irqs)
1898                         smmu->num_context_irqs++;
1899         }
1900
1901         if (!smmu->num_context_irqs) {
1902                 dev_err(dev, "found %d interrupts but expected at least %d\n",
1903                         num_irqs, smmu->num_global_irqs + 1);
1904                 return -ENODEV;
1905         }
1906
1907         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1908                                   GFP_KERNEL);
1909         if (!smmu->irqs) {
1910                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1911                 return -ENOMEM;
1912         }
1913
1914         for (i = 0; i < num_irqs; ++i) {
1915                 int irq = platform_get_irq(pdev, i);
1916
1917                 if (irq < 0) {
1918                         dev_err(dev, "failed to get irq index %d\n", i);
1919                         return -ENODEV;
1920                 }
1921                 smmu->irqs[i] = irq;
1922         }
1923
1924         err = arm_smmu_device_cfg_probe(smmu);
1925         if (err)
1926                 return err;
1927
1928         i = 0;
1929         smmu->masters = RB_ROOT;
1930         while (!of_parse_phandle_with_args(dev->of_node, "mmu-masters",
1931                                            "#stream-id-cells", i,
1932                                            &masterspec)) {
1933                 err = register_smmu_master(smmu, dev, &masterspec);
1934                 if (err) {
1935                         dev_err(dev, "failed to add master %s\n",
1936                                 masterspec.np->name);
1937                         goto out_put_masters;
1938                 }
1939
1940                 i++;
1941         }
1942         dev_notice(dev, "registered %d master devices\n", i);
1943
1944         parse_driver_options(smmu);
1945
1946         if (smmu->version == ARM_SMMU_V2 &&
1947             smmu->num_context_banks != smmu->num_context_irqs) {
1948                 dev_err(dev,
1949                         "found only %d context interrupt(s) but %d required\n",
1950                         smmu->num_context_irqs, smmu->num_context_banks);
1951                 err = -ENODEV;
1952                 goto out_put_masters;
1953         }
1954
1955         for (i = 0; i < smmu->num_global_irqs; ++i) {
1956                 err = request_irq(smmu->irqs[i],
1957                                   arm_smmu_global_fault,
1958                                   IRQF_SHARED,
1959                                   "arm-smmu global fault",
1960                                   smmu);
1961                 if (err) {
1962                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
1963                                 i, smmu->irqs[i]);
1964                         goto out_free_irqs;
1965                 }
1966         }
1967
1968         INIT_LIST_HEAD(&smmu->list);
1969         spin_lock(&arm_smmu_devices_lock);
1970         list_add(&smmu->list, &arm_smmu_devices);
1971         spin_unlock(&arm_smmu_devices_lock);
1972
1973         arm_smmu_device_reset(smmu);
1974         return 0;
1975
1976 out_free_irqs:
1977         while (i--)
1978                 free_irq(smmu->irqs[i], smmu);
1979
1980 out_put_masters:
1981         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
1982                 struct arm_smmu_master *master
1983                         = container_of(node, struct arm_smmu_master, node);
1984                 of_node_put(master->of_node);
1985         }
1986
1987         return err;
1988 }
1989
1990 static int arm_smmu_device_remove(struct platform_device *pdev)
1991 {
1992         int i;
1993         struct device *dev = &pdev->dev;
1994         struct arm_smmu_device *curr, *smmu = NULL;
1995         struct rb_node *node;
1996
1997         spin_lock(&arm_smmu_devices_lock);
1998         list_for_each_entry(curr, &arm_smmu_devices, list) {
1999                 if (curr->dev == dev) {
2000                         smmu = curr;
2001                         list_del(&smmu->list);
2002                         break;
2003                 }
2004         }
2005         spin_unlock(&arm_smmu_devices_lock);
2006
2007         if (!smmu)
2008                 return -ENODEV;
2009
2010         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
2011                 struct arm_smmu_master *master
2012                         = container_of(node, struct arm_smmu_master, node);
2013                 of_node_put(master->of_node);
2014         }
2015
2016         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2017                 dev_err(dev, "removing device with active domains!\n");
2018
2019         for (i = 0; i < smmu->num_global_irqs; ++i)
2020                 free_irq(smmu->irqs[i], smmu);
2021
2022         /* Turn the thing off */
2023         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2024         return 0;
2025 }
2026
2027 static struct platform_driver arm_smmu_driver = {
2028         .driver = {
2029                 .name           = "arm-smmu",
2030                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2031         },
2032         .probe  = arm_smmu_device_dt_probe,
2033         .remove = arm_smmu_device_remove,
2034 };
2035
2036 static int __init arm_smmu_init(void)
2037 {
2038         struct device_node *np;
2039         int ret;
2040
2041         /*
2042          * Play nice with systems that don't have an ARM SMMU by checking that
2043          * an ARM SMMU exists in the system before proceeding with the driver
2044          * and IOMMU bus operation registration.
2045          */
2046         np = of_find_matching_node(NULL, arm_smmu_of_match);
2047         if (!np)
2048                 return 0;
2049
2050         of_node_put(np);
2051
2052         ret = platform_driver_register(&arm_smmu_driver);
2053         if (ret)
2054                 return ret;
2055
2056         /* Oh, for a proper bus abstraction */
2057         if (!iommu_present(&platform_bus_type))
2058                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2059
2060 #ifdef CONFIG_ARM_AMBA
2061         if (!iommu_present(&amba_bustype))
2062                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2063 #endif
2064
2065 #ifdef CONFIG_PCI
2066         if (!iommu_present(&pci_bus_type))
2067                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2068 #endif
2069
2070         return 0;
2071 }
2072
2073 static void __exit arm_smmu_exit(void)
2074 {
2075         return platform_driver_unregister(&arm_smmu_driver);
2076 }
2077
2078 subsys_initcall(arm_smmu_init);
2079 module_exit(arm_smmu_exit);
2080
2081 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2082 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2083 MODULE_LICENSE("GPL v2");