iommu/arm-smmu: Handle stream IDs more dynamically
[linux-2.6-block.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  */
28
29 #define pr_fmt(fmt) "arm-smmu: " fmt
30
31 #include <linux/delay.h>
32 #include <linux/dma-iommu.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/err.h>
35 #include <linux/interrupt.h>
36 #include <linux/io.h>
37 #include <linux/io-64-nonatomic-hi-lo.h>
38 #include <linux/iommu.h>
39 #include <linux/iopoll.h>
40 #include <linux/module.h>
41 #include <linux/of.h>
42 #include <linux/of_address.h>
43 #include <linux/pci.h>
44 #include <linux/platform_device.h>
45 #include <linux/slab.h>
46 #include <linux/spinlock.h>
47
48 #include <linux/amba/bus.h>
49
50 #include "io-pgtable.h"
51
52 /* Maximum number of stream IDs assigned to a single device */
53 #define MAX_MASTER_STREAMIDS            128
54
55 /* Maximum number of context banks per SMMU */
56 #define ARM_SMMU_MAX_CBS                128
57
58 /* Maximum number of mapping groups per SMMU */
59 #define ARM_SMMU_MAX_SMRS               128
60
61 /* SMMU global address space */
62 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
63 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
64
65 /*
66  * SMMU global address space with conditional offset to access secure
67  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
68  * nsGFSYNR0: 0x450)
69  */
70 #define ARM_SMMU_GR0_NS(smmu)                                           \
71         ((smmu)->base +                                                 \
72                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
73                         ? 0x400 : 0))
74
75 /*
76  * Some 64-bit registers only make sense to write atomically, but in such
77  * cases all the data relevant to AArch32 formats lies within the lower word,
78  * therefore this actually makes more sense than it might first appear.
79  */
80 #ifdef CONFIG_64BIT
81 #define smmu_write_atomic_lq            writeq_relaxed
82 #else
83 #define smmu_write_atomic_lq            writel_relaxed
84 #endif
85
86 /* Configuration registers */
87 #define ARM_SMMU_GR0_sCR0               0x0
88 #define sCR0_CLIENTPD                   (1 << 0)
89 #define sCR0_GFRE                       (1 << 1)
90 #define sCR0_GFIE                       (1 << 2)
91 #define sCR0_GCFGFRE                    (1 << 4)
92 #define sCR0_GCFGFIE                    (1 << 5)
93 #define sCR0_USFCFG                     (1 << 10)
94 #define sCR0_VMIDPNE                    (1 << 11)
95 #define sCR0_PTM                        (1 << 12)
96 #define sCR0_FB                         (1 << 13)
97 #define sCR0_VMID16EN                   (1 << 31)
98 #define sCR0_BSU_SHIFT                  14
99 #define sCR0_BSU_MASK                   0x3
100
101 /* Auxiliary Configuration register */
102 #define ARM_SMMU_GR0_sACR               0x10
103
104 /* Identification registers */
105 #define ARM_SMMU_GR0_ID0                0x20
106 #define ARM_SMMU_GR0_ID1                0x24
107 #define ARM_SMMU_GR0_ID2                0x28
108 #define ARM_SMMU_GR0_ID3                0x2c
109 #define ARM_SMMU_GR0_ID4                0x30
110 #define ARM_SMMU_GR0_ID5                0x34
111 #define ARM_SMMU_GR0_ID6                0x38
112 #define ARM_SMMU_GR0_ID7                0x3c
113 #define ARM_SMMU_GR0_sGFSR              0x48
114 #define ARM_SMMU_GR0_sGFSYNR0           0x50
115 #define ARM_SMMU_GR0_sGFSYNR1           0x54
116 #define ARM_SMMU_GR0_sGFSYNR2           0x58
117
118 #define ID0_S1TS                        (1 << 30)
119 #define ID0_S2TS                        (1 << 29)
120 #define ID0_NTS                         (1 << 28)
121 #define ID0_SMS                         (1 << 27)
122 #define ID0_ATOSNS                      (1 << 26)
123 #define ID0_PTFS_NO_AARCH32             (1 << 25)
124 #define ID0_PTFS_NO_AARCH32S            (1 << 24)
125 #define ID0_CTTW                        (1 << 14)
126 #define ID0_NUMIRPT_SHIFT               16
127 #define ID0_NUMIRPT_MASK                0xff
128 #define ID0_NUMSIDB_SHIFT               9
129 #define ID0_NUMSIDB_MASK                0xf
130 #define ID0_NUMSMRG_SHIFT               0
131 #define ID0_NUMSMRG_MASK                0xff
132
133 #define ID1_PAGESIZE                    (1 << 31)
134 #define ID1_NUMPAGENDXB_SHIFT           28
135 #define ID1_NUMPAGENDXB_MASK            7
136 #define ID1_NUMS2CB_SHIFT               16
137 #define ID1_NUMS2CB_MASK                0xff
138 #define ID1_NUMCB_SHIFT                 0
139 #define ID1_NUMCB_MASK                  0xff
140
141 #define ID2_OAS_SHIFT                   4
142 #define ID2_OAS_MASK                    0xf
143 #define ID2_IAS_SHIFT                   0
144 #define ID2_IAS_MASK                    0xf
145 #define ID2_UBS_SHIFT                   8
146 #define ID2_UBS_MASK                    0xf
147 #define ID2_PTFS_4K                     (1 << 12)
148 #define ID2_PTFS_16K                    (1 << 13)
149 #define ID2_PTFS_64K                    (1 << 14)
150 #define ID2_VMID16                      (1 << 15)
151
152 #define ID7_MAJOR_SHIFT                 4
153 #define ID7_MAJOR_MASK                  0xf
154
155 /* Global TLB invalidation */
156 #define ARM_SMMU_GR0_TLBIVMID           0x64
157 #define ARM_SMMU_GR0_TLBIALLNSNH        0x68
158 #define ARM_SMMU_GR0_TLBIALLH           0x6c
159 #define ARM_SMMU_GR0_sTLBGSYNC          0x70
160 #define ARM_SMMU_GR0_sTLBGSTATUS        0x74
161 #define sTLBGSTATUS_GSACTIVE            (1 << 0)
162 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
163
164 /* Stream mapping registers */
165 #define ARM_SMMU_GR0_SMR(n)             (0x800 + ((n) << 2))
166 #define SMR_VALID                       (1 << 31)
167 #define SMR_MASK_SHIFT                  16
168 #define SMR_ID_SHIFT                    0
169
170 #define ARM_SMMU_GR0_S2CR(n)            (0xc00 + ((n) << 2))
171 #define S2CR_CBNDX_SHIFT                0
172 #define S2CR_CBNDX_MASK                 0xff
173 #define S2CR_TYPE_SHIFT                 16
174 #define S2CR_TYPE_MASK                  0x3
175 #define S2CR_TYPE_TRANS                 (0 << S2CR_TYPE_SHIFT)
176 #define S2CR_TYPE_BYPASS                (1 << S2CR_TYPE_SHIFT)
177 #define S2CR_TYPE_FAULT                 (2 << S2CR_TYPE_SHIFT)
178
179 #define S2CR_PRIVCFG_SHIFT              24
180 #define S2CR_PRIVCFG_UNPRIV             (2 << S2CR_PRIVCFG_SHIFT)
181
182 /* Context bank attribute registers */
183 #define ARM_SMMU_GR1_CBAR(n)            (0x0 + ((n) << 2))
184 #define CBAR_VMID_SHIFT                 0
185 #define CBAR_VMID_MASK                  0xff
186 #define CBAR_S1_BPSHCFG_SHIFT           8
187 #define CBAR_S1_BPSHCFG_MASK            3
188 #define CBAR_S1_BPSHCFG_NSH             3
189 #define CBAR_S1_MEMATTR_SHIFT           12
190 #define CBAR_S1_MEMATTR_MASK            0xf
191 #define CBAR_S1_MEMATTR_WB              0xf
192 #define CBAR_TYPE_SHIFT                 16
193 #define CBAR_TYPE_MASK                  0x3
194 #define CBAR_TYPE_S2_TRANS              (0 << CBAR_TYPE_SHIFT)
195 #define CBAR_TYPE_S1_TRANS_S2_BYPASS    (1 << CBAR_TYPE_SHIFT)
196 #define CBAR_TYPE_S1_TRANS_S2_FAULT     (2 << CBAR_TYPE_SHIFT)
197 #define CBAR_TYPE_S1_TRANS_S2_TRANS     (3 << CBAR_TYPE_SHIFT)
198 #define CBAR_IRPTNDX_SHIFT              24
199 #define CBAR_IRPTNDX_MASK               0xff
200
201 #define ARM_SMMU_GR1_CBA2R(n)           (0x800 + ((n) << 2))
202 #define CBA2R_RW64_32BIT                (0 << 0)
203 #define CBA2R_RW64_64BIT                (1 << 0)
204 #define CBA2R_VMID_SHIFT                16
205 #define CBA2R_VMID_MASK                 0xffff
206
207 /* Translation context bank */
208 #define ARM_SMMU_CB_BASE(smmu)          ((smmu)->base + ((smmu)->size >> 1))
209 #define ARM_SMMU_CB(smmu, n)            ((n) * (1 << (smmu)->pgshift))
210
211 #define ARM_SMMU_CB_SCTLR               0x0
212 #define ARM_SMMU_CB_ACTLR               0x4
213 #define ARM_SMMU_CB_RESUME              0x8
214 #define ARM_SMMU_CB_TTBCR2              0x10
215 #define ARM_SMMU_CB_TTBR0               0x20
216 #define ARM_SMMU_CB_TTBR1               0x28
217 #define ARM_SMMU_CB_TTBCR               0x30
218 #define ARM_SMMU_CB_CONTEXTIDR          0x34
219 #define ARM_SMMU_CB_S1_MAIR0            0x38
220 #define ARM_SMMU_CB_S1_MAIR1            0x3c
221 #define ARM_SMMU_CB_PAR                 0x50
222 #define ARM_SMMU_CB_FSR                 0x58
223 #define ARM_SMMU_CB_FAR                 0x60
224 #define ARM_SMMU_CB_FSYNR0              0x68
225 #define ARM_SMMU_CB_S1_TLBIVA           0x600
226 #define ARM_SMMU_CB_S1_TLBIASID         0x610
227 #define ARM_SMMU_CB_S1_TLBIVAL          0x620
228 #define ARM_SMMU_CB_S2_TLBIIPAS2        0x630
229 #define ARM_SMMU_CB_S2_TLBIIPAS2L       0x638
230 #define ARM_SMMU_CB_ATS1PR              0x800
231 #define ARM_SMMU_CB_ATSR                0x8f0
232
233 #define SCTLR_S1_ASIDPNE                (1 << 12)
234 #define SCTLR_CFCFG                     (1 << 7)
235 #define SCTLR_CFIE                      (1 << 6)
236 #define SCTLR_CFRE                      (1 << 5)
237 #define SCTLR_E                         (1 << 4)
238 #define SCTLR_AFE                       (1 << 2)
239 #define SCTLR_TRE                       (1 << 1)
240 #define SCTLR_M                         (1 << 0)
241
242 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
243
244 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
245
246 #define CB_PAR_F                        (1 << 0)
247
248 #define ATSR_ACTIVE                     (1 << 0)
249
250 #define RESUME_RETRY                    (0 << 0)
251 #define RESUME_TERMINATE                (1 << 0)
252
253 #define TTBCR2_SEP_SHIFT                15
254 #define TTBCR2_SEP_UPSTREAM             (0x7 << TTBCR2_SEP_SHIFT)
255
256 #define TTBRn_ASID_SHIFT                48
257
258 #define FSR_MULTI                       (1 << 31)
259 #define FSR_SS                          (1 << 30)
260 #define FSR_UUT                         (1 << 8)
261 #define FSR_ASF                         (1 << 7)
262 #define FSR_TLBLKF                      (1 << 6)
263 #define FSR_TLBMCF                      (1 << 5)
264 #define FSR_EF                          (1 << 4)
265 #define FSR_PF                          (1 << 3)
266 #define FSR_AFF                         (1 << 2)
267 #define FSR_TF                          (1 << 1)
268
269 #define FSR_IGN                         (FSR_AFF | FSR_ASF | \
270                                          FSR_TLBMCF | FSR_TLBLKF)
271 #define FSR_FAULT                       (FSR_MULTI | FSR_SS | FSR_UUT | \
272                                          FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
273
274 #define FSYNR0_WNR                      (1 << 4)
275
276 static int force_stage;
277 module_param(force_stage, int, S_IRUGO);
278 MODULE_PARM_DESC(force_stage,
279         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
280 static bool disable_bypass;
281 module_param(disable_bypass, bool, S_IRUGO);
282 MODULE_PARM_DESC(disable_bypass,
283         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
284
285 enum arm_smmu_arch_version {
286         ARM_SMMU_V1,
287         ARM_SMMU_V1_64K,
288         ARM_SMMU_V2,
289 };
290
291 enum arm_smmu_implementation {
292         GENERIC_SMMU,
293         ARM_MMU500,
294         CAVIUM_SMMUV2,
295 };
296
297 struct arm_smmu_smr {
298         u8                              idx;
299         u16                             mask;
300         u16                             id;
301 };
302
303 struct arm_smmu_master_cfg {
304         int                             num_streamids;
305         u16                             streamids[MAX_MASTER_STREAMIDS];
306         struct arm_smmu_smr             *smrs;
307 };
308
309 struct arm_smmu_master {
310         struct device_node              *of_node;
311         struct rb_node                  node;
312         struct arm_smmu_master_cfg      cfg;
313 };
314
315 struct arm_smmu_device {
316         struct device                   *dev;
317
318         void __iomem                    *base;
319         unsigned long                   size;
320         unsigned long                   pgshift;
321
322 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
323 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
324 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
325 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
326 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
327 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
328 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
329 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
330 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
331 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
332 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
333 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
334         u32                             features;
335
336 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
337         u32                             options;
338         enum arm_smmu_arch_version      version;
339         enum arm_smmu_implementation    model;
340
341         u32                             num_context_banks;
342         u32                             num_s2_context_banks;
343         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
344         atomic_t                        irptndx;
345
346         u32                             num_mapping_groups;
347         u16                             streamid_mask;
348         u16                             smr_mask_mask;
349         DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
350
351         unsigned long                   va_size;
352         unsigned long                   ipa_size;
353         unsigned long                   pa_size;
354         unsigned long                   pgsize_bitmap;
355
356         u32                             num_global_irqs;
357         u32                             num_context_irqs;
358         unsigned int                    *irqs;
359
360         struct list_head                list;
361         struct rb_root                  masters;
362
363         u32                             cavium_id_base; /* Specific to Cavium */
364 };
365
366 enum arm_smmu_context_fmt {
367         ARM_SMMU_CTX_FMT_NONE,
368         ARM_SMMU_CTX_FMT_AARCH64,
369         ARM_SMMU_CTX_FMT_AARCH32_L,
370         ARM_SMMU_CTX_FMT_AARCH32_S,
371 };
372
373 struct arm_smmu_cfg {
374         u8                              cbndx;
375         u8                              irptndx;
376         u32                             cbar;
377         enum arm_smmu_context_fmt       fmt;
378 };
379 #define INVALID_IRPTNDX                 0xff
380
381 #define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
382 #define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
383
384 enum arm_smmu_domain_stage {
385         ARM_SMMU_DOMAIN_S1 = 0,
386         ARM_SMMU_DOMAIN_S2,
387         ARM_SMMU_DOMAIN_NESTED,
388 };
389
390 struct arm_smmu_domain {
391         struct arm_smmu_device          *smmu;
392         struct io_pgtable_ops           *pgtbl_ops;
393         spinlock_t                      pgtbl_lock;
394         struct arm_smmu_cfg             cfg;
395         enum arm_smmu_domain_stage      stage;
396         struct mutex                    init_mutex; /* Protects smmu pointer */
397         struct iommu_domain             domain;
398 };
399
400 struct arm_smmu_phandle_args {
401         struct device_node *np;
402         int args_count;
403         uint32_t args[MAX_MASTER_STREAMIDS];
404 };
405
406 static DEFINE_SPINLOCK(arm_smmu_devices_lock);
407 static LIST_HEAD(arm_smmu_devices);
408
409 struct arm_smmu_option_prop {
410         u32 opt;
411         const char *prop;
412 };
413
414 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
415
416 static struct arm_smmu_option_prop arm_smmu_options[] = {
417         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
418         { 0, NULL},
419 };
420
421 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
422 {
423         return container_of(dom, struct arm_smmu_domain, domain);
424 }
425
426 static void parse_driver_options(struct arm_smmu_device *smmu)
427 {
428         int i = 0;
429
430         do {
431                 if (of_property_read_bool(smmu->dev->of_node,
432                                                 arm_smmu_options[i].prop)) {
433                         smmu->options |= arm_smmu_options[i].opt;
434                         dev_notice(smmu->dev, "option %s\n",
435                                 arm_smmu_options[i].prop);
436                 }
437         } while (arm_smmu_options[++i].opt);
438 }
439
440 static struct device_node *dev_get_dev_node(struct device *dev)
441 {
442         if (dev_is_pci(dev)) {
443                 struct pci_bus *bus = to_pci_dev(dev)->bus;
444
445                 while (!pci_is_root_bus(bus))
446                         bus = bus->parent;
447                 return bus->bridge->parent->of_node;
448         }
449
450         return dev->of_node;
451 }
452
453 static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
454                                                 struct device_node *dev_node)
455 {
456         struct rb_node *node = smmu->masters.rb_node;
457
458         while (node) {
459                 struct arm_smmu_master *master;
460
461                 master = container_of(node, struct arm_smmu_master, node);
462
463                 if (dev_node < master->of_node)
464                         node = node->rb_left;
465                 else if (dev_node > master->of_node)
466                         node = node->rb_right;
467                 else
468                         return master;
469         }
470
471         return NULL;
472 }
473
474 static struct arm_smmu_master_cfg *
475 find_smmu_master_cfg(struct device *dev)
476 {
477         struct arm_smmu_master_cfg *cfg = NULL;
478         struct iommu_group *group = iommu_group_get(dev);
479
480         if (group) {
481                 cfg = iommu_group_get_iommudata(group);
482                 iommu_group_put(group);
483         }
484
485         return cfg;
486 }
487
488 static int insert_smmu_master(struct arm_smmu_device *smmu,
489                               struct arm_smmu_master *master)
490 {
491         struct rb_node **new, *parent;
492
493         new = &smmu->masters.rb_node;
494         parent = NULL;
495         while (*new) {
496                 struct arm_smmu_master *this
497                         = container_of(*new, struct arm_smmu_master, node);
498
499                 parent = *new;
500                 if (master->of_node < this->of_node)
501                         new = &((*new)->rb_left);
502                 else if (master->of_node > this->of_node)
503                         new = &((*new)->rb_right);
504                 else
505                         return -EEXIST;
506         }
507
508         rb_link_node(&master->node, parent, new);
509         rb_insert_color(&master->node, &smmu->masters);
510         return 0;
511 }
512
513 static int register_smmu_master(struct arm_smmu_device *smmu,
514                                 struct device *dev,
515                                 struct arm_smmu_phandle_args *masterspec)
516 {
517         int i;
518         struct arm_smmu_master *master;
519
520         master = find_smmu_master(smmu, masterspec->np);
521         if (master) {
522                 dev_err(dev,
523                         "rejecting multiple registrations for master device %s\n",
524                         masterspec->np->name);
525                 return -EBUSY;
526         }
527
528         if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
529                 dev_err(dev,
530                         "reached maximum number (%d) of stream IDs for master device %s\n",
531                         MAX_MASTER_STREAMIDS, masterspec->np->name);
532                 return -ENOSPC;
533         }
534
535         master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
536         if (!master)
537                 return -ENOMEM;
538
539         master->of_node                 = masterspec->np;
540         master->cfg.num_streamids       = masterspec->args_count;
541
542         for (i = 0; i < master->cfg.num_streamids; ++i) {
543                 u16 streamid = masterspec->args[i];
544
545                 if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) &&
546                      (streamid >= smmu->num_mapping_groups)) {
547                         dev_err(dev,
548                                 "stream ID for master device %s greater than maximum allowed (%d)\n",
549                                 masterspec->np->name, smmu->num_mapping_groups);
550                         return -ERANGE;
551                 }
552                 master->cfg.streamids[i] = streamid;
553         }
554         return insert_smmu_master(smmu, master);
555 }
556
557 static struct arm_smmu_device *find_smmu_for_device(struct device *dev)
558 {
559         struct arm_smmu_device *smmu;
560         struct arm_smmu_master *master = NULL;
561         struct device_node *dev_node = dev_get_dev_node(dev);
562
563         spin_lock(&arm_smmu_devices_lock);
564         list_for_each_entry(smmu, &arm_smmu_devices, list) {
565                 master = find_smmu_master(smmu, dev_node);
566                 if (master)
567                         break;
568         }
569         spin_unlock(&arm_smmu_devices_lock);
570
571         return master ? smmu : NULL;
572 }
573
574 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
575 {
576         int idx;
577
578         do {
579                 idx = find_next_zero_bit(map, end, start);
580                 if (idx == end)
581                         return -ENOSPC;
582         } while (test_and_set_bit(idx, map));
583
584         return idx;
585 }
586
587 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
588 {
589         clear_bit(idx, map);
590 }
591
592 /* Wait for any pending TLB invalidations to complete */
593 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
594 {
595         int count = 0;
596         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
597
598         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
599         while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
600                & sTLBGSTATUS_GSACTIVE) {
601                 cpu_relax();
602                 if (++count == TLB_LOOP_TIMEOUT) {
603                         dev_err_ratelimited(smmu->dev,
604                         "TLB sync timed out -- SMMU may be deadlocked\n");
605                         return;
606                 }
607                 udelay(1);
608         }
609 }
610
611 static void arm_smmu_tlb_sync(void *cookie)
612 {
613         struct arm_smmu_domain *smmu_domain = cookie;
614         __arm_smmu_tlb_sync(smmu_domain->smmu);
615 }
616
617 static void arm_smmu_tlb_inv_context(void *cookie)
618 {
619         struct arm_smmu_domain *smmu_domain = cookie;
620         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
621         struct arm_smmu_device *smmu = smmu_domain->smmu;
622         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
623         void __iomem *base;
624
625         if (stage1) {
626                 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
627                 writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
628                                base + ARM_SMMU_CB_S1_TLBIASID);
629         } else {
630                 base = ARM_SMMU_GR0(smmu);
631                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
632                                base + ARM_SMMU_GR0_TLBIVMID);
633         }
634
635         __arm_smmu_tlb_sync(smmu);
636 }
637
638 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
639                                           size_t granule, bool leaf, void *cookie)
640 {
641         struct arm_smmu_domain *smmu_domain = cookie;
642         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
643         struct arm_smmu_device *smmu = smmu_domain->smmu;
644         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
645         void __iomem *reg;
646
647         if (stage1) {
648                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
649                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
650
651                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
652                         iova &= ~12UL;
653                         iova |= ARM_SMMU_CB_ASID(smmu, cfg);
654                         do {
655                                 writel_relaxed(iova, reg);
656                                 iova += granule;
657                         } while (size -= granule);
658                 } else {
659                         iova >>= 12;
660                         iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
661                         do {
662                                 writeq_relaxed(iova, reg);
663                                 iova += granule >> 12;
664                         } while (size -= granule);
665                 }
666         } else if (smmu->version == ARM_SMMU_V2) {
667                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
668                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
669                               ARM_SMMU_CB_S2_TLBIIPAS2;
670                 iova >>= 12;
671                 do {
672                         smmu_write_atomic_lq(iova, reg);
673                         iova += granule >> 12;
674                 } while (size -= granule);
675         } else {
676                 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
677                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
678         }
679 }
680
681 static struct iommu_gather_ops arm_smmu_gather_ops = {
682         .tlb_flush_all  = arm_smmu_tlb_inv_context,
683         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
684         .tlb_sync       = arm_smmu_tlb_sync,
685 };
686
687 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
688 {
689         u32 fsr, fsynr;
690         unsigned long iova;
691         struct iommu_domain *domain = dev;
692         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
693         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
694         struct arm_smmu_device *smmu = smmu_domain->smmu;
695         void __iomem *cb_base;
696
697         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
698         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
699
700         if (!(fsr & FSR_FAULT))
701                 return IRQ_NONE;
702
703         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
704         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
705
706         dev_err_ratelimited(smmu->dev,
707         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
708                             fsr, iova, fsynr, cfg->cbndx);
709
710         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
711         return IRQ_HANDLED;
712 }
713
714 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
715 {
716         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
717         struct arm_smmu_device *smmu = dev;
718         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
719
720         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
721         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
722         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
723         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
724
725         if (!gfsr)
726                 return IRQ_NONE;
727
728         dev_err_ratelimited(smmu->dev,
729                 "Unexpected global fault, this could be serious\n");
730         dev_err_ratelimited(smmu->dev,
731                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
732                 gfsr, gfsynr0, gfsynr1, gfsynr2);
733
734         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
735         return IRQ_HANDLED;
736 }
737
738 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
739                                        struct io_pgtable_cfg *pgtbl_cfg)
740 {
741         u32 reg, reg2;
742         u64 reg64;
743         bool stage1;
744         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
745         struct arm_smmu_device *smmu = smmu_domain->smmu;
746         void __iomem *cb_base, *gr1_base;
747
748         gr1_base = ARM_SMMU_GR1(smmu);
749         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
750         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
751
752         if (smmu->version > ARM_SMMU_V1) {
753                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
754                         reg = CBA2R_RW64_64BIT;
755                 else
756                         reg = CBA2R_RW64_32BIT;
757                 /* 16-bit VMIDs live in CBA2R */
758                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
759                         reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
760
761                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
762         }
763
764         /* CBAR */
765         reg = cfg->cbar;
766         if (smmu->version < ARM_SMMU_V2)
767                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
768
769         /*
770          * Use the weakest shareability/memory types, so they are
771          * overridden by the ttbcr/pte.
772          */
773         if (stage1) {
774                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
775                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
776         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
777                 /* 8-bit VMIDs live in CBAR */
778                 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
779         }
780         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
781
782         /* TTBRs */
783         if (stage1) {
784                 u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
785
786                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
787                         reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
788                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
789                         reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
790                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
791                         writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
792                 } else {
793                         reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
794                         reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
795                         writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
796                         reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
797                         reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
798                         writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
799                 }
800         } else {
801                 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
802                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
803         }
804
805         /* TTBCR */
806         if (stage1) {
807                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
808                         reg = pgtbl_cfg->arm_v7s_cfg.tcr;
809                         reg2 = 0;
810                 } else {
811                         reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
812                         reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
813                         reg2 |= TTBCR2_SEP_UPSTREAM;
814                 }
815                 if (smmu->version > ARM_SMMU_V1)
816                         writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
817         } else {
818                 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
819         }
820         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
821
822         /* MAIRs (stage-1 only) */
823         if (stage1) {
824                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
825                         reg = pgtbl_cfg->arm_v7s_cfg.prrr;
826                         reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
827                 } else {
828                         reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
829                         reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
830                 }
831                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
832                 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
833         }
834
835         /* SCTLR */
836         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
837         if (stage1)
838                 reg |= SCTLR_S1_ASIDPNE;
839 #ifdef __BIG_ENDIAN
840         reg |= SCTLR_E;
841 #endif
842         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
843 }
844
845 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
846                                         struct arm_smmu_device *smmu)
847 {
848         int irq, start, ret = 0;
849         unsigned long ias, oas;
850         struct io_pgtable_ops *pgtbl_ops;
851         struct io_pgtable_cfg pgtbl_cfg;
852         enum io_pgtable_fmt fmt;
853         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
854         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
855
856         mutex_lock(&smmu_domain->init_mutex);
857         if (smmu_domain->smmu)
858                 goto out_unlock;
859
860         /* We're bypassing these SIDs, so don't allocate an actual context */
861         if (domain->type == IOMMU_DOMAIN_DMA) {
862                 smmu_domain->smmu = smmu;
863                 goto out_unlock;
864         }
865
866         /*
867          * Mapping the requested stage onto what we support is surprisingly
868          * complicated, mainly because the spec allows S1+S2 SMMUs without
869          * support for nested translation. That means we end up with the
870          * following table:
871          *
872          * Requested        Supported        Actual
873          *     S1               N              S1
874          *     S1             S1+S2            S1
875          *     S1               S2             S2
876          *     S1               S1             S1
877          *     N                N              N
878          *     N              S1+S2            S2
879          *     N                S2             S2
880          *     N                S1             S1
881          *
882          * Note that you can't actually request stage-2 mappings.
883          */
884         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
885                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
886         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
887                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
888
889         /*
890          * Choosing a suitable context format is even more fiddly. Until we
891          * grow some way for the caller to express a preference, and/or move
892          * the decision into the io-pgtable code where it arguably belongs,
893          * just aim for the closest thing to the rest of the system, and hope
894          * that the hardware isn't esoteric enough that we can't assume AArch64
895          * support to be a superset of AArch32 support...
896          */
897         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
898                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
899         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
900             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
901             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
902             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
903                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
904         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
905             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
906                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
907                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
908                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
909
910         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
911                 ret = -EINVAL;
912                 goto out_unlock;
913         }
914
915         switch (smmu_domain->stage) {
916         case ARM_SMMU_DOMAIN_S1:
917                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
918                 start = smmu->num_s2_context_banks;
919                 ias = smmu->va_size;
920                 oas = smmu->ipa_size;
921                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
922                         fmt = ARM_64_LPAE_S1;
923                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
924                         fmt = ARM_32_LPAE_S1;
925                         ias = min(ias, 32UL);
926                         oas = min(oas, 40UL);
927                 } else {
928                         fmt = ARM_V7S;
929                         ias = min(ias, 32UL);
930                         oas = min(oas, 32UL);
931                 }
932                 break;
933         case ARM_SMMU_DOMAIN_NESTED:
934                 /*
935                  * We will likely want to change this if/when KVM gets
936                  * involved.
937                  */
938         case ARM_SMMU_DOMAIN_S2:
939                 cfg->cbar = CBAR_TYPE_S2_TRANS;
940                 start = 0;
941                 ias = smmu->ipa_size;
942                 oas = smmu->pa_size;
943                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
944                         fmt = ARM_64_LPAE_S2;
945                 } else {
946                         fmt = ARM_32_LPAE_S2;
947                         ias = min(ias, 40UL);
948                         oas = min(oas, 40UL);
949                 }
950                 break;
951         default:
952                 ret = -EINVAL;
953                 goto out_unlock;
954         }
955
956         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
957                                       smmu->num_context_banks);
958         if (ret < 0)
959                 goto out_unlock;
960
961         cfg->cbndx = ret;
962         if (smmu->version < ARM_SMMU_V2) {
963                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
964                 cfg->irptndx %= smmu->num_context_irqs;
965         } else {
966                 cfg->irptndx = cfg->cbndx;
967         }
968
969         pgtbl_cfg = (struct io_pgtable_cfg) {
970                 .pgsize_bitmap  = smmu->pgsize_bitmap,
971                 .ias            = ias,
972                 .oas            = oas,
973                 .tlb            = &arm_smmu_gather_ops,
974                 .iommu_dev      = smmu->dev,
975         };
976
977         smmu_domain->smmu = smmu;
978         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
979         if (!pgtbl_ops) {
980                 ret = -ENOMEM;
981                 goto out_clear_smmu;
982         }
983
984         /* Update the domain's page sizes to reflect the page table format */
985         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
986
987         /* Initialise the context bank with our page table cfg */
988         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
989
990         /*
991          * Request context fault interrupt. Do this last to avoid the
992          * handler seeing a half-initialised domain state.
993          */
994         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
995         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
996                                IRQF_SHARED, "arm-smmu-context-fault", domain);
997         if (ret < 0) {
998                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
999                         cfg->irptndx, irq);
1000                 cfg->irptndx = INVALID_IRPTNDX;
1001         }
1002
1003         mutex_unlock(&smmu_domain->init_mutex);
1004
1005         /* Publish page table ops for map/unmap */
1006         smmu_domain->pgtbl_ops = pgtbl_ops;
1007         return 0;
1008
1009 out_clear_smmu:
1010         smmu_domain->smmu = NULL;
1011 out_unlock:
1012         mutex_unlock(&smmu_domain->init_mutex);
1013         return ret;
1014 }
1015
1016 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
1017 {
1018         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1019         struct arm_smmu_device *smmu = smmu_domain->smmu;
1020         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1021         void __iomem *cb_base;
1022         int irq;
1023
1024         if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
1025                 return;
1026
1027         /*
1028          * Disable the context bank and free the page tables before freeing
1029          * it.
1030          */
1031         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1032         writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1033
1034         if (cfg->irptndx != INVALID_IRPTNDX) {
1035                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1036                 devm_free_irq(smmu->dev, irq, domain);
1037         }
1038
1039         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1040         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1041 }
1042
1043 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1044 {
1045         struct arm_smmu_domain *smmu_domain;
1046
1047         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1048                 return NULL;
1049         /*
1050          * Allocate the domain and initialise some of its data structures.
1051          * We can't really do anything meaningful until we've added a
1052          * master.
1053          */
1054         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1055         if (!smmu_domain)
1056                 return NULL;
1057
1058         if (type == IOMMU_DOMAIN_DMA &&
1059             iommu_get_dma_cookie(&smmu_domain->domain)) {
1060                 kfree(smmu_domain);
1061                 return NULL;
1062         }
1063
1064         mutex_init(&smmu_domain->init_mutex);
1065         spin_lock_init(&smmu_domain->pgtbl_lock);
1066
1067         return &smmu_domain->domain;
1068 }
1069
1070 static void arm_smmu_domain_free(struct iommu_domain *domain)
1071 {
1072         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1073
1074         /*
1075          * Free the domain resources. We assume that all devices have
1076          * already been detached.
1077          */
1078         iommu_put_dma_cookie(domain);
1079         arm_smmu_destroy_domain_context(domain);
1080         kfree(smmu_domain);
1081 }
1082
1083 static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
1084                                           struct arm_smmu_master_cfg *cfg)
1085 {
1086         int i;
1087         struct arm_smmu_smr *smrs;
1088         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1089
1090         if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
1091                 return 0;
1092
1093         if (cfg->smrs)
1094                 return -EEXIST;
1095
1096         smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
1097         if (!smrs) {
1098                 dev_err(smmu->dev, "failed to allocate %d SMRs\n",
1099                         cfg->num_streamids);
1100                 return -ENOMEM;
1101         }
1102
1103         /* Allocate the SMRs on the SMMU */
1104         for (i = 0; i < cfg->num_streamids; ++i) {
1105                 int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
1106                                                   smmu->num_mapping_groups);
1107                 if (idx < 0) {
1108                         dev_err(smmu->dev, "failed to allocate free SMR\n");
1109                         goto err_free_smrs;
1110                 }
1111
1112                 smrs[i] = (struct arm_smmu_smr) {
1113                         .idx    = idx,
1114                         .mask   = 0, /* We don't currently share SMRs */
1115                         .id     = cfg->streamids[i],
1116                 };
1117         }
1118
1119         /* It worked! Now, poke the actual hardware */
1120         for (i = 0; i < cfg->num_streamids; ++i) {
1121                 u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
1122                           smrs[i].mask << SMR_MASK_SHIFT;
1123                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
1124         }
1125
1126         cfg->smrs = smrs;
1127         return 0;
1128
1129 err_free_smrs:
1130         while (--i >= 0)
1131                 __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
1132         kfree(smrs);
1133         return -ENOSPC;
1134 }
1135
1136 static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
1137                                       struct arm_smmu_master_cfg *cfg)
1138 {
1139         int i;
1140         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1141         struct arm_smmu_smr *smrs = cfg->smrs;
1142
1143         if (!smrs)
1144                 return;
1145
1146         /* Invalidate the SMRs before freeing back to the allocator */
1147         for (i = 0; i < cfg->num_streamids; ++i) {
1148                 u8 idx = smrs[i].idx;
1149
1150                 writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
1151                 __arm_smmu_free_bitmap(smmu->smr_map, idx);
1152         }
1153
1154         cfg->smrs = NULL;
1155         kfree(smrs);
1156 }
1157
1158 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1159                                       struct arm_smmu_master_cfg *cfg)
1160 {
1161         int i, ret;
1162         struct arm_smmu_device *smmu = smmu_domain->smmu;
1163         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1164
1165         /*
1166          * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1167          * for all devices behind the SMMU. Note that we need to take
1168          * care configuring SMRs for devices both a platform_device and
1169          * and a PCI device (i.e. a PCI host controller)
1170          */
1171         if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1172                 return 0;
1173
1174         /* Devices in an IOMMU group may already be configured */
1175         ret = arm_smmu_master_configure_smrs(smmu, cfg);
1176         if (ret)
1177                 return ret == -EEXIST ? 0 : ret;
1178
1179         for (i = 0; i < cfg->num_streamids; ++i) {
1180                 u32 idx, s2cr;
1181
1182                 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
1183                 s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
1184                        (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
1185                 writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1186         }
1187
1188         return 0;
1189 }
1190
1191 static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
1192                                           struct arm_smmu_master_cfg *cfg)
1193 {
1194         int i;
1195         struct arm_smmu_device *smmu = smmu_domain->smmu;
1196         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1197
1198         /* An IOMMU group is torn down by the first device to be removed */
1199         if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs)
1200                 return;
1201
1202         /*
1203          * We *must* clear the S2CR first, because freeing the SMR means
1204          * that it can be re-allocated immediately.
1205          */
1206         for (i = 0; i < cfg->num_streamids; ++i) {
1207                 u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
1208                 u32 reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
1209
1210                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(idx));
1211         }
1212
1213         arm_smmu_master_free_smrs(smmu, cfg);
1214 }
1215
1216 static void arm_smmu_detach_dev(struct device *dev,
1217                                 struct arm_smmu_master_cfg *cfg)
1218 {
1219         struct iommu_domain *domain = dev->archdata.iommu;
1220         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1221
1222         dev->archdata.iommu = NULL;
1223         arm_smmu_domain_remove_master(smmu_domain, cfg);
1224 }
1225
1226 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1227 {
1228         int ret;
1229         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1230         struct arm_smmu_device *smmu;
1231         struct arm_smmu_master_cfg *cfg;
1232
1233         smmu = find_smmu_for_device(dev);
1234         if (!smmu) {
1235                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1236                 return -ENXIO;
1237         }
1238
1239         /* Ensure that the domain is finalised */
1240         ret = arm_smmu_init_domain_context(domain, smmu);
1241         if (ret < 0)
1242                 return ret;
1243
1244         /*
1245          * Sanity check the domain. We don't support domains across
1246          * different SMMUs.
1247          */
1248         if (smmu_domain->smmu != smmu) {
1249                 dev_err(dev,
1250                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1251                         dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1252                 return -EINVAL;
1253         }
1254
1255         /* Looks ok, so add the device to the domain */
1256         cfg = find_smmu_master_cfg(dev);
1257         if (!cfg)
1258                 return -ENODEV;
1259
1260         /* Detach the dev from its current domain */
1261         if (dev->archdata.iommu)
1262                 arm_smmu_detach_dev(dev, cfg);
1263
1264         ret = arm_smmu_domain_add_master(smmu_domain, cfg);
1265         if (!ret)
1266                 dev->archdata.iommu = domain;
1267         return ret;
1268 }
1269
1270 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1271                         phys_addr_t paddr, size_t size, int prot)
1272 {
1273         int ret;
1274         unsigned long flags;
1275         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1276         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1277
1278         if (!ops)
1279                 return -ENODEV;
1280
1281         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1282         ret = ops->map(ops, iova, paddr, size, prot);
1283         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1284         return ret;
1285 }
1286
1287 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1288                              size_t size)
1289 {
1290         size_t ret;
1291         unsigned long flags;
1292         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1293         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1294
1295         if (!ops)
1296                 return 0;
1297
1298         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1299         ret = ops->unmap(ops, iova, size);
1300         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1301         return ret;
1302 }
1303
1304 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1305                                               dma_addr_t iova)
1306 {
1307         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1308         struct arm_smmu_device *smmu = smmu_domain->smmu;
1309         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1310         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1311         struct device *dev = smmu->dev;
1312         void __iomem *cb_base;
1313         u32 tmp;
1314         u64 phys;
1315         unsigned long va;
1316
1317         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1318
1319         /* ATS1 registers can only be written atomically */
1320         va = iova & ~0xfffUL;
1321         if (smmu->version == ARM_SMMU_V2)
1322                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1323         else /* Register is only 32-bit in v1 */
1324                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1325
1326         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1327                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1328                 dev_err(dev,
1329                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1330                         &iova);
1331                 return ops->iova_to_phys(ops, iova);
1332         }
1333
1334         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1335         if (phys & CB_PAR_F) {
1336                 dev_err(dev, "translation fault!\n");
1337                 dev_err(dev, "PAR = 0x%llx\n", phys);
1338                 return 0;
1339         }
1340
1341         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1342 }
1343
1344 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1345                                         dma_addr_t iova)
1346 {
1347         phys_addr_t ret;
1348         unsigned long flags;
1349         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1350         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1351
1352         if (!ops)
1353                 return 0;
1354
1355         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1356         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1357                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1358                 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1359         } else {
1360                 ret = ops->iova_to_phys(ops, iova);
1361         }
1362
1363         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1364
1365         return ret;
1366 }
1367
1368 static bool arm_smmu_capable(enum iommu_cap cap)
1369 {
1370         switch (cap) {
1371         case IOMMU_CAP_CACHE_COHERENCY:
1372                 /*
1373                  * Return true here as the SMMU can always send out coherent
1374                  * requests.
1375                  */
1376                 return true;
1377         case IOMMU_CAP_INTR_REMAP:
1378                 return true; /* MSIs are just memory writes */
1379         case IOMMU_CAP_NOEXEC:
1380                 return true;
1381         default:
1382                 return false;
1383         }
1384 }
1385
1386 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
1387 {
1388         *((u16 *)data) = alias;
1389         return 0; /* Continue walking */
1390 }
1391
1392 static void __arm_smmu_release_pci_iommudata(void *data)
1393 {
1394         kfree(data);
1395 }
1396
1397 static int arm_smmu_init_pci_device(struct pci_dev *pdev,
1398                                     struct iommu_group *group)
1399 {
1400         struct arm_smmu_master_cfg *cfg;
1401         u16 sid;
1402         int i;
1403
1404         cfg = iommu_group_get_iommudata(group);
1405         if (!cfg) {
1406                 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
1407                 if (!cfg)
1408                         return -ENOMEM;
1409
1410                 iommu_group_set_iommudata(group, cfg,
1411                                           __arm_smmu_release_pci_iommudata);
1412         }
1413
1414         if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
1415                 return -ENOSPC;
1416
1417         /*
1418          * Assume Stream ID == Requester ID for now.
1419          * We need a way to describe the ID mappings in FDT.
1420          */
1421         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1422         for (i = 0; i < cfg->num_streamids; ++i)
1423                 if (cfg->streamids[i] == sid)
1424                         break;
1425
1426         /* Avoid duplicate SIDs, as this can lead to SMR conflicts */
1427         if (i == cfg->num_streamids)
1428                 cfg->streamids[cfg->num_streamids++] = sid;
1429
1430         return 0;
1431 }
1432
1433 static int arm_smmu_init_platform_device(struct device *dev,
1434                                          struct iommu_group *group)
1435 {
1436         struct arm_smmu_device *smmu = find_smmu_for_device(dev);
1437         struct arm_smmu_master *master;
1438
1439         if (!smmu)
1440                 return -ENODEV;
1441
1442         master = find_smmu_master(smmu, dev->of_node);
1443         if (!master)
1444                 return -ENODEV;
1445
1446         iommu_group_set_iommudata(group, &master->cfg, NULL);
1447
1448         return 0;
1449 }
1450
1451 static int arm_smmu_add_device(struct device *dev)
1452 {
1453         struct iommu_group *group;
1454
1455         group = iommu_group_get_for_dev(dev);
1456         if (IS_ERR(group))
1457                 return PTR_ERR(group);
1458
1459         iommu_group_put(group);
1460         return 0;
1461 }
1462
1463 static void arm_smmu_remove_device(struct device *dev)
1464 {
1465         iommu_group_remove_device(dev);
1466 }
1467
1468 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1469 {
1470         struct iommu_group *group;
1471         int ret;
1472
1473         if (dev_is_pci(dev))
1474                 group = pci_device_group(dev);
1475         else
1476                 group = generic_device_group(dev);
1477
1478         if (IS_ERR(group))
1479                 return group;
1480
1481         if (dev_is_pci(dev))
1482                 ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
1483         else
1484                 ret = arm_smmu_init_platform_device(dev, group);
1485
1486         if (ret) {
1487                 iommu_group_put(group);
1488                 group = ERR_PTR(ret);
1489         }
1490
1491         return group;
1492 }
1493
1494 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1495                                     enum iommu_attr attr, void *data)
1496 {
1497         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1498
1499         switch (attr) {
1500         case DOMAIN_ATTR_NESTING:
1501                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1502                 return 0;
1503         default:
1504                 return -ENODEV;
1505         }
1506 }
1507
1508 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1509                                     enum iommu_attr attr, void *data)
1510 {
1511         int ret = 0;
1512         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1513
1514         mutex_lock(&smmu_domain->init_mutex);
1515
1516         switch (attr) {
1517         case DOMAIN_ATTR_NESTING:
1518                 if (smmu_domain->smmu) {
1519                         ret = -EPERM;
1520                         goto out_unlock;
1521                 }
1522
1523                 if (*(int *)data)
1524                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1525                 else
1526                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1527
1528                 break;
1529         default:
1530                 ret = -ENODEV;
1531         }
1532
1533 out_unlock:
1534         mutex_unlock(&smmu_domain->init_mutex);
1535         return ret;
1536 }
1537
1538 static struct iommu_ops arm_smmu_ops = {
1539         .capable                = arm_smmu_capable,
1540         .domain_alloc           = arm_smmu_domain_alloc,
1541         .domain_free            = arm_smmu_domain_free,
1542         .attach_dev             = arm_smmu_attach_dev,
1543         .map                    = arm_smmu_map,
1544         .unmap                  = arm_smmu_unmap,
1545         .map_sg                 = default_iommu_map_sg,
1546         .iova_to_phys           = arm_smmu_iova_to_phys,
1547         .add_device             = arm_smmu_add_device,
1548         .remove_device          = arm_smmu_remove_device,
1549         .device_group           = arm_smmu_device_group,
1550         .domain_get_attr        = arm_smmu_domain_get_attr,
1551         .domain_set_attr        = arm_smmu_domain_set_attr,
1552         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1553 };
1554
1555 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1556 {
1557         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1558         void __iomem *cb_base;
1559         int i = 0;
1560         u32 reg, major;
1561
1562         /* clear global FSR */
1563         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1564         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1565
1566         /* Mark all SMRn as invalid and all S2CRn as bypass unless overridden */
1567         reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
1568         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1569                 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i));
1570                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
1571         }
1572
1573         /*
1574          * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1575          * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1576          * bit is only present in MMU-500r2 onwards.
1577          */
1578         reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1579         major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1580         if ((smmu->model == ARM_MMU500) && (major >= 2)) {
1581                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1582                 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1583                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1584         }
1585
1586         /* Make sure all context banks are disabled and clear CB_FSR  */
1587         for (i = 0; i < smmu->num_context_banks; ++i) {
1588                 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1589                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1590                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1591                 /*
1592                  * Disable MMU-500's not-particularly-beneficial next-page
1593                  * prefetcher for the sake of errata #841119 and #826419.
1594                  */
1595                 if (smmu->model == ARM_MMU500) {
1596                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1597                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1598                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1599                 }
1600         }
1601
1602         /* Invalidate the TLB, just in case */
1603         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1604         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1605
1606         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1607
1608         /* Enable fault reporting */
1609         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1610
1611         /* Disable TLB broadcasting. */
1612         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1613
1614         /* Enable client access, handling unmatched streams as appropriate */
1615         reg &= ~sCR0_CLIENTPD;
1616         if (disable_bypass)
1617                 reg |= sCR0_USFCFG;
1618         else
1619                 reg &= ~sCR0_USFCFG;
1620
1621         /* Disable forced broadcasting */
1622         reg &= ~sCR0_FB;
1623
1624         /* Don't upgrade barriers */
1625         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1626
1627         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1628                 reg |= sCR0_VMID16EN;
1629
1630         /* Push the button */
1631         __arm_smmu_tlb_sync(smmu);
1632         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1633 }
1634
1635 static int arm_smmu_id_size_to_bits(int size)
1636 {
1637         switch (size) {
1638         case 0:
1639                 return 32;
1640         case 1:
1641                 return 36;
1642         case 2:
1643                 return 40;
1644         case 3:
1645                 return 42;
1646         case 4:
1647                 return 44;
1648         case 5:
1649         default:
1650                 return 48;
1651         }
1652 }
1653
1654 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1655 {
1656         unsigned long size;
1657         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1658         u32 id;
1659         bool cttw_dt, cttw_reg;
1660
1661         dev_notice(smmu->dev, "probing hardware configuration...\n");
1662         dev_notice(smmu->dev, "SMMUv%d with:\n",
1663                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1664
1665         /* ID0 */
1666         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1667
1668         /* Restrict available stages based on module parameter */
1669         if (force_stage == 1)
1670                 id &= ~(ID0_S2TS | ID0_NTS);
1671         else if (force_stage == 2)
1672                 id &= ~(ID0_S1TS | ID0_NTS);
1673
1674         if (id & ID0_S1TS) {
1675                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1676                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1677         }
1678
1679         if (id & ID0_S2TS) {
1680                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1681                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1682         }
1683
1684         if (id & ID0_NTS) {
1685                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1686                 dev_notice(smmu->dev, "\tnested translation\n");
1687         }
1688
1689         if (!(smmu->features &
1690                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1691                 dev_err(smmu->dev, "\tno translation support!\n");
1692                 return -ENODEV;
1693         }
1694
1695         if ((id & ID0_S1TS) &&
1696                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1697                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1698                 dev_notice(smmu->dev, "\taddress translation ops\n");
1699         }
1700
1701         /*
1702          * In order for DMA API calls to work properly, we must defer to what
1703          * the DT says about coherency, regardless of what the hardware claims.
1704          * Fortunately, this also opens up a workaround for systems where the
1705          * ID register value has ended up configured incorrectly.
1706          */
1707         cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
1708         cttw_reg = !!(id & ID0_CTTW);
1709         if (cttw_dt)
1710                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1711         if (cttw_dt || cttw_reg)
1712                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1713                            cttw_dt ? "" : "non-");
1714         if (cttw_dt != cttw_reg)
1715                 dev_notice(smmu->dev,
1716                            "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1717
1718         /* Max. number of entries we have for stream matching/indexing */
1719         size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1720         smmu->streamid_mask = size - 1;
1721         if (id & ID0_SMS) {
1722                 u32 smr;
1723
1724                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1725                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1726                 if (size == 0) {
1727                         dev_err(smmu->dev,
1728                                 "stream-matching supported, but no SMRs present!\n");
1729                         return -ENODEV;
1730                 }
1731
1732                 /*
1733                  * SMR.ID bits may not be preserved if the corresponding MASK
1734                  * bits are set, so check each one separately. We can reject
1735                  * masters later if they try to claim IDs outside these masks.
1736                  */
1737                 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1738                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1739                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1740                 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1741
1742                 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1743                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1744                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1745                 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1746
1747                 dev_notice(smmu->dev,
1748                            "\tstream matching with %lu register groups, mask 0x%x",
1749                            size, smmu->smr_mask_mask);
1750         }
1751         smmu->num_mapping_groups = size;
1752
1753         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1754                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1755                 if (!(id & ID0_PTFS_NO_AARCH32S))
1756                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1757         }
1758
1759         /* ID1 */
1760         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1761         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1762
1763         /* Check for size mismatch of SMMU address space from mapped region */
1764         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1765         size *= 2 << smmu->pgshift;
1766         if (smmu->size != size)
1767                 dev_warn(smmu->dev,
1768                         "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1769                         size, smmu->size);
1770
1771         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1772         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1773         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1774                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1775                 return -ENODEV;
1776         }
1777         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1778                    smmu->num_context_banks, smmu->num_s2_context_banks);
1779         /*
1780          * Cavium CN88xx erratum #27704.
1781          * Ensure ASID and VMID allocation is unique across all SMMUs in
1782          * the system.
1783          */
1784         if (smmu->model == CAVIUM_SMMUV2) {
1785                 smmu->cavium_id_base =
1786                         atomic_add_return(smmu->num_context_banks,
1787                                           &cavium_smmu_context_count);
1788                 smmu->cavium_id_base -= smmu->num_context_banks;
1789         }
1790
1791         /* ID2 */
1792         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1793         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1794         smmu->ipa_size = size;
1795
1796         /* The output mask is also applied for bypass */
1797         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1798         smmu->pa_size = size;
1799
1800         if (id & ID2_VMID16)
1801                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1802
1803         /*
1804          * What the page table walker can address actually depends on which
1805          * descriptor format is in use, but since a) we don't know that yet,
1806          * and b) it can vary per context bank, this will have to do...
1807          */
1808         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1809                 dev_warn(smmu->dev,
1810                          "failed to set DMA mask for table walker\n");
1811
1812         if (smmu->version < ARM_SMMU_V2) {
1813                 smmu->va_size = smmu->ipa_size;
1814                 if (smmu->version == ARM_SMMU_V1_64K)
1815                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1816         } else {
1817                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1818                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1819                 if (id & ID2_PTFS_4K)
1820                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1821                 if (id & ID2_PTFS_16K)
1822                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1823                 if (id & ID2_PTFS_64K)
1824                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1825         }
1826
1827         /* Now we've corralled the various formats, what'll it do? */
1828         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1829                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1830         if (smmu->features &
1831             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1832                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1833         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1834                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1835         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1836                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1837
1838         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1839                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1840         else
1841                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1842         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1843                    smmu->pgsize_bitmap);
1844
1845
1846         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1847                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1848                            smmu->va_size, smmu->ipa_size);
1849
1850         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1851                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1852                            smmu->ipa_size, smmu->pa_size);
1853
1854         return 0;
1855 }
1856
1857 struct arm_smmu_match_data {
1858         enum arm_smmu_arch_version version;
1859         enum arm_smmu_implementation model;
1860 };
1861
1862 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1863 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1864
1865 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1866 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1867 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1868 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1869 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1870
1871 static const struct of_device_id arm_smmu_of_match[] = {
1872         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1873         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1874         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1875         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1876         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1877         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1878         { },
1879 };
1880 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1881
1882 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1883 {
1884         const struct of_device_id *of_id;
1885         const struct arm_smmu_match_data *data;
1886         struct resource *res;
1887         struct arm_smmu_device *smmu;
1888         struct device *dev = &pdev->dev;
1889         struct rb_node *node;
1890         struct of_phandle_iterator it;
1891         struct arm_smmu_phandle_args *masterspec;
1892         int num_irqs, i, err;
1893
1894         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1895         if (!smmu) {
1896                 dev_err(dev, "failed to allocate arm_smmu_device\n");
1897                 return -ENOMEM;
1898         }
1899         smmu->dev = dev;
1900
1901         of_id = of_match_node(arm_smmu_of_match, dev->of_node);
1902         data = of_id->data;
1903         smmu->version = data->version;
1904         smmu->model = data->model;
1905
1906         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1907         smmu->base = devm_ioremap_resource(dev, res);
1908         if (IS_ERR(smmu->base))
1909                 return PTR_ERR(smmu->base);
1910         smmu->size = resource_size(res);
1911
1912         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1913                                  &smmu->num_global_irqs)) {
1914                 dev_err(dev, "missing #global-interrupts property\n");
1915                 return -ENODEV;
1916         }
1917
1918         num_irqs = 0;
1919         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1920                 num_irqs++;
1921                 if (num_irqs > smmu->num_global_irqs)
1922                         smmu->num_context_irqs++;
1923         }
1924
1925         if (!smmu->num_context_irqs) {
1926                 dev_err(dev, "found %d interrupts but expected at least %d\n",
1927                         num_irqs, smmu->num_global_irqs + 1);
1928                 return -ENODEV;
1929         }
1930
1931         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1932                                   GFP_KERNEL);
1933         if (!smmu->irqs) {
1934                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1935                 return -ENOMEM;
1936         }
1937
1938         for (i = 0; i < num_irqs; ++i) {
1939                 int irq = platform_get_irq(pdev, i);
1940
1941                 if (irq < 0) {
1942                         dev_err(dev, "failed to get irq index %d\n", i);
1943                         return -ENODEV;
1944                 }
1945                 smmu->irqs[i] = irq;
1946         }
1947
1948         err = arm_smmu_device_cfg_probe(smmu);
1949         if (err)
1950                 return err;
1951
1952         i = 0;
1953         smmu->masters = RB_ROOT;
1954
1955         err = -ENOMEM;
1956         /* No need to zero the memory for masterspec */
1957         masterspec = kmalloc(sizeof(*masterspec), GFP_KERNEL);
1958         if (!masterspec)
1959                 goto out_put_masters;
1960
1961         of_for_each_phandle(&it, err, dev->of_node,
1962                             "mmu-masters", "#stream-id-cells", 0) {
1963                 int count = of_phandle_iterator_args(&it, masterspec->args,
1964                                                      MAX_MASTER_STREAMIDS);
1965                 masterspec->np          = of_node_get(it.node);
1966                 masterspec->args_count  = count;
1967
1968                 err = register_smmu_master(smmu, dev, masterspec);
1969                 if (err) {
1970                         dev_err(dev, "failed to add master %s\n",
1971                                 masterspec->np->name);
1972                         kfree(masterspec);
1973                         goto out_put_masters;
1974                 }
1975
1976                 i++;
1977         }
1978
1979         dev_notice(dev, "registered %d master devices\n", i);
1980
1981         kfree(masterspec);
1982
1983         parse_driver_options(smmu);
1984
1985         if (smmu->version == ARM_SMMU_V2 &&
1986             smmu->num_context_banks != smmu->num_context_irqs) {
1987                 dev_err(dev,
1988                         "found only %d context interrupt(s) but %d required\n",
1989                         smmu->num_context_irqs, smmu->num_context_banks);
1990                 err = -ENODEV;
1991                 goto out_put_masters;
1992         }
1993
1994         for (i = 0; i < smmu->num_global_irqs; ++i) {
1995                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
1996                                        arm_smmu_global_fault,
1997                                        IRQF_SHARED,
1998                                        "arm-smmu global fault",
1999                                        smmu);
2000                 if (err) {
2001                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
2002                                 i, smmu->irqs[i]);
2003                         goto out_put_masters;
2004                 }
2005         }
2006
2007         INIT_LIST_HEAD(&smmu->list);
2008         spin_lock(&arm_smmu_devices_lock);
2009         list_add(&smmu->list, &arm_smmu_devices);
2010         spin_unlock(&arm_smmu_devices_lock);
2011
2012         arm_smmu_device_reset(smmu);
2013         return 0;
2014
2015 out_put_masters:
2016         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
2017                 struct arm_smmu_master *master
2018                         = container_of(node, struct arm_smmu_master, node);
2019                 of_node_put(master->of_node);
2020         }
2021
2022         return err;
2023 }
2024
2025 static int arm_smmu_device_remove(struct platform_device *pdev)
2026 {
2027         struct device *dev = &pdev->dev;
2028         struct arm_smmu_device *curr, *smmu = NULL;
2029         struct rb_node *node;
2030
2031         spin_lock(&arm_smmu_devices_lock);
2032         list_for_each_entry(curr, &arm_smmu_devices, list) {
2033                 if (curr->dev == dev) {
2034                         smmu = curr;
2035                         list_del(&smmu->list);
2036                         break;
2037                 }
2038         }
2039         spin_unlock(&arm_smmu_devices_lock);
2040
2041         if (!smmu)
2042                 return -ENODEV;
2043
2044         for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
2045                 struct arm_smmu_master *master
2046                         = container_of(node, struct arm_smmu_master, node);
2047                 of_node_put(master->of_node);
2048         }
2049
2050         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2051                 dev_err(dev, "removing device with active domains!\n");
2052
2053         /* Turn the thing off */
2054         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2055         return 0;
2056 }
2057
2058 static struct platform_driver arm_smmu_driver = {
2059         .driver = {
2060                 .name           = "arm-smmu",
2061                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2062         },
2063         .probe  = arm_smmu_device_dt_probe,
2064         .remove = arm_smmu_device_remove,
2065 };
2066
2067 static int __init arm_smmu_init(void)
2068 {
2069         struct device_node *np;
2070         int ret;
2071
2072         /*
2073          * Play nice with systems that don't have an ARM SMMU by checking that
2074          * an ARM SMMU exists in the system before proceeding with the driver
2075          * and IOMMU bus operation registration.
2076          */
2077         np = of_find_matching_node(NULL, arm_smmu_of_match);
2078         if (!np)
2079                 return 0;
2080
2081         of_node_put(np);
2082
2083         ret = platform_driver_register(&arm_smmu_driver);
2084         if (ret)
2085                 return ret;
2086
2087         /* Oh, for a proper bus abstraction */
2088         if (!iommu_present(&platform_bus_type))
2089                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2090
2091 #ifdef CONFIG_ARM_AMBA
2092         if (!iommu_present(&amba_bustype))
2093                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2094 #endif
2095
2096 #ifdef CONFIG_PCI
2097         if (!iommu_present(&pci_bus_type)) {
2098                 pci_request_acs();
2099                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2100         }
2101 #endif
2102
2103         return 0;
2104 }
2105
2106 static void __exit arm_smmu_exit(void)
2107 {
2108         return platform_driver_unregister(&arm_smmu_driver);
2109 }
2110
2111 subsys_initcall(arm_smmu_init);
2112 module_exit(arm_smmu_exit);
2113
2114 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2115 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2116 MODULE_LICENSE("GPL v2");