Merge branch 'i2c/for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[linux-2.6-block.git] / drivers / iommu / arm-smmu-v3.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/iommu.h>
22 #include <linux/iopoll.h>
23 #include <linux/module.h>
24 #include <linux/msi.h>
25 #include <linux/of.h>
26 #include <linux/of_address.h>
27 #include <linux/of_iommu.h>
28 #include <linux/of_platform.h>
29 #include <linux/pci.h>
30 #include <linux/platform_device.h>
31
32 #include <linux/amba/bus.h>
33
34 #include "io-pgtable.h"
35
36 /* MMIO registers */
37 #define ARM_SMMU_IDR0                   0x0
38 #define IDR0_ST_LVL                     GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL                1
40 #define IDR0_STALL_MODEL                GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL          0
42 #define IDR0_STALL_MODEL_FORCE          2
43 #define IDR0_TTENDIAN                   GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED             0
45 #define IDR0_TTENDIAN_LE                2
46 #define IDR0_TTENDIAN_BE                3
47 #define IDR0_CD2L                       (1 << 19)
48 #define IDR0_VMID16                     (1 << 18)
49 #define IDR0_PRI                        (1 << 16)
50 #define IDR0_SEV                        (1 << 14)
51 #define IDR0_MSI                        (1 << 13)
52 #define IDR0_ASID16                     (1 << 12)
53 #define IDR0_ATS                        (1 << 10)
54 #define IDR0_HYP                        (1 << 9)
55 #define IDR0_COHACC                     (1 << 4)
56 #define IDR0_TTF                        GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64                2
58 #define IDR0_TTF_AARCH32_64             3
59 #define IDR0_S1P                        (1 << 1)
60 #define IDR0_S2P                        (1 << 0)
61
62 #define ARM_SMMU_IDR1                   0x4
63 #define IDR1_TABLES_PRESET              (1 << 30)
64 #define IDR1_QUEUES_PRESET              (1 << 29)
65 #define IDR1_REL                        (1 << 28)
66 #define IDR1_CMDQS                      GENMASK(25, 21)
67 #define IDR1_EVTQS                      GENMASK(20, 16)
68 #define IDR1_PRIQS                      GENMASK(15, 11)
69 #define IDR1_SSIDSIZE                   GENMASK(10, 6)
70 #define IDR1_SIDSIZE                    GENMASK(5, 0)
71
72 #define ARM_SMMU_IDR5                   0x14
73 #define IDR5_STALL_MAX                  GENMASK(31, 16)
74 #define IDR5_GRAN64K                    (1 << 6)
75 #define IDR5_GRAN16K                    (1 << 5)
76 #define IDR5_GRAN4K                     (1 << 4)
77 #define IDR5_OAS                        GENMASK(2, 0)
78 #define IDR5_OAS_32_BIT                 0
79 #define IDR5_OAS_36_BIT                 1
80 #define IDR5_OAS_40_BIT                 2
81 #define IDR5_OAS_42_BIT                 3
82 #define IDR5_OAS_44_BIT                 4
83 #define IDR5_OAS_48_BIT                 5
84 #define IDR5_OAS_52_BIT                 6
85 #define IDR5_VAX                        GENMASK(11, 10)
86 #define IDR5_VAX_52_BIT                 1
87
88 #define ARM_SMMU_CR0                    0x20
89 #define CR0_CMDQEN                      (1 << 3)
90 #define CR0_EVTQEN                      (1 << 2)
91 #define CR0_PRIQEN                      (1 << 1)
92 #define CR0_SMMUEN                      (1 << 0)
93
94 #define ARM_SMMU_CR0ACK                 0x24
95
96 #define ARM_SMMU_CR1                    0x28
97 #define CR1_TABLE_SH                    GENMASK(11, 10)
98 #define CR1_TABLE_OC                    GENMASK(9, 8)
99 #define CR1_TABLE_IC                    GENMASK(7, 6)
100 #define CR1_QUEUE_SH                    GENMASK(5, 4)
101 #define CR1_QUEUE_OC                    GENMASK(3, 2)
102 #define CR1_QUEUE_IC                    GENMASK(1, 0)
103 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
104 #define CR1_CACHE_NC                    0
105 #define CR1_CACHE_WB                    1
106 #define CR1_CACHE_WT                    2
107
108 #define ARM_SMMU_CR2                    0x2c
109 #define CR2_PTM                         (1 << 2)
110 #define CR2_RECINVSID                   (1 << 1)
111 #define CR2_E2H                         (1 << 0)
112
113 #define ARM_SMMU_GBPA                   0x44
114 #define GBPA_UPDATE                     (1 << 31)
115 #define GBPA_ABORT                      (1 << 20)
116
117 #define ARM_SMMU_IRQ_CTRL               0x50
118 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
119 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
120 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
121
122 #define ARM_SMMU_IRQ_CTRLACK            0x54
123
124 #define ARM_SMMU_GERROR                 0x60
125 #define GERROR_SFM_ERR                  (1 << 8)
126 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
127 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
128 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
129 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
130 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
131 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
132 #define GERROR_CMDQ_ERR                 (1 << 0)
133 #define GERROR_ERR_MASK                 0xfd
134
135 #define ARM_SMMU_GERRORN                0x64
136
137 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
138 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
139 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
140
141 #define ARM_SMMU_STRTAB_BASE            0x80
142 #define STRTAB_BASE_RA                  (1UL << 62)
143 #define STRTAB_BASE_ADDR_MASK           GENMASK_ULL(51, 6)
144
145 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
146 #define STRTAB_BASE_CFG_FMT             GENMASK(17, 16)
147 #define STRTAB_BASE_CFG_FMT_LINEAR      0
148 #define STRTAB_BASE_CFG_FMT_2LVL        1
149 #define STRTAB_BASE_CFG_SPLIT           GENMASK(10, 6)
150 #define STRTAB_BASE_CFG_LOG2SIZE        GENMASK(5, 0)
151
152 #define ARM_SMMU_CMDQ_BASE              0x90
153 #define ARM_SMMU_CMDQ_PROD              0x98
154 #define ARM_SMMU_CMDQ_CONS              0x9c
155
156 #define ARM_SMMU_EVTQ_BASE              0xa0
157 #define ARM_SMMU_EVTQ_PROD              0x100a8
158 #define ARM_SMMU_EVTQ_CONS              0x100ac
159 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
160 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
161 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
162
163 #define ARM_SMMU_PRIQ_BASE              0xc0
164 #define ARM_SMMU_PRIQ_PROD              0x100c8
165 #define ARM_SMMU_PRIQ_CONS              0x100cc
166 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
167 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
168 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
169
170 /* Common MSI config fields */
171 #define MSI_CFG0_ADDR_MASK              GENMASK_ULL(51, 2)
172 #define MSI_CFG2_SH                     GENMASK(5, 4)
173 #define MSI_CFG2_MEMATTR                GENMASK(3, 0)
174
175 /* Common memory attribute values */
176 #define ARM_SMMU_SH_NSH                 0
177 #define ARM_SMMU_SH_OSH                 2
178 #define ARM_SMMU_SH_ISH                 3
179 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE   0x1
180 #define ARM_SMMU_MEMATTR_OIWB           0xf
181
182 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
183 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
184 #define Q_OVERFLOW_FLAG                 (1 << 31)
185 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
186 #define Q_ENT(q, p)                     ((q)->base +                    \
187                                          Q_IDX(q, p) * (q)->ent_dwords)
188
189 #define Q_BASE_RWA                      (1UL << 62)
190 #define Q_BASE_ADDR_MASK                GENMASK_ULL(51, 5)
191 #define Q_BASE_LOG2SIZE                 GENMASK(4, 0)
192
193 /*
194  * Stream table.
195  *
196  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
197  * 2lvl: 128k L1 entries,
198  *       256 lazy entries per table (each table covers a PCI bus)
199  */
200 #define STRTAB_L1_SZ_SHIFT              20
201 #define STRTAB_SPLIT                    8
202
203 #define STRTAB_L1_DESC_DWORDS           1
204 #define STRTAB_L1_DESC_SPAN             GENMASK_ULL(4, 0)
205 #define STRTAB_L1_DESC_L2PTR_MASK       GENMASK_ULL(51, 6)
206
207 #define STRTAB_STE_DWORDS               8
208 #define STRTAB_STE_0_V                  (1UL << 0)
209 #define STRTAB_STE_0_CFG                GENMASK_ULL(3, 1)
210 #define STRTAB_STE_0_CFG_ABORT          0
211 #define STRTAB_STE_0_CFG_BYPASS         4
212 #define STRTAB_STE_0_CFG_S1_TRANS       5
213 #define STRTAB_STE_0_CFG_S2_TRANS       6
214
215 #define STRTAB_STE_0_S1FMT              GENMASK_ULL(5, 4)
216 #define STRTAB_STE_0_S1FMT_LINEAR       0
217 #define STRTAB_STE_0_S1CTXPTR_MASK      GENMASK_ULL(51, 6)
218 #define STRTAB_STE_0_S1CDMAX            GENMASK_ULL(63, 59)
219
220 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
221 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
222 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
223 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
224 #define STRTAB_STE_1_S1CIR              GENMASK_ULL(3, 2)
225 #define STRTAB_STE_1_S1COR              GENMASK_ULL(5, 4)
226 #define STRTAB_STE_1_S1CSH              GENMASK_ULL(7, 6)
227
228 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
229
230 #define STRTAB_STE_1_EATS               GENMASK_ULL(29, 28)
231 #define STRTAB_STE_1_EATS_ABT           0UL
232 #define STRTAB_STE_1_EATS_TRANS         1UL
233 #define STRTAB_STE_1_EATS_S1CHK         2UL
234
235 #define STRTAB_STE_1_STRW               GENMASK_ULL(31, 30)
236 #define STRTAB_STE_1_STRW_NSEL1         0UL
237 #define STRTAB_STE_1_STRW_EL2           2UL
238
239 #define STRTAB_STE_1_SHCFG              GENMASK_ULL(45, 44)
240 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
241
242 #define STRTAB_STE_2_S2VMID             GENMASK_ULL(15, 0)
243 #define STRTAB_STE_2_VTCR               GENMASK_ULL(50, 32)
244 #define STRTAB_STE_2_S2AA64             (1UL << 51)
245 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
246 #define STRTAB_STE_2_S2PTW              (1UL << 54)
247 #define STRTAB_STE_2_S2R                (1UL << 58)
248
249 #define STRTAB_STE_3_S2TTB_MASK         GENMASK_ULL(51, 4)
250
251 /* Context descriptor (stage-1 only) */
252 #define CTXDESC_CD_DWORDS               8
253 #define CTXDESC_CD_0_TCR_T0SZ           GENMASK_ULL(5, 0)
254 #define ARM64_TCR_T0SZ                  GENMASK_ULL(5, 0)
255 #define CTXDESC_CD_0_TCR_TG0            GENMASK_ULL(7, 6)
256 #define ARM64_TCR_TG0                   GENMASK_ULL(15, 14)
257 #define CTXDESC_CD_0_TCR_IRGN0          GENMASK_ULL(9, 8)
258 #define ARM64_TCR_IRGN0                 GENMASK_ULL(9, 8)
259 #define CTXDESC_CD_0_TCR_ORGN0          GENMASK_ULL(11, 10)
260 #define ARM64_TCR_ORGN0                 GENMASK_ULL(11, 10)
261 #define CTXDESC_CD_0_TCR_SH0            GENMASK_ULL(13, 12)
262 #define ARM64_TCR_SH0                   GENMASK_ULL(13, 12)
263 #define CTXDESC_CD_0_TCR_EPD0           (1ULL << 14)
264 #define ARM64_TCR_EPD0                  (1ULL << 7)
265 #define CTXDESC_CD_0_TCR_EPD1           (1ULL << 30)
266 #define ARM64_TCR_EPD1                  (1ULL << 23)
267
268 #define CTXDESC_CD_0_ENDI               (1UL << 15)
269 #define CTXDESC_CD_0_V                  (1UL << 31)
270
271 #define CTXDESC_CD_0_TCR_IPS            GENMASK_ULL(34, 32)
272 #define ARM64_TCR_IPS                   GENMASK_ULL(34, 32)
273 #define CTXDESC_CD_0_TCR_TBI0           (1ULL << 38)
274 #define ARM64_TCR_TBI0                  (1ULL << 37)
275
276 #define CTXDESC_CD_0_AA64               (1UL << 41)
277 #define CTXDESC_CD_0_S                  (1UL << 44)
278 #define CTXDESC_CD_0_R                  (1UL << 45)
279 #define CTXDESC_CD_0_A                  (1UL << 46)
280 #define CTXDESC_CD_0_ASET               (1UL << 47)
281 #define CTXDESC_CD_0_ASID               GENMASK_ULL(63, 48)
282
283 #define CTXDESC_CD_1_TTB0_MASK          GENMASK_ULL(51, 4)
284
285 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
286 #define ARM_SMMU_TCR2CD(tcr, fld)       FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
287                                         FIELD_GET(ARM64_TCR_##fld, tcr))
288
289 /* Command queue */
290 #define CMDQ_ENT_DWORDS                 2
291 #define CMDQ_MAX_SZ_SHIFT               8
292
293 #define CMDQ_CONS_ERR                   GENMASK(30, 24)
294 #define CMDQ_ERR_CERROR_NONE_IDX        0
295 #define CMDQ_ERR_CERROR_ILL_IDX         1
296 #define CMDQ_ERR_CERROR_ABT_IDX         2
297
298 #define CMDQ_0_OP                       GENMASK_ULL(7, 0)
299 #define CMDQ_0_SSV                      (1UL << 11)
300
301 #define CMDQ_PREFETCH_0_SID             GENMASK_ULL(63, 32)
302 #define CMDQ_PREFETCH_1_SIZE            GENMASK_ULL(4, 0)
303 #define CMDQ_PREFETCH_1_ADDR_MASK       GENMASK_ULL(63, 12)
304
305 #define CMDQ_CFGI_0_SID                 GENMASK_ULL(63, 32)
306 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
307 #define CMDQ_CFGI_1_RANGE               GENMASK_ULL(4, 0)
308
309 #define CMDQ_TLBI_0_VMID                GENMASK_ULL(47, 32)
310 #define CMDQ_TLBI_0_ASID                GENMASK_ULL(63, 48)
311 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
312 #define CMDQ_TLBI_1_VA_MASK             GENMASK_ULL(63, 12)
313 #define CMDQ_TLBI_1_IPA_MASK            GENMASK_ULL(51, 12)
314
315 #define CMDQ_PRI_0_SSID                 GENMASK_ULL(31, 12)
316 #define CMDQ_PRI_0_SID                  GENMASK_ULL(63, 32)
317 #define CMDQ_PRI_1_GRPID                GENMASK_ULL(8, 0)
318 #define CMDQ_PRI_1_RESP                 GENMASK_ULL(13, 12)
319
320 #define CMDQ_SYNC_0_CS                  GENMASK_ULL(13, 12)
321 #define CMDQ_SYNC_0_CS_NONE             0
322 #define CMDQ_SYNC_0_CS_IRQ              1
323 #define CMDQ_SYNC_0_CS_SEV              2
324 #define CMDQ_SYNC_0_MSH                 GENMASK_ULL(23, 22)
325 #define CMDQ_SYNC_0_MSIATTR             GENMASK_ULL(27, 24)
326 #define CMDQ_SYNC_0_MSIDATA             GENMASK_ULL(63, 32)
327 #define CMDQ_SYNC_1_MSIADDR_MASK        GENMASK_ULL(51, 2)
328
329 /* Event queue */
330 #define EVTQ_ENT_DWORDS                 4
331 #define EVTQ_MAX_SZ_SHIFT               7
332
333 #define EVTQ_0_ID                       GENMASK_ULL(7, 0)
334
335 /* PRI queue */
336 #define PRIQ_ENT_DWORDS                 2
337 #define PRIQ_MAX_SZ_SHIFT               8
338
339 #define PRIQ_0_SID                      GENMASK_ULL(31, 0)
340 #define PRIQ_0_SSID                     GENMASK_ULL(51, 32)
341 #define PRIQ_0_PERM_PRIV                (1UL << 58)
342 #define PRIQ_0_PERM_EXEC                (1UL << 59)
343 #define PRIQ_0_PERM_READ                (1UL << 60)
344 #define PRIQ_0_PERM_WRITE               (1UL << 61)
345 #define PRIQ_0_PRG_LAST                 (1UL << 62)
346 #define PRIQ_0_SSID_V                   (1UL << 63)
347
348 #define PRIQ_1_PRG_IDX                  GENMASK_ULL(8, 0)
349 #define PRIQ_1_ADDR_MASK                GENMASK_ULL(63, 12)
350
351 /* High-level queue structures */
352 #define ARM_SMMU_POLL_TIMEOUT_US        100
353 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US   1000000 /* 1s! */
354 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT   10
355
356 #define MSI_IOVA_BASE                   0x8000000
357 #define MSI_IOVA_LENGTH                 0x100000
358
359 static bool disable_bypass = 1;
360 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
361 MODULE_PARM_DESC(disable_bypass,
362         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
363
364 enum pri_resp {
365         PRI_RESP_DENY = 0,
366         PRI_RESP_FAIL = 1,
367         PRI_RESP_SUCC = 2,
368 };
369
370 enum arm_smmu_msi_index {
371         EVTQ_MSI_INDEX,
372         GERROR_MSI_INDEX,
373         PRIQ_MSI_INDEX,
374         ARM_SMMU_MAX_MSIS,
375 };
376
377 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
378         [EVTQ_MSI_INDEX] = {
379                 ARM_SMMU_EVTQ_IRQ_CFG0,
380                 ARM_SMMU_EVTQ_IRQ_CFG1,
381                 ARM_SMMU_EVTQ_IRQ_CFG2,
382         },
383         [GERROR_MSI_INDEX] = {
384                 ARM_SMMU_GERROR_IRQ_CFG0,
385                 ARM_SMMU_GERROR_IRQ_CFG1,
386                 ARM_SMMU_GERROR_IRQ_CFG2,
387         },
388         [PRIQ_MSI_INDEX] = {
389                 ARM_SMMU_PRIQ_IRQ_CFG0,
390                 ARM_SMMU_PRIQ_IRQ_CFG1,
391                 ARM_SMMU_PRIQ_IRQ_CFG2,
392         },
393 };
394
395 struct arm_smmu_cmdq_ent {
396         /* Common fields */
397         u8                              opcode;
398         bool                            substream_valid;
399
400         /* Command-specific fields */
401         union {
402                 #define CMDQ_OP_PREFETCH_CFG    0x1
403                 struct {
404                         u32                     sid;
405                         u8                      size;
406                         u64                     addr;
407                 } prefetch;
408
409                 #define CMDQ_OP_CFGI_STE        0x3
410                 #define CMDQ_OP_CFGI_ALL        0x4
411                 struct {
412                         u32                     sid;
413                         union {
414                                 bool            leaf;
415                                 u8              span;
416                         };
417                 } cfgi;
418
419                 #define CMDQ_OP_TLBI_NH_ASID    0x11
420                 #define CMDQ_OP_TLBI_NH_VA      0x12
421                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
422                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
423                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
424                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
425                 struct {
426                         u16                     asid;
427                         u16                     vmid;
428                         bool                    leaf;
429                         u64                     addr;
430                 } tlbi;
431
432                 #define CMDQ_OP_PRI_RESP        0x41
433                 struct {
434                         u32                     sid;
435                         u32                     ssid;
436                         u16                     grpid;
437                         enum pri_resp           resp;
438                 } pri;
439
440                 #define CMDQ_OP_CMD_SYNC        0x46
441                 struct {
442                         u32                     msidata;
443                         u64                     msiaddr;
444                 } sync;
445         };
446 };
447
448 struct arm_smmu_queue {
449         int                             irq; /* Wired interrupt */
450
451         __le64                          *base;
452         dma_addr_t                      base_dma;
453         u64                             q_base;
454
455         size_t                          ent_dwords;
456         u32                             max_n_shift;
457         u32                             prod;
458         u32                             cons;
459
460         u32 __iomem                     *prod_reg;
461         u32 __iomem                     *cons_reg;
462 };
463
464 struct arm_smmu_cmdq {
465         struct arm_smmu_queue           q;
466         spinlock_t                      lock;
467 };
468
469 struct arm_smmu_evtq {
470         struct arm_smmu_queue           q;
471         u32                             max_stalls;
472 };
473
474 struct arm_smmu_priq {
475         struct arm_smmu_queue           q;
476 };
477
478 /* High-level stream table and context descriptor structures */
479 struct arm_smmu_strtab_l1_desc {
480         u8                              span;
481
482         __le64                          *l2ptr;
483         dma_addr_t                      l2ptr_dma;
484 };
485
486 struct arm_smmu_s1_cfg {
487         __le64                          *cdptr;
488         dma_addr_t                      cdptr_dma;
489
490         struct arm_smmu_ctx_desc {
491                 u16     asid;
492                 u64     ttbr;
493                 u64     tcr;
494                 u64     mair;
495         }                               cd;
496 };
497
498 struct arm_smmu_s2_cfg {
499         u16                             vmid;
500         u64                             vttbr;
501         u64                             vtcr;
502 };
503
504 struct arm_smmu_strtab_ent {
505         /*
506          * An STE is "assigned" if the master emitting the corresponding SID
507          * is attached to a domain. The behaviour of an unassigned STE is
508          * determined by the disable_bypass parameter, whereas an assigned
509          * STE behaves according to s1_cfg/s2_cfg, which themselves are
510          * configured according to the domain type.
511          */
512         bool                            assigned;
513         struct arm_smmu_s1_cfg          *s1_cfg;
514         struct arm_smmu_s2_cfg          *s2_cfg;
515 };
516
517 struct arm_smmu_strtab_cfg {
518         __le64                          *strtab;
519         dma_addr_t                      strtab_dma;
520         struct arm_smmu_strtab_l1_desc  *l1_desc;
521         unsigned int                    num_l1_ents;
522
523         u64                             strtab_base;
524         u32                             strtab_base_cfg;
525 };
526
527 /* An SMMUv3 instance */
528 struct arm_smmu_device {
529         struct device                   *dev;
530         void __iomem                    *base;
531
532 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
533 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
534 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
535 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
536 #define ARM_SMMU_FEAT_PRI               (1 << 4)
537 #define ARM_SMMU_FEAT_ATS               (1 << 5)
538 #define ARM_SMMU_FEAT_SEV               (1 << 6)
539 #define ARM_SMMU_FEAT_MSI               (1 << 7)
540 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
541 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
542 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
543 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
544 #define ARM_SMMU_FEAT_HYP               (1 << 12)
545 #define ARM_SMMU_FEAT_STALL_FORCE       (1 << 13)
546 #define ARM_SMMU_FEAT_VAX               (1 << 14)
547         u32                             features;
548
549 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
550 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY    (1 << 1)
551         u32                             options;
552
553         struct arm_smmu_cmdq            cmdq;
554         struct arm_smmu_evtq            evtq;
555         struct arm_smmu_priq            priq;
556
557         int                             gerr_irq;
558         int                             combined_irq;
559         u32                             sync_nr;
560         u8                              prev_cmd_opcode;
561
562         unsigned long                   ias; /* IPA */
563         unsigned long                   oas; /* PA */
564         unsigned long                   pgsize_bitmap;
565
566 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
567         unsigned int                    asid_bits;
568         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
569
570 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
571         unsigned int                    vmid_bits;
572         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
573
574         unsigned int                    ssid_bits;
575         unsigned int                    sid_bits;
576
577         struct arm_smmu_strtab_cfg      strtab_cfg;
578
579         u32                             sync_count;
580
581         /* IOMMU core code handle */
582         struct iommu_device             iommu;
583 };
584
585 /* SMMU private data for each master */
586 struct arm_smmu_master_data {
587         struct arm_smmu_device          *smmu;
588         struct arm_smmu_strtab_ent      ste;
589 };
590
591 /* SMMU private data for an IOMMU domain */
592 enum arm_smmu_domain_stage {
593         ARM_SMMU_DOMAIN_S1 = 0,
594         ARM_SMMU_DOMAIN_S2,
595         ARM_SMMU_DOMAIN_NESTED,
596         ARM_SMMU_DOMAIN_BYPASS,
597 };
598
599 struct arm_smmu_domain {
600         struct arm_smmu_device          *smmu;
601         struct mutex                    init_mutex; /* Protects smmu pointer */
602
603         struct io_pgtable_ops           *pgtbl_ops;
604         bool                            non_strict;
605
606         enum arm_smmu_domain_stage      stage;
607         union {
608                 struct arm_smmu_s1_cfg  s1_cfg;
609                 struct arm_smmu_s2_cfg  s2_cfg;
610         };
611
612         struct iommu_domain             domain;
613 };
614
615 struct arm_smmu_option_prop {
616         u32 opt;
617         const char *prop;
618 };
619
620 static struct arm_smmu_option_prop arm_smmu_options[] = {
621         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
622         { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
623         { 0, NULL},
624 };
625
626 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
627                                                  struct arm_smmu_device *smmu)
628 {
629         if ((offset > SZ_64K) &&
630             (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
631                 offset -= SZ_64K;
632
633         return smmu->base + offset;
634 }
635
636 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
637 {
638         return container_of(dom, struct arm_smmu_domain, domain);
639 }
640
641 static void parse_driver_options(struct arm_smmu_device *smmu)
642 {
643         int i = 0;
644
645         do {
646                 if (of_property_read_bool(smmu->dev->of_node,
647                                                 arm_smmu_options[i].prop)) {
648                         smmu->options |= arm_smmu_options[i].opt;
649                         dev_notice(smmu->dev, "option %s\n",
650                                 arm_smmu_options[i].prop);
651                 }
652         } while (arm_smmu_options[++i].opt);
653 }
654
655 /* Low-level queue manipulation functions */
656 static bool queue_full(struct arm_smmu_queue *q)
657 {
658         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
659                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
660 }
661
662 static bool queue_empty(struct arm_smmu_queue *q)
663 {
664         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
665                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
666 }
667
668 static void queue_sync_cons(struct arm_smmu_queue *q)
669 {
670         q->cons = readl_relaxed(q->cons_reg);
671 }
672
673 static void queue_inc_cons(struct arm_smmu_queue *q)
674 {
675         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
676
677         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
678         writel(q->cons, q->cons_reg);
679 }
680
681 static int queue_sync_prod(struct arm_smmu_queue *q)
682 {
683         int ret = 0;
684         u32 prod = readl_relaxed(q->prod_reg);
685
686         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
687                 ret = -EOVERFLOW;
688
689         q->prod = prod;
690         return ret;
691 }
692
693 static void queue_inc_prod(struct arm_smmu_queue *q)
694 {
695         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
696
697         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
698         writel(q->prod, q->prod_reg);
699 }
700
701 /*
702  * Wait for the SMMU to consume items. If sync is true, wait until the queue
703  * is empty. Otherwise, wait until there is at least one free slot.
704  */
705 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
706 {
707         ktime_t timeout;
708         unsigned int delay = 1, spin_cnt = 0;
709
710         /* Wait longer if it's a CMD_SYNC */
711         timeout = ktime_add_us(ktime_get(), sync ?
712                                             ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
713                                             ARM_SMMU_POLL_TIMEOUT_US);
714
715         while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
716                 if (ktime_compare(ktime_get(), timeout) > 0)
717                         return -ETIMEDOUT;
718
719                 if (wfe) {
720                         wfe();
721                 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
722                         cpu_relax();
723                         continue;
724                 } else {
725                         udelay(delay);
726                         delay *= 2;
727                         spin_cnt = 0;
728                 }
729         }
730
731         return 0;
732 }
733
734 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
735 {
736         int i;
737
738         for (i = 0; i < n_dwords; ++i)
739                 *dst++ = cpu_to_le64(*src++);
740 }
741
742 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
743 {
744         if (queue_full(q))
745                 return -ENOSPC;
746
747         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
748         queue_inc_prod(q);
749         return 0;
750 }
751
752 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
753 {
754         int i;
755
756         for (i = 0; i < n_dwords; ++i)
757                 *dst++ = le64_to_cpu(*src++);
758 }
759
760 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
761 {
762         if (queue_empty(q))
763                 return -EAGAIN;
764
765         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
766         queue_inc_cons(q);
767         return 0;
768 }
769
770 /* High-level queue accessors */
771 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
772 {
773         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
774         cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
775
776         switch (ent->opcode) {
777         case CMDQ_OP_TLBI_EL2_ALL:
778         case CMDQ_OP_TLBI_NSNH_ALL:
779                 break;
780         case CMDQ_OP_PREFETCH_CFG:
781                 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
782                 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
783                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
784                 break;
785         case CMDQ_OP_CFGI_STE:
786                 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
787                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
788                 break;
789         case CMDQ_OP_CFGI_ALL:
790                 /* Cover the entire SID range */
791                 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
792                 break;
793         case CMDQ_OP_TLBI_NH_VA:
794                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
795                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
796                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
797                 break;
798         case CMDQ_OP_TLBI_S2_IPA:
799                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
800                 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
801                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
802                 break;
803         case CMDQ_OP_TLBI_NH_ASID:
804                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
805                 /* Fallthrough */
806         case CMDQ_OP_TLBI_S12_VMALL:
807                 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
808                 break;
809         case CMDQ_OP_PRI_RESP:
810                 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
811                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
812                 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
813                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
814                 switch (ent->pri.resp) {
815                 case PRI_RESP_DENY:
816                 case PRI_RESP_FAIL:
817                 case PRI_RESP_SUCC:
818                         break;
819                 default:
820                         return -EINVAL;
821                 }
822                 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
823                 break;
824         case CMDQ_OP_CMD_SYNC:
825                 if (ent->sync.msiaddr)
826                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
827                 else
828                         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
829                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
830                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
831                 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
832                 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
833                 break;
834         default:
835                 return -ENOENT;
836         }
837
838         return 0;
839 }
840
841 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
842 {
843         static const char *cerror_str[] = {
844                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
845                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
846                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
847         };
848
849         int i;
850         u64 cmd[CMDQ_ENT_DWORDS];
851         struct arm_smmu_queue *q = &smmu->cmdq.q;
852         u32 cons = readl_relaxed(q->cons_reg);
853         u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
854         struct arm_smmu_cmdq_ent cmd_sync = {
855                 .opcode = CMDQ_OP_CMD_SYNC,
856         };
857
858         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
859                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
860
861         switch (idx) {
862         case CMDQ_ERR_CERROR_ABT_IDX:
863                 dev_err(smmu->dev, "retrying command fetch\n");
864         case CMDQ_ERR_CERROR_NONE_IDX:
865                 return;
866         case CMDQ_ERR_CERROR_ILL_IDX:
867                 /* Fallthrough */
868         default:
869                 break;
870         }
871
872         /*
873          * We may have concurrent producers, so we need to be careful
874          * not to touch any of the shadow cmdq state.
875          */
876         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
877         dev_err(smmu->dev, "skipping command in error state:\n");
878         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
879                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
880
881         /* Convert the erroneous command into a CMD_SYNC */
882         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
883                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
884                 return;
885         }
886
887         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
888 }
889
890 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
891 {
892         struct arm_smmu_queue *q = &smmu->cmdq.q;
893         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
894
895         smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
896
897         while (queue_insert_raw(q, cmd) == -ENOSPC) {
898                 if (queue_poll_cons(q, false, wfe))
899                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
900         }
901 }
902
903 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
904                                     struct arm_smmu_cmdq_ent *ent)
905 {
906         u64 cmd[CMDQ_ENT_DWORDS];
907         unsigned long flags;
908
909         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
910                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911                          ent->opcode);
912                 return;
913         }
914
915         spin_lock_irqsave(&smmu->cmdq.lock, flags);
916         arm_smmu_cmdq_insert_cmd(smmu, cmd);
917         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
918 }
919
920 /*
921  * The difference between val and sync_idx is bounded by the maximum size of
922  * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
923  */
924 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
925 {
926         ktime_t timeout;
927         u32 val;
928
929         timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
930         val = smp_cond_load_acquire(&smmu->sync_count,
931                                     (int)(VAL - sync_idx) >= 0 ||
932                                     !ktime_before(ktime_get(), timeout));
933
934         return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
935 }
936
937 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
938 {
939         u64 cmd[CMDQ_ENT_DWORDS];
940         unsigned long flags;
941         struct arm_smmu_cmdq_ent ent = {
942                 .opcode = CMDQ_OP_CMD_SYNC,
943                 .sync   = {
944                         .msiaddr = virt_to_phys(&smmu->sync_count),
945                 },
946         };
947
948         spin_lock_irqsave(&smmu->cmdq.lock, flags);
949
950         /* Piggy-back on the previous command if it's a SYNC */
951         if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
952                 ent.sync.msidata = smmu->sync_nr;
953         } else {
954                 ent.sync.msidata = ++smmu->sync_nr;
955                 arm_smmu_cmdq_build_cmd(cmd, &ent);
956                 arm_smmu_cmdq_insert_cmd(smmu, cmd);
957         }
958
959         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
960
961         return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
962 }
963
964 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
965 {
966         u64 cmd[CMDQ_ENT_DWORDS];
967         unsigned long flags;
968         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
969         struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
970         int ret;
971
972         arm_smmu_cmdq_build_cmd(cmd, &ent);
973
974         spin_lock_irqsave(&smmu->cmdq.lock, flags);
975         arm_smmu_cmdq_insert_cmd(smmu, cmd);
976         ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
977         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
978
979         return ret;
980 }
981
982 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
983 {
984         int ret;
985         bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
986                    (smmu->features & ARM_SMMU_FEAT_COHERENCY);
987
988         ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
989                   : __arm_smmu_cmdq_issue_sync(smmu);
990         if (ret)
991                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
992 }
993
994 /* Context descriptor manipulation functions */
995 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
996 {
997         u64 val = 0;
998
999         /* Repack the TCR. Just care about TTBR0 for now */
1000         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1001         val |= ARM_SMMU_TCR2CD(tcr, TG0);
1002         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1003         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1004         val |= ARM_SMMU_TCR2CD(tcr, SH0);
1005         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1006         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1007         val |= ARM_SMMU_TCR2CD(tcr, IPS);
1008         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1009
1010         return val;
1011 }
1012
1013 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1014                                     struct arm_smmu_s1_cfg *cfg)
1015 {
1016         u64 val;
1017
1018         /*
1019          * We don't need to issue any invalidation here, as we'll invalidate
1020          * the STE when installing the new entry anyway.
1021          */
1022         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1023 #ifdef __BIG_ENDIAN
1024               CTXDESC_CD_0_ENDI |
1025 #endif
1026               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1027               CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1028               CTXDESC_CD_0_V;
1029
1030         /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1031         if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1032                 val |= CTXDESC_CD_0_S;
1033
1034         cfg->cdptr[0] = cpu_to_le64(val);
1035
1036         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1037         cfg->cdptr[1] = cpu_to_le64(val);
1038
1039         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1040 }
1041
1042 /* Stream table manipulation functions */
1043 static void
1044 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1045 {
1046         u64 val = 0;
1047
1048         val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1049         val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1050
1051         *dst = cpu_to_le64(val);
1052 }
1053
1054 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1055 {
1056         struct arm_smmu_cmdq_ent cmd = {
1057                 .opcode = CMDQ_OP_CFGI_STE,
1058                 .cfgi   = {
1059                         .sid    = sid,
1060                         .leaf   = true,
1061                 },
1062         };
1063
1064         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1065         arm_smmu_cmdq_issue_sync(smmu);
1066 }
1067
1068 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1069                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
1070 {
1071         /*
1072          * This is hideously complicated, but we only really care about
1073          * three cases at the moment:
1074          *
1075          * 1. Invalid (all zero) -> bypass/fault (init)
1076          * 2. Bypass/fault -> translation/bypass (attach)
1077          * 3. Translation/bypass -> bypass/fault (detach)
1078          *
1079          * Given that we can't update the STE atomically and the SMMU
1080          * doesn't read the thing in a defined order, that leaves us
1081          * with the following maintenance requirements:
1082          *
1083          * 1. Update Config, return (init time STEs aren't live)
1084          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1085          * 3. Update Config, sync
1086          */
1087         u64 val = le64_to_cpu(dst[0]);
1088         bool ste_live = false;
1089         struct arm_smmu_cmdq_ent prefetch_cmd = {
1090                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1091                 .prefetch       = {
1092                         .sid    = sid,
1093                 },
1094         };
1095
1096         if (val & STRTAB_STE_0_V) {
1097                 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1098                 case STRTAB_STE_0_CFG_BYPASS:
1099                         break;
1100                 case STRTAB_STE_0_CFG_S1_TRANS:
1101                 case STRTAB_STE_0_CFG_S2_TRANS:
1102                         ste_live = true;
1103                         break;
1104                 case STRTAB_STE_0_CFG_ABORT:
1105                         if (disable_bypass)
1106                                 break;
1107                 default:
1108                         BUG(); /* STE corruption */
1109                 }
1110         }
1111
1112         /* Nuke the existing STE_0 value, as we're going to rewrite it */
1113         val = STRTAB_STE_0_V;
1114
1115         /* Bypass/fault */
1116         if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1117                 if (!ste->assigned && disable_bypass)
1118                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1119                 else
1120                         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1121
1122                 dst[0] = cpu_to_le64(val);
1123                 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1124                                                 STRTAB_STE_1_SHCFG_INCOMING));
1125                 dst[2] = 0; /* Nuke the VMID */
1126                 /*
1127                  * The SMMU can perform negative caching, so we must sync
1128                  * the STE regardless of whether the old value was live.
1129                  */
1130                 if (smmu)
1131                         arm_smmu_sync_ste_for_sid(smmu, sid);
1132                 return;
1133         }
1134
1135         if (ste->s1_cfg) {
1136                 BUG_ON(ste_live);
1137                 dst[1] = cpu_to_le64(
1138                          FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1139                          FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1140                          FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1141 #ifdef CONFIG_PCI_ATS
1142                          FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1143 #endif
1144                          FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1145
1146                 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1147                    !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1148                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1149
1150                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1151                         FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1152         }
1153
1154         if (ste->s2_cfg) {
1155                 BUG_ON(ste_live);
1156                 dst[2] = cpu_to_le64(
1157                          FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1158                          FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1159 #ifdef __BIG_ENDIAN
1160                          STRTAB_STE_2_S2ENDI |
1161 #endif
1162                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1163                          STRTAB_STE_2_S2R);
1164
1165                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1166
1167                 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1168         }
1169
1170         arm_smmu_sync_ste_for_sid(smmu, sid);
1171         dst[0] = cpu_to_le64(val);
1172         arm_smmu_sync_ste_for_sid(smmu, sid);
1173
1174         /* It's likely that we'll want to use the new STE soon */
1175         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1176                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1177 }
1178
1179 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1180 {
1181         unsigned int i;
1182         struct arm_smmu_strtab_ent ste = { .assigned = false };
1183
1184         for (i = 0; i < nent; ++i) {
1185                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1186                 strtab += STRTAB_STE_DWORDS;
1187         }
1188 }
1189
1190 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1191 {
1192         size_t size;
1193         void *strtab;
1194         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1195         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1196
1197         if (desc->l2ptr)
1198                 return 0;
1199
1200         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1201         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1202
1203         desc->span = STRTAB_SPLIT + 1;
1204         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1205                                           GFP_KERNEL | __GFP_ZERO);
1206         if (!desc->l2ptr) {
1207                 dev_err(smmu->dev,
1208                         "failed to allocate l2 stream table for SID %u\n",
1209                         sid);
1210                 return -ENOMEM;
1211         }
1212
1213         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1214         arm_smmu_write_strtab_l1_desc(strtab, desc);
1215         return 0;
1216 }
1217
1218 /* IRQ and event handlers */
1219 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1220 {
1221         int i;
1222         struct arm_smmu_device *smmu = dev;
1223         struct arm_smmu_queue *q = &smmu->evtq.q;
1224         u64 evt[EVTQ_ENT_DWORDS];
1225
1226         do {
1227                 while (!queue_remove_raw(q, evt)) {
1228                         u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1229
1230                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1231                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1232                                 dev_info(smmu->dev, "\t0x%016llx\n",
1233                                          (unsigned long long)evt[i]);
1234
1235                 }
1236
1237                 /*
1238                  * Not much we can do on overflow, so scream and pretend we're
1239                  * trying harder.
1240                  */
1241                 if (queue_sync_prod(q) == -EOVERFLOW)
1242                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1243         } while (!queue_empty(q));
1244
1245         /* Sync our overflow flag, as we believe we're up to speed */
1246         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1247         return IRQ_HANDLED;
1248 }
1249
1250 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1251 {
1252         u32 sid, ssid;
1253         u16 grpid;
1254         bool ssv, last;
1255
1256         sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1257         ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1258         ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1259         last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1260         grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1261
1262         dev_info(smmu->dev, "unexpected PRI request received:\n");
1263         dev_info(smmu->dev,
1264                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1265                  sid, ssid, grpid, last ? "L" : "",
1266                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1267                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1268                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1269                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1270                  evt[1] & PRIQ_1_ADDR_MASK);
1271
1272         if (last) {
1273                 struct arm_smmu_cmdq_ent cmd = {
1274                         .opcode                 = CMDQ_OP_PRI_RESP,
1275                         .substream_valid        = ssv,
1276                         .pri                    = {
1277                                 .sid    = sid,
1278                                 .ssid   = ssid,
1279                                 .grpid  = grpid,
1280                                 .resp   = PRI_RESP_DENY,
1281                         },
1282                 };
1283
1284                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1285         }
1286 }
1287
1288 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1289 {
1290         struct arm_smmu_device *smmu = dev;
1291         struct arm_smmu_queue *q = &smmu->priq.q;
1292         u64 evt[PRIQ_ENT_DWORDS];
1293
1294         do {
1295                 while (!queue_remove_raw(q, evt))
1296                         arm_smmu_handle_ppr(smmu, evt);
1297
1298                 if (queue_sync_prod(q) == -EOVERFLOW)
1299                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1300         } while (!queue_empty(q));
1301
1302         /* Sync our overflow flag, as we believe we're up to speed */
1303         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1304         writel(q->cons, q->cons_reg);
1305         return IRQ_HANDLED;
1306 }
1307
1308 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1309
1310 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1311 {
1312         u32 gerror, gerrorn, active;
1313         struct arm_smmu_device *smmu = dev;
1314
1315         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1316         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1317
1318         active = gerror ^ gerrorn;
1319         if (!(active & GERROR_ERR_MASK))
1320                 return IRQ_NONE; /* No errors pending */
1321
1322         dev_warn(smmu->dev,
1323                  "unexpected global error reported (0x%08x), this could be serious\n",
1324                  active);
1325
1326         if (active & GERROR_SFM_ERR) {
1327                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1328                 arm_smmu_device_disable(smmu);
1329         }
1330
1331         if (active & GERROR_MSI_GERROR_ABT_ERR)
1332                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1333
1334         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1335                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1336
1337         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1338                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1339
1340         if (active & GERROR_MSI_CMDQ_ABT_ERR)
1341                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1342
1343         if (active & GERROR_PRIQ_ABT_ERR)
1344                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1345
1346         if (active & GERROR_EVTQ_ABT_ERR)
1347                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1348
1349         if (active & GERROR_CMDQ_ERR)
1350                 arm_smmu_cmdq_skip_err(smmu);
1351
1352         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1353         return IRQ_HANDLED;
1354 }
1355
1356 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1357 {
1358         struct arm_smmu_device *smmu = dev;
1359
1360         arm_smmu_evtq_thread(irq, dev);
1361         if (smmu->features & ARM_SMMU_FEAT_PRI)
1362                 arm_smmu_priq_thread(irq, dev);
1363
1364         return IRQ_HANDLED;
1365 }
1366
1367 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1368 {
1369         arm_smmu_gerror_handler(irq, dev);
1370         return IRQ_WAKE_THREAD;
1371 }
1372
1373 /* IO_PGTABLE API */
1374 static void arm_smmu_tlb_sync(void *cookie)
1375 {
1376         struct arm_smmu_domain *smmu_domain = cookie;
1377
1378         arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
1379 }
1380
1381 static void arm_smmu_tlb_inv_context(void *cookie)
1382 {
1383         struct arm_smmu_domain *smmu_domain = cookie;
1384         struct arm_smmu_device *smmu = smmu_domain->smmu;
1385         struct arm_smmu_cmdq_ent cmd;
1386
1387         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1388                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1389                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1390                 cmd.tlbi.vmid   = 0;
1391         } else {
1392                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1393                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1394         }
1395
1396         /*
1397          * NOTE: when io-pgtable is in non-strict mode, we may get here with
1398          * PTEs previously cleared by unmaps on the current CPU not yet visible
1399          * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
1400          * to guarantee those are observed before the TLBI. Do be careful, 007.
1401          */
1402         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1403         arm_smmu_cmdq_issue_sync(smmu);
1404 }
1405
1406 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1407                                           size_t granule, bool leaf, void *cookie)
1408 {
1409         struct arm_smmu_domain *smmu_domain = cookie;
1410         struct arm_smmu_device *smmu = smmu_domain->smmu;
1411         struct arm_smmu_cmdq_ent cmd = {
1412                 .tlbi = {
1413                         .leaf   = leaf,
1414                         .addr   = iova,
1415                 },
1416         };
1417
1418         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1419                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1420                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1421         } else {
1422                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1423                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1424         }
1425
1426         do {
1427                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1428                 cmd.tlbi.addr += granule;
1429         } while (size -= granule);
1430 }
1431
1432 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1433         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1434         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1435         .tlb_sync       = arm_smmu_tlb_sync,
1436 };
1437
1438 /* IOMMU API */
1439 static bool arm_smmu_capable(enum iommu_cap cap)
1440 {
1441         switch (cap) {
1442         case IOMMU_CAP_CACHE_COHERENCY:
1443                 return true;
1444         case IOMMU_CAP_NOEXEC:
1445                 return true;
1446         default:
1447                 return false;
1448         }
1449 }
1450
1451 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1452 {
1453         struct arm_smmu_domain *smmu_domain;
1454
1455         if (type != IOMMU_DOMAIN_UNMANAGED &&
1456             type != IOMMU_DOMAIN_DMA &&
1457             type != IOMMU_DOMAIN_IDENTITY)
1458                 return NULL;
1459
1460         /*
1461          * Allocate the domain and initialise some of its data structures.
1462          * We can't really do anything meaningful until we've added a
1463          * master.
1464          */
1465         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1466         if (!smmu_domain)
1467                 return NULL;
1468
1469         if (type == IOMMU_DOMAIN_DMA &&
1470             iommu_get_dma_cookie(&smmu_domain->domain)) {
1471                 kfree(smmu_domain);
1472                 return NULL;
1473         }
1474
1475         mutex_init(&smmu_domain->init_mutex);
1476         return &smmu_domain->domain;
1477 }
1478
1479 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1480 {
1481         int idx, size = 1 << span;
1482
1483         do {
1484                 idx = find_first_zero_bit(map, size);
1485                 if (idx == size)
1486                         return -ENOSPC;
1487         } while (test_and_set_bit(idx, map));
1488
1489         return idx;
1490 }
1491
1492 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1493 {
1494         clear_bit(idx, map);
1495 }
1496
1497 static void arm_smmu_domain_free(struct iommu_domain *domain)
1498 {
1499         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1500         struct arm_smmu_device *smmu = smmu_domain->smmu;
1501
1502         iommu_put_dma_cookie(domain);
1503         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1504
1505         /* Free the CD and ASID, if we allocated them */
1506         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1507                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1508
1509                 if (cfg->cdptr) {
1510                         dmam_free_coherent(smmu_domain->smmu->dev,
1511                                            CTXDESC_CD_DWORDS << 3,
1512                                            cfg->cdptr,
1513                                            cfg->cdptr_dma);
1514
1515                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1516                 }
1517         } else {
1518                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1519                 if (cfg->vmid)
1520                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1521         }
1522
1523         kfree(smmu_domain);
1524 }
1525
1526 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1527                                        struct io_pgtable_cfg *pgtbl_cfg)
1528 {
1529         int ret;
1530         int asid;
1531         struct arm_smmu_device *smmu = smmu_domain->smmu;
1532         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1533
1534         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1535         if (asid < 0)
1536                 return asid;
1537
1538         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1539                                          &cfg->cdptr_dma,
1540                                          GFP_KERNEL | __GFP_ZERO);
1541         if (!cfg->cdptr) {
1542                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1543                 ret = -ENOMEM;
1544                 goto out_free_asid;
1545         }
1546
1547         cfg->cd.asid    = (u16)asid;
1548         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1549         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1550         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1551         return 0;
1552
1553 out_free_asid:
1554         arm_smmu_bitmap_free(smmu->asid_map, asid);
1555         return ret;
1556 }
1557
1558 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1559                                        struct io_pgtable_cfg *pgtbl_cfg)
1560 {
1561         int vmid;
1562         struct arm_smmu_device *smmu = smmu_domain->smmu;
1563         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1564
1565         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1566         if (vmid < 0)
1567                 return vmid;
1568
1569         cfg->vmid       = (u16)vmid;
1570         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1571         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1572         return 0;
1573 }
1574
1575 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1576 {
1577         int ret;
1578         unsigned long ias, oas;
1579         enum io_pgtable_fmt fmt;
1580         struct io_pgtable_cfg pgtbl_cfg;
1581         struct io_pgtable_ops *pgtbl_ops;
1582         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1583                                  struct io_pgtable_cfg *);
1584         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1585         struct arm_smmu_device *smmu = smmu_domain->smmu;
1586
1587         if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1588                 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1589                 return 0;
1590         }
1591
1592         /* Restrict the stage to what we can actually support */
1593         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1594                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1595         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1596                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1597
1598         switch (smmu_domain->stage) {
1599         case ARM_SMMU_DOMAIN_S1:
1600                 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1601                 ias = min_t(unsigned long, ias, VA_BITS);
1602                 oas = smmu->ias;
1603                 fmt = ARM_64_LPAE_S1;
1604                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1605                 break;
1606         case ARM_SMMU_DOMAIN_NESTED:
1607         case ARM_SMMU_DOMAIN_S2:
1608                 ias = smmu->ias;
1609                 oas = smmu->oas;
1610                 fmt = ARM_64_LPAE_S2;
1611                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1612                 break;
1613         default:
1614                 return -EINVAL;
1615         }
1616
1617         pgtbl_cfg = (struct io_pgtable_cfg) {
1618                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1619                 .ias            = ias,
1620                 .oas            = oas,
1621                 .tlb            = &arm_smmu_gather_ops,
1622                 .iommu_dev      = smmu->dev,
1623         };
1624
1625         if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1626                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1627
1628         if (smmu_domain->non_strict)
1629                 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1630
1631         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1632         if (!pgtbl_ops)
1633                 return -ENOMEM;
1634
1635         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1636         domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1637         domain->geometry.force_aperture = true;
1638
1639         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1640         if (ret < 0) {
1641                 free_io_pgtable_ops(pgtbl_ops);
1642                 return ret;
1643         }
1644
1645         smmu_domain->pgtbl_ops = pgtbl_ops;
1646         return 0;
1647 }
1648
1649 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1650 {
1651         __le64 *step;
1652         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1653
1654         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1655                 struct arm_smmu_strtab_l1_desc *l1_desc;
1656                 int idx;
1657
1658                 /* Two-level walk */
1659                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1660                 l1_desc = &cfg->l1_desc[idx];
1661                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1662                 step = &l1_desc->l2ptr[idx];
1663         } else {
1664                 /* Simple linear lookup */
1665                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1666         }
1667
1668         return step;
1669 }
1670
1671 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1672 {
1673         int i, j;
1674         struct arm_smmu_master_data *master = fwspec->iommu_priv;
1675         struct arm_smmu_device *smmu = master->smmu;
1676
1677         for (i = 0; i < fwspec->num_ids; ++i) {
1678                 u32 sid = fwspec->ids[i];
1679                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1680
1681                 /* Bridged PCI devices may end up with duplicated IDs */
1682                 for (j = 0; j < i; j++)
1683                         if (fwspec->ids[j] == sid)
1684                                 break;
1685                 if (j < i)
1686                         continue;
1687
1688                 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1689         }
1690 }
1691
1692 static void arm_smmu_detach_dev(struct device *dev)
1693 {
1694         struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1695
1696         master->ste.assigned = false;
1697         arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1698 }
1699
1700 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1701 {
1702         int ret = 0;
1703         struct arm_smmu_device *smmu;
1704         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1705         struct arm_smmu_master_data *master;
1706         struct arm_smmu_strtab_ent *ste;
1707
1708         if (!dev->iommu_fwspec)
1709                 return -ENOENT;
1710
1711         master = dev->iommu_fwspec->iommu_priv;
1712         smmu = master->smmu;
1713         ste = &master->ste;
1714
1715         /* Already attached to a different domain? */
1716         if (ste->assigned)
1717                 arm_smmu_detach_dev(dev);
1718
1719         mutex_lock(&smmu_domain->init_mutex);
1720
1721         if (!smmu_domain->smmu) {
1722                 smmu_domain->smmu = smmu;
1723                 ret = arm_smmu_domain_finalise(domain);
1724                 if (ret) {
1725                         smmu_domain->smmu = NULL;
1726                         goto out_unlock;
1727                 }
1728         } else if (smmu_domain->smmu != smmu) {
1729                 dev_err(dev,
1730                         "cannot attach to SMMU %s (upstream of %s)\n",
1731                         dev_name(smmu_domain->smmu->dev),
1732                         dev_name(smmu->dev));
1733                 ret = -ENXIO;
1734                 goto out_unlock;
1735         }
1736
1737         ste->assigned = true;
1738
1739         if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1740                 ste->s1_cfg = NULL;
1741                 ste->s2_cfg = NULL;
1742         } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1743                 ste->s1_cfg = &smmu_domain->s1_cfg;
1744                 ste->s2_cfg = NULL;
1745                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1746         } else {
1747                 ste->s1_cfg = NULL;
1748                 ste->s2_cfg = &smmu_domain->s2_cfg;
1749         }
1750
1751         arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1752 out_unlock:
1753         mutex_unlock(&smmu_domain->init_mutex);
1754         return ret;
1755 }
1756
1757 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1758                         phys_addr_t paddr, size_t size, int prot)
1759 {
1760         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1761
1762         if (!ops)
1763                 return -ENODEV;
1764
1765         return ops->map(ops, iova, paddr, size, prot);
1766 }
1767
1768 static size_t
1769 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1770 {
1771         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1772
1773         if (!ops)
1774                 return 0;
1775
1776         return ops->unmap(ops, iova, size);
1777 }
1778
1779 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1780 {
1781         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1782
1783         if (smmu_domain->smmu)
1784                 arm_smmu_tlb_inv_context(smmu_domain);
1785 }
1786
1787 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1788 {
1789         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1790
1791         if (smmu)
1792                 arm_smmu_cmdq_issue_sync(smmu);
1793 }
1794
1795 static phys_addr_t
1796 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1797 {
1798         struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1799
1800         if (domain->type == IOMMU_DOMAIN_IDENTITY)
1801                 return iova;
1802
1803         if (!ops)
1804                 return 0;
1805
1806         return ops->iova_to_phys(ops, iova);
1807 }
1808
1809 static struct platform_driver arm_smmu_driver;
1810
1811 static int arm_smmu_match_node(struct device *dev, void *data)
1812 {
1813         return dev->fwnode == data;
1814 }
1815
1816 static
1817 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1818 {
1819         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1820                                                 fwnode, arm_smmu_match_node);
1821         put_device(dev);
1822         return dev ? dev_get_drvdata(dev) : NULL;
1823 }
1824
1825 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1826 {
1827         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1828
1829         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1830                 limit *= 1UL << STRTAB_SPLIT;
1831
1832         return sid < limit;
1833 }
1834
1835 static struct iommu_ops arm_smmu_ops;
1836
1837 static int arm_smmu_add_device(struct device *dev)
1838 {
1839         int i, ret;
1840         struct arm_smmu_device *smmu;
1841         struct arm_smmu_master_data *master;
1842         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1843         struct iommu_group *group;
1844
1845         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1846                 return -ENODEV;
1847         /*
1848          * We _can_ actually withstand dodgy bus code re-calling add_device()
1849          * without an intervening remove_device()/of_xlate() sequence, but
1850          * we're not going to do so quietly...
1851          */
1852         if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1853                 master = fwspec->iommu_priv;
1854                 smmu = master->smmu;
1855         } else {
1856                 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1857                 if (!smmu)
1858                         return -ENODEV;
1859                 master = kzalloc(sizeof(*master), GFP_KERNEL);
1860                 if (!master)
1861                         return -ENOMEM;
1862
1863                 master->smmu = smmu;
1864                 fwspec->iommu_priv = master;
1865         }
1866
1867         /* Check the SIDs are in range of the SMMU and our stream table */
1868         for (i = 0; i < fwspec->num_ids; i++) {
1869                 u32 sid = fwspec->ids[i];
1870
1871                 if (!arm_smmu_sid_in_range(smmu, sid))
1872                         return -ERANGE;
1873
1874                 /* Ensure l2 strtab is initialised */
1875                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1876                         ret = arm_smmu_init_l2_strtab(smmu, sid);
1877                         if (ret)
1878                                 return ret;
1879                 }
1880         }
1881
1882         group = iommu_group_get_for_dev(dev);
1883         if (!IS_ERR(group)) {
1884                 iommu_group_put(group);
1885                 iommu_device_link(&smmu->iommu, dev);
1886         }
1887
1888         return PTR_ERR_OR_ZERO(group);
1889 }
1890
1891 static void arm_smmu_remove_device(struct device *dev)
1892 {
1893         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1894         struct arm_smmu_master_data *master;
1895         struct arm_smmu_device *smmu;
1896
1897         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1898                 return;
1899
1900         master = fwspec->iommu_priv;
1901         smmu = master->smmu;
1902         if (master && master->ste.assigned)
1903                 arm_smmu_detach_dev(dev);
1904         iommu_group_remove_device(dev);
1905         iommu_device_unlink(&smmu->iommu, dev);
1906         kfree(master);
1907         iommu_fwspec_free(dev);
1908 }
1909
1910 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1911 {
1912         struct iommu_group *group;
1913
1914         /*
1915          * We don't support devices sharing stream IDs other than PCI RID
1916          * aliases, since the necessary ID-to-device lookup becomes rather
1917          * impractical given a potential sparse 32-bit stream ID space.
1918          */
1919         if (dev_is_pci(dev))
1920                 group = pci_device_group(dev);
1921         else
1922                 group = generic_device_group(dev);
1923
1924         return group;
1925 }
1926
1927 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1928                                     enum iommu_attr attr, void *data)
1929 {
1930         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1931
1932         switch (domain->type) {
1933         case IOMMU_DOMAIN_UNMANAGED:
1934                 switch (attr) {
1935                 case DOMAIN_ATTR_NESTING:
1936                         *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1937                         return 0;
1938                 default:
1939                         return -ENODEV;
1940                 }
1941                 break;
1942         case IOMMU_DOMAIN_DMA:
1943                 switch (attr) {
1944                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1945                         *(int *)data = smmu_domain->non_strict;
1946                         return 0;
1947                 default:
1948                         return -ENODEV;
1949                 }
1950                 break;
1951         default:
1952                 return -EINVAL;
1953         }
1954 }
1955
1956 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1957                                     enum iommu_attr attr, void *data)
1958 {
1959         int ret = 0;
1960         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1961
1962         mutex_lock(&smmu_domain->init_mutex);
1963
1964         switch (domain->type) {
1965         case IOMMU_DOMAIN_UNMANAGED:
1966                 switch (attr) {
1967                 case DOMAIN_ATTR_NESTING:
1968                         if (smmu_domain->smmu) {
1969                                 ret = -EPERM;
1970                                 goto out_unlock;
1971                         }
1972
1973                         if (*(int *)data)
1974                                 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1975                         else
1976                                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1977                         break;
1978                 default:
1979                         ret = -ENODEV;
1980                 }
1981                 break;
1982         case IOMMU_DOMAIN_DMA:
1983                 switch(attr) {
1984                 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1985                         smmu_domain->non_strict = *(int *)data;
1986                         break;
1987                 default:
1988                         ret = -ENODEV;
1989                 }
1990                 break;
1991         default:
1992                 ret = -EINVAL;
1993         }
1994
1995 out_unlock:
1996         mutex_unlock(&smmu_domain->init_mutex);
1997         return ret;
1998 }
1999
2000 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2001 {
2002         return iommu_fwspec_add_ids(dev, args->args, 1);
2003 }
2004
2005 static void arm_smmu_get_resv_regions(struct device *dev,
2006                                       struct list_head *head)
2007 {
2008         struct iommu_resv_region *region;
2009         int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2010
2011         region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2012                                          prot, IOMMU_RESV_SW_MSI);
2013         if (!region)
2014                 return;
2015
2016         list_add_tail(&region->list, head);
2017
2018         iommu_dma_get_resv_regions(dev, head);
2019 }
2020
2021 static void arm_smmu_put_resv_regions(struct device *dev,
2022                                       struct list_head *head)
2023 {
2024         struct iommu_resv_region *entry, *next;
2025
2026         list_for_each_entry_safe(entry, next, head, list)
2027                 kfree(entry);
2028 }
2029
2030 static struct iommu_ops arm_smmu_ops = {
2031         .capable                = arm_smmu_capable,
2032         .domain_alloc           = arm_smmu_domain_alloc,
2033         .domain_free            = arm_smmu_domain_free,
2034         .attach_dev             = arm_smmu_attach_dev,
2035         .map                    = arm_smmu_map,
2036         .unmap                  = arm_smmu_unmap,
2037         .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
2038         .iotlb_sync             = arm_smmu_iotlb_sync,
2039         .iova_to_phys           = arm_smmu_iova_to_phys,
2040         .add_device             = arm_smmu_add_device,
2041         .remove_device          = arm_smmu_remove_device,
2042         .device_group           = arm_smmu_device_group,
2043         .domain_get_attr        = arm_smmu_domain_get_attr,
2044         .domain_set_attr        = arm_smmu_domain_set_attr,
2045         .of_xlate               = arm_smmu_of_xlate,
2046         .get_resv_regions       = arm_smmu_get_resv_regions,
2047         .put_resv_regions       = arm_smmu_put_resv_regions,
2048         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
2049 };
2050
2051 /* Probing and initialisation functions */
2052 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2053                                    struct arm_smmu_queue *q,
2054                                    unsigned long prod_off,
2055                                    unsigned long cons_off,
2056                                    size_t dwords)
2057 {
2058         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2059
2060         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2061         if (!q->base) {
2062                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2063                         qsz);
2064                 return -ENOMEM;
2065         }
2066
2067         q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
2068         q->cons_reg     = arm_smmu_page1_fixup(cons_off, smmu);
2069         q->ent_dwords   = dwords;
2070
2071         q->q_base  = Q_BASE_RWA;
2072         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2073         q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2074
2075         q->prod = q->cons = 0;
2076         return 0;
2077 }
2078
2079 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2080 {
2081         int ret;
2082
2083         /* cmdq */
2084         spin_lock_init(&smmu->cmdq.lock);
2085         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2086                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2087         if (ret)
2088                 return ret;
2089
2090         /* evtq */
2091         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2092                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2093         if (ret)
2094                 return ret;
2095
2096         /* priq */
2097         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2098                 return 0;
2099
2100         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2101                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2102 }
2103
2104 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2105 {
2106         unsigned int i;
2107         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2108         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2109         void *strtab = smmu->strtab_cfg.strtab;
2110
2111         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2112         if (!cfg->l1_desc) {
2113                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2114                 return -ENOMEM;
2115         }
2116
2117         for (i = 0; i < cfg->num_l1_ents; ++i) {
2118                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2119                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2120         }
2121
2122         return 0;
2123 }
2124
2125 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2126 {
2127         void *strtab;
2128         u64 reg;
2129         u32 size, l1size;
2130         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2131
2132         /* Calculate the L1 size, capped to the SIDSIZE. */
2133         size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2134         size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2135         cfg->num_l1_ents = 1 << size;
2136
2137         size += STRTAB_SPLIT;
2138         if (size < smmu->sid_bits)
2139                 dev_warn(smmu->dev,
2140                          "2-level strtab only covers %u/%u bits of SID\n",
2141                          size, smmu->sid_bits);
2142
2143         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2144         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2145                                      GFP_KERNEL | __GFP_ZERO);
2146         if (!strtab) {
2147                 dev_err(smmu->dev,
2148                         "failed to allocate l1 stream table (%u bytes)\n",
2149                         size);
2150                 return -ENOMEM;
2151         }
2152         cfg->strtab = strtab;
2153
2154         /* Configure strtab_base_cfg for 2 levels */
2155         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2156         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2157         reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2158         cfg->strtab_base_cfg = reg;
2159
2160         return arm_smmu_init_l1_strtab(smmu);
2161 }
2162
2163 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2164 {
2165         void *strtab;
2166         u64 reg;
2167         u32 size;
2168         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2169
2170         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2171         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2172                                      GFP_KERNEL | __GFP_ZERO);
2173         if (!strtab) {
2174                 dev_err(smmu->dev,
2175                         "failed to allocate linear stream table (%u bytes)\n",
2176                         size);
2177                 return -ENOMEM;
2178         }
2179         cfg->strtab = strtab;
2180         cfg->num_l1_ents = 1 << smmu->sid_bits;
2181
2182         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2183         reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2184         reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2185         cfg->strtab_base_cfg = reg;
2186
2187         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2188         return 0;
2189 }
2190
2191 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2192 {
2193         u64 reg;
2194         int ret;
2195
2196         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2197                 ret = arm_smmu_init_strtab_2lvl(smmu);
2198         else
2199                 ret = arm_smmu_init_strtab_linear(smmu);
2200
2201         if (ret)
2202                 return ret;
2203
2204         /* Set the strtab base address */
2205         reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2206         reg |= STRTAB_BASE_RA;
2207         smmu->strtab_cfg.strtab_base = reg;
2208
2209         /* Allocate the first VMID for stage-2 bypass STEs */
2210         set_bit(0, smmu->vmid_map);
2211         return 0;
2212 }
2213
2214 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2215 {
2216         int ret;
2217
2218         ret = arm_smmu_init_queues(smmu);
2219         if (ret)
2220                 return ret;
2221
2222         return arm_smmu_init_strtab(smmu);
2223 }
2224
2225 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2226                                    unsigned int reg_off, unsigned int ack_off)
2227 {
2228         u32 reg;
2229
2230         writel_relaxed(val, smmu->base + reg_off);
2231         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2232                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2233 }
2234
2235 /* GBPA is "special" */
2236 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2237 {
2238         int ret;
2239         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2240
2241         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2242                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2243         if (ret)
2244                 return ret;
2245
2246         reg &= ~clr;
2247         reg |= set;
2248         writel_relaxed(reg | GBPA_UPDATE, gbpa);
2249         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2250                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2251
2252         if (ret)
2253                 dev_err(smmu->dev, "GBPA not responding to update\n");
2254         return ret;
2255 }
2256
2257 static void arm_smmu_free_msis(void *data)
2258 {
2259         struct device *dev = data;
2260         platform_msi_domain_free_irqs(dev);
2261 }
2262
2263 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2264 {
2265         phys_addr_t doorbell;
2266         struct device *dev = msi_desc_to_dev(desc);
2267         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2268         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2269
2270         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2271         doorbell &= MSI_CFG0_ADDR_MASK;
2272
2273         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2274         writel_relaxed(msg->data, smmu->base + cfg[1]);
2275         writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2276 }
2277
2278 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2279 {
2280         struct msi_desc *desc;
2281         int ret, nvec = ARM_SMMU_MAX_MSIS;
2282         struct device *dev = smmu->dev;
2283
2284         /* Clear the MSI address regs */
2285         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2286         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2287
2288         if (smmu->features & ARM_SMMU_FEAT_PRI)
2289                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2290         else
2291                 nvec--;
2292
2293         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2294                 return;
2295
2296         if (!dev->msi_domain) {
2297                 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2298                 return;
2299         }
2300
2301         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2302         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2303         if (ret) {
2304                 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2305                 return;
2306         }
2307
2308         for_each_msi_entry(desc, dev) {
2309                 switch (desc->platform.msi_index) {
2310                 case EVTQ_MSI_INDEX:
2311                         smmu->evtq.q.irq = desc->irq;
2312                         break;
2313                 case GERROR_MSI_INDEX:
2314                         smmu->gerr_irq = desc->irq;
2315                         break;
2316                 case PRIQ_MSI_INDEX:
2317                         smmu->priq.q.irq = desc->irq;
2318                         break;
2319                 default:        /* Unknown */
2320                         continue;
2321                 }
2322         }
2323
2324         /* Add callback to free MSIs on teardown */
2325         devm_add_action(dev, arm_smmu_free_msis, dev);
2326 }
2327
2328 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2329 {
2330         int irq, ret;
2331
2332         arm_smmu_setup_msis(smmu);
2333
2334         /* Request interrupt lines */
2335         irq = smmu->evtq.q.irq;
2336         if (irq) {
2337                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2338                                                 arm_smmu_evtq_thread,
2339                                                 IRQF_ONESHOT,
2340                                                 "arm-smmu-v3-evtq", smmu);
2341                 if (ret < 0)
2342                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2343         } else {
2344                 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2345         }
2346
2347         irq = smmu->gerr_irq;
2348         if (irq) {
2349                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2350                                        0, "arm-smmu-v3-gerror", smmu);
2351                 if (ret < 0)
2352                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2353         } else {
2354                 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2355         }
2356
2357         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2358                 irq = smmu->priq.q.irq;
2359                 if (irq) {
2360                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2361                                                         arm_smmu_priq_thread,
2362                                                         IRQF_ONESHOT,
2363                                                         "arm-smmu-v3-priq",
2364                                                         smmu);
2365                         if (ret < 0)
2366                                 dev_warn(smmu->dev,
2367                                          "failed to enable priq irq\n");
2368                 } else {
2369                         dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2370                 }
2371         }
2372 }
2373
2374 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2375 {
2376         int ret, irq;
2377         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2378
2379         /* Disable IRQs first */
2380         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2381                                       ARM_SMMU_IRQ_CTRLACK);
2382         if (ret) {
2383                 dev_err(smmu->dev, "failed to disable irqs\n");
2384                 return ret;
2385         }
2386
2387         irq = smmu->combined_irq;
2388         if (irq) {
2389                 /*
2390                  * Cavium ThunderX2 implementation doesn't support unique irq
2391                  * lines. Use a single irq line for all the SMMUv3 interrupts.
2392                  */
2393                 ret = devm_request_threaded_irq(smmu->dev, irq,
2394                                         arm_smmu_combined_irq_handler,
2395                                         arm_smmu_combined_irq_thread,
2396                                         IRQF_ONESHOT,
2397                                         "arm-smmu-v3-combined-irq", smmu);
2398                 if (ret < 0)
2399                         dev_warn(smmu->dev, "failed to enable combined irq\n");
2400         } else
2401                 arm_smmu_setup_unique_irqs(smmu);
2402
2403         if (smmu->features & ARM_SMMU_FEAT_PRI)
2404                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2405
2406         /* Enable interrupt generation on the SMMU */
2407         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2408                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2409         if (ret)
2410                 dev_warn(smmu->dev, "failed to enable irqs\n");
2411
2412         return 0;
2413 }
2414
2415 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2416 {
2417         int ret;
2418
2419         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2420         if (ret)
2421                 dev_err(smmu->dev, "failed to clear cr0\n");
2422
2423         return ret;
2424 }
2425
2426 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2427 {
2428         int ret;
2429         u32 reg, enables;
2430         struct arm_smmu_cmdq_ent cmd;
2431
2432         /* Clear CR0 and sync (disables SMMU and queue processing) */
2433         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2434         if (reg & CR0_SMMUEN) {
2435                 if (is_kdump_kernel()) {
2436                         arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2437                         arm_smmu_device_disable(smmu);
2438                         return -EBUSY;
2439                 }
2440
2441                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2442         }
2443
2444         ret = arm_smmu_device_disable(smmu);
2445         if (ret)
2446                 return ret;
2447
2448         /* CR1 (table and queue memory attributes) */
2449         reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2450               FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2451               FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2452               FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2453               FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2454               FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2455         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2456
2457         /* CR2 (random crap) */
2458         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2459         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2460
2461         /* Stream table */
2462         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2463                        smmu->base + ARM_SMMU_STRTAB_BASE);
2464         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2465                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2466
2467         /* Command queue */
2468         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2469         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2470         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2471
2472         enables = CR0_CMDQEN;
2473         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2474                                       ARM_SMMU_CR0ACK);
2475         if (ret) {
2476                 dev_err(smmu->dev, "failed to enable command queue\n");
2477                 return ret;
2478         }
2479
2480         /* Invalidate any cached configuration */
2481         cmd.opcode = CMDQ_OP_CFGI_ALL;
2482         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2483         arm_smmu_cmdq_issue_sync(smmu);
2484
2485         /* Invalidate any stale TLB entries */
2486         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2487                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2488                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2489         }
2490
2491         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2492         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2493         arm_smmu_cmdq_issue_sync(smmu);
2494
2495         /* Event queue */
2496         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2497         writel_relaxed(smmu->evtq.q.prod,
2498                        arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2499         writel_relaxed(smmu->evtq.q.cons,
2500                        arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2501
2502         enables |= CR0_EVTQEN;
2503         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2504                                       ARM_SMMU_CR0ACK);
2505         if (ret) {
2506                 dev_err(smmu->dev, "failed to enable event queue\n");
2507                 return ret;
2508         }
2509
2510         /* PRI queue */
2511         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2512                 writeq_relaxed(smmu->priq.q.q_base,
2513                                smmu->base + ARM_SMMU_PRIQ_BASE);
2514                 writel_relaxed(smmu->priq.q.prod,
2515                                arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2516                 writel_relaxed(smmu->priq.q.cons,
2517                                arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2518
2519                 enables |= CR0_PRIQEN;
2520                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2521                                               ARM_SMMU_CR0ACK);
2522                 if (ret) {
2523                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2524                         return ret;
2525                 }
2526         }
2527
2528         ret = arm_smmu_setup_irqs(smmu);
2529         if (ret) {
2530                 dev_err(smmu->dev, "failed to setup irqs\n");
2531                 return ret;
2532         }
2533
2534
2535         /* Enable the SMMU interface, or ensure bypass */
2536         if (!bypass || disable_bypass) {
2537                 enables |= CR0_SMMUEN;
2538         } else {
2539                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2540                 if (ret)
2541                         return ret;
2542         }
2543         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2544                                       ARM_SMMU_CR0ACK);
2545         if (ret) {
2546                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2547                 return ret;
2548         }
2549
2550         return 0;
2551 }
2552
2553 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2554 {
2555         u32 reg;
2556         bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2557
2558         /* IDR0 */
2559         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2560
2561         /* 2-level structures */
2562         if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2563                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2564
2565         if (reg & IDR0_CD2L)
2566                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2567
2568         /*
2569          * Translation table endianness.
2570          * We currently require the same endianness as the CPU, but this
2571          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2572          */
2573         switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2574         case IDR0_TTENDIAN_MIXED:
2575                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2576                 break;
2577 #ifdef __BIG_ENDIAN
2578         case IDR0_TTENDIAN_BE:
2579                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2580                 break;
2581 #else
2582         case IDR0_TTENDIAN_LE:
2583                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2584                 break;
2585 #endif
2586         default:
2587                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2588                 return -ENXIO;
2589         }
2590
2591         /* Boolean feature flags */
2592         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2593                 smmu->features |= ARM_SMMU_FEAT_PRI;
2594
2595         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2596                 smmu->features |= ARM_SMMU_FEAT_ATS;
2597
2598         if (reg & IDR0_SEV)
2599                 smmu->features |= ARM_SMMU_FEAT_SEV;
2600
2601         if (reg & IDR0_MSI)
2602                 smmu->features |= ARM_SMMU_FEAT_MSI;
2603
2604         if (reg & IDR0_HYP)
2605                 smmu->features |= ARM_SMMU_FEAT_HYP;
2606
2607         /*
2608          * The coherency feature as set by FW is used in preference to the ID
2609          * register, but warn on mismatch.
2610          */
2611         if (!!(reg & IDR0_COHACC) != coherent)
2612                 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2613                          coherent ? "true" : "false");
2614
2615         switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2616         case IDR0_STALL_MODEL_FORCE:
2617                 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2618                 /* Fallthrough */
2619         case IDR0_STALL_MODEL_STALL:
2620                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2621         }
2622
2623         if (reg & IDR0_S1P)
2624                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2625
2626         if (reg & IDR0_S2P)
2627                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2628
2629         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2630                 dev_err(smmu->dev, "no translation support!\n");
2631                 return -ENXIO;
2632         }
2633
2634         /* We only support the AArch64 table format at present */
2635         switch (FIELD_GET(IDR0_TTF, reg)) {
2636         case IDR0_TTF_AARCH32_64:
2637                 smmu->ias = 40;
2638                 /* Fallthrough */
2639         case IDR0_TTF_AARCH64:
2640                 break;
2641         default:
2642                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2643                 return -ENXIO;
2644         }
2645
2646         /* ASID/VMID sizes */
2647         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2648         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2649
2650         /* IDR1 */
2651         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2652         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2653                 dev_err(smmu->dev, "embedded implementation not supported\n");
2654                 return -ENXIO;
2655         }
2656
2657         /* Queue sizes, capped at 4k */
2658         smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2659                                          FIELD_GET(IDR1_CMDQS, reg));
2660         if (!smmu->cmdq.q.max_n_shift) {
2661                 /* Odd alignment restrictions on the base, so ignore for now */
2662                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2663                 return -ENXIO;
2664         }
2665
2666         smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2667                                          FIELD_GET(IDR1_EVTQS, reg));
2668         smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2669                                          FIELD_GET(IDR1_PRIQS, reg));
2670
2671         /* SID/SSID sizes */
2672         smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2673         smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2674
2675         /*
2676          * If the SMMU supports fewer bits than would fill a single L2 stream
2677          * table, use a linear table instead.
2678          */
2679         if (smmu->sid_bits <= STRTAB_SPLIT)
2680                 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2681
2682         /* IDR5 */
2683         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2684
2685         /* Maximum number of outstanding stalls */
2686         smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2687
2688         /* Page sizes */
2689         if (reg & IDR5_GRAN64K)
2690                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2691         if (reg & IDR5_GRAN16K)
2692                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2693         if (reg & IDR5_GRAN4K)
2694                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2695
2696         /* Input address size */
2697         if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2698                 smmu->features |= ARM_SMMU_FEAT_VAX;
2699
2700         /* Output address size */
2701         switch (FIELD_GET(IDR5_OAS, reg)) {
2702         case IDR5_OAS_32_BIT:
2703                 smmu->oas = 32;
2704                 break;
2705         case IDR5_OAS_36_BIT:
2706                 smmu->oas = 36;
2707                 break;
2708         case IDR5_OAS_40_BIT:
2709                 smmu->oas = 40;
2710                 break;
2711         case IDR5_OAS_42_BIT:
2712                 smmu->oas = 42;
2713                 break;
2714         case IDR5_OAS_44_BIT:
2715                 smmu->oas = 44;
2716                 break;
2717         case IDR5_OAS_52_BIT:
2718                 smmu->oas = 52;
2719                 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2720                 break;
2721         default:
2722                 dev_info(smmu->dev,
2723                         "unknown output address size. Truncating to 48-bit\n");
2724                 /* Fallthrough */
2725         case IDR5_OAS_48_BIT:
2726                 smmu->oas = 48;
2727         }
2728
2729         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2730                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2731         else
2732                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2733
2734         /* Set the DMA mask for our table walker */
2735         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2736                 dev_warn(smmu->dev,
2737                          "failed to set DMA mask for table walker\n");
2738
2739         smmu->ias = max(smmu->ias, smmu->oas);
2740
2741         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2742                  smmu->ias, smmu->oas, smmu->features);
2743         return 0;
2744 }
2745
2746 #ifdef CONFIG_ACPI
2747 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2748 {
2749         switch (model) {
2750         case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2751                 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2752                 break;
2753         case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2754                 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2755                 break;
2756         }
2757
2758         dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2759 }
2760
2761 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2762                                       struct arm_smmu_device *smmu)
2763 {
2764         struct acpi_iort_smmu_v3 *iort_smmu;
2765         struct device *dev = smmu->dev;
2766         struct acpi_iort_node *node;
2767
2768         node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2769
2770         /* Retrieve SMMUv3 specific data */
2771         iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2772
2773         acpi_smmu_get_options(iort_smmu->model, smmu);
2774
2775         if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2776                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2777
2778         return 0;
2779 }
2780 #else
2781 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2782                                              struct arm_smmu_device *smmu)
2783 {
2784         return -ENODEV;
2785 }
2786 #endif
2787
2788 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2789                                     struct arm_smmu_device *smmu)
2790 {
2791         struct device *dev = &pdev->dev;
2792         u32 cells;
2793         int ret = -EINVAL;
2794
2795         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2796                 dev_err(dev, "missing #iommu-cells property\n");
2797         else if (cells != 1)
2798                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2799         else
2800                 ret = 0;
2801
2802         parse_driver_options(smmu);
2803
2804         if (of_dma_is_coherent(dev->of_node))
2805                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2806
2807         return ret;
2808 }
2809
2810 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2811 {
2812         if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2813                 return SZ_64K;
2814         else
2815                 return SZ_128K;
2816 }
2817
2818 static int arm_smmu_device_probe(struct platform_device *pdev)
2819 {
2820         int irq, ret;
2821         struct resource *res;
2822         resource_size_t ioaddr;
2823         struct arm_smmu_device *smmu;
2824         struct device *dev = &pdev->dev;
2825         bool bypass;
2826
2827         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2828         if (!smmu) {
2829                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2830                 return -ENOMEM;
2831         }
2832         smmu->dev = dev;
2833
2834         if (dev->of_node) {
2835                 ret = arm_smmu_device_dt_probe(pdev, smmu);
2836         } else {
2837                 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2838                 if (ret == -ENODEV)
2839                         return ret;
2840         }
2841
2842         /* Set bypass mode according to firmware probing result */
2843         bypass = !!ret;
2844
2845         /* Base address */
2846         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2847         if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2848                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2849                 return -EINVAL;
2850         }
2851         ioaddr = res->start;
2852
2853         smmu->base = devm_ioremap_resource(dev, res);
2854         if (IS_ERR(smmu->base))
2855                 return PTR_ERR(smmu->base);
2856
2857         /* Interrupt lines */
2858
2859         irq = platform_get_irq_byname(pdev, "combined");
2860         if (irq > 0)
2861                 smmu->combined_irq = irq;
2862         else {
2863                 irq = platform_get_irq_byname(pdev, "eventq");
2864                 if (irq > 0)
2865                         smmu->evtq.q.irq = irq;
2866
2867                 irq = platform_get_irq_byname(pdev, "priq");
2868                 if (irq > 0)
2869                         smmu->priq.q.irq = irq;
2870
2871                 irq = platform_get_irq_byname(pdev, "gerror");
2872                 if (irq > 0)
2873                         smmu->gerr_irq = irq;
2874         }
2875         /* Probe the h/w */
2876         ret = arm_smmu_device_hw_probe(smmu);
2877         if (ret)
2878                 return ret;
2879
2880         /* Initialise in-memory data structures */
2881         ret = arm_smmu_init_structures(smmu);
2882         if (ret)
2883                 return ret;
2884
2885         /* Record our private device structure */
2886         platform_set_drvdata(pdev, smmu);
2887
2888         /* Reset the device */
2889         ret = arm_smmu_device_reset(smmu, bypass);
2890         if (ret)
2891                 return ret;
2892
2893         /* And we're up. Go go go! */
2894         ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2895                                      "smmu3.%pa", &ioaddr);
2896         if (ret)
2897                 return ret;
2898
2899         iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2900         iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2901
2902         ret = iommu_device_register(&smmu->iommu);
2903         if (ret) {
2904                 dev_err(dev, "Failed to register iommu\n");
2905                 return ret;
2906         }
2907
2908 #ifdef CONFIG_PCI
2909         if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2910                 pci_request_acs();
2911                 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2912                 if (ret)
2913                         return ret;
2914         }
2915 #endif
2916 #ifdef CONFIG_ARM_AMBA
2917         if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2918                 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2919                 if (ret)
2920                         return ret;
2921         }
2922 #endif
2923         if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2924                 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2925                 if (ret)
2926                         return ret;
2927         }
2928         return 0;
2929 }
2930
2931 static int arm_smmu_device_remove(struct platform_device *pdev)
2932 {
2933         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2934
2935         arm_smmu_device_disable(smmu);
2936
2937         return 0;
2938 }
2939
2940 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2941 {
2942         arm_smmu_device_remove(pdev);
2943 }
2944
2945 static const struct of_device_id arm_smmu_of_match[] = {
2946         { .compatible = "arm,smmu-v3", },
2947         { },
2948 };
2949 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2950
2951 static struct platform_driver arm_smmu_driver = {
2952         .driver = {
2953                 .name           = "arm-smmu-v3",
2954                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2955         },
2956         .probe  = arm_smmu_device_probe,
2957         .remove = arm_smmu_device_remove,
2958         .shutdown = arm_smmu_device_shutdown,
2959 };
2960 module_platform_driver(arm_smmu_driver);
2961
2962 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2963 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2964 MODULE_LICENSE("GPL v2");