52860bcf80f23679c9bbc108e64a87eed3b11ff3
[linux-2.6-block.git] / drivers / iommu / arm-smmu-v3.c
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/iommu.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/msi.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_iommu.h>
34 #include <linux/of_platform.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37
38 #include <linux/amba/bus.h>
39
40 #include "io-pgtable.h"
41
42 /* MMIO registers */
43 #define ARM_SMMU_IDR0                   0x0
44 #define IDR0_ST_LVL_SHIFT               27
45 #define IDR0_ST_LVL_MASK                0x3
46 #define IDR0_ST_LVL_2LVL                (1 << IDR0_ST_LVL_SHIFT)
47 #define IDR0_STALL_MODEL_SHIFT          24
48 #define IDR0_STALL_MODEL_MASK           0x3
49 #define IDR0_STALL_MODEL_STALL          (0 << IDR0_STALL_MODEL_SHIFT)
50 #define IDR0_STALL_MODEL_FORCE          (2 << IDR0_STALL_MODEL_SHIFT)
51 #define IDR0_TTENDIAN_SHIFT             21
52 #define IDR0_TTENDIAN_MASK              0x3
53 #define IDR0_TTENDIAN_LE                (2 << IDR0_TTENDIAN_SHIFT)
54 #define IDR0_TTENDIAN_BE                (3 << IDR0_TTENDIAN_SHIFT)
55 #define IDR0_TTENDIAN_MIXED             (0 << IDR0_TTENDIAN_SHIFT)
56 #define IDR0_CD2L                       (1 << 19)
57 #define IDR0_VMID16                     (1 << 18)
58 #define IDR0_PRI                        (1 << 16)
59 #define IDR0_SEV                        (1 << 14)
60 #define IDR0_MSI                        (1 << 13)
61 #define IDR0_ASID16                     (1 << 12)
62 #define IDR0_ATS                        (1 << 10)
63 #define IDR0_HYP                        (1 << 9)
64 #define IDR0_COHACC                     (1 << 4)
65 #define IDR0_TTF_SHIFT                  2
66 #define IDR0_TTF_MASK                   0x3
67 #define IDR0_TTF_AARCH64                (2 << IDR0_TTF_SHIFT)
68 #define IDR0_TTF_AARCH32_64             (3 << IDR0_TTF_SHIFT)
69 #define IDR0_S1P                        (1 << 1)
70 #define IDR0_S2P                        (1 << 0)
71
72 #define ARM_SMMU_IDR1                   0x4
73 #define IDR1_TABLES_PRESET              (1 << 30)
74 #define IDR1_QUEUES_PRESET              (1 << 29)
75 #define IDR1_REL                        (1 << 28)
76 #define IDR1_CMDQ_SHIFT                 21
77 #define IDR1_CMDQ_MASK                  0x1f
78 #define IDR1_EVTQ_SHIFT                 16
79 #define IDR1_EVTQ_MASK                  0x1f
80 #define IDR1_PRIQ_SHIFT                 11
81 #define IDR1_PRIQ_MASK                  0x1f
82 #define IDR1_SSID_SHIFT                 6
83 #define IDR1_SSID_MASK                  0x1f
84 #define IDR1_SID_SHIFT                  0
85 #define IDR1_SID_MASK                   0x3f
86
87 #define ARM_SMMU_IDR5                   0x14
88 #define IDR5_STALL_MAX_SHIFT            16
89 #define IDR5_STALL_MAX_MASK             0xffff
90 #define IDR5_GRAN64K                    (1 << 6)
91 #define IDR5_GRAN16K                    (1 << 5)
92 #define IDR5_GRAN4K                     (1 << 4)
93 #define IDR5_OAS_SHIFT                  0
94 #define IDR5_OAS_MASK                   0x7
95 #define IDR5_OAS_32_BIT                 (0 << IDR5_OAS_SHIFT)
96 #define IDR5_OAS_36_BIT                 (1 << IDR5_OAS_SHIFT)
97 #define IDR5_OAS_40_BIT                 (2 << IDR5_OAS_SHIFT)
98 #define IDR5_OAS_42_BIT                 (3 << IDR5_OAS_SHIFT)
99 #define IDR5_OAS_44_BIT                 (4 << IDR5_OAS_SHIFT)
100 #define IDR5_OAS_48_BIT                 (5 << IDR5_OAS_SHIFT)
101
102 #define ARM_SMMU_CR0                    0x20
103 #define CR0_CMDQEN                      (1 << 3)
104 #define CR0_EVTQEN                      (1 << 2)
105 #define CR0_PRIQEN                      (1 << 1)
106 #define CR0_SMMUEN                      (1 << 0)
107
108 #define ARM_SMMU_CR0ACK                 0x24
109
110 #define ARM_SMMU_CR1                    0x28
111 #define CR1_SH_NSH                      0
112 #define CR1_SH_OSH                      2
113 #define CR1_SH_ISH                      3
114 #define CR1_CACHE_NC                    0
115 #define CR1_CACHE_WB                    1
116 #define CR1_CACHE_WT                    2
117 #define CR1_TABLE_SH_SHIFT              10
118 #define CR1_TABLE_OC_SHIFT              8
119 #define CR1_TABLE_IC_SHIFT              6
120 #define CR1_QUEUE_SH_SHIFT              4
121 #define CR1_QUEUE_OC_SHIFT              2
122 #define CR1_QUEUE_IC_SHIFT              0
123
124 #define ARM_SMMU_CR2                    0x2c
125 #define CR2_PTM                         (1 << 2)
126 #define CR2_RECINVSID                   (1 << 1)
127 #define CR2_E2H                         (1 << 0)
128
129 #define ARM_SMMU_GBPA                   0x44
130 #define GBPA_ABORT                      (1 << 20)
131 #define GBPA_UPDATE                     (1 << 31)
132
133 #define ARM_SMMU_IRQ_CTRL               0x50
134 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
135 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
136 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
137
138 #define ARM_SMMU_IRQ_CTRLACK            0x54
139
140 #define ARM_SMMU_GERROR                 0x60
141 #define GERROR_SFM_ERR                  (1 << 8)
142 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
143 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
144 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
145 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
146 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
147 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
148 #define GERROR_CMDQ_ERR                 (1 << 0)
149 #define GERROR_ERR_MASK                 0xfd
150
151 #define ARM_SMMU_GERRORN                0x64
152
153 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
154 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
155 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
156
157 #define ARM_SMMU_STRTAB_BASE            0x80
158 #define STRTAB_BASE_RA                  (1UL << 62)
159 #define STRTAB_BASE_ADDR_SHIFT          6
160 #define STRTAB_BASE_ADDR_MASK           0x3ffffffffffUL
161
162 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
163 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT  0
164 #define STRTAB_BASE_CFG_LOG2SIZE_MASK   0x3f
165 #define STRTAB_BASE_CFG_SPLIT_SHIFT     6
166 #define STRTAB_BASE_CFG_SPLIT_MASK      0x1f
167 #define STRTAB_BASE_CFG_FMT_SHIFT       16
168 #define STRTAB_BASE_CFG_FMT_MASK        0x3
169 #define STRTAB_BASE_CFG_FMT_LINEAR      (0 << STRTAB_BASE_CFG_FMT_SHIFT)
170 #define STRTAB_BASE_CFG_FMT_2LVL        (1 << STRTAB_BASE_CFG_FMT_SHIFT)
171
172 #define ARM_SMMU_CMDQ_BASE              0x90
173 #define ARM_SMMU_CMDQ_PROD              0x98
174 #define ARM_SMMU_CMDQ_CONS              0x9c
175
176 #define ARM_SMMU_EVTQ_BASE              0xa0
177 #define ARM_SMMU_EVTQ_PROD              0x100a8
178 #define ARM_SMMU_EVTQ_CONS              0x100ac
179 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
180 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
181 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
182
183 #define ARM_SMMU_PRIQ_BASE              0xc0
184 #define ARM_SMMU_PRIQ_PROD              0x100c8
185 #define ARM_SMMU_PRIQ_CONS              0x100cc
186 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
187 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
188 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
189
190 /* Common MSI config fields */
191 #define MSI_CFG0_ADDR_SHIFT             2
192 #define MSI_CFG0_ADDR_MASK              0x3fffffffffffUL
193 #define MSI_CFG2_SH_SHIFT               4
194 #define MSI_CFG2_SH_NSH                 (0UL << MSI_CFG2_SH_SHIFT)
195 #define MSI_CFG2_SH_OSH                 (2UL << MSI_CFG2_SH_SHIFT)
196 #define MSI_CFG2_SH_ISH                 (3UL << MSI_CFG2_SH_SHIFT)
197 #define MSI_CFG2_MEMATTR_SHIFT          0
198 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE   (0x1 << MSI_CFG2_MEMATTR_SHIFT)
199
200 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
201 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
202 #define Q_OVERFLOW_FLAG                 (1 << 31)
203 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
204 #define Q_ENT(q, p)                     ((q)->base +                    \
205                                          Q_IDX(q, p) * (q)->ent_dwords)
206
207 #define Q_BASE_RWA                      (1UL << 62)
208 #define Q_BASE_ADDR_SHIFT               5
209 #define Q_BASE_ADDR_MASK                0xfffffffffffUL
210 #define Q_BASE_LOG2SIZE_SHIFT           0
211 #define Q_BASE_LOG2SIZE_MASK            0x1fUL
212
213 /*
214  * Stream table.
215  *
216  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
217  * 2lvl: 128k L1 entries,
218  *       256 lazy entries per table (each table covers a PCI bus)
219  */
220 #define STRTAB_L1_SZ_SHIFT              20
221 #define STRTAB_SPLIT                    8
222
223 #define STRTAB_L1_DESC_DWORDS           1
224 #define STRTAB_L1_DESC_SPAN_SHIFT       0
225 #define STRTAB_L1_DESC_SPAN_MASK        0x1fUL
226 #define STRTAB_L1_DESC_L2PTR_SHIFT      6
227 #define STRTAB_L1_DESC_L2PTR_MASK       0x3ffffffffffUL
228
229 #define STRTAB_STE_DWORDS               8
230 #define STRTAB_STE_0_V                  (1UL << 0)
231 #define STRTAB_STE_0_CFG_SHIFT          1
232 #define STRTAB_STE_0_CFG_MASK           0x7UL
233 #define STRTAB_STE_0_CFG_ABORT          (0UL << STRTAB_STE_0_CFG_SHIFT)
234 #define STRTAB_STE_0_CFG_BYPASS         (4UL << STRTAB_STE_0_CFG_SHIFT)
235 #define STRTAB_STE_0_CFG_S1_TRANS       (5UL << STRTAB_STE_0_CFG_SHIFT)
236 #define STRTAB_STE_0_CFG_S2_TRANS       (6UL << STRTAB_STE_0_CFG_SHIFT)
237
238 #define STRTAB_STE_0_S1FMT_SHIFT        4
239 #define STRTAB_STE_0_S1FMT_LINEAR       (0UL << STRTAB_STE_0_S1FMT_SHIFT)
240 #define STRTAB_STE_0_S1CTXPTR_SHIFT     6
241 #define STRTAB_STE_0_S1CTXPTR_MASK      0x3ffffffffffUL
242 #define STRTAB_STE_0_S1CDMAX_SHIFT      59
243 #define STRTAB_STE_0_S1CDMAX_MASK       0x1fUL
244
245 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
246 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
247 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
248 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
249 #define STRTAB_STE_1_S1C_SH_NSH         0UL
250 #define STRTAB_STE_1_S1C_SH_OSH         2UL
251 #define STRTAB_STE_1_S1C_SH_ISH         3UL
252 #define STRTAB_STE_1_S1CIR_SHIFT        2
253 #define STRTAB_STE_1_S1COR_SHIFT        4
254 #define STRTAB_STE_1_S1CSH_SHIFT        6
255
256 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
257
258 #define STRTAB_STE_1_EATS_ABT           0UL
259 #define STRTAB_STE_1_EATS_TRANS         1UL
260 #define STRTAB_STE_1_EATS_S1CHK         2UL
261 #define STRTAB_STE_1_EATS_SHIFT         28
262
263 #define STRTAB_STE_1_STRW_NSEL1         0UL
264 #define STRTAB_STE_1_STRW_EL2           2UL
265 #define STRTAB_STE_1_STRW_SHIFT         30
266
267 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
268 #define STRTAB_STE_1_SHCFG_SHIFT        44
269
270 #define STRTAB_STE_2_S2VMID_SHIFT       0
271 #define STRTAB_STE_2_S2VMID_MASK        0xffffUL
272 #define STRTAB_STE_2_VTCR_SHIFT         32
273 #define STRTAB_STE_2_VTCR_MASK          0x7ffffUL
274 #define STRTAB_STE_2_S2AA64             (1UL << 51)
275 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
276 #define STRTAB_STE_2_S2PTW              (1UL << 54)
277 #define STRTAB_STE_2_S2R                (1UL << 58)
278
279 #define STRTAB_STE_3_S2TTB_SHIFT        4
280 #define STRTAB_STE_3_S2TTB_MASK         0xfffffffffffUL
281
282 /* Context descriptor (stage-1 only) */
283 #define CTXDESC_CD_DWORDS               8
284 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT     0
285 #define ARM64_TCR_T0SZ_SHIFT            0
286 #define ARM64_TCR_T0SZ_MASK             0x1fUL
287 #define CTXDESC_CD_0_TCR_TG0_SHIFT      6
288 #define ARM64_TCR_TG0_SHIFT             14
289 #define ARM64_TCR_TG0_MASK              0x3UL
290 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT    8
291 #define ARM64_TCR_IRGN0_SHIFT           8
292 #define ARM64_TCR_IRGN0_MASK            0x3UL
293 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT    10
294 #define ARM64_TCR_ORGN0_SHIFT           10
295 #define ARM64_TCR_ORGN0_MASK            0x3UL
296 #define CTXDESC_CD_0_TCR_SH0_SHIFT      12
297 #define ARM64_TCR_SH0_SHIFT             12
298 #define ARM64_TCR_SH0_MASK              0x3UL
299 #define CTXDESC_CD_0_TCR_EPD0_SHIFT     14
300 #define ARM64_TCR_EPD0_SHIFT            7
301 #define ARM64_TCR_EPD0_MASK             0x1UL
302 #define CTXDESC_CD_0_TCR_EPD1_SHIFT     30
303 #define ARM64_TCR_EPD1_SHIFT            23
304 #define ARM64_TCR_EPD1_MASK             0x1UL
305
306 #define CTXDESC_CD_0_ENDI               (1UL << 15)
307 #define CTXDESC_CD_0_V                  (1UL << 31)
308
309 #define CTXDESC_CD_0_TCR_IPS_SHIFT      32
310 #define ARM64_TCR_IPS_SHIFT             32
311 #define ARM64_TCR_IPS_MASK              0x7UL
312 #define CTXDESC_CD_0_TCR_TBI0_SHIFT     38
313 #define ARM64_TCR_TBI0_SHIFT            37
314 #define ARM64_TCR_TBI0_MASK             0x1UL
315
316 #define CTXDESC_CD_0_AA64               (1UL << 41)
317 #define CTXDESC_CD_0_R                  (1UL << 45)
318 #define CTXDESC_CD_0_A                  (1UL << 46)
319 #define CTXDESC_CD_0_ASET_SHIFT         47
320 #define CTXDESC_CD_0_ASET_SHARED        (0UL << CTXDESC_CD_0_ASET_SHIFT)
321 #define CTXDESC_CD_0_ASET_PRIVATE       (1UL << CTXDESC_CD_0_ASET_SHIFT)
322 #define CTXDESC_CD_0_ASID_SHIFT         48
323 #define CTXDESC_CD_0_ASID_MASK          0xffffUL
324
325 #define CTXDESC_CD_1_TTB0_SHIFT         4
326 #define CTXDESC_CD_1_TTB0_MASK          0xfffffffffffUL
327
328 #define CTXDESC_CD_3_MAIR_SHIFT         0
329
330 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
331 #define ARM_SMMU_TCR2CD(tcr, fld)                                       \
332         (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
333          << CTXDESC_CD_0_TCR_##fld##_SHIFT)
334
335 /* Command queue */
336 #define CMDQ_ENT_DWORDS                 2
337 #define CMDQ_MAX_SZ_SHIFT               8
338
339 #define CMDQ_ERR_SHIFT                  24
340 #define CMDQ_ERR_MASK                   0x7f
341 #define CMDQ_ERR_CERROR_NONE_IDX        0
342 #define CMDQ_ERR_CERROR_ILL_IDX         1
343 #define CMDQ_ERR_CERROR_ABT_IDX         2
344
345 #define CMDQ_0_OP_SHIFT                 0
346 #define CMDQ_0_OP_MASK                  0xffUL
347 #define CMDQ_0_SSV                      (1UL << 11)
348
349 #define CMDQ_PREFETCH_0_SID_SHIFT       32
350 #define CMDQ_PREFETCH_1_SIZE_SHIFT      0
351 #define CMDQ_PREFETCH_1_ADDR_MASK       ~0xfffUL
352
353 #define CMDQ_CFGI_0_SID_SHIFT           32
354 #define CMDQ_CFGI_0_SID_MASK            0xffffffffUL
355 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
356 #define CMDQ_CFGI_1_RANGE_SHIFT         0
357 #define CMDQ_CFGI_1_RANGE_MASK          0x1fUL
358
359 #define CMDQ_TLBI_0_VMID_SHIFT          32
360 #define CMDQ_TLBI_0_ASID_SHIFT          48
361 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
362 #define CMDQ_TLBI_1_VA_MASK             ~0xfffUL
363 #define CMDQ_TLBI_1_IPA_MASK            0xfffffffff000UL
364
365 #define CMDQ_PRI_0_SSID_SHIFT           12
366 #define CMDQ_PRI_0_SSID_MASK            0xfffffUL
367 #define CMDQ_PRI_0_SID_SHIFT            32
368 #define CMDQ_PRI_0_SID_MASK             0xffffffffUL
369 #define CMDQ_PRI_1_GRPID_SHIFT          0
370 #define CMDQ_PRI_1_GRPID_MASK           0x1ffUL
371 #define CMDQ_PRI_1_RESP_SHIFT           12
372 #define CMDQ_PRI_1_RESP_DENY            (0UL << CMDQ_PRI_1_RESP_SHIFT)
373 #define CMDQ_PRI_1_RESP_FAIL            (1UL << CMDQ_PRI_1_RESP_SHIFT)
374 #define CMDQ_PRI_1_RESP_SUCC            (2UL << CMDQ_PRI_1_RESP_SHIFT)
375
376 #define CMDQ_SYNC_0_CS_SHIFT            12
377 #define CMDQ_SYNC_0_CS_NONE             (0UL << CMDQ_SYNC_0_CS_SHIFT)
378 #define CMDQ_SYNC_0_CS_SEV              (2UL << CMDQ_SYNC_0_CS_SHIFT)
379
380 /* Event queue */
381 #define EVTQ_ENT_DWORDS                 4
382 #define EVTQ_MAX_SZ_SHIFT               7
383
384 #define EVTQ_0_ID_SHIFT                 0
385 #define EVTQ_0_ID_MASK                  0xffUL
386
387 /* PRI queue */
388 #define PRIQ_ENT_DWORDS                 2
389 #define PRIQ_MAX_SZ_SHIFT               8
390
391 #define PRIQ_0_SID_SHIFT                0
392 #define PRIQ_0_SID_MASK                 0xffffffffUL
393 #define PRIQ_0_SSID_SHIFT               32
394 #define PRIQ_0_SSID_MASK                0xfffffUL
395 #define PRIQ_0_PERM_PRIV                (1UL << 58)
396 #define PRIQ_0_PERM_EXEC                (1UL << 59)
397 #define PRIQ_0_PERM_READ                (1UL << 60)
398 #define PRIQ_0_PERM_WRITE               (1UL << 61)
399 #define PRIQ_0_PRG_LAST                 (1UL << 62)
400 #define PRIQ_0_SSID_V                   (1UL << 63)
401
402 #define PRIQ_1_PRG_IDX_SHIFT            0
403 #define PRIQ_1_PRG_IDX_MASK             0x1ffUL
404 #define PRIQ_1_ADDR_SHIFT               12
405 #define PRIQ_1_ADDR_MASK                0xfffffffffffffUL
406
407 /* High-level queue structures */
408 #define ARM_SMMU_POLL_TIMEOUT_US        100
409
410 static bool disable_bypass;
411 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
412 MODULE_PARM_DESC(disable_bypass,
413         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
414
415 enum pri_resp {
416         PRI_RESP_DENY,
417         PRI_RESP_FAIL,
418         PRI_RESP_SUCC,
419 };
420
421 enum arm_smmu_msi_index {
422         EVTQ_MSI_INDEX,
423         GERROR_MSI_INDEX,
424         PRIQ_MSI_INDEX,
425         ARM_SMMU_MAX_MSIS,
426 };
427
428 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
429         [EVTQ_MSI_INDEX] = {
430                 ARM_SMMU_EVTQ_IRQ_CFG0,
431                 ARM_SMMU_EVTQ_IRQ_CFG1,
432                 ARM_SMMU_EVTQ_IRQ_CFG2,
433         },
434         [GERROR_MSI_INDEX] = {
435                 ARM_SMMU_GERROR_IRQ_CFG0,
436                 ARM_SMMU_GERROR_IRQ_CFG1,
437                 ARM_SMMU_GERROR_IRQ_CFG2,
438         },
439         [PRIQ_MSI_INDEX] = {
440                 ARM_SMMU_PRIQ_IRQ_CFG0,
441                 ARM_SMMU_PRIQ_IRQ_CFG1,
442                 ARM_SMMU_PRIQ_IRQ_CFG2,
443         },
444 };
445
446 struct arm_smmu_cmdq_ent {
447         /* Common fields */
448         u8                              opcode;
449         bool                            substream_valid;
450
451         /* Command-specific fields */
452         union {
453                 #define CMDQ_OP_PREFETCH_CFG    0x1
454                 struct {
455                         u32                     sid;
456                         u8                      size;
457                         u64                     addr;
458                 } prefetch;
459
460                 #define CMDQ_OP_CFGI_STE        0x3
461                 #define CMDQ_OP_CFGI_ALL        0x4
462                 struct {
463                         u32                     sid;
464                         union {
465                                 bool            leaf;
466                                 u8              span;
467                         };
468                 } cfgi;
469
470                 #define CMDQ_OP_TLBI_NH_ASID    0x11
471                 #define CMDQ_OP_TLBI_NH_VA      0x12
472                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
473                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
474                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
475                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
476                 struct {
477                         u16                     asid;
478                         u16                     vmid;
479                         bool                    leaf;
480                         u64                     addr;
481                 } tlbi;
482
483                 #define CMDQ_OP_PRI_RESP        0x41
484                 struct {
485                         u32                     sid;
486                         u32                     ssid;
487                         u16                     grpid;
488                         enum pri_resp           resp;
489                 } pri;
490
491                 #define CMDQ_OP_CMD_SYNC        0x46
492         };
493 };
494
495 struct arm_smmu_queue {
496         int                             irq; /* Wired interrupt */
497
498         __le64                          *base;
499         dma_addr_t                      base_dma;
500         u64                             q_base;
501
502         size_t                          ent_dwords;
503         u32                             max_n_shift;
504         u32                             prod;
505         u32                             cons;
506
507         u32 __iomem                     *prod_reg;
508         u32 __iomem                     *cons_reg;
509 };
510
511 struct arm_smmu_cmdq {
512         struct arm_smmu_queue           q;
513         spinlock_t                      lock;
514 };
515
516 struct arm_smmu_evtq {
517         struct arm_smmu_queue           q;
518         u32                             max_stalls;
519 };
520
521 struct arm_smmu_priq {
522         struct arm_smmu_queue           q;
523 };
524
525 /* High-level stream table and context descriptor structures */
526 struct arm_smmu_strtab_l1_desc {
527         u8                              span;
528
529         __le64                          *l2ptr;
530         dma_addr_t                      l2ptr_dma;
531 };
532
533 struct arm_smmu_s1_cfg {
534         __le64                          *cdptr;
535         dma_addr_t                      cdptr_dma;
536
537         struct arm_smmu_ctx_desc {
538                 u16     asid;
539                 u64     ttbr;
540                 u64     tcr;
541                 u64     mair;
542         }                               cd;
543 };
544
545 struct arm_smmu_s2_cfg {
546         u16                             vmid;
547         u64                             vttbr;
548         u64                             vtcr;
549 };
550
551 struct arm_smmu_strtab_ent {
552         bool                            valid;
553
554         bool                            bypass; /* Overrides s1/s2 config */
555         struct arm_smmu_s1_cfg          *s1_cfg;
556         struct arm_smmu_s2_cfg          *s2_cfg;
557 };
558
559 struct arm_smmu_strtab_cfg {
560         __le64                          *strtab;
561         dma_addr_t                      strtab_dma;
562         struct arm_smmu_strtab_l1_desc  *l1_desc;
563         unsigned int                    num_l1_ents;
564
565         u64                             strtab_base;
566         u32                             strtab_base_cfg;
567 };
568
569 /* An SMMUv3 instance */
570 struct arm_smmu_device {
571         struct device                   *dev;
572         void __iomem                    *base;
573
574 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
575 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
576 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
577 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
578 #define ARM_SMMU_FEAT_PRI               (1 << 4)
579 #define ARM_SMMU_FEAT_ATS               (1 << 5)
580 #define ARM_SMMU_FEAT_SEV               (1 << 6)
581 #define ARM_SMMU_FEAT_MSI               (1 << 7)
582 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
583 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
584 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
585 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
586 #define ARM_SMMU_FEAT_HYP               (1 << 12)
587         u32                             features;
588
589 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
590         u32                             options;
591
592         struct arm_smmu_cmdq            cmdq;
593         struct arm_smmu_evtq            evtq;
594         struct arm_smmu_priq            priq;
595
596         int                             gerr_irq;
597
598         unsigned long                   ias; /* IPA */
599         unsigned long                   oas; /* PA */
600         unsigned long                   pgsize_bitmap;
601
602 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
603         unsigned int                    asid_bits;
604         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
605
606 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
607         unsigned int                    vmid_bits;
608         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
609
610         unsigned int                    ssid_bits;
611         unsigned int                    sid_bits;
612
613         struct arm_smmu_strtab_cfg      strtab_cfg;
614 };
615
616 /* SMMU private data for each master */
617 struct arm_smmu_master_data {
618         struct arm_smmu_device          *smmu;
619         struct arm_smmu_strtab_ent      ste;
620 };
621
622 /* SMMU private data for an IOMMU domain */
623 enum arm_smmu_domain_stage {
624         ARM_SMMU_DOMAIN_S1 = 0,
625         ARM_SMMU_DOMAIN_S2,
626         ARM_SMMU_DOMAIN_NESTED,
627 };
628
629 struct arm_smmu_domain {
630         struct arm_smmu_device          *smmu;
631         struct mutex                    init_mutex; /* Protects smmu pointer */
632
633         struct io_pgtable_ops           *pgtbl_ops;
634         spinlock_t                      pgtbl_lock;
635
636         enum arm_smmu_domain_stage      stage;
637         union {
638                 struct arm_smmu_s1_cfg  s1_cfg;
639                 struct arm_smmu_s2_cfg  s2_cfg;
640         };
641
642         struct iommu_domain             domain;
643 };
644
645 struct arm_smmu_option_prop {
646         u32 opt;
647         const char *prop;
648 };
649
650 static struct arm_smmu_option_prop arm_smmu_options[] = {
651         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
652         { 0, NULL},
653 };
654
655 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
656 {
657         return container_of(dom, struct arm_smmu_domain, domain);
658 }
659
660 static void parse_driver_options(struct arm_smmu_device *smmu)
661 {
662         int i = 0;
663
664         do {
665                 if (of_property_read_bool(smmu->dev->of_node,
666                                                 arm_smmu_options[i].prop)) {
667                         smmu->options |= arm_smmu_options[i].opt;
668                         dev_notice(smmu->dev, "option %s\n",
669                                 arm_smmu_options[i].prop);
670                 }
671         } while (arm_smmu_options[++i].opt);
672 }
673
674 /* Low-level queue manipulation functions */
675 static bool queue_full(struct arm_smmu_queue *q)
676 {
677         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
678                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
679 }
680
681 static bool queue_empty(struct arm_smmu_queue *q)
682 {
683         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
684                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
685 }
686
687 static void queue_sync_cons(struct arm_smmu_queue *q)
688 {
689         q->cons = readl_relaxed(q->cons_reg);
690 }
691
692 static void queue_inc_cons(struct arm_smmu_queue *q)
693 {
694         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
695
696         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
697         writel(q->cons, q->cons_reg);
698 }
699
700 static int queue_sync_prod(struct arm_smmu_queue *q)
701 {
702         int ret = 0;
703         u32 prod = readl_relaxed(q->prod_reg);
704
705         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
706                 ret = -EOVERFLOW;
707
708         q->prod = prod;
709         return ret;
710 }
711
712 static void queue_inc_prod(struct arm_smmu_queue *q)
713 {
714         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
715
716         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
717         writel(q->prod, q->prod_reg);
718 }
719
720 /*
721  * Wait for the SMMU to consume items. If drain is true, wait until the queue
722  * is empty. Otherwise, wait until there is at least one free slot.
723  */
724 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
725 {
726         ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
727
728         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
729                 if (ktime_compare(ktime_get(), timeout) > 0)
730                         return -ETIMEDOUT;
731
732                 if (wfe) {
733                         wfe();
734                 } else {
735                         cpu_relax();
736                         udelay(1);
737                 }
738         }
739
740         return 0;
741 }
742
743 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
744 {
745         int i;
746
747         for (i = 0; i < n_dwords; ++i)
748                 *dst++ = cpu_to_le64(*src++);
749 }
750
751 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
752 {
753         if (queue_full(q))
754                 return -ENOSPC;
755
756         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
757         queue_inc_prod(q);
758         return 0;
759 }
760
761 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
762 {
763         int i;
764
765         for (i = 0; i < n_dwords; ++i)
766                 *dst++ = le64_to_cpu(*src++);
767 }
768
769 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
770 {
771         if (queue_empty(q))
772                 return -EAGAIN;
773
774         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
775         queue_inc_cons(q);
776         return 0;
777 }
778
779 /* High-level queue accessors */
780 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
781 {
782         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
783         cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
784
785         switch (ent->opcode) {
786         case CMDQ_OP_TLBI_EL2_ALL:
787         case CMDQ_OP_TLBI_NSNH_ALL:
788                 break;
789         case CMDQ_OP_PREFETCH_CFG:
790                 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
791                 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
792                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
793                 break;
794         case CMDQ_OP_CFGI_STE:
795                 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
796                 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
797                 break;
798         case CMDQ_OP_CFGI_ALL:
799                 /* Cover the entire SID range */
800                 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
801                 break;
802         case CMDQ_OP_TLBI_NH_VA:
803                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
804                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
805                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
806                 break;
807         case CMDQ_OP_TLBI_S2_IPA:
808                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
809                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
810                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
811                 break;
812         case CMDQ_OP_TLBI_NH_ASID:
813                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
814                 /* Fallthrough */
815         case CMDQ_OP_TLBI_S12_VMALL:
816                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
817                 break;
818         case CMDQ_OP_PRI_RESP:
819                 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
820                 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
821                 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
822                 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
823                 switch (ent->pri.resp) {
824                 case PRI_RESP_DENY:
825                         cmd[1] |= CMDQ_PRI_1_RESP_DENY;
826                         break;
827                 case PRI_RESP_FAIL:
828                         cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
829                         break;
830                 case PRI_RESP_SUCC:
831                         cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
832                         break;
833                 default:
834                         return -EINVAL;
835                 }
836                 break;
837         case CMDQ_OP_CMD_SYNC:
838                 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
839                 break;
840         default:
841                 return -ENOENT;
842         }
843
844         return 0;
845 }
846
847 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
848 {
849         static const char *cerror_str[] = {
850                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
851                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
852                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
853         };
854
855         int i;
856         u64 cmd[CMDQ_ENT_DWORDS];
857         struct arm_smmu_queue *q = &smmu->cmdq.q;
858         u32 cons = readl_relaxed(q->cons_reg);
859         u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
860         struct arm_smmu_cmdq_ent cmd_sync = {
861                 .opcode = CMDQ_OP_CMD_SYNC,
862         };
863
864         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
865                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
866
867         switch (idx) {
868         case CMDQ_ERR_CERROR_ABT_IDX:
869                 dev_err(smmu->dev, "retrying command fetch\n");
870         case CMDQ_ERR_CERROR_NONE_IDX:
871                 return;
872         case CMDQ_ERR_CERROR_ILL_IDX:
873                 /* Fallthrough */
874         default:
875                 break;
876         }
877
878         /*
879          * We may have concurrent producers, so we need to be careful
880          * not to touch any of the shadow cmdq state.
881          */
882         queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
883         dev_err(smmu->dev, "skipping command in error state:\n");
884         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
885                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
886
887         /* Convert the erroneous command into a CMD_SYNC */
888         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
889                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
890                 return;
891         }
892
893         queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
894 }
895
896 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
897                                     struct arm_smmu_cmdq_ent *ent)
898 {
899         u64 cmd[CMDQ_ENT_DWORDS];
900         unsigned long flags;
901         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
902         struct arm_smmu_queue *q = &smmu->cmdq.q;
903
904         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
905                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
906                          ent->opcode);
907                 return;
908         }
909
910         spin_lock_irqsave(&smmu->cmdq.lock, flags);
911         while (queue_insert_raw(q, cmd) == -ENOSPC) {
912                 if (queue_poll_cons(q, false, wfe))
913                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
914         }
915
916         if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
917                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
918         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
919 }
920
921 /* Context descriptor manipulation functions */
922 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
923 {
924         u64 val = 0;
925
926         /* Repack the TCR. Just care about TTBR0 for now */
927         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
928         val |= ARM_SMMU_TCR2CD(tcr, TG0);
929         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
930         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
931         val |= ARM_SMMU_TCR2CD(tcr, SH0);
932         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
933         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
934         val |= ARM_SMMU_TCR2CD(tcr, IPS);
935         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
936
937         return val;
938 }
939
940 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
941                                     struct arm_smmu_s1_cfg *cfg)
942 {
943         u64 val;
944
945         /*
946          * We don't need to issue any invalidation here, as we'll invalidate
947          * the STE when installing the new entry anyway.
948          */
949         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
950 #ifdef __BIG_ENDIAN
951               CTXDESC_CD_0_ENDI |
952 #endif
953               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
954               CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
955               CTXDESC_CD_0_V;
956         cfg->cdptr[0] = cpu_to_le64(val);
957
958         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
959         cfg->cdptr[1] = cpu_to_le64(val);
960
961         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
962 }
963
964 /* Stream table manipulation functions */
965 static void
966 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
967 {
968         u64 val = 0;
969
970         val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
971                 << STRTAB_L1_DESC_SPAN_SHIFT;
972         val |= desc->l2ptr_dma &
973                STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
974
975         *dst = cpu_to_le64(val);
976 }
977
978 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
979 {
980         struct arm_smmu_cmdq_ent cmd = {
981                 .opcode = CMDQ_OP_CFGI_STE,
982                 .cfgi   = {
983                         .sid    = sid,
984                         .leaf   = true,
985                 },
986         };
987
988         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
989         cmd.opcode = CMDQ_OP_CMD_SYNC;
990         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
991 }
992
993 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
994                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
995 {
996         /*
997          * This is hideously complicated, but we only really care about
998          * three cases at the moment:
999          *
1000          * 1. Invalid (all zero) -> bypass  (init)
1001          * 2. Bypass -> translation (attach)
1002          * 3. Translation -> bypass (detach)
1003          *
1004          * Given that we can't update the STE atomically and the SMMU
1005          * doesn't read the thing in a defined order, that leaves us
1006          * with the following maintenance requirements:
1007          *
1008          * 1. Update Config, return (init time STEs aren't live)
1009          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1010          * 3. Update Config, sync
1011          */
1012         u64 val = le64_to_cpu(dst[0]);
1013         bool ste_live = false;
1014         struct arm_smmu_cmdq_ent prefetch_cmd = {
1015                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1016                 .prefetch       = {
1017                         .sid    = sid,
1018                 },
1019         };
1020
1021         if (val & STRTAB_STE_0_V) {
1022                 u64 cfg;
1023
1024                 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1025                 switch (cfg) {
1026                 case STRTAB_STE_0_CFG_BYPASS:
1027                         break;
1028                 case STRTAB_STE_0_CFG_S1_TRANS:
1029                 case STRTAB_STE_0_CFG_S2_TRANS:
1030                         ste_live = true;
1031                         break;
1032                 case STRTAB_STE_0_CFG_ABORT:
1033                         if (disable_bypass)
1034                                 break;
1035                 default:
1036                         BUG(); /* STE corruption */
1037                 }
1038         }
1039
1040         /* Nuke the existing Config, as we're going to rewrite it */
1041         val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1042
1043         if (ste->valid)
1044                 val |= STRTAB_STE_0_V;
1045         else
1046                 val &= ~STRTAB_STE_0_V;
1047
1048         if (ste->bypass) {
1049                 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1050                                       : STRTAB_STE_0_CFG_BYPASS;
1051                 dst[0] = cpu_to_le64(val);
1052                 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1053                          << STRTAB_STE_1_SHCFG_SHIFT);
1054                 dst[2] = 0; /* Nuke the VMID */
1055                 if (ste_live)
1056                         arm_smmu_sync_ste_for_sid(smmu, sid);
1057                 return;
1058         }
1059
1060         if (ste->s1_cfg) {
1061                 BUG_ON(ste_live);
1062                 dst[1] = cpu_to_le64(
1063                          STRTAB_STE_1_S1C_CACHE_WBRA
1064                          << STRTAB_STE_1_S1CIR_SHIFT |
1065                          STRTAB_STE_1_S1C_CACHE_WBRA
1066                          << STRTAB_STE_1_S1COR_SHIFT |
1067                          STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1068 #ifdef CONFIG_PCI_ATS
1069                          STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1070 #endif
1071                          STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1072
1073                 if (smmu->features & ARM_SMMU_FEAT_STALLS)
1074                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1075
1076                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1077                         << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1078                         STRTAB_STE_0_CFG_S1_TRANS;
1079
1080         }
1081
1082         if (ste->s2_cfg) {
1083                 BUG_ON(ste_live);
1084                 dst[2] = cpu_to_le64(
1085                          ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1086                          (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1087                           << STRTAB_STE_2_VTCR_SHIFT |
1088 #ifdef __BIG_ENDIAN
1089                          STRTAB_STE_2_S2ENDI |
1090 #endif
1091                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1092                          STRTAB_STE_2_S2R);
1093
1094                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1095                          STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1096
1097                 val |= STRTAB_STE_0_CFG_S2_TRANS;
1098         }
1099
1100         arm_smmu_sync_ste_for_sid(smmu, sid);
1101         dst[0] = cpu_to_le64(val);
1102         arm_smmu_sync_ste_for_sid(smmu, sid);
1103
1104         /* It's likely that we'll want to use the new STE soon */
1105         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1106                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1107 }
1108
1109 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1110 {
1111         unsigned int i;
1112         struct arm_smmu_strtab_ent ste = {
1113                 .valid  = true,
1114                 .bypass = true,
1115         };
1116
1117         for (i = 0; i < nent; ++i) {
1118                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1119                 strtab += STRTAB_STE_DWORDS;
1120         }
1121 }
1122
1123 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1124 {
1125         size_t size;
1126         void *strtab;
1127         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1128         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1129
1130         if (desc->l2ptr)
1131                 return 0;
1132
1133         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1134         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1135
1136         desc->span = STRTAB_SPLIT + 1;
1137         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1138                                           GFP_KERNEL | __GFP_ZERO);
1139         if (!desc->l2ptr) {
1140                 dev_err(smmu->dev,
1141                         "failed to allocate l2 stream table for SID %u\n",
1142                         sid);
1143                 return -ENOMEM;
1144         }
1145
1146         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1147         arm_smmu_write_strtab_l1_desc(strtab, desc);
1148         return 0;
1149 }
1150
1151 /* IRQ and event handlers */
1152 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1153 {
1154         int i;
1155         struct arm_smmu_device *smmu = dev;
1156         struct arm_smmu_queue *q = &smmu->evtq.q;
1157         u64 evt[EVTQ_ENT_DWORDS];
1158
1159         do {
1160                 while (!queue_remove_raw(q, evt)) {
1161                         u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1162
1163                         dev_info(smmu->dev, "event 0x%02x received:\n", id);
1164                         for (i = 0; i < ARRAY_SIZE(evt); ++i)
1165                                 dev_info(smmu->dev, "\t0x%016llx\n",
1166                                          (unsigned long long)evt[i]);
1167
1168                 }
1169
1170                 /*
1171                  * Not much we can do on overflow, so scream and pretend we're
1172                  * trying harder.
1173                  */
1174                 if (queue_sync_prod(q) == -EOVERFLOW)
1175                         dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1176         } while (!queue_empty(q));
1177
1178         /* Sync our overflow flag, as we believe we're up to speed */
1179         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1180         return IRQ_HANDLED;
1181 }
1182
1183 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1184 {
1185         u32 sid, ssid;
1186         u16 grpid;
1187         bool ssv, last;
1188
1189         sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1190         ssv = evt[0] & PRIQ_0_SSID_V;
1191         ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1192         last = evt[0] & PRIQ_0_PRG_LAST;
1193         grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1194
1195         dev_info(smmu->dev, "unexpected PRI request received:\n");
1196         dev_info(smmu->dev,
1197                  "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1198                  sid, ssid, grpid, last ? "L" : "",
1199                  evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1200                  evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1201                  evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1202                  evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1203                  evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1204
1205         if (last) {
1206                 struct arm_smmu_cmdq_ent cmd = {
1207                         .opcode                 = CMDQ_OP_PRI_RESP,
1208                         .substream_valid        = ssv,
1209                         .pri                    = {
1210                                 .sid    = sid,
1211                                 .ssid   = ssid,
1212                                 .grpid  = grpid,
1213                                 .resp   = PRI_RESP_DENY,
1214                         },
1215                 };
1216
1217                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1218         }
1219 }
1220
1221 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1222 {
1223         struct arm_smmu_device *smmu = dev;
1224         struct arm_smmu_queue *q = &smmu->priq.q;
1225         u64 evt[PRIQ_ENT_DWORDS];
1226
1227         do {
1228                 while (!queue_remove_raw(q, evt))
1229                         arm_smmu_handle_ppr(smmu, evt);
1230
1231                 if (queue_sync_prod(q) == -EOVERFLOW)
1232                         dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1233         } while (!queue_empty(q));
1234
1235         /* Sync our overflow flag, as we believe we're up to speed */
1236         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1237         return IRQ_HANDLED;
1238 }
1239
1240 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1241 {
1242         /* We don't actually use CMD_SYNC interrupts for anything */
1243         return IRQ_HANDLED;
1244 }
1245
1246 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1247
1248 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1249 {
1250         u32 gerror, gerrorn, active;
1251         struct arm_smmu_device *smmu = dev;
1252
1253         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1254         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1255
1256         active = gerror ^ gerrorn;
1257         if (!(active & GERROR_ERR_MASK))
1258                 return IRQ_NONE; /* No errors pending */
1259
1260         dev_warn(smmu->dev,
1261                  "unexpected global error reported (0x%08x), this could be serious\n",
1262                  active);
1263
1264         if (active & GERROR_SFM_ERR) {
1265                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1266                 arm_smmu_device_disable(smmu);
1267         }
1268
1269         if (active & GERROR_MSI_GERROR_ABT_ERR)
1270                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1271
1272         if (active & GERROR_MSI_PRIQ_ABT_ERR)
1273                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1274
1275         if (active & GERROR_MSI_EVTQ_ABT_ERR)
1276                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1277
1278         if (active & GERROR_MSI_CMDQ_ABT_ERR) {
1279                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1280                 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1281         }
1282
1283         if (active & GERROR_PRIQ_ABT_ERR)
1284                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1285
1286         if (active & GERROR_EVTQ_ABT_ERR)
1287                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1288
1289         if (active & GERROR_CMDQ_ERR)
1290                 arm_smmu_cmdq_skip_err(smmu);
1291
1292         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1293         return IRQ_HANDLED;
1294 }
1295
1296 /* IO_PGTABLE API */
1297 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1298 {
1299         struct arm_smmu_cmdq_ent cmd;
1300
1301         cmd.opcode = CMDQ_OP_CMD_SYNC;
1302         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1303 }
1304
1305 static void arm_smmu_tlb_sync(void *cookie)
1306 {
1307         struct arm_smmu_domain *smmu_domain = cookie;
1308         __arm_smmu_tlb_sync(smmu_domain->smmu);
1309 }
1310
1311 static void arm_smmu_tlb_inv_context(void *cookie)
1312 {
1313         struct arm_smmu_domain *smmu_domain = cookie;
1314         struct arm_smmu_device *smmu = smmu_domain->smmu;
1315         struct arm_smmu_cmdq_ent cmd;
1316
1317         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1318                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1319                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1320                 cmd.tlbi.vmid   = 0;
1321         } else {
1322                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1323                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1324         }
1325
1326         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1327         __arm_smmu_tlb_sync(smmu);
1328 }
1329
1330 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1331                                           size_t granule, bool leaf, void *cookie)
1332 {
1333         struct arm_smmu_domain *smmu_domain = cookie;
1334         struct arm_smmu_device *smmu = smmu_domain->smmu;
1335         struct arm_smmu_cmdq_ent cmd = {
1336                 .tlbi = {
1337                         .leaf   = leaf,
1338                         .addr   = iova,
1339                 },
1340         };
1341
1342         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1343                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1344                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1345         } else {
1346                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1347                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1348         }
1349
1350         do {
1351                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1352                 cmd.tlbi.addr += granule;
1353         } while (size -= granule);
1354 }
1355
1356 static struct iommu_gather_ops arm_smmu_gather_ops = {
1357         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1358         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1359         .tlb_sync       = arm_smmu_tlb_sync,
1360 };
1361
1362 /* IOMMU API */
1363 static bool arm_smmu_capable(enum iommu_cap cap)
1364 {
1365         switch (cap) {
1366         case IOMMU_CAP_CACHE_COHERENCY:
1367                 return true;
1368         case IOMMU_CAP_INTR_REMAP:
1369                 return true; /* MSIs are just memory writes */
1370         case IOMMU_CAP_NOEXEC:
1371                 return true;
1372         default:
1373                 return false;
1374         }
1375 }
1376
1377 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1378 {
1379         struct arm_smmu_domain *smmu_domain;
1380
1381         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1382                 return NULL;
1383
1384         /*
1385          * Allocate the domain and initialise some of its data structures.
1386          * We can't really do anything meaningful until we've added a
1387          * master.
1388          */
1389         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1390         if (!smmu_domain)
1391                 return NULL;
1392
1393         if (type == IOMMU_DOMAIN_DMA &&
1394             iommu_get_dma_cookie(&smmu_domain->domain)) {
1395                 kfree(smmu_domain);
1396                 return NULL;
1397         }
1398
1399         mutex_init(&smmu_domain->init_mutex);
1400         spin_lock_init(&smmu_domain->pgtbl_lock);
1401         return &smmu_domain->domain;
1402 }
1403
1404 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1405 {
1406         int idx, size = 1 << span;
1407
1408         do {
1409                 idx = find_first_zero_bit(map, size);
1410                 if (idx == size)
1411                         return -ENOSPC;
1412         } while (test_and_set_bit(idx, map));
1413
1414         return idx;
1415 }
1416
1417 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1418 {
1419         clear_bit(idx, map);
1420 }
1421
1422 static void arm_smmu_domain_free(struct iommu_domain *domain)
1423 {
1424         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1425         struct arm_smmu_device *smmu = smmu_domain->smmu;
1426
1427         iommu_put_dma_cookie(domain);
1428         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1429
1430         /* Free the CD and ASID, if we allocated them */
1431         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1432                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1433
1434                 if (cfg->cdptr) {
1435                         dmam_free_coherent(smmu_domain->smmu->dev,
1436                                            CTXDESC_CD_DWORDS << 3,
1437                                            cfg->cdptr,
1438                                            cfg->cdptr_dma);
1439
1440                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1441                 }
1442         } else {
1443                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1444                 if (cfg->vmid)
1445                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1446         }
1447
1448         kfree(smmu_domain);
1449 }
1450
1451 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1452                                        struct io_pgtable_cfg *pgtbl_cfg)
1453 {
1454         int ret;
1455         int asid;
1456         struct arm_smmu_device *smmu = smmu_domain->smmu;
1457         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1458
1459         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1460         if (asid < 0)
1461                 return asid;
1462
1463         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1464                                          &cfg->cdptr_dma,
1465                                          GFP_KERNEL | __GFP_ZERO);
1466         if (!cfg->cdptr) {
1467                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1468                 ret = -ENOMEM;
1469                 goto out_free_asid;
1470         }
1471
1472         cfg->cd.asid    = (u16)asid;
1473         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1474         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1475         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1476         return 0;
1477
1478 out_free_asid:
1479         arm_smmu_bitmap_free(smmu->asid_map, asid);
1480         return ret;
1481 }
1482
1483 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1484                                        struct io_pgtable_cfg *pgtbl_cfg)
1485 {
1486         int vmid;
1487         struct arm_smmu_device *smmu = smmu_domain->smmu;
1488         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1489
1490         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1491         if (vmid < 0)
1492                 return vmid;
1493
1494         cfg->vmid       = (u16)vmid;
1495         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1496         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1497         return 0;
1498 }
1499
1500 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1501 {
1502         int ret;
1503         unsigned long ias, oas;
1504         enum io_pgtable_fmt fmt;
1505         struct io_pgtable_cfg pgtbl_cfg;
1506         struct io_pgtable_ops *pgtbl_ops;
1507         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1508                                  struct io_pgtable_cfg *);
1509         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1510         struct arm_smmu_device *smmu = smmu_domain->smmu;
1511
1512         /* Restrict the stage to what we can actually support */
1513         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1514                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1515         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1516                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1517
1518         switch (smmu_domain->stage) {
1519         case ARM_SMMU_DOMAIN_S1:
1520                 ias = VA_BITS;
1521                 oas = smmu->ias;
1522                 fmt = ARM_64_LPAE_S1;
1523                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1524                 break;
1525         case ARM_SMMU_DOMAIN_NESTED:
1526         case ARM_SMMU_DOMAIN_S2:
1527                 ias = smmu->ias;
1528                 oas = smmu->oas;
1529                 fmt = ARM_64_LPAE_S2;
1530                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1531                 break;
1532         default:
1533                 return -EINVAL;
1534         }
1535
1536         pgtbl_cfg = (struct io_pgtable_cfg) {
1537                 .pgsize_bitmap  = smmu->pgsize_bitmap,
1538                 .ias            = ias,
1539                 .oas            = oas,
1540                 .tlb            = &arm_smmu_gather_ops,
1541                 .iommu_dev      = smmu->dev,
1542         };
1543
1544         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1545         if (!pgtbl_ops)
1546                 return -ENOMEM;
1547
1548         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1549         smmu_domain->pgtbl_ops = pgtbl_ops;
1550
1551         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1552         if (ret < 0)
1553                 free_io_pgtable_ops(pgtbl_ops);
1554
1555         return ret;
1556 }
1557
1558 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1559 {
1560         __le64 *step;
1561         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1562
1563         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1564                 struct arm_smmu_strtab_l1_desc *l1_desc;
1565                 int idx;
1566
1567                 /* Two-level walk */
1568                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1569                 l1_desc = &cfg->l1_desc[idx];
1570                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1571                 step = &l1_desc->l2ptr[idx];
1572         } else {
1573                 /* Simple linear lookup */
1574                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1575         }
1576
1577         return step;
1578 }
1579
1580 static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1581 {
1582         int i;
1583         struct arm_smmu_master_data *master = fwspec->iommu_priv;
1584         struct arm_smmu_device *smmu = master->smmu;
1585
1586         for (i = 0; i < fwspec->num_ids; ++i) {
1587                 u32 sid = fwspec->ids[i];
1588                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1589
1590                 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1591         }
1592
1593         return 0;
1594 }
1595
1596 static void arm_smmu_detach_dev(struct device *dev)
1597 {
1598         struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1599
1600         master->ste.bypass = true;
1601         if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0)
1602                 dev_warn(dev, "failed to install bypass STE\n");
1603 }
1604
1605 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1606 {
1607         int ret = 0;
1608         struct arm_smmu_device *smmu;
1609         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1610         struct arm_smmu_master_data *master;
1611         struct arm_smmu_strtab_ent *ste;
1612
1613         if (!dev->iommu_fwspec)
1614                 return -ENOENT;
1615
1616         master = dev->iommu_fwspec->iommu_priv;
1617         smmu = master->smmu;
1618         ste = &master->ste;
1619
1620         /* Already attached to a different domain? */
1621         if (!ste->bypass)
1622                 arm_smmu_detach_dev(dev);
1623
1624         mutex_lock(&smmu_domain->init_mutex);
1625
1626         if (!smmu_domain->smmu) {
1627                 smmu_domain->smmu = smmu;
1628                 ret = arm_smmu_domain_finalise(domain);
1629                 if (ret) {
1630                         smmu_domain->smmu = NULL;
1631                         goto out_unlock;
1632                 }
1633         } else if (smmu_domain->smmu != smmu) {
1634                 dev_err(dev,
1635                         "cannot attach to SMMU %s (upstream of %s)\n",
1636                         dev_name(smmu_domain->smmu->dev),
1637                         dev_name(smmu->dev));
1638                 ret = -ENXIO;
1639                 goto out_unlock;
1640         }
1641
1642         ste->bypass = false;
1643         ste->valid = true;
1644
1645         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1646                 ste->s1_cfg = &smmu_domain->s1_cfg;
1647                 ste->s2_cfg = NULL;
1648                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1649         } else {
1650                 ste->s1_cfg = NULL;
1651                 ste->s2_cfg = &smmu_domain->s2_cfg;
1652         }
1653
1654         ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1655         if (ret < 0)
1656                 ste->valid = false;
1657
1658 out_unlock:
1659         mutex_unlock(&smmu_domain->init_mutex);
1660         return ret;
1661 }
1662
1663 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1664                         phys_addr_t paddr, size_t size, int prot)
1665 {
1666         int ret;
1667         unsigned long flags;
1668         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1669         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1670
1671         if (!ops)
1672                 return -ENODEV;
1673
1674         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1675         ret = ops->map(ops, iova, paddr, size, prot);
1676         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1677         return ret;
1678 }
1679
1680 static size_t
1681 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1682 {
1683         size_t ret;
1684         unsigned long flags;
1685         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1686         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1687
1688         if (!ops)
1689                 return 0;
1690
1691         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1692         ret = ops->unmap(ops, iova, size);
1693         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1694         return ret;
1695 }
1696
1697 static phys_addr_t
1698 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1699 {
1700         phys_addr_t ret;
1701         unsigned long flags;
1702         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1703         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1704
1705         if (!ops)
1706                 return 0;
1707
1708         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1709         ret = ops->iova_to_phys(ops, iova);
1710         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1711
1712         return ret;
1713 }
1714
1715 static struct platform_driver arm_smmu_driver;
1716
1717 static int arm_smmu_match_node(struct device *dev, void *data)
1718 {
1719         return dev->of_node == data;
1720 }
1721
1722 static struct arm_smmu_device *arm_smmu_get_by_node(struct device_node *np)
1723 {
1724         struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1725                                                 np, arm_smmu_match_node);
1726         put_device(dev);
1727         return dev ? dev_get_drvdata(dev) : NULL;
1728 }
1729
1730 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1731 {
1732         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1733
1734         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1735                 limit *= 1UL << STRTAB_SPLIT;
1736
1737         return sid < limit;
1738 }
1739
1740 static struct iommu_ops arm_smmu_ops;
1741
1742 static int arm_smmu_add_device(struct device *dev)
1743 {
1744         int i, ret;
1745         struct arm_smmu_device *smmu;
1746         struct arm_smmu_master_data *master;
1747         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1748         struct iommu_group *group;
1749
1750         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1751                 return -ENODEV;
1752         /*
1753          * We _can_ actually withstand dodgy bus code re-calling add_device()
1754          * without an intervening remove_device()/of_xlate() sequence, but
1755          * we're not going to do so quietly...
1756          */
1757         if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1758                 master = fwspec->iommu_priv;
1759                 smmu = master->smmu;
1760         } else {
1761                 smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
1762                 if (!smmu)
1763                         return -ENODEV;
1764                 master = kzalloc(sizeof(*master), GFP_KERNEL);
1765                 if (!master)
1766                         return -ENOMEM;
1767
1768                 master->smmu = smmu;
1769                 fwspec->iommu_priv = master;
1770         }
1771
1772         /* Check the SIDs are in range of the SMMU and our stream table */
1773         for (i = 0; i < fwspec->num_ids; i++) {
1774                 u32 sid = fwspec->ids[i];
1775
1776                 if (!arm_smmu_sid_in_range(smmu, sid))
1777                         return -ERANGE;
1778
1779                 /* Ensure l2 strtab is initialised */
1780                 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1781                         ret = arm_smmu_init_l2_strtab(smmu, sid);
1782                         if (ret)
1783                                 return ret;
1784                 }
1785         }
1786
1787         group = iommu_group_get_for_dev(dev);
1788         if (!IS_ERR(group))
1789                 iommu_group_put(group);
1790
1791         return PTR_ERR_OR_ZERO(group);
1792 }
1793
1794 static void arm_smmu_remove_device(struct device *dev)
1795 {
1796         struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1797         struct arm_smmu_master_data *master;
1798
1799         if (!fwspec || fwspec->ops != &arm_smmu_ops)
1800                 return;
1801
1802         master = fwspec->iommu_priv;
1803         if (master && master->ste.valid)
1804                 arm_smmu_detach_dev(dev);
1805         iommu_group_remove_device(dev);
1806         kfree(master);
1807         iommu_fwspec_free(dev);
1808 }
1809
1810 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1811 {
1812         struct iommu_group *group;
1813
1814         /*
1815          * We don't support devices sharing stream IDs other than PCI RID
1816          * aliases, since the necessary ID-to-device lookup becomes rather
1817          * impractical given a potential sparse 32-bit stream ID space.
1818          */
1819         if (dev_is_pci(dev))
1820                 group = pci_device_group(dev);
1821         else
1822                 group = generic_device_group(dev);
1823
1824         return group;
1825 }
1826
1827 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1828                                     enum iommu_attr attr, void *data)
1829 {
1830         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1831
1832         switch (attr) {
1833         case DOMAIN_ATTR_NESTING:
1834                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1835                 return 0;
1836         default:
1837                 return -ENODEV;
1838         }
1839 }
1840
1841 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1842                                     enum iommu_attr attr, void *data)
1843 {
1844         int ret = 0;
1845         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1846
1847         mutex_lock(&smmu_domain->init_mutex);
1848
1849         switch (attr) {
1850         case DOMAIN_ATTR_NESTING:
1851                 if (smmu_domain->smmu) {
1852                         ret = -EPERM;
1853                         goto out_unlock;
1854                 }
1855
1856                 if (*(int *)data)
1857                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1858                 else
1859                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1860
1861                 break;
1862         default:
1863                 ret = -ENODEV;
1864         }
1865
1866 out_unlock:
1867         mutex_unlock(&smmu_domain->init_mutex);
1868         return ret;
1869 }
1870
1871 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1872 {
1873         return iommu_fwspec_add_ids(dev, args->args, 1);
1874 }
1875
1876 static struct iommu_ops arm_smmu_ops = {
1877         .capable                = arm_smmu_capable,
1878         .domain_alloc           = arm_smmu_domain_alloc,
1879         .domain_free            = arm_smmu_domain_free,
1880         .attach_dev             = arm_smmu_attach_dev,
1881         .map                    = arm_smmu_map,
1882         .unmap                  = arm_smmu_unmap,
1883         .map_sg                 = default_iommu_map_sg,
1884         .iova_to_phys           = arm_smmu_iova_to_phys,
1885         .add_device             = arm_smmu_add_device,
1886         .remove_device          = arm_smmu_remove_device,
1887         .device_group           = arm_smmu_device_group,
1888         .domain_get_attr        = arm_smmu_domain_get_attr,
1889         .domain_set_attr        = arm_smmu_domain_set_attr,
1890         .of_xlate               = arm_smmu_of_xlate,
1891         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1892 };
1893
1894 /* Probing and initialisation functions */
1895 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1896                                    struct arm_smmu_queue *q,
1897                                    unsigned long prod_off,
1898                                    unsigned long cons_off,
1899                                    size_t dwords)
1900 {
1901         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1902
1903         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1904         if (!q->base) {
1905                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1906                         qsz);
1907                 return -ENOMEM;
1908         }
1909
1910         q->prod_reg     = smmu->base + prod_off;
1911         q->cons_reg     = smmu->base + cons_off;
1912         q->ent_dwords   = dwords;
1913
1914         q->q_base  = Q_BASE_RWA;
1915         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1916         q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1917                      << Q_BASE_LOG2SIZE_SHIFT;
1918
1919         q->prod = q->cons = 0;
1920         return 0;
1921 }
1922
1923 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1924 {
1925         int ret;
1926
1927         /* cmdq */
1928         spin_lock_init(&smmu->cmdq.lock);
1929         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1930                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1931         if (ret)
1932                 return ret;
1933
1934         /* evtq */
1935         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
1936                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
1937         if (ret)
1938                 return ret;
1939
1940         /* priq */
1941         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
1942                 return 0;
1943
1944         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
1945                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
1946 }
1947
1948 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
1949 {
1950         unsigned int i;
1951         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1952         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
1953         void *strtab = smmu->strtab_cfg.strtab;
1954
1955         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
1956         if (!cfg->l1_desc) {
1957                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
1958                 return -ENOMEM;
1959         }
1960
1961         for (i = 0; i < cfg->num_l1_ents; ++i) {
1962                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
1963                 strtab += STRTAB_L1_DESC_DWORDS << 3;
1964         }
1965
1966         return 0;
1967 }
1968
1969 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
1970 {
1971         void *strtab;
1972         u64 reg;
1973         u32 size, l1size;
1974         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1975
1976         /*
1977          * If we can resolve everything with a single L2 table, then we
1978          * just need a single L1 descriptor. Otherwise, calculate the L1
1979          * size, capped to the SIDSIZE.
1980          */
1981         if (smmu->sid_bits < STRTAB_SPLIT) {
1982                 size = 0;
1983         } else {
1984                 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
1985                 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
1986         }
1987         cfg->num_l1_ents = 1 << size;
1988
1989         size += STRTAB_SPLIT;
1990         if (size < smmu->sid_bits)
1991                 dev_warn(smmu->dev,
1992                          "2-level strtab only covers %u/%u bits of SID\n",
1993                          size, smmu->sid_bits);
1994
1995         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
1996         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
1997                                      GFP_KERNEL | __GFP_ZERO);
1998         if (!strtab) {
1999                 dev_err(smmu->dev,
2000                         "failed to allocate l1 stream table (%u bytes)\n",
2001                         size);
2002                 return -ENOMEM;
2003         }
2004         cfg->strtab = strtab;
2005
2006         /* Configure strtab_base_cfg for 2 levels */
2007         reg  = STRTAB_BASE_CFG_FMT_2LVL;
2008         reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2009                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2010         reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2011                 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2012         cfg->strtab_base_cfg = reg;
2013
2014         return arm_smmu_init_l1_strtab(smmu);
2015 }
2016
2017 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2018 {
2019         void *strtab;
2020         u64 reg;
2021         u32 size;
2022         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2023
2024         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2025         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2026                                      GFP_KERNEL | __GFP_ZERO);
2027         if (!strtab) {
2028                 dev_err(smmu->dev,
2029                         "failed to allocate linear stream table (%u bytes)\n",
2030                         size);
2031                 return -ENOMEM;
2032         }
2033         cfg->strtab = strtab;
2034         cfg->num_l1_ents = 1 << smmu->sid_bits;
2035
2036         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2037         reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2038         reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2039                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2040         cfg->strtab_base_cfg = reg;
2041
2042         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2043         return 0;
2044 }
2045
2046 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2047 {
2048         u64 reg;
2049         int ret;
2050
2051         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2052                 ret = arm_smmu_init_strtab_2lvl(smmu);
2053         else
2054                 ret = arm_smmu_init_strtab_linear(smmu);
2055
2056         if (ret)
2057                 return ret;
2058
2059         /* Set the strtab base address */
2060         reg  = smmu->strtab_cfg.strtab_dma &
2061                STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2062         reg |= STRTAB_BASE_RA;
2063         smmu->strtab_cfg.strtab_base = reg;
2064
2065         /* Allocate the first VMID for stage-2 bypass STEs */
2066         set_bit(0, smmu->vmid_map);
2067         return 0;
2068 }
2069
2070 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2071 {
2072         int ret;
2073
2074         ret = arm_smmu_init_queues(smmu);
2075         if (ret)
2076                 return ret;
2077
2078         return arm_smmu_init_strtab(smmu);
2079 }
2080
2081 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2082                                    unsigned int reg_off, unsigned int ack_off)
2083 {
2084         u32 reg;
2085
2086         writel_relaxed(val, smmu->base + reg_off);
2087         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2088                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2089 }
2090
2091 /* GBPA is "special" */
2092 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2093 {
2094         int ret;
2095         u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2096
2097         ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2098                                          1, ARM_SMMU_POLL_TIMEOUT_US);
2099         if (ret)
2100                 return ret;
2101
2102         reg &= ~clr;
2103         reg |= set;
2104         writel_relaxed(reg | GBPA_UPDATE, gbpa);
2105         return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2106                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2107 }
2108
2109 static void arm_smmu_free_msis(void *data)
2110 {
2111         struct device *dev = data;
2112         platform_msi_domain_free_irqs(dev);
2113 }
2114
2115 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2116 {
2117         phys_addr_t doorbell;
2118         struct device *dev = msi_desc_to_dev(desc);
2119         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2120         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2121
2122         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2123         doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2124
2125         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2126         writel_relaxed(msg->data, smmu->base + cfg[1]);
2127         writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2128 }
2129
2130 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2131 {
2132         struct msi_desc *desc;
2133         int ret, nvec = ARM_SMMU_MAX_MSIS;
2134         struct device *dev = smmu->dev;
2135
2136         /* Clear the MSI address regs */
2137         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2138         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2139
2140         if (smmu->features & ARM_SMMU_FEAT_PRI)
2141                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2142         else
2143                 nvec--;
2144
2145         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2146                 return;
2147
2148         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2149         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2150         if (ret) {
2151                 dev_warn(dev, "failed to allocate MSIs\n");
2152                 return;
2153         }
2154
2155         for_each_msi_entry(desc, dev) {
2156                 switch (desc->platform.msi_index) {
2157                 case EVTQ_MSI_INDEX:
2158                         smmu->evtq.q.irq = desc->irq;
2159                         break;
2160                 case GERROR_MSI_INDEX:
2161                         smmu->gerr_irq = desc->irq;
2162                         break;
2163                 case PRIQ_MSI_INDEX:
2164                         smmu->priq.q.irq = desc->irq;
2165                         break;
2166                 default:        /* Unknown */
2167                         continue;
2168                 }
2169         }
2170
2171         /* Add callback to free MSIs on teardown */
2172         devm_add_action(dev, arm_smmu_free_msis, dev);
2173 }
2174
2175 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2176 {
2177         int ret, irq;
2178         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2179
2180         /* Disable IRQs first */
2181         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2182                                       ARM_SMMU_IRQ_CTRLACK);
2183         if (ret) {
2184                 dev_err(smmu->dev, "failed to disable irqs\n");
2185                 return ret;
2186         }
2187
2188         arm_smmu_setup_msis(smmu);
2189
2190         /* Request interrupt lines */
2191         irq = smmu->evtq.q.irq;
2192         if (irq) {
2193                 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2194                                                 arm_smmu_evtq_thread,
2195                                                 IRQF_ONESHOT,
2196                                                 "arm-smmu-v3-evtq", smmu);
2197                 if (ret < 0)
2198                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2199         }
2200
2201         irq = smmu->cmdq.q.irq;
2202         if (irq) {
2203                 ret = devm_request_irq(smmu->dev, irq,
2204                                        arm_smmu_cmdq_sync_handler, 0,
2205                                        "arm-smmu-v3-cmdq-sync", smmu);
2206                 if (ret < 0)
2207                         dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2208         }
2209
2210         irq = smmu->gerr_irq;
2211         if (irq) {
2212                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2213                                        0, "arm-smmu-v3-gerror", smmu);
2214                 if (ret < 0)
2215                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2216         }
2217
2218         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2219                 irq = smmu->priq.q.irq;
2220                 if (irq) {
2221                         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2222                                                         arm_smmu_priq_thread,
2223                                                         IRQF_ONESHOT,
2224                                                         "arm-smmu-v3-priq",
2225                                                         smmu);
2226                         if (ret < 0)
2227                                 dev_warn(smmu->dev,
2228                                          "failed to enable priq irq\n");
2229                         else
2230                                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2231                 }
2232         }
2233
2234         /* Enable interrupt generation on the SMMU */
2235         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2236                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2237         if (ret)
2238                 dev_warn(smmu->dev, "failed to enable irqs\n");
2239
2240         return 0;
2241 }
2242
2243 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2244 {
2245         int ret;
2246
2247         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2248         if (ret)
2249                 dev_err(smmu->dev, "failed to clear cr0\n");
2250
2251         return ret;
2252 }
2253
2254 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2255 {
2256         int ret;
2257         u32 reg, enables;
2258         struct arm_smmu_cmdq_ent cmd;
2259
2260         /* Clear CR0 and sync (disables SMMU and queue processing) */
2261         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2262         if (reg & CR0_SMMUEN)
2263                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2264
2265         ret = arm_smmu_device_disable(smmu);
2266         if (ret)
2267                 return ret;
2268
2269         /* CR1 (table and queue memory attributes) */
2270         reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2271               (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2272               (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2273               (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2274               (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2275               (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2276         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2277
2278         /* CR2 (random crap) */
2279         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2280         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2281
2282         /* Stream table */
2283         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2284                        smmu->base + ARM_SMMU_STRTAB_BASE);
2285         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2286                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2287
2288         /* Command queue */
2289         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2290         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2291         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2292
2293         enables = CR0_CMDQEN;
2294         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2295                                       ARM_SMMU_CR0ACK);
2296         if (ret) {
2297                 dev_err(smmu->dev, "failed to enable command queue\n");
2298                 return ret;
2299         }
2300
2301         /* Invalidate any cached configuration */
2302         cmd.opcode = CMDQ_OP_CFGI_ALL;
2303         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2304         cmd.opcode = CMDQ_OP_CMD_SYNC;
2305         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2306
2307         /* Invalidate any stale TLB entries */
2308         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2309                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2310                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2311         }
2312
2313         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2314         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2315         cmd.opcode = CMDQ_OP_CMD_SYNC;
2316         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2317
2318         /* Event queue */
2319         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2320         writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2321         writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2322
2323         enables |= CR0_EVTQEN;
2324         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2325                                       ARM_SMMU_CR0ACK);
2326         if (ret) {
2327                 dev_err(smmu->dev, "failed to enable event queue\n");
2328                 return ret;
2329         }
2330
2331         /* PRI queue */
2332         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2333                 writeq_relaxed(smmu->priq.q.q_base,
2334                                smmu->base + ARM_SMMU_PRIQ_BASE);
2335                 writel_relaxed(smmu->priq.q.prod,
2336                                smmu->base + ARM_SMMU_PRIQ_PROD);
2337                 writel_relaxed(smmu->priq.q.cons,
2338                                smmu->base + ARM_SMMU_PRIQ_CONS);
2339
2340                 enables |= CR0_PRIQEN;
2341                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2342                                               ARM_SMMU_CR0ACK);
2343                 if (ret) {
2344                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2345                         return ret;
2346                 }
2347         }
2348
2349         ret = arm_smmu_setup_irqs(smmu);
2350         if (ret) {
2351                 dev_err(smmu->dev, "failed to setup irqs\n");
2352                 return ret;
2353         }
2354
2355
2356         /* Enable the SMMU interface, or ensure bypass */
2357         if (!bypass || disable_bypass) {
2358                 enables |= CR0_SMMUEN;
2359         } else {
2360                 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2361                 if (ret) {
2362                         dev_err(smmu->dev, "GBPA not responding to update\n");
2363                         return ret;
2364                 }
2365         }
2366         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2367                                       ARM_SMMU_CR0ACK);
2368         if (ret) {
2369                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2370                 return ret;
2371         }
2372
2373         return 0;
2374 }
2375
2376 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2377 {
2378         u32 reg;
2379         bool coherent;
2380
2381         /* IDR0 */
2382         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2383
2384         /* 2-level structures */
2385         if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2386                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2387
2388         if (reg & IDR0_CD2L)
2389                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2390
2391         /*
2392          * Translation table endianness.
2393          * We currently require the same endianness as the CPU, but this
2394          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2395          */
2396         switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2397         case IDR0_TTENDIAN_MIXED:
2398                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2399                 break;
2400 #ifdef __BIG_ENDIAN
2401         case IDR0_TTENDIAN_BE:
2402                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2403                 break;
2404 #else
2405         case IDR0_TTENDIAN_LE:
2406                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2407                 break;
2408 #endif
2409         default:
2410                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2411                 return -ENXIO;
2412         }
2413
2414         /* Boolean feature flags */
2415         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2416                 smmu->features |= ARM_SMMU_FEAT_PRI;
2417
2418         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2419                 smmu->features |= ARM_SMMU_FEAT_ATS;
2420
2421         if (reg & IDR0_SEV)
2422                 smmu->features |= ARM_SMMU_FEAT_SEV;
2423
2424         if (reg & IDR0_MSI)
2425                 smmu->features |= ARM_SMMU_FEAT_MSI;
2426
2427         if (reg & IDR0_HYP)
2428                 smmu->features |= ARM_SMMU_FEAT_HYP;
2429
2430         /*
2431          * The dma-coherent property is used in preference to the ID
2432          * register, but warn on mismatch.
2433          */
2434         coherent = of_dma_is_coherent(smmu->dev->of_node);
2435         if (coherent)
2436                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2437
2438         if (!!(reg & IDR0_COHACC) != coherent)
2439                 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2440                          coherent ? "true" : "false");
2441
2442         switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2443         case IDR0_STALL_MODEL_STALL:
2444                 /* Fallthrough */
2445         case IDR0_STALL_MODEL_FORCE:
2446                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2447         }
2448
2449         if (reg & IDR0_S1P)
2450                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2451
2452         if (reg & IDR0_S2P)
2453                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2454
2455         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2456                 dev_err(smmu->dev, "no translation support!\n");
2457                 return -ENXIO;
2458         }
2459
2460         /* We only support the AArch64 table format at present */
2461         switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2462         case IDR0_TTF_AARCH32_64:
2463                 smmu->ias = 40;
2464                 /* Fallthrough */
2465         case IDR0_TTF_AARCH64:
2466                 break;
2467         default:
2468                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2469                 return -ENXIO;
2470         }
2471
2472         /* ASID/VMID sizes */
2473         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2474         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2475
2476         /* IDR1 */
2477         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2478         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2479                 dev_err(smmu->dev, "embedded implementation not supported\n");
2480                 return -ENXIO;
2481         }
2482
2483         /* Queue sizes, capped at 4k */
2484         smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2485                                        reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2486         if (!smmu->cmdq.q.max_n_shift) {
2487                 /* Odd alignment restrictions on the base, so ignore for now */
2488                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2489                 return -ENXIO;
2490         }
2491
2492         smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2493                                        reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2494         smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2495                                        reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2496
2497         /* SID/SSID sizes */
2498         smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2499         smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2500
2501         /* IDR5 */
2502         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2503
2504         /* Maximum number of outstanding stalls */
2505         smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2506                                 & IDR5_STALL_MAX_MASK;
2507
2508         /* Page sizes */
2509         if (reg & IDR5_GRAN64K)
2510                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2511         if (reg & IDR5_GRAN16K)
2512                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2513         if (reg & IDR5_GRAN4K)
2514                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2515
2516         if (arm_smmu_ops.pgsize_bitmap == -1UL)
2517                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2518         else
2519                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2520
2521         /* Output address size */
2522         switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2523         case IDR5_OAS_32_BIT:
2524                 smmu->oas = 32;
2525                 break;
2526         case IDR5_OAS_36_BIT:
2527                 smmu->oas = 36;
2528                 break;
2529         case IDR5_OAS_40_BIT:
2530                 smmu->oas = 40;
2531                 break;
2532         case IDR5_OAS_42_BIT:
2533                 smmu->oas = 42;
2534                 break;
2535         case IDR5_OAS_44_BIT:
2536                 smmu->oas = 44;
2537                 break;
2538         default:
2539                 dev_info(smmu->dev,
2540                         "unknown output address size. Truncating to 48-bit\n");
2541                 /* Fallthrough */
2542         case IDR5_OAS_48_BIT:
2543                 smmu->oas = 48;
2544         }
2545
2546         /* Set the DMA mask for our table walker */
2547         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2548                 dev_warn(smmu->dev,
2549                          "failed to set DMA mask for table walker\n");
2550
2551         smmu->ias = max(smmu->ias, smmu->oas);
2552
2553         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2554                  smmu->ias, smmu->oas, smmu->features);
2555         return 0;
2556 }
2557
2558 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2559 {
2560         int irq, ret;
2561         struct resource *res;
2562         struct arm_smmu_device *smmu;
2563         struct device *dev = &pdev->dev;
2564         bool bypass = true;
2565         u32 cells;
2566
2567         if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2568                 dev_err(dev, "missing #iommu-cells property\n");
2569         else if (cells != 1)
2570                 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2571         else
2572                 bypass = false;
2573
2574         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2575         if (!smmu) {
2576                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2577                 return -ENOMEM;
2578         }
2579         smmu->dev = dev;
2580
2581         /* Base address */
2582         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2583         if (resource_size(res) + 1 < SZ_128K) {
2584                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2585                 return -EINVAL;
2586         }
2587
2588         smmu->base = devm_ioremap_resource(dev, res);
2589         if (IS_ERR(smmu->base))
2590                 return PTR_ERR(smmu->base);
2591
2592         /* Interrupt lines */
2593         irq = platform_get_irq_byname(pdev, "eventq");
2594         if (irq > 0)
2595                 smmu->evtq.q.irq = irq;
2596
2597         irq = platform_get_irq_byname(pdev, "priq");
2598         if (irq > 0)
2599                 smmu->priq.q.irq = irq;
2600
2601         irq = platform_get_irq_byname(pdev, "cmdq-sync");
2602         if (irq > 0)
2603                 smmu->cmdq.q.irq = irq;
2604
2605         irq = platform_get_irq_byname(pdev, "gerror");
2606         if (irq > 0)
2607                 smmu->gerr_irq = irq;
2608
2609         parse_driver_options(smmu);
2610
2611         /* Probe the h/w */
2612         ret = arm_smmu_device_probe(smmu);
2613         if (ret)
2614                 return ret;
2615
2616         /* Initialise in-memory data structures */
2617         ret = arm_smmu_init_structures(smmu);
2618         if (ret)
2619                 return ret;
2620
2621         /* Record our private device structure */
2622         platform_set_drvdata(pdev, smmu);
2623
2624         /* Reset the device */
2625         ret = arm_smmu_device_reset(smmu, bypass);
2626         if (ret)
2627                 return ret;
2628
2629         /* And we're up. Go go go! */
2630         of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
2631 #ifdef CONFIG_PCI
2632         pci_request_acs();
2633         ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2634         if (ret)
2635                 return ret;
2636 #endif
2637 #ifdef CONFIG_ARM_AMBA
2638         ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2639         if (ret)
2640                 return ret;
2641 #endif
2642         return bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2643 }
2644
2645 static int arm_smmu_device_remove(struct platform_device *pdev)
2646 {
2647         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2648
2649         arm_smmu_device_disable(smmu);
2650         return 0;
2651 }
2652
2653 static struct of_device_id arm_smmu_of_match[] = {
2654         { .compatible = "arm,smmu-v3", },
2655         { },
2656 };
2657 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2658
2659 static struct platform_driver arm_smmu_driver = {
2660         .driver = {
2661                 .name           = "arm-smmu-v3",
2662                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2663         },
2664         .probe  = arm_smmu_device_dt_probe,
2665         .remove = arm_smmu_device_remove,
2666 };
2667
2668 static int __init arm_smmu_init(void)
2669 {
2670         static bool registered;
2671         int ret = 0;
2672
2673         if (!registered) {
2674                 ret = platform_driver_register(&arm_smmu_driver);
2675                 registered = !ret;
2676         }
2677         return ret;
2678 }
2679
2680 static void __exit arm_smmu_exit(void)
2681 {
2682         return platform_driver_unregister(&arm_smmu_driver);
2683 }
2684
2685 subsys_initcall(arm_smmu_init);
2686 module_exit(arm_smmu_exit);
2687
2688 static int __init arm_smmu_of_init(struct device_node *np)
2689 {
2690         int ret = arm_smmu_init();
2691
2692         if (ret)
2693                 return ret;
2694
2695         if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
2696                 return -ENODEV;
2697
2698         return 0;
2699 }
2700 IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init);
2701
2702 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2703 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2704 MODULE_LICENSE("GPL v2");