Commit | Line | Data |
---|---|---|
2d7ca2c4 | 1 | // SPDX-License-Identifier: GPL-2.0 |
48ec83bc WD |
2 | /* |
3 | * IOMMU API for ARM architected SMMUv3 implementations. | |
4 | * | |
48ec83bc WD |
5 | * Copyright (C) 2015 ARM Limited |
6 | * | |
7 | * Author: Will Deacon <will.deacon@arm.com> | |
8 | * | |
9 | * This driver is powered by bad coffee and bombay mix. | |
10 | */ | |
11 | ||
e4dadfa8 LP |
12 | #include <linux/acpi.h> |
13 | #include <linux/acpi_iort.h> | |
1cf9e54e | 14 | #include <linux/bitops.h> |
b63b3439 | 15 | #include <linux/crash_dump.h> |
48ec83bc WD |
16 | #include <linux/delay.h> |
17 | #include <linux/err.h> | |
18 | #include <linux/interrupt.h> | |
b77cf11f | 19 | #include <linux/io-pgtable.h> |
48ec83bc | 20 | #include <linux/iopoll.h> |
6e8fa740 | 21 | #include <linux/module.h> |
166bdbd2 | 22 | #include <linux/msi.h> |
48ec83bc WD |
23 | #include <linux/of.h> |
24 | #include <linux/of_address.h> | |
941a802d | 25 | #include <linux/of_platform.h> |
48ec83bc | 26 | #include <linux/pci.h> |
9ce27afc | 27 | #include <linux/pci-ats.h> |
48ec83bc WD |
28 | #include <linux/platform_device.h> |
29 | ||
e881e783 | 30 | #include "arm-smmu-v3.h" |
f2042ed2 | 31 | #include "../../dma-iommu.h" |
757636ed | 32 | #include "../../iommu-sva.h" |
50019f09 | 33 | |
3045fe45 | 34 | static bool disable_bypass = true; |
9305d02a | 35 | module_param(disable_bypass, bool, 0444); |
48ec83bc WD |
36 | MODULE_PARM_DESC(disable_bypass, |
37 | "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU."); | |
38 | ||
bd07a20a BS |
39 | static bool disable_msipolling; |
40 | module_param(disable_msipolling, bool, 0444); | |
41 | MODULE_PARM_DESC(disable_msipolling, | |
42 | "Disable MSI-based polling for CMD_SYNC completion."); | |
43 | ||
166bdbd2 MZ |
44 | enum arm_smmu_msi_index { |
45 | EVTQ_MSI_INDEX, | |
46 | GERROR_MSI_INDEX, | |
47 | PRIQ_MSI_INDEX, | |
48 | ARM_SMMU_MAX_MSIS, | |
49 | }; | |
50 | ||
51 | static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { | |
52 | [EVTQ_MSI_INDEX] = { | |
53 | ARM_SMMU_EVTQ_IRQ_CFG0, | |
54 | ARM_SMMU_EVTQ_IRQ_CFG1, | |
55 | ARM_SMMU_EVTQ_IRQ_CFG2, | |
56 | }, | |
57 | [GERROR_MSI_INDEX] = { | |
58 | ARM_SMMU_GERROR_IRQ_CFG0, | |
59 | ARM_SMMU_GERROR_IRQ_CFG1, | |
60 | ARM_SMMU_GERROR_IRQ_CFG2, | |
61 | }, | |
62 | [PRIQ_MSI_INDEX] = { | |
63 | ARM_SMMU_PRIQ_IRQ_CFG0, | |
64 | ARM_SMMU_PRIQ_IRQ_CFG1, | |
65 | ARM_SMMU_PRIQ_IRQ_CFG2, | |
66 | }, | |
67 | }; | |
68 | ||
5e92946c ZL |
69 | struct arm_smmu_option_prop { |
70 | u32 opt; | |
71 | const char *prop; | |
72 | }; | |
73 | ||
3f1ce8e8 JPB |
74 | DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa); |
75 | DEFINE_MUTEX(arm_smmu_asid_lock); | |
0299a1a8 | 76 | |
32784a95 JPB |
77 | /* |
78 | * Special value used by SVA when a process dies, to quiesce a CD without | |
79 | * disabling it. | |
80 | */ | |
81 | struct arm_smmu_ctx_desc quiet_cd = { 0 }; | |
82 | ||
5e92946c ZL |
83 | static struct arm_smmu_option_prop arm_smmu_options[] = { |
84 | { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, | |
e5b829de | 85 | { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, |
5e92946c ZL |
86 | { 0, NULL}, |
87 | }; | |
88 | ||
5e92946c ZL |
89 | static void parse_driver_options(struct arm_smmu_device *smmu) |
90 | { | |
91 | int i = 0; | |
92 | ||
93 | do { | |
94 | if (of_property_read_bool(smmu->dev->of_node, | |
95 | arm_smmu_options[i].prop)) { | |
96 | smmu->options |= arm_smmu_options[i].opt; | |
97 | dev_notice(smmu->dev, "option %s\n", | |
98 | arm_smmu_options[i].prop); | |
99 | } | |
100 | } while (arm_smmu_options[++i].opt); | |
101 | } | |
102 | ||
48ec83bc | 103 | /* Low-level queue manipulation functions */ |
587e6c10 WD |
104 | static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n) |
105 | { | |
106 | u32 space, prod, cons; | |
107 | ||
108 | prod = Q_IDX(q, q->prod); | |
109 | cons = Q_IDX(q, q->cons); | |
110 | ||
111 | if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons)) | |
112 | space = (1 << q->max_n_shift) - (prod - cons); | |
113 | else | |
114 | space = cons - prod; | |
115 | ||
116 | return space >= n; | |
117 | } | |
118 | ||
7c288a5b | 119 | static bool queue_full(struct arm_smmu_ll_queue *q) |
48ec83bc WD |
120 | { |
121 | return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && | |
122 | Q_WRP(q, q->prod) != Q_WRP(q, q->cons); | |
123 | } | |
124 | ||
7c288a5b | 125 | static bool queue_empty(struct arm_smmu_ll_queue *q) |
48ec83bc WD |
126 | { |
127 | return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && | |
128 | Q_WRP(q, q->prod) == Q_WRP(q, q->cons); | |
129 | } | |
130 | ||
587e6c10 | 131 | static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod) |
48ec83bc | 132 | { |
587e6c10 WD |
133 | return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) && |
134 | (Q_IDX(q, q->cons) > Q_IDX(q, prod))) || | |
135 | ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) && | |
136 | (Q_IDX(q, q->cons) <= Q_IDX(q, prod))); | |
48ec83bc WD |
137 | } |
138 | ||
2a8868f1 | 139 | static void queue_sync_cons_out(struct arm_smmu_queue *q) |
48ec83bc | 140 | { |
a868e853 WD |
141 | /* |
142 | * Ensure that all CPU accesses (reads and writes) to the queue | |
143 | * are complete before we update the cons pointer. | |
144 | */ | |
a76a3777 | 145 | __iomb(); |
52be8637 | 146 | writel_relaxed(q->llq.cons, q->cons_reg); |
48ec83bc WD |
147 | } |
148 | ||
7c288a5b | 149 | static void queue_inc_cons(struct arm_smmu_ll_queue *q) |
2a8868f1 | 150 | { |
7c288a5b WD |
151 | u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; |
152 | q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); | |
48ec83bc WD |
153 | } |
154 | ||
2a8868f1 | 155 | static int queue_sync_prod_in(struct arm_smmu_queue *q) |
48ec83bc | 156 | { |
a76a3777 | 157 | u32 prod; |
48ec83bc | 158 | int ret = 0; |
a76a3777 ZW |
159 | |
160 | /* | |
161 | * We can't use the _relaxed() variant here, as we must prevent | |
162 | * speculative reads of the queue before we have determined that | |
163 | * prod has indeed moved. | |
164 | */ | |
165 | prod = readl(q->prod_reg); | |
48ec83bc | 166 | |
52be8637 | 167 | if (Q_OVF(prod) != Q_OVF(q->llq.prod)) |
48ec83bc WD |
168 | ret = -EOVERFLOW; |
169 | ||
52be8637 | 170 | q->llq.prod = prod; |
48ec83bc WD |
171 | return ret; |
172 | } | |
173 | ||
587e6c10 | 174 | static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n) |
48ec83bc | 175 | { |
587e6c10 WD |
176 | u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n; |
177 | return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); | |
48ec83bc WD |
178 | } |
179 | ||
587e6c10 WD |
180 | static void queue_poll_init(struct arm_smmu_device *smmu, |
181 | struct arm_smmu_queue_poll *qp) | |
48ec83bc | 182 | { |
587e6c10 WD |
183 | qp->delay = 1; |
184 | qp->spin_cnt = 0; | |
185 | qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); | |
186 | qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); | |
48ec83bc | 187 | } |
48ec83bc | 188 | |
587e6c10 | 189 | static int queue_poll(struct arm_smmu_queue_poll *qp) |
48ec83bc | 190 | { |
587e6c10 WD |
191 | if (ktime_compare(ktime_get(), qp->timeout) > 0) |
192 | return -ETIMEDOUT; | |
48ec83bc | 193 | |
587e6c10 WD |
194 | if (qp->wfe) { |
195 | wfe(); | |
196 | } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) { | |
197 | cpu_relax(); | |
198 | } else { | |
199 | udelay(qp->delay); | |
200 | qp->delay *= 2; | |
201 | qp->spin_cnt = 0; | |
48ec83bc WD |
202 | } |
203 | ||
204 | return 0; | |
205 | } | |
206 | ||
207 | static void queue_write(__le64 *dst, u64 *src, size_t n_dwords) | |
208 | { | |
209 | int i; | |
210 | ||
211 | for (i = 0; i < n_dwords; ++i) | |
212 | *dst++ = cpu_to_le64(*src++); | |
213 | } | |
214 | ||
376cdf66 | 215 | static void queue_read(u64 *dst, __le64 *src, size_t n_dwords) |
48ec83bc WD |
216 | { |
217 | int i; | |
218 | ||
219 | for (i = 0; i < n_dwords; ++i) | |
220 | *dst++ = le64_to_cpu(*src++); | |
221 | } | |
222 | ||
223 | static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) | |
224 | { | |
7c288a5b | 225 | if (queue_empty(&q->llq)) |
48ec83bc WD |
226 | return -EAGAIN; |
227 | ||
52be8637 | 228 | queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords); |
7c288a5b | 229 | queue_inc_cons(&q->llq); |
2a8868f1 | 230 | queue_sync_cons_out(q); |
48ec83bc WD |
231 | return 0; |
232 | } | |
233 | ||
234 | /* High-level queue accessors */ | |
235 | static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) | |
236 | { | |
d25f6ead | 237 | memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); |
7417b99c | 238 | cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode); |
48ec83bc WD |
239 | |
240 | switch (ent->opcode) { | |
241 | case CMDQ_OP_TLBI_EL2_ALL: | |
242 | case CMDQ_OP_TLBI_NSNH_ALL: | |
243 | break; | |
244 | case CMDQ_OP_PREFETCH_CFG: | |
7417b99c | 245 | cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid); |
48ec83bc | 246 | break; |
87f42391 JPB |
247 | case CMDQ_OP_CFGI_CD: |
248 | cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); | |
df561f66 | 249 | fallthrough; |
48ec83bc | 250 | case CMDQ_OP_CFGI_STE: |
7417b99c RM |
251 | cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); |
252 | cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); | |
48ec83bc | 253 | break; |
87f42391 JPB |
254 | case CMDQ_OP_CFGI_CD_ALL: |
255 | cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); | |
256 | break; | |
48ec83bc WD |
257 | case CMDQ_OP_CFGI_ALL: |
258 | /* Cover the entire SID range */ | |
7417b99c | 259 | cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); |
48ec83bc WD |
260 | break; |
261 | case CMDQ_OP_TLBI_NH_VA: | |
9111aebf JPB |
262 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); |
263 | fallthrough; | |
264 | case CMDQ_OP_TLBI_EL2_VA: | |
6a481a95 RH |
265 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); |
266 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); | |
7417b99c RM |
267 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); |
268 | cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); | |
6a481a95 RH |
269 | cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); |
270 | cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); | |
1c27df1c WD |
271 | cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; |
272 | break; | |
48ec83bc | 273 | case CMDQ_OP_TLBI_S2_IPA: |
6a481a95 RH |
274 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); |
275 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); | |
7417b99c RM |
276 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); |
277 | cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); | |
6a481a95 RH |
278 | cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); |
279 | cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); | |
1c27df1c | 280 | cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; |
48ec83bc WD |
281 | break; |
282 | case CMDQ_OP_TLBI_NH_ASID: | |
7417b99c | 283 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); |
df561f66 | 284 | fallthrough; |
48ec83bc | 285 | case CMDQ_OP_TLBI_S12_VMALL: |
7417b99c | 286 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); |
48ec83bc | 287 | break; |
9111aebf JPB |
288 | case CMDQ_OP_TLBI_EL2_ASID: |
289 | cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); | |
290 | break; | |
9ce27afc JPB |
291 | case CMDQ_OP_ATC_INV: |
292 | cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); | |
293 | cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); | |
294 | cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); | |
295 | cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); | |
296 | cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); | |
297 | cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; | |
298 | break; | |
48ec83bc | 299 | case CMDQ_OP_PRI_RESP: |
7417b99c RM |
300 | cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); |
301 | cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); | |
302 | cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid); | |
303 | cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid); | |
48ec83bc WD |
304 | switch (ent->pri.resp) { |
305 | case PRI_RESP_DENY: | |
48ec83bc | 306 | case PRI_RESP_FAIL: |
48ec83bc | 307 | case PRI_RESP_SUCC: |
48ec83bc WD |
308 | break; |
309 | default: | |
310 | return -EINVAL; | |
311 | } | |
7417b99c | 312 | cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); |
48ec83bc | 313 | break; |
395ad89d JPB |
314 | case CMDQ_OP_RESUME: |
315 | cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid); | |
316 | cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp); | |
317 | cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag); | |
318 | break; | |
48ec83bc | 319 | case CMDQ_OP_CMD_SYNC: |
587e6c10 | 320 | if (ent->sync.msiaddr) { |
7417b99c | 321 | cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); |
587e6c10 WD |
322 | cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; |
323 | } else { | |
7417b99c | 324 | cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); |
587e6c10 | 325 | } |
7417b99c RM |
326 | cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); |
327 | cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); | |
48ec83bc WD |
328 | break; |
329 | default: | |
330 | return -ENOENT; | |
331 | } | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
8639cc83 ZL |
336 | static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) |
337 | { | |
338 | return &smmu->cmdq; | |
339 | } | |
340 | ||
587e6c10 | 341 | static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, |
8639cc83 | 342 | struct arm_smmu_queue *q, u32 prod) |
587e6c10 | 343 | { |
587e6c10 WD |
344 | struct arm_smmu_cmdq_ent ent = { |
345 | .opcode = CMDQ_OP_CMD_SYNC, | |
346 | }; | |
347 | ||
348 | /* | |
349 | * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI | |
350 | * payload, so the write will zero the entire command on that platform. | |
351 | */ | |
bd07a20a | 352 | if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { |
587e6c10 WD |
353 | ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * |
354 | q->ent_dwords * 8; | |
355 | } | |
356 | ||
357 | arm_smmu_cmdq_build_cmd(cmd, &ent); | |
358 | } | |
359 | ||
2cbeaf3f ZL |
360 | static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, |
361 | struct arm_smmu_queue *q) | |
48ec83bc | 362 | { |
d56d5162 | 363 | static const char * const cerror_str[] = { |
48ec83bc WD |
364 | [CMDQ_ERR_CERROR_NONE_IDX] = "No error", |
365 | [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", | |
366 | [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", | |
9ce27afc | 367 | [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", |
48ec83bc WD |
368 | }; |
369 | ||
370 | int i; | |
371 | u64 cmd[CMDQ_ENT_DWORDS]; | |
48ec83bc | 372 | u32 cons = readl_relaxed(q->cons_reg); |
cbcee19a | 373 | u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); |
48ec83bc WD |
374 | struct arm_smmu_cmdq_ent cmd_sync = { |
375 | .opcode = CMDQ_OP_CMD_SYNC, | |
376 | }; | |
377 | ||
378 | dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, | |
a0d5c04c | 379 | idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); |
48ec83bc WD |
380 | |
381 | switch (idx) { | |
48ec83bc WD |
382 | case CMDQ_ERR_CERROR_ABT_IDX: |
383 | dev_err(smmu->dev, "retrying command fetch\n"); | |
5a1ab5c0 | 384 | return; |
48ec83bc WD |
385 | case CMDQ_ERR_CERROR_NONE_IDX: |
386 | return; | |
9ce27afc JPB |
387 | case CMDQ_ERR_CERROR_ATC_INV_IDX: |
388 | /* | |
389 | * ATC Invalidation Completion timeout. CONS is still pointing | |
390 | * at the CMD_SYNC. Attempt to complete other pending commands | |
391 | * by repeating the CMD_SYNC, though we might well end up back | |
392 | * here since the ATC invalidation may still be pending. | |
393 | */ | |
394 | return; | |
a0d5c04c | 395 | case CMDQ_ERR_CERROR_ILL_IDX: |
a0d5c04c WD |
396 | default: |
397 | break; | |
48ec83bc WD |
398 | } |
399 | ||
400 | /* | |
401 | * We may have concurrent producers, so we need to be careful | |
402 | * not to touch any of the shadow cmdq state. | |
403 | */ | |
aea2037e | 404 | queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); |
48ec83bc WD |
405 | dev_err(smmu->dev, "skipping command in error state:\n"); |
406 | for (i = 0; i < ARRAY_SIZE(cmd); ++i) | |
407 | dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); | |
408 | ||
409 | /* Convert the erroneous command into a CMD_SYNC */ | |
59d9bd72 | 410 | arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); |
48ec83bc | 411 | |
aea2037e | 412 | queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); |
48ec83bc WD |
413 | } |
414 | ||
2cbeaf3f ZL |
415 | static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) |
416 | { | |
417 | __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); | |
418 | } | |
419 | ||
587e6c10 WD |
420 | /* |
421 | * Command queue locking. | |
422 | * This is a form of bastardised rwlock with the following major changes: | |
423 | * | |
424 | * - The only LOCK routines are exclusive_trylock() and shared_lock(). | |
425 | * Neither have barrier semantics, and instead provide only a control | |
426 | * dependency. | |
427 | * | |
428 | * - The UNLOCK routines are supplemented with shared_tryunlock(), which | |
429 | * fails if the caller appears to be the last lock holder (yes, this is | |
430 | * racy). All successful UNLOCK routines have RELEASE semantics. | |
431 | */ | |
432 | static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq) | |
2f657add | 433 | { |
587e6c10 | 434 | int val; |
2f657add | 435 | |
587e6c10 WD |
436 | /* |
437 | * We can try to avoid the cmpxchg() loop by simply incrementing the | |
438 | * lock counter. When held in exclusive state, the lock counter is set | |
439 | * to INT_MIN so these increments won't hurt as the value will remain | |
440 | * negative. | |
441 | */ | |
442 | if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) | |
443 | return; | |
444 | ||
445 | do { | |
446 | val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0); | |
447 | } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val); | |
448 | } | |
449 | ||
450 | static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq) | |
451 | { | |
452 | (void)atomic_dec_return_release(&cmdq->lock); | |
453 | } | |
454 | ||
455 | static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq) | |
456 | { | |
457 | if (atomic_read(&cmdq->lock) == 1) | |
458 | return false; | |
459 | ||
460 | arm_smmu_cmdq_shared_unlock(cmdq); | |
461 | return true; | |
462 | } | |
463 | ||
464 | #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \ | |
465 | ({ \ | |
466 | bool __ret; \ | |
467 | local_irq_save(flags); \ | |
468 | __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \ | |
469 | if (!__ret) \ | |
470 | local_irq_restore(flags); \ | |
471 | __ret; \ | |
472 | }) | |
473 | ||
474 | #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \ | |
475 | ({ \ | |
476 | atomic_set_release(&cmdq->lock, 0); \ | |
477 | local_irq_restore(flags); \ | |
478 | }) | |
479 | ||
480 | ||
481 | /* | |
482 | * Command queue insertion. | |
483 | * This is made fiddly by our attempts to achieve some sort of scalability | |
484 | * since there is one queue shared amongst all of the CPUs in the system. If | |
485 | * you like mixed-size concurrency, dependency ordering and relaxed atomics, | |
486 | * then you'll *love* this monstrosity. | |
487 | * | |
488 | * The basic idea is to split the queue up into ranges of commands that are | |
489 | * owned by a given CPU; the owner may not have written all of the commands | |
490 | * itself, but is responsible for advancing the hardware prod pointer when | |
491 | * the time comes. The algorithm is roughly: | |
492 | * | |
493 | * 1. Allocate some space in the queue. At this point we also discover | |
494 | * whether the head of the queue is currently owned by another CPU, | |
495 | * or whether we are the owner. | |
496 | * | |
497 | * 2. Write our commands into our allocated slots in the queue. | |
498 | * | |
499 | * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. | |
500 | * | |
501 | * 4. If we are an owner: | |
502 | * a. Wait for the previous owner to finish. | |
503 | * b. Mark the queue head as unowned, which tells us the range | |
504 | * that we are responsible for publishing. | |
505 | * c. Wait for all commands in our owned range to become valid. | |
506 | * d. Advance the hardware prod pointer. | |
507 | * e. Tell the next owner we've finished. | |
508 | * | |
509 | * 5. If we are inserting a CMD_SYNC (we may or may not have been an | |
510 | * owner), then we need to stick around until it has completed: | |
511 | * a. If we have MSIs, the SMMU can write back into the CMD_SYNC | |
512 | * to clear the first 4 bytes. | |
513 | * b. Otherwise, we spin waiting for the hardware cons pointer to | |
514 | * advance past our command. | |
515 | * | |
516 | * The devil is in the details, particularly the use of locking for handling | |
517 | * SYNC completion and freeing up space in the queue before we think that it is | |
518 | * full. | |
519 | */ | |
520 | static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq, | |
521 | u32 sprod, u32 eprod, bool set) | |
522 | { | |
523 | u32 swidx, sbidx, ewidx, ebidx; | |
524 | struct arm_smmu_ll_queue llq = { | |
525 | .max_n_shift = cmdq->q.llq.max_n_shift, | |
526 | .prod = sprod, | |
527 | }; | |
901510ee | 528 | |
587e6c10 WD |
529 | ewidx = BIT_WORD(Q_IDX(&llq, eprod)); |
530 | ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG; | |
2f657add | 531 | |
587e6c10 WD |
532 | while (llq.prod != eprod) { |
533 | unsigned long mask; | |
534 | atomic_long_t *ptr; | |
535 | u32 limit = BITS_PER_LONG; | |
901510ee | 536 | |
587e6c10 WD |
537 | swidx = BIT_WORD(Q_IDX(&llq, llq.prod)); |
538 | sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG; | |
539 | ||
540 | ptr = &cmdq->valid_map[swidx]; | |
541 | ||
542 | if ((swidx == ewidx) && (sbidx < ebidx)) | |
543 | limit = ebidx; | |
544 | ||
545 | mask = GENMASK(limit - 1, sbidx); | |
546 | ||
547 | /* | |
548 | * The valid bit is the inverse of the wrap bit. This means | |
549 | * that a zero-initialised queue is invalid and, after marking | |
550 | * all entries as valid, they become invalid again when we | |
551 | * wrap. | |
552 | */ | |
553 | if (set) { | |
554 | atomic_long_xor(mask, ptr); | |
555 | } else { /* Poll */ | |
556 | unsigned long valid; | |
557 | ||
558 | valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask; | |
559 | atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid); | |
560 | } | |
561 | ||
562 | llq.prod = queue_inc_prod_n(&llq, limit - sbidx); | |
2f657add RM |
563 | } |
564 | } | |
565 | ||
587e6c10 WD |
566 | /* Mark all entries in the range [sprod, eprod) as valid */ |
567 | static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq, | |
568 | u32 sprod, u32 eprod) | |
569 | { | |
570 | __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true); | |
571 | } | |
572 | ||
573 | /* Wait for all entries in the range [sprod, eprod) to become valid */ | |
574 | static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, | |
575 | u32 sprod, u32 eprod) | |
576 | { | |
577 | __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false); | |
578 | } | |
579 | ||
580 | /* Wait for the command queue to become non-full */ | |
581 | static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, | |
582 | struct arm_smmu_ll_queue *llq) | |
48ec83bc | 583 | { |
8ded2909 | 584 | unsigned long flags; |
587e6c10 | 585 | struct arm_smmu_queue_poll qp; |
8639cc83 | 586 | struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); |
587e6c10 | 587 | int ret = 0; |
48ec83bc | 588 | |
587e6c10 WD |
589 | /* |
590 | * Try to update our copy of cons by grabbing exclusive cmdq access. If | |
591 | * that fails, spin until somebody else updates it for us. | |
592 | */ | |
593 | if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) { | |
594 | WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg)); | |
595 | arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags); | |
596 | llq->val = READ_ONCE(cmdq->q.llq.val); | |
597 | return 0; | |
48ec83bc WD |
598 | } |
599 | ||
587e6c10 WD |
600 | queue_poll_init(smmu, &qp); |
601 | do { | |
8639cc83 | 602 | llq->val = READ_ONCE(cmdq->q.llq.val); |
587e6c10 WD |
603 | if (!queue_full(llq)) |
604 | break; | |
605 | ||
606 | ret = queue_poll(&qp); | |
607 | } while (!ret); | |
608 | ||
609 | return ret; | |
2f657add | 610 | } |
48ec83bc | 611 | |
37de98f8 | 612 | /* |
587e6c10 WD |
613 | * Wait until the SMMU signals a CMD_SYNC completion MSI. |
614 | * Must be called with the cmdq lock held in some capacity. | |
37de98f8 | 615 | */ |
587e6c10 WD |
616 | static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, |
617 | struct arm_smmu_ll_queue *llq) | |
37de98f8 | 618 | { |
587e6c10 WD |
619 | int ret = 0; |
620 | struct arm_smmu_queue_poll qp; | |
8639cc83 | 621 | struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); |
587e6c10 | 622 | u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); |
a529ea19 | 623 | |
587e6c10 | 624 | queue_poll_init(smmu, &qp); |
37de98f8 | 625 | |
587e6c10 WD |
626 | /* |
627 | * The MSI won't generate an event, since it's being written back | |
628 | * into the command queue. | |
629 | */ | |
630 | qp.wfe = false; | |
631 | smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp))); | |
632 | llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); | |
633 | return ret; | |
37de98f8 RM |
634 | } |
635 | ||
587e6c10 WD |
636 | /* |
637 | * Wait until the SMMU cons index passes llq->prod. | |
638 | * Must be called with the cmdq lock held in some capacity. | |
639 | */ | |
640 | static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, | |
641 | struct arm_smmu_ll_queue *llq) | |
49806599 | 642 | { |
587e6c10 | 643 | struct arm_smmu_queue_poll qp; |
8639cc83 | 644 | struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); |
587e6c10 WD |
645 | u32 prod = llq->prod; |
646 | int ret = 0; | |
49806599 | 647 | |
587e6c10 | 648 | queue_poll_init(smmu, &qp); |
8639cc83 | 649 | llq->val = READ_ONCE(cmdq->q.llq.val); |
587e6c10 WD |
650 | do { |
651 | if (queue_consumed(llq, prod)) | |
652 | break; | |
901510ee | 653 | |
587e6c10 | 654 | ret = queue_poll(&qp); |
901510ee | 655 | |
587e6c10 WD |
656 | /* |
657 | * This needs to be a readl() so that our subsequent call | |
658 | * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. | |
659 | * | |
660 | * Specifically, we need to ensure that we observe all | |
661 | * shared_lock()s by other CMD_SYNCs that share our owner, | |
662 | * so that a failing call to tryunlock() means that we're | |
663 | * the last one out and therefore we can safely advance | |
664 | * cmdq->q.llq.cons. Roughly speaking: | |
665 | * | |
666 | * CPU 0 CPU1 CPU2 (us) | |
667 | * | |
668 | * if (sync) | |
669 | * shared_lock(); | |
670 | * | |
671 | * dma_wmb(); | |
672 | * set_valid_map(); | |
673 | * | |
674 | * if (owner) { | |
675 | * poll_valid_map(); | |
676 | * <control dependency> | |
677 | * writel(prod_reg); | |
678 | * | |
679 | * readl(cons_reg); | |
680 | * tryunlock(); | |
681 | * | |
682 | * Requires us to see CPU 0's shared_lock() acquisition. | |
683 | */ | |
684 | llq->cons = readl(cmdq->q.cons_reg); | |
685 | } while (!ret); | |
49806599 | 686 | |
587e6c10 | 687 | return ret; |
49806599 WD |
688 | } |
689 | ||
587e6c10 WD |
690 | static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, |
691 | struct arm_smmu_ll_queue *llq) | |
2f657add | 692 | { |
bd07a20a | 693 | if (smmu->options & ARM_SMMU_OPT_MSIPOLL) |
587e6c10 WD |
694 | return __arm_smmu_cmdq_poll_until_msi(smmu, llq); |
695 | ||
696 | return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); | |
697 | } | |
698 | ||
699 | static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, | |
700 | u32 prod, int n) | |
701 | { | |
702 | int i; | |
703 | struct arm_smmu_ll_queue llq = { | |
704 | .max_n_shift = cmdq->q.llq.max_n_shift, | |
705 | .prod = prod, | |
706 | }; | |
707 | ||
708 | for (i = 0; i < n; ++i) { | |
709 | u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS]; | |
710 | ||
711 | prod = queue_inc_prod_n(&llq, i); | |
712 | queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS); | |
713 | } | |
714 | } | |
715 | ||
05cbaf4d WD |
716 | /* |
717 | * This is the actual insertion function, and provides the following | |
718 | * ordering guarantees to callers: | |
719 | * | |
720 | * - There is a dma_wmb() before publishing any commands to the queue. | |
721 | * This can be relied upon to order prior writes to data structures | |
722 | * in memory (such as a CD or an STE) before the command. | |
723 | * | |
724 | * - On completion of a CMD_SYNC, there is a control dependency. | |
725 | * This can be relied upon to order subsequent writes to memory (e.g. | |
726 | * freeing an IOVA) after completion of the CMD_SYNC. | |
727 | * | |
728 | * - Command insertion is totally ordered, so if two CPUs each race to | |
729 | * insert their own list of commands then all of the commands from one | |
730 | * CPU will appear before any of the commands from the other CPU. | |
731 | */ | |
587e6c10 WD |
732 | static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, |
733 | u64 *cmds, int n, bool sync) | |
734 | { | |
735 | u64 cmd_sync[CMDQ_ENT_DWORDS]; | |
736 | u32 prod; | |
2f657add | 737 | unsigned long flags; |
587e6c10 | 738 | bool owner; |
8639cc83 | 739 | struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); |
5c08c5ac | 740 | struct arm_smmu_ll_queue llq, head; |
587e6c10 | 741 | int ret = 0; |
2f657add | 742 | |
5c08c5ac JG |
743 | llq.max_n_shift = cmdq->q.llq.max_n_shift; |
744 | ||
587e6c10 WD |
745 | /* 1. Allocate some space in the queue */ |
746 | local_irq_save(flags); | |
747 | llq.val = READ_ONCE(cmdq->q.llq.val); | |
748 | do { | |
749 | u64 old; | |
750 | ||
751 | while (!queue_has_space(&llq, n + sync)) { | |
752 | local_irq_restore(flags); | |
753 | if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) | |
754 | dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); | |
755 | local_irq_save(flags); | |
756 | } | |
757 | ||
758 | head.cons = llq.cons; | |
759 | head.prod = queue_inc_prod_n(&llq, n + sync) | | |
760 | CMDQ_PROD_OWNED_FLAG; | |
761 | ||
762 | old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); | |
763 | if (old == llq.val) | |
764 | break; | |
765 | ||
766 | llq.val = old; | |
767 | } while (1); | |
768 | owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG); | |
769 | head.prod &= ~CMDQ_PROD_OWNED_FLAG; | |
770 | llq.prod &= ~CMDQ_PROD_OWNED_FLAG; | |
771 | ||
772 | /* | |
773 | * 2. Write our commands into the queue | |
774 | * Dependency ordering from the cmpxchg() loop above. | |
775 | */ | |
776 | arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); | |
777 | if (sync) { | |
778 | prod = queue_inc_prod_n(&llq, n); | |
8639cc83 | 779 | arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); |
587e6c10 WD |
780 | queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); |
781 | ||
782 | /* | |
783 | * In order to determine completion of our CMD_SYNC, we must | |
784 | * ensure that the queue can't wrap twice without us noticing. | |
785 | * We achieve that by taking the cmdq lock as shared before | |
786 | * marking our slot as valid. | |
787 | */ | |
788 | arm_smmu_cmdq_shared_lock(cmdq); | |
789 | } | |
2f657add | 790 | |
587e6c10 WD |
791 | /* 3. Mark our slots as valid, ensuring commands are visible first */ |
792 | dma_wmb(); | |
793 | arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod); | |
2f657add | 794 | |
587e6c10 WD |
795 | /* 4. If we are the owner, take control of the SMMU hardware */ |
796 | if (owner) { | |
797 | /* a. Wait for previous owner to finish */ | |
798 | atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod); | |
799 | ||
800 | /* b. Stop gathering work by clearing the owned flag */ | |
801 | prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG, | |
802 | &cmdq->q.llq.atomic.prod); | |
803 | prod &= ~CMDQ_PROD_OWNED_FLAG; | |
804 | ||
805 | /* | |
806 | * c. Wait for any gathered work to be written to the queue. | |
807 | * Note that we read our own entries so that we have the control | |
808 | * dependency required by (d). | |
809 | */ | |
810 | arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod); | |
811 | ||
812 | /* | |
813 | * d. Advance the hardware prod pointer | |
814 | * Control dependency ordering from the entries becoming valid. | |
815 | */ | |
816 | writel_relaxed(prod, cmdq->q.prod_reg); | |
817 | ||
818 | /* | |
819 | * e. Tell the next owner we're done | |
820 | * Make sure we've updated the hardware first, so that we don't | |
821 | * race to update prod and potentially move it backwards. | |
822 | */ | |
823 | atomic_set_release(&cmdq->owner_prod, prod); | |
824 | } | |
825 | ||
826 | /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ | |
827 | if (sync) { | |
828 | llq.prod = queue_inc_prod_n(&llq, n); | |
829 | ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); | |
830 | if (ret) { | |
831 | dev_err_ratelimited(smmu->dev, | |
832 | "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", | |
833 | llq.prod, | |
834 | readl_relaxed(cmdq->q.prod_reg), | |
835 | readl_relaxed(cmdq->q.cons_reg)); | |
836 | } | |
2f657add | 837 | |
587e6c10 | 838 | /* |
49fbb250 | 839 | * Try to unlock the cmdq lock. This will fail if we're the last |
587e6c10 WD |
840 | * reader, in which case we can safely update cmdq->q.llq.cons |
841 | */ | |
842 | if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { | |
843 | WRITE_ONCE(cmdq->q.llq.cons, llq.cons); | |
844 | arm_smmu_cmdq_shared_unlock(cmdq); | |
845 | } | |
846 | } | |
2f657add | 847 | |
587e6c10 | 848 | local_irq_restore(flags); |
49806599 WD |
849 | return ret; |
850 | } | |
851 | ||
4537f6f1 ZL |
852 | static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, |
853 | struct arm_smmu_cmdq_ent *ent, | |
854 | bool sync) | |
49806599 | 855 | { |
587e6c10 | 856 | u64 cmd[CMDQ_ENT_DWORDS]; |
49806599 | 857 | |
59d9bd72 | 858 | if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) { |
587e6c10 WD |
859 | dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", |
860 | ent->opcode); | |
861 | return -EINVAL; | |
862 | } | |
863 | ||
4537f6f1 | 864 | return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); |
587e6c10 WD |
865 | } |
866 | ||
4537f6f1 ZL |
867 | static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, |
868 | struct arm_smmu_cmdq_ent *ent) | |
587e6c10 | 869 | { |
4537f6f1 | 870 | return __arm_smmu_cmdq_issue_cmd(smmu, ent, false); |
587e6c10 WD |
871 | } |
872 | ||
4537f6f1 ZL |
873 | static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, |
874 | struct arm_smmu_cmdq_ent *ent) | |
587e6c10 | 875 | { |
4537f6f1 | 876 | return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); |
48ec83bc WD |
877 | } |
878 | ||
4ce8da45 JPB |
879 | static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, |
880 | struct arm_smmu_cmdq_batch *cmds, | |
881 | struct arm_smmu_cmdq_ent *cmd) | |
882 | { | |
59d9bd72 ZL |
883 | int index; |
884 | ||
4ce8da45 JPB |
885 | if (cmds->num == CMDQ_BATCH_ENTRIES) { |
886 | arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); | |
887 | cmds->num = 0; | |
888 | } | |
59d9bd72 ZL |
889 | |
890 | index = cmds->num * CMDQ_ENT_DWORDS; | |
891 | if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) { | |
892 | dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", | |
893 | cmd->opcode); | |
894 | return; | |
895 | } | |
896 | ||
4ce8da45 JPB |
897 | cmds->num++; |
898 | } | |
899 | ||
900 | static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, | |
901 | struct arm_smmu_cmdq_batch *cmds) | |
902 | { | |
903 | return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); | |
904 | } | |
905 | ||
395ad89d JPB |
906 | static int arm_smmu_page_response(struct device *dev, |
907 | struct iommu_fault_event *unused, | |
908 | struct iommu_page_response *resp) | |
909 | { | |
910 | struct arm_smmu_cmdq_ent cmd = {0}; | |
911 | struct arm_smmu_master *master = dev_iommu_priv_get(dev); | |
912 | int sid = master->streams[0].id; | |
913 | ||
914 | if (master->stall_enabled) { | |
915 | cmd.opcode = CMDQ_OP_RESUME; | |
916 | cmd.resume.sid = sid; | |
917 | cmd.resume.stag = resp->grpid; | |
918 | switch (resp->code) { | |
919 | case IOMMU_PAGE_RESP_INVALID: | |
920 | case IOMMU_PAGE_RESP_FAILURE: | |
921 | cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; | |
922 | break; | |
923 | case IOMMU_PAGE_RESP_SUCCESS: | |
924 | cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; | |
925 | break; | |
926 | default: | |
927 | return -EINVAL; | |
928 | } | |
929 | } else { | |
930 | return -ENODEV; | |
931 | } | |
932 | ||
933 | arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); | |
934 | /* | |
935 | * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. | |
936 | * RESUME consumption guarantees that the stalled transaction will be | |
937 | * terminated... at some point in the future. PRI_RESP is fire and | |
938 | * forget. | |
939 | */ | |
940 | ||
941 | return 0; | |
942 | } | |
943 | ||
48ec83bc | 944 | /* Context descriptor manipulation functions */ |
3e630336 JPB |
945 | void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) |
946 | { | |
947 | struct arm_smmu_cmdq_ent cmd = { | |
9111aebf JPB |
948 | .opcode = smmu->features & ARM_SMMU_FEAT_E2H ? |
949 | CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID, | |
3e630336 JPB |
950 | .tlbi.asid = asid, |
951 | }; | |
952 | ||
4537f6f1 | 953 | arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); |
3e630336 JPB |
954 | } |
955 | ||
87f42391 JPB |
956 | static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, |
957 | int ssid, bool leaf) | |
48ec83bc | 958 | { |
87f42391 JPB |
959 | size_t i; |
960 | unsigned long flags; | |
961 | struct arm_smmu_master *master; | |
fac95671 | 962 | struct arm_smmu_cmdq_batch cmds; |
87f42391 JPB |
963 | struct arm_smmu_device *smmu = smmu_domain->smmu; |
964 | struct arm_smmu_cmdq_ent cmd = { | |
965 | .opcode = CMDQ_OP_CFGI_CD, | |
966 | .cfgi = { | |
967 | .ssid = ssid, | |
968 | .leaf = leaf, | |
969 | }, | |
970 | }; | |
971 | ||
fac95671 JG |
972 | cmds.num = 0; |
973 | ||
87f42391 JPB |
974 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); |
975 | list_for_each_entry(master, &smmu_domain->devices, domain_head) { | |
cdf315f9 JPB |
976 | for (i = 0; i < master->num_streams; i++) { |
977 | cmd.cfgi.sid = master->streams[i].id; | |
edd0351e | 978 | arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); |
87f42391 JPB |
979 | } |
980 | } | |
981 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); | |
48ec83bc | 982 | |
edd0351e | 983 | arm_smmu_cmdq_batch_submit(smmu, &cmds); |
87f42391 | 984 | } |
48ec83bc | 985 | |
73af06f5 JPB |
986 | static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, |
987 | struct arm_smmu_l1_ctx_desc *l1_desc) | |
988 | { | |
989 | size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); | |
48ec83bc | 990 | |
73af06f5 JPB |
991 | l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, |
992 | &l1_desc->l2ptr_dma, GFP_KERNEL); | |
993 | if (!l1_desc->l2ptr) { | |
994 | dev_warn(smmu->dev, | |
995 | "failed to allocate context descriptor table\n"); | |
996 | return -ENOMEM; | |
997 | } | |
998 | return 0; | |
48ec83bc WD |
999 | } |
1000 | ||
73af06f5 JPB |
1001 | static void arm_smmu_write_cd_l1_desc(__le64 *dst, |
1002 | struct arm_smmu_l1_ctx_desc *l1_desc) | |
48ec83bc | 1003 | { |
73af06f5 JPB |
1004 | u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | |
1005 | CTXDESC_L1_DESC_V; | |
1006 | ||
87e5fe5b | 1007 | /* See comment in arm_smmu_write_ctx_desc() */ |
73af06f5 JPB |
1008 | WRITE_ONCE(*dst, cpu_to_le64(val)); |
1009 | } | |
1010 | ||
1011 | static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, | |
1012 | u32 ssid) | |
1013 | { | |
1014 | __le64 *l1ptr; | |
1015 | unsigned int idx; | |
1016 | struct arm_smmu_l1_ctx_desc *l1_desc; | |
1017 | struct arm_smmu_device *smmu = smmu_domain->smmu; | |
1018 | struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; | |
48ec83bc | 1019 | |
73af06f5 JPB |
1020 | if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) |
1021 | return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; | |
1022 | ||
1023 | idx = ssid >> CTXDESC_SPLIT; | |
1024 | l1_desc = &cdcfg->l1_desc[idx]; | |
1025 | if (!l1_desc->l2ptr) { | |
1026 | if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) | |
1027 | return NULL; | |
1028 | ||
1029 | l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; | |
1030 | arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); | |
1031 | /* An invalid L1CD can be cached */ | |
1032 | arm_smmu_sync_cd(smmu_domain, ssid, false); | |
1033 | } | |
1034 | idx = ssid & (CTXDESC_L2_ENTRIES - 1); | |
1035 | return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; | |
1036 | } | |
1037 | ||
3e630336 JPB |
1038 | int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, |
1039 | struct arm_smmu_ctx_desc *cd) | |
87f42391 | 1040 | { |
48ec83bc | 1041 | /* |
87f42391 JPB |
1042 | * This function handles the following cases: |
1043 | * | |
1044 | * (1) Install primary CD, for normal DMA traffic (SSID = 0). | |
1045 | * (2) Install a secondary CD, for SID+SSID traffic. | |
1046 | * (3) Update ASID of a CD. Atomically write the first 64 bits of the | |
1047 | * CD, then invalidate the old entry and mappings. | |
32784a95 JPB |
1048 | * (4) Quiesce the context without clearing the valid bit. Disable |
1049 | * translation, and ignore any translation fault. | |
1050 | * (5) Remove a secondary CD. | |
48ec83bc | 1051 | */ |
87f42391 JPB |
1052 | u64 val; |
1053 | bool cd_live; | |
73af06f5 | 1054 | __le64 *cdptr; |
73af06f5 JPB |
1055 | |
1056 | if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) | |
1057 | return -E2BIG; | |
1058 | ||
1059 | cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); | |
1060 | if (!cdptr) | |
1061 | return -ENOMEM; | |
87f42391 JPB |
1062 | |
1063 | val = le64_to_cpu(cdptr[0]); | |
1064 | cd_live = !!(val & CTXDESC_CD_0_V); | |
9cff86fd | 1065 | |
32784a95 | 1066 | if (!cd) { /* (5) */ |
87f42391 | 1067 | val = 0; |
32784a95 JPB |
1068 | } else if (cd == &quiet_cd) { /* (4) */ |
1069 | val |= CTXDESC_CD_0_TCR_EPD0; | |
87f42391 JPB |
1070 | } else if (cd_live) { /* (3) */ |
1071 | val &= ~CTXDESC_CD_0_ASID; | |
1072 | val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); | |
1073 | /* | |
1074 | * Until CD+TLB invalidation, both ASIDs may be used for tagging | |
1075 | * this substream's traffic | |
1076 | */ | |
1077 | } else { /* (1) and (2) */ | |
1078 | cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); | |
1079 | cdptr[2] = 0; | |
1080 | cdptr[3] = cpu_to_le64(cd->mair); | |
9cff86fd | 1081 | |
87f42391 JPB |
1082 | /* |
1083 | * STE is live, and the SMMU might read dwords of this CD in any | |
1084 | * order. Ensure that it observes valid values before reading | |
1085 | * V=1. | |
1086 | */ | |
1087 | arm_smmu_sync_cd(smmu_domain, ssid, true); | |
48ec83bc | 1088 | |
87f42391 | 1089 | val = cd->tcr | |
48ec83bc | 1090 | #ifdef __BIG_ENDIAN |
87f42391 | 1091 | CTXDESC_CD_0_ENDI | |
48ec83bc | 1092 | #endif |
3f1ce8e8 JPB |
1093 | CTXDESC_CD_0_R | CTXDESC_CD_0_A | |
1094 | (cd->mm ? 0 : CTXDESC_CD_0_ASET) | | |
87f42391 JPB |
1095 | CTXDESC_CD_0_AA64 | |
1096 | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | | |
1097 | CTXDESC_CD_0_V; | |
9cff86fd | 1098 | |
395ad89d | 1099 | if (smmu_domain->stall_enabled) |
87f42391 JPB |
1100 | val |= CTXDESC_CD_0_S; |
1101 | } | |
9cff86fd | 1102 | |
87f42391 JPB |
1103 | /* |
1104 | * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 | |
1105 | * "Configuration structures and configuration invalidation completion" | |
1106 | * | |
1107 | * The size of single-copy atomic reads made by the SMMU is | |
1108 | * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single | |
1109 | * field within an aligned 64-bit span of a structure can be altered | |
1110 | * without first making the structure invalid. | |
1111 | */ | |
1112 | WRITE_ONCE(cdptr[0], cpu_to_le64(val)); | |
1113 | arm_smmu_sync_cd(smmu_domain, ssid, true); | |
1114 | return 0; | |
48ec83bc WD |
1115 | } |
1116 | ||
a557aff0 JPB |
1117 | static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) |
1118 | { | |
73af06f5 | 1119 | int ret; |
a557aff0 | 1120 | size_t l1size; |
73af06f5 | 1121 | size_t max_contexts; |
a557aff0 JPB |
1122 | struct arm_smmu_device *smmu = smmu_domain->smmu; |
1123 | struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; | |
1124 | struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; | |
1125 | ||
73af06f5 JPB |
1126 | max_contexts = 1 << cfg->s1cdmax; |
1127 | ||
1128 | if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || | |
1129 | max_contexts <= CTXDESC_L2_ENTRIES) { | |
1130 | cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; | |
1131 | cdcfg->num_l1_ents = max_contexts; | |
48ec83bc | 1132 | |
73af06f5 JPB |
1133 | l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); |
1134 | } else { | |
1135 | cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; | |
1136 | cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, | |
1137 | CTXDESC_L2_ENTRIES); | |
1138 | ||
1139 | cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, | |
1140 | sizeof(*cdcfg->l1_desc), | |
1141 | GFP_KERNEL); | |
1142 | if (!cdcfg->l1_desc) | |
1143 | return -ENOMEM; | |
1144 | ||
1145 | l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); | |
1146 | } | |
87f42391 | 1147 | |
a557aff0 JPB |
1148 | cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, |
1149 | GFP_KERNEL); | |
1150 | if (!cdcfg->cdtab) { | |
1151 | dev_warn(smmu->dev, "failed to allocate context descriptor\n"); | |
73af06f5 JPB |
1152 | ret = -ENOMEM; |
1153 | goto err_free_l1; | |
a557aff0 | 1154 | } |
73af06f5 | 1155 | |
a557aff0 | 1156 | return 0; |
48ec83bc | 1157 | |
73af06f5 JPB |
1158 | err_free_l1: |
1159 | if (cdcfg->l1_desc) { | |
1160 | devm_kfree(smmu->dev, cdcfg->l1_desc); | |
1161 | cdcfg->l1_desc = NULL; | |
1162 | } | |
1163 | return ret; | |
a557aff0 JPB |
1164 | } |
1165 | ||
1166 | static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) | |
1167 | { | |
73af06f5 JPB |
1168 | int i; |
1169 | size_t size, l1size; | |
a557aff0 JPB |
1170 | struct arm_smmu_device *smmu = smmu_domain->smmu; |
1171 | struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; | |
73af06f5 JPB |
1172 | |
1173 | if (cdcfg->l1_desc) { | |
1174 | size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); | |
1175 | ||
1176 | for (i = 0; i < cdcfg->num_l1_ents; i++) { | |
1177 | if (!cdcfg->l1_desc[i].l2ptr) | |
1178 | continue; | |
1179 | ||
1180 | dmam_free_coherent(smmu->dev, size, | |
1181 | cdcfg->l1_desc[i].l2ptr, | |
1182 | cdcfg->l1_desc[i].l2ptr_dma); | |
1183 | } | |
1184 | devm_kfree(smmu->dev, cdcfg->l1_desc); | |
1185 | cdcfg->l1_desc = NULL; | |
1186 | ||
1187 | l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); | |
1188 | } else { | |
1189 | l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); | |
1190 | } | |
a557aff0 JPB |
1191 | |
1192 | dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); | |
1193 | cdcfg->cdtab_dma = 0; | |
1194 | cdcfg->cdtab = NULL; | |
48ec83bc WD |
1195 | } |
1196 | ||
3f1ce8e8 | 1197 | bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) |
0299a1a8 | 1198 | { |
3f1ce8e8 JPB |
1199 | bool free; |
1200 | struct arm_smmu_ctx_desc *old_cd; | |
1201 | ||
0299a1a8 | 1202 | if (!cd->asid) |
3f1ce8e8 | 1203 | return false; |
0299a1a8 | 1204 | |
3f1ce8e8 JPB |
1205 | free = refcount_dec_and_test(&cd->refs); |
1206 | if (free) { | |
1207 | old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid); | |
1208 | WARN_ON(old_cd != cd); | |
1209 | } | |
1210 | return free; | |
0299a1a8 JPB |
1211 | } |
1212 | ||
48ec83bc WD |
1213 | /* Stream table manipulation functions */ |
1214 | static void | |
1215 | arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) | |
1216 | { | |
1217 | u64 val = 0; | |
1218 | ||
ba08bdcb | 1219 | val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); |
1cf9e54e | 1220 | val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; |
48ec83bc | 1221 | |
87e5fe5b JPB |
1222 | /* See comment in arm_smmu_write_ctx_desc() */ |
1223 | WRITE_ONCE(*dst, cpu_to_le64(val)); | |
48ec83bc WD |
1224 | } |
1225 | ||
1226 | static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) | |
1227 | { | |
1228 | struct arm_smmu_cmdq_ent cmd = { | |
1229 | .opcode = CMDQ_OP_CFGI_STE, | |
1230 | .cfgi = { | |
1231 | .sid = sid, | |
1232 | .leaf = true, | |
1233 | }, | |
1234 | }; | |
1235 | ||
4537f6f1 | 1236 | arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); |
48ec83bc WD |
1237 | } |
1238 | ||
8be39a1a JPB |
1239 | static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, |
1240 | __le64 *dst) | |
48ec83bc WD |
1241 | { |
1242 | /* | |
1243 | * This is hideously complicated, but we only really care about | |
1244 | * three cases at the moment: | |
1245 | * | |
beb3c6a0 WD |
1246 | * 1. Invalid (all zero) -> bypass/fault (init) |
1247 | * 2. Bypass/fault -> translation/bypass (attach) | |
1248 | * 3. Translation/bypass -> bypass/fault (detach) | |
48ec83bc WD |
1249 | * |
1250 | * Given that we can't update the STE atomically and the SMMU | |
1251 | * doesn't read the thing in a defined order, that leaves us | |
1252 | * with the following maintenance requirements: | |
1253 | * | |
1254 | * 1. Update Config, return (init time STEs aren't live) | |
1255 | * 2. Write everything apart from dword 0, sync, write dword 0, sync | |
1256 | * 3. Update Config, sync | |
1257 | */ | |
1258 | u64 val = le64_to_cpu(dst[0]); | |
1259 | bool ste_live = false; | |
8be39a1a JPB |
1260 | struct arm_smmu_device *smmu = NULL; |
1261 | struct arm_smmu_s1_cfg *s1_cfg = NULL; | |
1262 | struct arm_smmu_s2_cfg *s2_cfg = NULL; | |
1263 | struct arm_smmu_domain *smmu_domain = NULL; | |
48ec83bc WD |
1264 | struct arm_smmu_cmdq_ent prefetch_cmd = { |
1265 | .opcode = CMDQ_OP_PREFETCH_CFG, | |
1266 | .prefetch = { | |
1267 | .sid = sid, | |
1268 | }, | |
1269 | }; | |
1270 | ||
8be39a1a JPB |
1271 | if (master) { |
1272 | smmu_domain = master->domain; | |
1273 | smmu = master->smmu; | |
1274 | } | |
1275 | ||
1276 | if (smmu_domain) { | |
1277 | switch (smmu_domain->stage) { | |
1278 | case ARM_SMMU_DOMAIN_S1: | |
1279 | s1_cfg = &smmu_domain->s1_cfg; | |
1280 | break; | |
1281 | case ARM_SMMU_DOMAIN_S2: | |
1282 | case ARM_SMMU_DOMAIN_NESTED: | |
1283 | s2_cfg = &smmu_domain->s2_cfg; | |
1284 | break; | |
1285 | default: | |
1286 | break; | |
1287 | } | |
1288 | } | |
1289 | ||
48ec83bc | 1290 | if (val & STRTAB_STE_0_V) { |
ba08bdcb | 1291 | switch (FIELD_GET(STRTAB_STE_0_CFG, val)) { |
48ec83bc WD |
1292 | case STRTAB_STE_0_CFG_BYPASS: |
1293 | break; | |
1294 | case STRTAB_STE_0_CFG_S1_TRANS: | |
1295 | case STRTAB_STE_0_CFG_S2_TRANS: | |
1296 | ste_live = true; | |
1297 | break; | |
5bc0a116 | 1298 | case STRTAB_STE_0_CFG_ABORT: |
11f4fe9b AR |
1299 | BUG_ON(!disable_bypass); |
1300 | break; | |
48ec83bc WD |
1301 | default: |
1302 | BUG(); /* STE corruption */ | |
1303 | } | |
1304 | } | |
1305 | ||
810871c5 | 1306 | /* Nuke the existing STE_0 value, as we're going to rewrite it */ |
beb3c6a0 WD |
1307 | val = STRTAB_STE_0_V; |
1308 | ||
1309 | /* Bypass/fault */ | |
8be39a1a JPB |
1310 | if (!smmu_domain || !(s1_cfg || s2_cfg)) { |
1311 | if (!smmu_domain && disable_bypass) | |
ba08bdcb | 1312 | val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); |
beb3c6a0 | 1313 | else |
ba08bdcb | 1314 | val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); |
48ec83bc | 1315 | |
48ec83bc | 1316 | dst[0] = cpu_to_le64(val); |
ba08bdcb RM |
1317 | dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, |
1318 | STRTAB_STE_1_SHCFG_INCOMING)); | |
48ec83bc | 1319 | dst[2] = 0; /* Nuke the VMID */ |
704c0382 WD |
1320 | /* |
1321 | * The SMMU can perform negative caching, so we must sync | |
1322 | * the STE regardless of whether the old value was live. | |
1323 | */ | |
1324 | if (smmu) | |
48ec83bc WD |
1325 | arm_smmu_sync_ste_for_sid(smmu, sid); |
1326 | return; | |
1327 | } | |
1328 | ||
8be39a1a | 1329 | if (s1_cfg) { |
9111aebf JPB |
1330 | u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? |
1331 | STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; | |
1332 | ||
48ec83bc WD |
1333 | BUG_ON(ste_live); |
1334 | dst[1] = cpu_to_le64( | |
87f42391 | 1335 | FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | |
ba08bdcb RM |
1336 | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | |
1337 | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | | |
1338 | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | | |
9111aebf | 1339 | FIELD_PREP(STRTAB_STE_1_STRW, strw)); |
48ec83bc | 1340 | |
9cff86fd | 1341 | if (smmu->features & ARM_SMMU_FEAT_STALLS && |
395ad89d | 1342 | !master->stall_enabled) |
6380be05 PM |
1343 | dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); |
1344 | ||
7bc4f3fa | 1345 | val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | |
87f42391 JPB |
1346 | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | |
1347 | FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | | |
1348 | FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); | |
48ec83bc WD |
1349 | } |
1350 | ||
8be39a1a | 1351 | if (s2_cfg) { |
48ec83bc WD |
1352 | BUG_ON(ste_live); |
1353 | dst[2] = cpu_to_le64( | |
8be39a1a JPB |
1354 | FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | |
1355 | FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | | |
48ec83bc WD |
1356 | #ifdef __BIG_ENDIAN |
1357 | STRTAB_STE_2_S2ENDI | | |
1358 | #endif | |
1359 | STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | | |
1360 | STRTAB_STE_2_S2R); | |
1361 | ||
8be39a1a | 1362 | dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); |
48ec83bc | 1363 | |
ba08bdcb | 1364 | val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); |
48ec83bc WD |
1365 | } |
1366 | ||
9ce27afc JPB |
1367 | if (master->ats_enabled) |
1368 | dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, | |
1369 | STRTAB_STE_1_EATS_TRANS)); | |
1370 | ||
48ec83bc | 1371 | arm_smmu_sync_ste_for_sid(smmu, sid); |
d71e0171 WD |
1372 | /* See comment in arm_smmu_write_ctx_desc() */ |
1373 | WRITE_ONCE(dst[0], cpu_to_le64(val)); | |
48ec83bc WD |
1374 | arm_smmu_sync_ste_for_sid(smmu, sid); |
1375 | ||
1376 | /* It's likely that we'll want to use the new STE soon */ | |
5e92946c ZL |
1377 | if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) |
1378 | arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); | |
48ec83bc WD |
1379 | } |
1380 | ||
6c998abb | 1381 | static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force) |
48ec83bc WD |
1382 | { |
1383 | unsigned int i; | |
6c998abb SK |
1384 | u64 val = STRTAB_STE_0_V; |
1385 | ||
1386 | if (disable_bypass && !force) | |
1387 | val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); | |
1388 | else | |
1389 | val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); | |
48ec83bc WD |
1390 | |
1391 | for (i = 0; i < nent; ++i) { | |
6c998abb SK |
1392 | strtab[0] = cpu_to_le64(val); |
1393 | strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, | |
1394 | STRTAB_STE_1_SHCFG_INCOMING)); | |
1395 | strtab[2] = 0; | |
48ec83bc WD |
1396 | strtab += STRTAB_STE_DWORDS; |
1397 | } | |
1398 | } | |
1399 | ||
1400 | static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) | |
1401 | { | |
1402 | size_t size; | |
1403 | void *strtab; | |
1404 | struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; | |
1405 | struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; | |
1406 | ||
1407 | if (desc->l2ptr) | |
1408 | return 0; | |
1409 | ||
1410 | size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); | |
69146e7b | 1411 | strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; |
48ec83bc WD |
1412 | |
1413 | desc->span = STRTAB_SPLIT + 1; | |
04fa26c7 | 1414 | desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, |
9bb9069c | 1415 | GFP_KERNEL); |
48ec83bc WD |
1416 | if (!desc->l2ptr) { |
1417 | dev_err(smmu->dev, | |
1418 | "failed to allocate l2 stream table for SID %u\n", | |
1419 | sid); | |
1420 | return -ENOMEM; | |
1421 | } | |
1422 | ||
6c998abb | 1423 | arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false); |
48ec83bc WD |
1424 | arm_smmu_write_strtab_l1_desc(strtab, desc); |
1425 | return 0; | |
1426 | } | |
1427 | ||
cdf315f9 JPB |
1428 | static struct arm_smmu_master * |
1429 | arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) | |
1430 | { | |
1431 | struct rb_node *node; | |
1432 | struct arm_smmu_stream *stream; | |
1433 | ||
1434 | lockdep_assert_held(&smmu->streams_mutex); | |
1435 | ||
1436 | node = smmu->streams.rb_node; | |
1437 | while (node) { | |
1438 | stream = rb_entry(node, struct arm_smmu_stream, node); | |
1439 | if (stream->id < sid) | |
1440 | node = node->rb_right; | |
1441 | else if (stream->id > sid) | |
1442 | node = node->rb_left; | |
1443 | else | |
1444 | return stream->master; | |
1445 | } | |
1446 | ||
1447 | return NULL; | |
1448 | } | |
1449 | ||
48ec83bc | 1450 | /* IRQ and event handlers */ |
395ad89d JPB |
1451 | static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) |
1452 | { | |
1453 | int ret; | |
1454 | u32 reason; | |
1455 | u32 perm = 0; | |
1456 | struct arm_smmu_master *master; | |
1457 | bool ssid_valid = evt[0] & EVTQ_0_SSV; | |
1458 | u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); | |
1459 | struct iommu_fault_event fault_evt = { }; | |
1460 | struct iommu_fault *flt = &fault_evt.fault; | |
1461 | ||
1462 | switch (FIELD_GET(EVTQ_0_ID, evt[0])) { | |
1463 | case EVT_ID_TRANSLATION_FAULT: | |
1464 | reason = IOMMU_FAULT_REASON_PTE_FETCH; | |
1465 | break; | |
1466 | case EVT_ID_ADDR_SIZE_FAULT: | |
1467 | reason = IOMMU_FAULT_REASON_OOR_ADDRESS; | |
1468 | break; | |
1469 | case EVT_ID_ACCESS_FAULT: | |
1470 | reason = IOMMU_FAULT_REASON_ACCESS; | |
1471 | break; | |
1472 | case EVT_ID_PERMISSION_FAULT: | |
1473 | reason = IOMMU_FAULT_REASON_PERMISSION; | |
1474 | break; | |
1475 | default: | |
1476 | return -EOPNOTSUPP; | |
1477 | } | |
1478 | ||
1479 | /* Stage-2 is always pinned at the moment */ | |
1480 | if (evt[1] & EVTQ_1_S2) | |
1481 | return -EFAULT; | |
1482 | ||
1483 | if (evt[1] & EVTQ_1_RnW) | |
1484 | perm |= IOMMU_FAULT_PERM_READ; | |
1485 | else | |
1486 | perm |= IOMMU_FAULT_PERM_WRITE; | |
1487 | ||
1488 | if (evt[1] & EVTQ_1_InD) | |
1489 | perm |= IOMMU_FAULT_PERM_EXEC; | |
1490 | ||
1491 | if (evt[1] & EVTQ_1_PnU) | |
1492 | perm |= IOMMU_FAULT_PERM_PRIV; | |
1493 | ||
1494 | if (evt[1] & EVTQ_1_STALL) { | |
1495 | flt->type = IOMMU_FAULT_PAGE_REQ; | |
1496 | flt->prm = (struct iommu_fault_page_request) { | |
1497 | .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, | |
1498 | .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), | |
1499 | .perm = perm, | |
1500 | .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), | |
1501 | }; | |
1502 | ||
1503 | if (ssid_valid) { | |
1504 | flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; | |
1505 | flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); | |
1506 | } | |
1507 | } else { | |
1508 | flt->type = IOMMU_FAULT_DMA_UNRECOV; | |
1509 | flt->event = (struct iommu_fault_unrecoverable) { | |
1510 | .reason = reason, | |
1511 | .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID, | |
1512 | .perm = perm, | |
1513 | .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), | |
1514 | }; | |
1515 | ||
1516 | if (ssid_valid) { | |
1517 | flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID; | |
1518 | flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); | |
1519 | } | |
1520 | } | |
1521 | ||
1522 | mutex_lock(&smmu->streams_mutex); | |
1523 | master = arm_smmu_find_master(smmu, sid); | |
1524 | if (!master) { | |
1525 | ret = -EINVAL; | |
1526 | goto out_unlock; | |
1527 | } | |
1528 | ||
1529 | ret = iommu_report_device_fault(master->dev, &fault_evt); | |
1530 | if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) { | |
1531 | /* Nobody cared, abort the access */ | |
1532 | struct iommu_page_response resp = { | |
1533 | .pasid = flt->prm.pasid, | |
1534 | .grpid = flt->prm.grpid, | |
1535 | .code = IOMMU_PAGE_RESP_FAILURE, | |
1536 | }; | |
1537 | arm_smmu_page_response(master->dev, &fault_evt, &resp); | |
1538 | } | |
1539 | ||
1540 | out_unlock: | |
1541 | mutex_unlock(&smmu->streams_mutex); | |
1542 | return ret; | |
1543 | } | |
1544 | ||
48ec83bc WD |
1545 | static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) |
1546 | { | |
395ad89d | 1547 | int i, ret; |
48ec83bc WD |
1548 | struct arm_smmu_device *smmu = dev; |
1549 | struct arm_smmu_queue *q = &smmu->evtq.q; | |
7c288a5b | 1550 | struct arm_smmu_ll_queue *llq = &q->llq; |
9cff922b JPB |
1551 | static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, |
1552 | DEFAULT_RATELIMIT_BURST); | |
48ec83bc WD |
1553 | u64 evt[EVTQ_ENT_DWORDS]; |
1554 | ||
b4163fb3 JPB |
1555 | do { |
1556 | while (!queue_remove_raw(q, evt)) { | |
7417b99c | 1557 | u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); |
48ec83bc | 1558 | |
395ad89d | 1559 | ret = arm_smmu_handle_evt(smmu, evt); |
9cff922b | 1560 | if (!ret || !__ratelimit(&rs)) |
395ad89d JPB |
1561 | continue; |
1562 | ||
b4163fb3 JPB |
1563 | dev_info(smmu->dev, "event 0x%02x received:\n", id); |
1564 | for (i = 0; i < ARRAY_SIZE(evt); ++i) | |
1565 | dev_info(smmu->dev, "\t0x%016llx\n", | |
1566 | (unsigned long long)evt[i]); | |
1567 | ||
30de2b54 | 1568 | cond_resched(); |
b4163fb3 JPB |
1569 | } |
1570 | ||
1571 | /* | |
1572 | * Not much we can do on overflow, so scream and pretend we're | |
1573 | * trying harder. | |
1574 | */ | |
2a8868f1 | 1575 | if (queue_sync_prod_in(q) == -EOVERFLOW) |
b4163fb3 | 1576 | dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n"); |
7c288a5b | 1577 | } while (!queue_empty(llq)); |
48ec83bc WD |
1578 | |
1579 | /* Sync our overflow flag, as we believe we're up to speed */ | |
7c288a5b WD |
1580 | llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | |
1581 | Q_IDX(llq, llq->cons); | |
48ec83bc WD |
1582 | return IRQ_HANDLED; |
1583 | } | |
1584 | ||
b4163fb3 JPB |
1585 | static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) |
1586 | { | |
1587 | u32 sid, ssid; | |
1588 | u16 grpid; | |
1589 | bool ssv, last; | |
1590 | ||
7417b99c RM |
1591 | sid = FIELD_GET(PRIQ_0_SID, evt[0]); |
1592 | ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); | |
1593 | ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0; | |
1594 | last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); | |
1595 | grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); | |
b4163fb3 JPB |
1596 | |
1597 | dev_info(smmu->dev, "unexpected PRI request received:\n"); | |
1598 | dev_info(smmu->dev, | |
1599 | "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n", | |
1600 | sid, ssid, grpid, last ? "L" : "", | |
1601 | evt[0] & PRIQ_0_PERM_PRIV ? "" : "un", | |
1602 | evt[0] & PRIQ_0_PERM_READ ? "R" : "", | |
1603 | evt[0] & PRIQ_0_PERM_WRITE ? "W" : "", | |
1604 | evt[0] & PRIQ_0_PERM_EXEC ? "X" : "", | |
1cf9e54e | 1605 | evt[1] & PRIQ_1_ADDR_MASK); |
b4163fb3 JPB |
1606 | |
1607 | if (last) { | |
1608 | struct arm_smmu_cmdq_ent cmd = { | |
1609 | .opcode = CMDQ_OP_PRI_RESP, | |
1610 | .substream_valid = ssv, | |
1611 | .pri = { | |
1612 | .sid = sid, | |
1613 | .ssid = ssid, | |
1614 | .grpid = grpid, | |
1615 | .resp = PRI_RESP_DENY, | |
1616 | }, | |
1617 | }; | |
48ec83bc | 1618 | |
b4163fb3 JPB |
1619 | arm_smmu_cmdq_issue_cmd(smmu, &cmd); |
1620 | } | |
48ec83bc WD |
1621 | } |
1622 | ||
1623 | static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) | |
1624 | { | |
1625 | struct arm_smmu_device *smmu = dev; | |
1626 | struct arm_smmu_queue *q = &smmu->priq.q; | |
7c288a5b | 1627 | struct arm_smmu_ll_queue *llq = &q->llq; |
48ec83bc WD |
1628 | u64 evt[PRIQ_ENT_DWORDS]; |
1629 | ||
b4163fb3 JPB |
1630 | do { |
1631 | while (!queue_remove_raw(q, evt)) | |
1632 | arm_smmu_handle_ppr(smmu, evt); | |
48ec83bc | 1633 | |
2a8868f1 | 1634 | if (queue_sync_prod_in(q) == -EOVERFLOW) |
b4163fb3 | 1635 | dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n"); |
7c288a5b | 1636 | } while (!queue_empty(llq)); |
48ec83bc WD |
1637 | |
1638 | /* Sync our overflow flag, as we believe we're up to speed */ | |
7c288a5b WD |
1639 | llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | |
1640 | Q_IDX(llq, llq->cons); | |
1641 | queue_sync_cons_out(q); | |
48ec83bc WD |
1642 | return IRQ_HANDLED; |
1643 | } | |
1644 | ||
48ec83bc WD |
1645 | static int arm_smmu_device_disable(struct arm_smmu_device *smmu); |
1646 | ||
1647 | static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) | |
1648 | { | |
324ba108 | 1649 | u32 gerror, gerrorn, active; |
48ec83bc WD |
1650 | struct arm_smmu_device *smmu = dev; |
1651 | ||
1652 | gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); | |
1653 | gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); | |
1654 | ||
324ba108 PM |
1655 | active = gerror ^ gerrorn; |
1656 | if (!(active & GERROR_ERR_MASK)) | |
48ec83bc WD |
1657 | return IRQ_NONE; /* No errors pending */ |
1658 | ||
1659 | dev_warn(smmu->dev, | |
1660 | "unexpected global error reported (0x%08x), this could be serious\n", | |
324ba108 | 1661 | active); |
48ec83bc | 1662 | |
324ba108 | 1663 | if (active & GERROR_SFM_ERR) { |
48ec83bc WD |
1664 | dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); |
1665 | arm_smmu_device_disable(smmu); | |
1666 | } | |
1667 | ||
324ba108 | 1668 | if (active & GERROR_MSI_GERROR_ABT_ERR) |
48ec83bc WD |
1669 | dev_warn(smmu->dev, "GERROR MSI write aborted\n"); |
1670 | ||
b4163fb3 | 1671 | if (active & GERROR_MSI_PRIQ_ABT_ERR) |
48ec83bc | 1672 | dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); |
48ec83bc | 1673 | |
b4163fb3 | 1674 | if (active & GERROR_MSI_EVTQ_ABT_ERR) |
48ec83bc | 1675 | dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); |
48ec83bc | 1676 | |
dce032a1 | 1677 | if (active & GERROR_MSI_CMDQ_ABT_ERR) |
48ec83bc | 1678 | dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); |
48ec83bc | 1679 | |
324ba108 | 1680 | if (active & GERROR_PRIQ_ABT_ERR) |
48ec83bc WD |
1681 | dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); |
1682 | ||
324ba108 | 1683 | if (active & GERROR_EVTQ_ABT_ERR) |
48ec83bc WD |
1684 | dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); |
1685 | ||
324ba108 | 1686 | if (active & GERROR_CMDQ_ERR) |
48ec83bc WD |
1687 | arm_smmu_cmdq_skip_err(smmu); |
1688 | ||
1689 | writel(gerror, smmu->base + ARM_SMMU_GERRORN); | |
1690 | return IRQ_HANDLED; | |
1691 | } | |
1692 | ||
f935448a GS |
1693 | static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) |
1694 | { | |
1695 | struct arm_smmu_device *smmu = dev; | |
1696 | ||
1697 | arm_smmu_evtq_thread(irq, dev); | |
1698 | if (smmu->features & ARM_SMMU_FEAT_PRI) | |
1699 | arm_smmu_priq_thread(irq, dev); | |
1700 | ||
1701 | return IRQ_HANDLED; | |
1702 | } | |
1703 | ||
1704 | static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) | |
1705 | { | |
1706 | arm_smmu_gerror_handler(irq, dev); | |
f935448a GS |
1707 | return IRQ_WAKE_THREAD; |
1708 | } | |
1709 | ||
9ce27afc JPB |
1710 | static void |
1711 | arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, | |
1712 | struct arm_smmu_cmdq_ent *cmd) | |
1713 | { | |
1714 | size_t log2_span; | |
1715 | size_t span_mask; | |
1716 | /* ATC invalidates are always on 4096-bytes pages */ | |
1717 | size_t inval_grain_shift = 12; | |
1718 | unsigned long page_start, page_end; | |
1719 | ||
2f7e8c55 JPB |
1720 | /* |
1721 | * ATS and PASID: | |
1722 | * | |
1723 | * If substream_valid is clear, the PCIe TLP is sent without a PASID | |
1724 | * prefix. In that case all ATC entries within the address range are | |
1725 | * invalidated, including those that were requested with a PASID! There | |
1726 | * is no way to invalidate only entries without PASID. | |
1727 | * | |
1728 | * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID | |
1729 | * traffic), translation requests without PASID create ATC entries | |
1730 | * without PASID, which must be invalidated with substream_valid clear. | |
1731 | * This has the unpleasant side-effect of invalidating all PASID-tagged | |
1732 | * ATC entries within the address range. | |
1733 | */ | |
9ce27afc JPB |
1734 | *cmd = (struct arm_smmu_cmdq_ent) { |
1735 | .opcode = CMDQ_OP_ATC_INV, | |
1736 | .substream_valid = !!ssid, | |
1737 | .atc.ssid = ssid, | |
1738 | }; | |
1739 | ||
1740 | if (!size) { | |
1741 | cmd->atc.size = ATC_INV_SIZE_ALL; | |
1742 | return; | |
1743 | } | |
1744 | ||
1745 | page_start = iova >> inval_grain_shift; | |
1746 | page_end = (iova + size - 1) >> inval_grain_shift; | |
1747 | ||
1748 | /* | |
1749 | * In an ATS Invalidate Request, the address must be aligned on the | |
1750 | * range size, which must be a power of two number of page sizes. We | |
1751 | * thus have to choose between grossly over-invalidating the region, or | |
1752 | * splitting the invalidation into multiple commands. For simplicity | |
1753 | * we'll go with the first solution, but should refine it in the future | |
1754 | * if multiple commands are shown to be more efficient. | |
1755 | * | |
1756 | * Find the smallest power of two that covers the range. The most | |
1757 | * significant differing bit between the start and end addresses, | |
1758 | * fls(start ^ end), indicates the required span. For example: | |
1759 | * | |
1760 | * We want to invalidate pages [8; 11]. This is already the ideal range: | |
1761 | * x = 0b1000 ^ 0b1011 = 0b11 | |
1762 | * span = 1 << fls(x) = 4 | |
1763 | * | |
1764 | * To invalidate pages [7; 10], we need to invalidate [0; 15]: | |
1765 | * x = 0b0111 ^ 0b1010 = 0b1101 | |
1766 | * span = 1 << fls(x) = 16 | |
1767 | */ | |
1768 | log2_span = fls_long(page_start ^ page_end); | |
1769 | span_mask = (1ULL << log2_span) - 1; | |
1770 | ||
1771 | page_start &= ~span_mask; | |
1772 | ||
1773 | cmd->atc.addr = page_start << inval_grain_shift; | |
1774 | cmd->atc.size = log2_span; | |
1775 | } | |
1776 | ||
9e773aee | 1777 | static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) |
9ce27afc JPB |
1778 | { |
1779 | int i; | |
9e773aee | 1780 | struct arm_smmu_cmdq_ent cmd; |
93f9f795 | 1781 | struct arm_smmu_cmdq_batch cmds; |
9ce27afc | 1782 | |
9e773aee | 1783 | arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); |
9ce27afc | 1784 | |
93f9f795 | 1785 | cmds.num = 0; |
cdf315f9 JPB |
1786 | for (i = 0; i < master->num_streams; i++) { |
1787 | cmd.atc.sid = master->streams[i].id; | |
eff19474 | 1788 | arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); |
9ce27afc JPB |
1789 | } |
1790 | ||
eff19474 | 1791 | return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); |
9ce27afc JPB |
1792 | } |
1793 | ||
2f7e8c55 JPB |
1794 | int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, |
1795 | unsigned long iova, size_t size) | |
9ce27afc | 1796 | { |
9e773aee | 1797 | int i; |
9ce27afc JPB |
1798 | unsigned long flags; |
1799 | struct arm_smmu_cmdq_ent cmd; | |
1800 | struct arm_smmu_master *master; | |
fac95671 | 1801 | struct arm_smmu_cmdq_batch cmds; |
9ce27afc JPB |
1802 | |
1803 | if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) | |
1804 | return 0; | |
1805 | ||
cdb8a3c3 WD |
1806 | /* |
1807 | * Ensure that we've completed prior invalidation of the main TLBs | |
1808 | * before we read 'nr_ats_masters' in case of a concurrent call to | |
1809 | * arm_smmu_enable_ats(): | |
1810 | * | |
1811 | * // unmap() // arm_smmu_enable_ats() | |
1812 | * TLBI+SYNC atomic_inc(&nr_ats_masters); | |
1813 | * smp_mb(); [...] | |
1814 | * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() | |
1815 | * | |
1816 | * Ensures that we always see the incremented 'nr_ats_masters' count if | |
1817 | * ATS was enabled at the PCI device before completion of the TLBI. | |
1818 | */ | |
1819 | smp_mb(); | |
1820 | if (!atomic_read(&smmu_domain->nr_ats_masters)) | |
1821 | return 0; | |
1822 | ||
9ce27afc JPB |
1823 | arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); |
1824 | ||
fac95671 JG |
1825 | cmds.num = 0; |
1826 | ||
9ce27afc | 1827 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); |
9e773aee RH |
1828 | list_for_each_entry(master, &smmu_domain->devices, domain_head) { |
1829 | if (!master->ats_enabled) | |
1830 | continue; | |
1831 | ||
cdf315f9 JPB |
1832 | for (i = 0; i < master->num_streams; i++) { |
1833 | cmd.atc.sid = master->streams[i].id; | |
9e773aee RH |
1834 | arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); |
1835 | } | |
1836 | } | |
9ce27afc JPB |
1837 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); |
1838 | ||
9e773aee | 1839 | return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); |
9ce27afc JPB |
1840 | } |
1841 | ||
48ec83bc | 1842 | /* IO_PGTABLE API */ |
48ec83bc WD |
1843 | static void arm_smmu_tlb_inv_context(void *cookie) |
1844 | { | |
1845 | struct arm_smmu_domain *smmu_domain = cookie; | |
1846 | struct arm_smmu_device *smmu = smmu_domain->smmu; | |
1847 | struct arm_smmu_cmdq_ent cmd; | |
1848 | ||
9662b99a ZL |
1849 | /* |
1850 | * NOTE: when io-pgtable is in non-strict mode, we may get here with | |
1851 | * PTEs previously cleared by unmaps on the current CPU not yet visible | |
587e6c10 WD |
1852 | * to the SMMU. We are relying on the dma_wmb() implicit during cmd |
1853 | * insertion to guarantee those are observed before the TLBI. Do be | |
1854 | * careful, 007. | |
9662b99a | 1855 | */ |
3e630336 JPB |
1856 | if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { |
1857 | arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid); | |
1858 | } else { | |
1859 | cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; | |
1860 | cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; | |
4537f6f1 | 1861 | arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); |
3e630336 | 1862 | } |
353e3cf8 | 1863 | arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); |
48ec83bc WD |
1864 | } |
1865 | ||
eba8d2f8 JPB |
1866 | static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, |
1867 | unsigned long iova, size_t size, | |
1868 | size_t granule, | |
1869 | struct arm_smmu_domain *smmu_domain) | |
48ec83bc | 1870 | { |
48ec83bc | 1871 | struct arm_smmu_device *smmu = smmu_domain->smmu; |
eba8d2f8 | 1872 | unsigned long end = iova + size, num_pages = 0, tg = 0; |
6a481a95 | 1873 | size_t inv_range = granule; |
fac95671 | 1874 | struct arm_smmu_cmdq_batch cmds; |
48ec83bc | 1875 | |
7314ca86 WD |
1876 | if (!size) |
1877 | return; | |
1878 | ||
6a481a95 RH |
1879 | if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { |
1880 | /* Get the leaf page size */ | |
1881 | tg = __ffs(smmu_domain->domain.pgsize_bitmap); | |
1882 | ||
1883 | /* Convert page size of 12,14,16 (log2) to 1,2,3 */ | |
eba8d2f8 | 1884 | cmd->tlbi.tg = (tg - 10) / 2; |
6a481a95 RH |
1885 | |
1886 | /* Determine what level the granule is at */ | |
eba8d2f8 | 1887 | cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); |
6a481a95 RH |
1888 | |
1889 | num_pages = size >> tg; | |
1890 | } | |
1891 | ||
fac95671 JG |
1892 | cmds.num = 0; |
1893 | ||
2af2e72b | 1894 | while (iova < end) { |
6a481a95 RH |
1895 | if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { |
1896 | /* | |
1897 | * On each iteration of the loop, the range is 5 bits | |
1898 | * worth of the aligned size remaining. | |
1899 | * The range in pages is: | |
1900 | * | |
1901 | * range = (num_pages & (0x1f << __ffs(num_pages))) | |
1902 | */ | |
1903 | unsigned long scale, num; | |
1904 | ||
1905 | /* Determine the power of 2 multiple number of pages */ | |
1906 | scale = __ffs(num_pages); | |
eba8d2f8 | 1907 | cmd->tlbi.scale = scale; |
6a481a95 RH |
1908 | |
1909 | /* Determine how many chunks of 2^scale size we have */ | |
1910 | num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; | |
eba8d2f8 | 1911 | cmd->tlbi.num = num - 1; |
6a481a95 RH |
1912 | |
1913 | /* range is num * 2^scale * pgsize */ | |
1914 | inv_range = num << (scale + tg); | |
1915 | ||
1916 | /* Clear out the lower order bits for the next iteration */ | |
1917 | num_pages -= num << scale; | |
2af2e72b WD |
1918 | } |
1919 | ||
eba8d2f8 JPB |
1920 | cmd->tlbi.addr = iova; |
1921 | arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); | |
6a481a95 | 1922 | iova += inv_range; |
2af2e72b | 1923 | } |
4ce8da45 | 1924 | arm_smmu_cmdq_batch_submit(smmu, &cmds); |
eba8d2f8 JPB |
1925 | } |
1926 | ||
1927 | static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, | |
1928 | size_t granule, bool leaf, | |
1929 | struct arm_smmu_domain *smmu_domain) | |
1930 | { | |
1931 | struct arm_smmu_cmdq_ent cmd = { | |
1932 | .tlbi = { | |
1933 | .leaf = leaf, | |
1934 | }, | |
1935 | }; | |
1936 | ||
1937 | if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { | |
9111aebf JPB |
1938 | cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? |
1939 | CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; | |
eba8d2f8 JPB |
1940 | cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; |
1941 | } else { | |
1942 | cmd.opcode = CMDQ_OP_TLBI_S2_IPA; | |
1943 | cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; | |
1944 | } | |
1945 | __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); | |
353e3cf8 WD |
1946 | |
1947 | /* | |
1948 | * Unfortunately, this can't be leaf-only since we may have | |
1949 | * zapped an entire table. | |
1950 | */ | |
eba8d2f8 | 1951 | arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); |
48ec83bc WD |
1952 | } |
1953 | ||
51d113c3 JPB |
1954 | void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, |
1955 | size_t granule, bool leaf, | |
1956 | struct arm_smmu_domain *smmu_domain) | |
1957 | { | |
1958 | struct arm_smmu_cmdq_ent cmd = { | |
9111aebf JPB |
1959 | .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? |
1960 | CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA, | |
51d113c3 JPB |
1961 | .tlbi = { |
1962 | .asid = asid, | |
1963 | .leaf = leaf, | |
1964 | }, | |
1965 | }; | |
1966 | ||
1967 | __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); | |
1968 | } | |
1969 | ||
3951c41a WD |
1970 | static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, |
1971 | unsigned long iova, size_t granule, | |
abfd6fe0 WD |
1972 | void *cookie) |
1973 | { | |
2af2e72b WD |
1974 | struct arm_smmu_domain *smmu_domain = cookie; |
1975 | struct iommu_domain *domain = &smmu_domain->domain; | |
1976 | ||
1977 | iommu_iotlb_gather_add_page(domain, gather, iova, granule); | |
abfd6fe0 WD |
1978 | } |
1979 | ||
05aed941 WD |
1980 | static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, |
1981 | size_t granule, void *cookie) | |
1982 | { | |
eba8d2f8 | 1983 | arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie); |
05aed941 WD |
1984 | } |
1985 | ||
298f7889 | 1986 | static const struct iommu_flush_ops arm_smmu_flush_ops = { |
48ec83bc | 1987 | .tlb_flush_all = arm_smmu_tlb_inv_context, |
05aed941 | 1988 | .tlb_flush_walk = arm_smmu_tlb_inv_walk, |
abfd6fe0 | 1989 | .tlb_add_page = arm_smmu_tlb_inv_page_nosync, |
48ec83bc WD |
1990 | }; |
1991 | ||
1992 | /* IOMMU API */ | |
359ad157 | 1993 | static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) |
48ec83bc | 1994 | { |
df198b37 RM |
1995 | struct arm_smmu_master *master = dev_iommu_priv_get(dev); |
1996 | ||
48ec83bc WD |
1997 | switch (cap) { |
1998 | case IOMMU_CAP_CACHE_COHERENCY: | |
df198b37 RM |
1999 | /* Assume that a coherent TCU implies coherent TBUs */ |
2000 | return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; | |
48ec83bc WD |
2001 | case IOMMU_CAP_NOEXEC: |
2002 | return true; | |
2003 | default: | |
2004 | return false; | |
2005 | } | |
2006 | } | |
2007 | ||
2008 | static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) | |
2009 | { | |
2010 | struct arm_smmu_domain *smmu_domain; | |
2011 | ||
386fa64f LB |
2012 | if (type == IOMMU_DOMAIN_SVA) |
2013 | return arm_smmu_sva_domain_alloc(); | |
2014 | ||
beb3c6a0 WD |
2015 | if (type != IOMMU_DOMAIN_UNMANAGED && |
2016 | type != IOMMU_DOMAIN_DMA && | |
f9afa313 | 2017 | type != IOMMU_DOMAIN_DMA_FQ && |
beb3c6a0 | 2018 | type != IOMMU_DOMAIN_IDENTITY) |
48ec83bc WD |
2019 | return NULL; |
2020 | ||
2021 | /* | |
2022 | * Allocate the domain and initialise some of its data structures. | |
2023 | * We can't really do anything meaningful until we've added a | |
2024 | * master. | |
2025 | */ | |
2026 | smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); | |
2027 | if (!smmu_domain) | |
2028 | return NULL; | |
2029 | ||
2030 | mutex_init(&smmu_domain->init_mutex); | |
2a7e62f5 JPB |
2031 | INIT_LIST_HEAD(&smmu_domain->devices); |
2032 | spin_lock_init(&smmu_domain->devices_lock); | |
32784a95 | 2033 | INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); |
2a7e62f5 | 2034 | |
48ec83bc WD |
2035 | return &smmu_domain->domain; |
2036 | } | |
2037 | ||
2038 | static int arm_smmu_bitmap_alloc(unsigned long *map, int span) | |
2039 | { | |
2040 | int idx, size = 1 << span; | |
2041 | ||
2042 | do { | |
2043 | idx = find_first_zero_bit(map, size); | |
2044 | if (idx == size) | |
2045 | return -ENOSPC; | |
2046 | } while (test_and_set_bit(idx, map)); | |
2047 | ||
2048 | return idx; | |
2049 | } | |
2050 | ||
2051 | static void arm_smmu_bitmap_free(unsigned long *map, int idx) | |
2052 | { | |
2053 | clear_bit(idx, map); | |
2054 | } | |
2055 | ||
2056 | static void arm_smmu_domain_free(struct iommu_domain *domain) | |
2057 | { | |
2058 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); | |
2059 | struct arm_smmu_device *smmu = smmu_domain->smmu; | |
2060 | ||
a6e08fb2 | 2061 | free_io_pgtable_ops(smmu_domain->pgtbl_ops); |
48ec83bc WD |
2062 | |
2063 | /* Free the CD and ASID, if we allocated them */ | |
2064 | if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { | |
2065 | struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; | |
2066 | ||
3f1ce8e8 JPB |
2067 | /* Prevent SVA from touching the CD while we're freeing it */ |
2068 | mutex_lock(&arm_smmu_asid_lock); | |
0299a1a8 | 2069 | if (cfg->cdcfg.cdtab) |
a557aff0 | 2070 | arm_smmu_free_cd_tables(smmu_domain); |
0299a1a8 | 2071 | arm_smmu_free_asid(&cfg->cd); |
3f1ce8e8 | 2072 | mutex_unlock(&arm_smmu_asid_lock); |
48ec83bc WD |
2073 | } else { |
2074 | struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; | |
2075 | if (cfg->vmid) | |
2076 | arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid); | |
2077 | } | |
2078 | ||
2079 | kfree(smmu_domain); | |
2080 | } | |
2081 | ||
2082 | static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, | |
2505ec6f | 2083 | struct arm_smmu_master *master, |
48ec83bc WD |
2084 | struct io_pgtable_cfg *pgtbl_cfg) |
2085 | { | |
2086 | int ret; | |
0299a1a8 | 2087 | u32 asid; |
48ec83bc WD |
2088 | struct arm_smmu_device *smmu = smmu_domain->smmu; |
2089 | struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; | |
fb485eb1 | 2090 | typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; |
48ec83bc | 2091 | |
3f1ce8e8 JPB |
2092 | refcount_set(&cfg->cd.refs, 1); |
2093 | ||
2094 | /* Prevent SVA from modifying the ASID until it is written to the CD */ | |
2095 | mutex_lock(&arm_smmu_asid_lock); | |
2096 | ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd, | |
0299a1a8 JPB |
2097 | XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); |
2098 | if (ret) | |
3f1ce8e8 | 2099 | goto out_unlock; |
48ec83bc | 2100 | |
2505ec6f JPB |
2101 | cfg->s1cdmax = master->ssid_bits; |
2102 | ||
395ad89d JPB |
2103 | smmu_domain->stall_enabled = master->stall_enabled; |
2104 | ||
a557aff0 JPB |
2105 | ret = arm_smmu_alloc_cd_tables(smmu_domain); |
2106 | if (ret) | |
48ec83bc | 2107 | goto out_free_asid; |
48ec83bc | 2108 | |
c0733a2c | 2109 | cfg->cd.asid = (u16)asid; |
d1e5f26f | 2110 | cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; |
fb485eb1 RM |
2111 | cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | |
2112 | FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | | |
2113 | FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | | |
2114 | FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | | |
2115 | FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | | |
2116 | FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | | |
2117 | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; | |
205577ab | 2118 | cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; |
492ddc79 JPB |
2119 | |
2120 | /* | |
2121 | * Note that this will end up calling arm_smmu_sync_cd() before | |
2122 | * the master has been added to the devices list for this domain. | |
2123 | * This isn't an issue because the STE hasn't been installed yet. | |
2124 | */ | |
2125 | ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); | |
2126 | if (ret) | |
2127 | goto out_free_cd_tables; | |
2128 | ||
3f1ce8e8 | 2129 | mutex_unlock(&arm_smmu_asid_lock); |
48ec83bc WD |
2130 | return 0; |
2131 | ||
492ddc79 JPB |
2132 | out_free_cd_tables: |
2133 | arm_smmu_free_cd_tables(smmu_domain); | |
48ec83bc | 2134 | out_free_asid: |
0299a1a8 | 2135 | arm_smmu_free_asid(&cfg->cd); |
3f1ce8e8 JPB |
2136 | out_unlock: |
2137 | mutex_unlock(&arm_smmu_asid_lock); | |
48ec83bc WD |
2138 | return ret; |
2139 | } | |
2140 | ||
2141 | static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, | |
2505ec6f | 2142 | struct arm_smmu_master *master, |
48ec83bc WD |
2143 | struct io_pgtable_cfg *pgtbl_cfg) |
2144 | { | |
c0733a2c | 2145 | int vmid; |
48ec83bc WD |
2146 | struct arm_smmu_device *smmu = smmu_domain->smmu; |
2147 | struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; | |
ac4b80e5 | 2148 | typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; |
48ec83bc WD |
2149 | |
2150 | vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); | |
287980e4 | 2151 | if (vmid < 0) |
48ec83bc WD |
2152 | return vmid; |
2153 | ||
ac4b80e5 | 2154 | vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; |
c0733a2c | 2155 | cfg->vmid = (u16)vmid; |
48ec83bc | 2156 | cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; |
ac4b80e5 WD |
2157 | cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | |
2158 | FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | | |
2159 | FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | | |
2160 | FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | | |
2161 | FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | | |
2162 | FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | | |
2163 | FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); | |
48ec83bc WD |
2164 | return 0; |
2165 | } | |
2166 | ||
2505ec6f JPB |
2167 | static int arm_smmu_domain_finalise(struct iommu_domain *domain, |
2168 | struct arm_smmu_master *master) | |
48ec83bc WD |
2169 | { |
2170 | int ret; | |
2171 | unsigned long ias, oas; | |
2172 | enum io_pgtable_fmt fmt; | |
2173 | struct io_pgtable_cfg pgtbl_cfg; | |
2174 | struct io_pgtable_ops *pgtbl_ops; | |
2175 | int (*finalise_stage_fn)(struct arm_smmu_domain *, | |
2505ec6f | 2176 | struct arm_smmu_master *, |
48ec83bc WD |
2177 | struct io_pgtable_cfg *); |
2178 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); | |
2179 | struct arm_smmu_device *smmu = smmu_domain->smmu; | |
2180 | ||
beb3c6a0 WD |
2181 | if (domain->type == IOMMU_DOMAIN_IDENTITY) { |
2182 | smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; | |
2183 | return 0; | |
2184 | } | |
2185 | ||
48ec83bc WD |
2186 | /* Restrict the stage to what we can actually support */ |
2187 | if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) | |
2188 | smmu_domain->stage = ARM_SMMU_DOMAIN_S2; | |
2189 | if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) | |
2190 | smmu_domain->stage = ARM_SMMU_DOMAIN_S1; | |
2191 | ||
2192 | switch (smmu_domain->stage) { | |
2193 | case ARM_SMMU_DOMAIN_S1: | |
dcd189e6 RM |
2194 | ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48; |
2195 | ias = min_t(unsigned long, ias, VA_BITS); | |
48ec83bc WD |
2196 | oas = smmu->ias; |
2197 | fmt = ARM_64_LPAE_S1; | |
2198 | finalise_stage_fn = arm_smmu_domain_finalise_s1; | |
2199 | break; | |
2200 | case ARM_SMMU_DOMAIN_NESTED: | |
2201 | case ARM_SMMU_DOMAIN_S2: | |
2202 | ias = smmu->ias; | |
2203 | oas = smmu->oas; | |
2204 | fmt = ARM_64_LPAE_S2; | |
2205 | finalise_stage_fn = arm_smmu_domain_finalise_s2; | |
2206 | break; | |
2207 | default: | |
2208 | return -EINVAL; | |
2209 | } | |
2210 | ||
2211 | pgtbl_cfg = (struct io_pgtable_cfg) { | |
d5466357 | 2212 | .pgsize_bitmap = smmu->pgsize_bitmap, |
48ec83bc WD |
2213 | .ias = ias, |
2214 | .oas = oas, | |
4f41845b | 2215 | .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, |
298f7889 | 2216 | .tlb = &arm_smmu_flush_ops, |
bdc6d973 | 2217 | .iommu_dev = smmu->dev, |
48ec83bc WD |
2218 | }; |
2219 | ||
2220 | pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); | |
2221 | if (!pgtbl_ops) | |
2222 | return -ENOMEM; | |
2223 | ||
d5466357 | 2224 | domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; |
6619c913 | 2225 | domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; |
455eb7d3 | 2226 | domain->geometry.force_aperture = true; |
48ec83bc | 2227 | |
2505ec6f | 2228 | ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); |
57d72e15 | 2229 | if (ret < 0) { |
48ec83bc | 2230 | free_io_pgtable_ops(pgtbl_ops); |
57d72e15 JPB |
2231 | return ret; |
2232 | } | |
48ec83bc | 2233 | |
57d72e15 JPB |
2234 | smmu_domain->pgtbl_ops = pgtbl_ops; |
2235 | return 0; | |
48ec83bc WD |
2236 | } |
2237 | ||
48ec83bc WD |
2238 | static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) |
2239 | { | |
2240 | __le64 *step; | |
2241 | struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; | |
2242 | ||
2243 | if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { | |
2244 | struct arm_smmu_strtab_l1_desc *l1_desc; | |
2245 | int idx; | |
2246 | ||
2247 | /* Two-level walk */ | |
2248 | idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; | |
2249 | l1_desc = &cfg->l1_desc[idx]; | |
2250 | idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; | |
2251 | step = &l1_desc->l2ptr[idx]; | |
2252 | } else { | |
2253 | /* Simple linear lookup */ | |
2254 | step = &cfg->strtab[sid * STRTAB_STE_DWORDS]; | |
2255 | } | |
2256 | ||
2257 | return step; | |
2258 | } | |
2259 | ||
bcecaee4 | 2260 | static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) |
48ec83bc | 2261 | { |
563b5cbe | 2262 | int i, j; |
8f785154 | 2263 | struct arm_smmu_device *smmu = master->smmu; |
48ec83bc | 2264 | |
cdf315f9 JPB |
2265 | for (i = 0; i < master->num_streams; ++i) { |
2266 | u32 sid = master->streams[i].id; | |
48ec83bc WD |
2267 | __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); |
2268 | ||
563b5cbe RM |
2269 | /* Bridged PCI devices may end up with duplicated IDs */ |
2270 | for (j = 0; j < i; j++) | |
cdf315f9 | 2271 | if (master->streams[j].id == sid) |
563b5cbe RM |
2272 | break; |
2273 | if (j < i) | |
2274 | continue; | |
2275 | ||
8be39a1a | 2276 | arm_smmu_write_strtab_ent(master, sid, step); |
48ec83bc | 2277 | } |
48ec83bc WD |
2278 | } |
2279 | ||
bfff88ec | 2280 | static bool arm_smmu_ats_supported(struct arm_smmu_master *master) |
9ce27afc | 2281 | { |
0b2527a6 | 2282 | struct device *dev = master->dev; |
9ce27afc | 2283 | struct arm_smmu_device *smmu = master->smmu; |
0b2527a6 | 2284 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); |
9ce27afc | 2285 | |
0b2527a6 | 2286 | if (!(smmu->features & ARM_SMMU_FEAT_ATS)) |
bfff88ec | 2287 | return false; |
9ce27afc | 2288 | |
0b2527a6 JPB |
2289 | if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) |
2290 | return false; | |
2291 | ||
2292 | return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); | |
097a7df2 | 2293 | } |
9ce27afc | 2294 | |
bfff88ec WD |
2295 | static void arm_smmu_enable_ats(struct arm_smmu_master *master) |
2296 | { | |
2297 | size_t stu; | |
2298 | struct pci_dev *pdev; | |
2299 | struct arm_smmu_device *smmu = master->smmu; | |
cdb8a3c3 | 2300 | struct arm_smmu_domain *smmu_domain = master->domain; |
9ce27afc | 2301 | |
bfff88ec WD |
2302 | /* Don't enable ATS at the endpoint if it's not enabled in the STE */ |
2303 | if (!master->ats_enabled) | |
2304 | return; | |
9ce27afc JPB |
2305 | |
2306 | /* Smallest Translation Unit: log2 of the smallest supported granule */ | |
2307 | stu = __ffs(smmu->pgsize_bitmap); | |
bfff88ec | 2308 | pdev = to_pci_dev(master->dev); |
9ce27afc | 2309 | |
cdb8a3c3 WD |
2310 | atomic_inc(&smmu_domain->nr_ats_masters); |
2311 | arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); | |
bfff88ec WD |
2312 | if (pci_enable_ats(pdev, stu)) |
2313 | dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); | |
9ce27afc JPB |
2314 | } |
2315 | ||
2316 | static void arm_smmu_disable_ats(struct arm_smmu_master *master) | |
2317 | { | |
cdb8a3c3 | 2318 | struct arm_smmu_domain *smmu_domain = master->domain; |
8dd8f005 | 2319 | |
bfff88ec | 2320 | if (!master->ats_enabled) |
9ce27afc JPB |
2321 | return; |
2322 | ||
bfff88ec WD |
2323 | pci_disable_ats(to_pci_dev(master->dev)); |
2324 | /* | |
2325 | * Ensure ATS is disabled at the endpoint before we issue the | |
2326 | * ATC invalidation via the SMMU. | |
2327 | */ | |
2328 | wmb(); | |
9e773aee | 2329 | arm_smmu_atc_inv_master(master); |
cdb8a3c3 | 2330 | atomic_dec(&smmu_domain->nr_ats_masters); |
9ce27afc JPB |
2331 | } |
2332 | ||
058c59a0 JPB |
2333 | static int arm_smmu_enable_pasid(struct arm_smmu_master *master) |
2334 | { | |
2335 | int ret; | |
2336 | int features; | |
2337 | int num_pasids; | |
2338 | struct pci_dev *pdev; | |
2339 | ||
2340 | if (!dev_is_pci(master->dev)) | |
2341 | return -ENODEV; | |
2342 | ||
2343 | pdev = to_pci_dev(master->dev); | |
2344 | ||
2345 | features = pci_pasid_features(pdev); | |
2346 | if (features < 0) | |
2347 | return features; | |
2348 | ||
2349 | num_pasids = pci_max_pasids(pdev); | |
2350 | if (num_pasids <= 0) | |
2351 | return num_pasids; | |
2352 | ||
2353 | ret = pci_enable_pasid(pdev, features); | |
2354 | if (ret) { | |
2355 | dev_err(&pdev->dev, "Failed to enable PASID\n"); | |
2356 | return ret; | |
2357 | } | |
2358 | ||
2359 | master->ssid_bits = min_t(u8, ilog2(num_pasids), | |
2360 | master->smmu->ssid_bits); | |
2361 | return 0; | |
2362 | } | |
2363 | ||
2364 | static void arm_smmu_disable_pasid(struct arm_smmu_master *master) | |
2365 | { | |
2366 | struct pci_dev *pdev; | |
2367 | ||
2368 | if (!dev_is_pci(master->dev)) | |
2369 | return; | |
2370 | ||
2371 | pdev = to_pci_dev(master->dev); | |
2372 | ||
2373 | if (!pdev->pasid_enabled) | |
2374 | return; | |
2375 | ||
2376 | master->ssid_bits = 0; | |
2377 | pci_disable_pasid(pdev); | |
2378 | } | |
2379 | ||
bcecaee4 | 2380 | static void arm_smmu_detach_dev(struct arm_smmu_master *master) |
bc7f2ce0 | 2381 | { |
2a7e62f5 JPB |
2382 | unsigned long flags; |
2383 | struct arm_smmu_domain *smmu_domain = master->domain; | |
2384 | ||
2385 | if (!smmu_domain) | |
8be39a1a JPB |
2386 | return; |
2387 | ||
cdb8a3c3 WD |
2388 | arm_smmu_disable_ats(master); |
2389 | ||
2a7e62f5 JPB |
2390 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); |
2391 | list_del(&master->domain_head); | |
2392 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); | |
2393 | ||
8be39a1a | 2394 | master->domain = NULL; |
bfff88ec | 2395 | master->ats_enabled = false; |
bcecaee4 | 2396 | arm_smmu_install_ste_for_dev(master); |
bc7f2ce0 WD |
2397 | } |
2398 | ||
48ec83bc WD |
2399 | static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) |
2400 | { | |
2401 | int ret = 0; | |
2a7e62f5 | 2402 | unsigned long flags; |
9b468f7d | 2403 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); |
48ec83bc WD |
2404 | struct arm_smmu_device *smmu; |
2405 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); | |
b54f4260 | 2406 | struct arm_smmu_master *master; |
48ec83bc | 2407 | |
9b468f7d | 2408 | if (!fwspec) |
48ec83bc WD |
2409 | return -ENOENT; |
2410 | ||
b7a9662f | 2411 | master = dev_iommu_priv_get(dev); |
8f785154 | 2412 | smmu = master->smmu; |
8f785154 | 2413 | |
f534d98b JPB |
2414 | /* |
2415 | * Checking that SVA is disabled ensures that this device isn't bound to | |
2416 | * any mm, and can be safely detached from its old domain. Bonds cannot | |
2417 | * be removed concurrently since we're holding the group mutex. | |
2418 | */ | |
2419 | if (arm_smmu_master_sva_enabled(master)) { | |
2420 | dev_err(dev, "cannot attach - SVA enabled\n"); | |
2421 | return -EBUSY; | |
2422 | } | |
2423 | ||
8be39a1a | 2424 | arm_smmu_detach_dev(master); |
48ec83bc | 2425 | |
48ec83bc WD |
2426 | mutex_lock(&smmu_domain->init_mutex); |
2427 | ||
2428 | if (!smmu_domain->smmu) { | |
2429 | smmu_domain->smmu = smmu; | |
2505ec6f | 2430 | ret = arm_smmu_domain_finalise(domain, master); |
48ec83bc WD |
2431 | if (ret) { |
2432 | smmu_domain->smmu = NULL; | |
2433 | goto out_unlock; | |
2434 | } | |
2435 | } else if (smmu_domain->smmu != smmu) { | |
f4a14773 | 2436 | ret = -EINVAL; |
48ec83bc | 2437 | goto out_unlock; |
2505ec6f JPB |
2438 | } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && |
2439 | master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { | |
2505ec6f JPB |
2440 | ret = -EINVAL; |
2441 | goto out_unlock; | |
395ad89d JPB |
2442 | } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && |
2443 | smmu_domain->stall_enabled != master->stall_enabled) { | |
395ad89d JPB |
2444 | ret = -EINVAL; |
2445 | goto out_unlock; | |
48ec83bc WD |
2446 | } |
2447 | ||
8be39a1a | 2448 | master->domain = smmu_domain; |
cbf8277e | 2449 | |
8c153645 JPB |
2450 | /* |
2451 | * The SMMU does not support enabling ATS with bypass. When the STE is | |
2452 | * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and | |
2453 | * Translated transactions are denied as though ATS is disabled for the | |
2454 | * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and | |
2455 | * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). | |
2456 | */ | |
9ce27afc | 2457 | if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) |
bfff88ec | 2458 | master->ats_enabled = arm_smmu_ats_supported(master); |
9ce27afc | 2459 | |
bcecaee4 | 2460 | arm_smmu_install_ste_for_dev(master); |
cdb8a3c3 WD |
2461 | |
2462 | spin_lock_irqsave(&smmu_domain->devices_lock, flags); | |
2463 | list_add(&master->domain_head, &smmu_domain->devices); | |
2464 | spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); | |
2465 | ||
bfff88ec | 2466 | arm_smmu_enable_ats(master); |
cdb8a3c3 | 2467 | |
48ec83bc WD |
2468 | out_unlock: |
2469 | mutex_unlock(&smmu_domain->init_mutex); | |
2470 | return ret; | |
2471 | } | |
2472 | ||
9eec3f9b XC |
2473 | static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, |
2474 | phys_addr_t paddr, size_t pgsize, size_t pgcount, | |
2475 | int prot, gfp_t gfp, size_t *mapped) | |
48ec83bc | 2476 | { |
58188afe | 2477 | struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; |
48ec83bc WD |
2478 | |
2479 | if (!ops) | |
2480 | return -ENODEV; | |
2481 | ||
9eec3f9b | 2482 | return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped); |
48ec83bc WD |
2483 | } |
2484 | ||
59103c79 XC |
2485 | static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova, |
2486 | size_t pgsize, size_t pgcount, | |
2487 | struct iommu_iotlb_gather *gather) | |
48ec83bc | 2488 | { |
9ce27afc JPB |
2489 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); |
2490 | struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; | |
48ec83bc WD |
2491 | |
2492 | if (!ops) | |
2493 | return 0; | |
2494 | ||
59103c79 | 2495 | return ops->unmap_pages(ops, iova, pgsize, pgcount, gather); |
48ec83bc WD |
2496 | } |
2497 | ||
07fdef34 ZL |
2498 | static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) |
2499 | { | |
2500 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); | |
2501 | ||
2502 | if (smmu_domain->smmu) | |
2503 | arm_smmu_tlb_inv_context(smmu_domain); | |
2504 | } | |
2505 | ||
56f8af5e WD |
2506 | static void arm_smmu_iotlb_sync(struct iommu_domain *domain, |
2507 | struct iommu_iotlb_gather *gather) | |
32b12449 | 2508 | { |
2af2e72b | 2509 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); |
32b12449 | 2510 | |
6cc7e5a9 XC |
2511 | if (!gather->pgsize) |
2512 | return; | |
2513 | ||
eba8d2f8 | 2514 | arm_smmu_tlb_inv_range_domain(gather->start, |
7060377c | 2515 | gather->end - gather->start + 1, |
eba8d2f8 | 2516 | gather->pgsize, true, smmu_domain); |
32b12449 RM |
2517 | } |
2518 | ||
48ec83bc WD |
2519 | static phys_addr_t |
2520 | arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) | |
2521 | { | |
58188afe | 2522 | struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; |
48ec83bc WD |
2523 | |
2524 | if (!ops) | |
2525 | return 0; | |
2526 | ||
58188afe | 2527 | return ops->iova_to_phys(ops, iova); |
48ec83bc WD |
2528 | } |
2529 | ||
8f785154 | 2530 | static struct platform_driver arm_smmu_driver; |
48ec83bc | 2531 | |
778de074 LP |
2532 | static |
2533 | struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) | |
48ec83bc | 2534 | { |
67843bba SP |
2535 | struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, |
2536 | fwnode); | |
8f785154 RM |
2537 | put_device(dev); |
2538 | return dev ? dev_get_drvdata(dev) : NULL; | |
48ec83bc WD |
2539 | } |
2540 | ||
2541 | static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) | |
2542 | { | |
2543 | unsigned long limit = smmu->strtab_cfg.num_l1_ents; | |
2544 | ||
2545 | if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) | |
2546 | limit *= 1UL << STRTAB_SPLIT; | |
2547 | ||
2548 | return sid < limit; | |
2549 | } | |
2550 | ||
04e2afd1 SK |
2551 | static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid) |
2552 | { | |
2553 | /* Check the SIDs are in range of the SMMU and our stream table */ | |
2554 | if (!arm_smmu_sid_in_range(smmu, sid)) | |
2555 | return -ERANGE; | |
2556 | ||
2557 | /* Ensure l2 strtab is initialised */ | |
2558 | if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) | |
2559 | return arm_smmu_init_l2_strtab(smmu, sid); | |
2560 | ||
2561 | return 0; | |
2562 | } | |
2563 | ||
cdf315f9 JPB |
2564 | static int arm_smmu_insert_master(struct arm_smmu_device *smmu, |
2565 | struct arm_smmu_master *master) | |
2566 | { | |
2567 | int i; | |
2568 | int ret = 0; | |
2569 | struct arm_smmu_stream *new_stream, *cur_stream; | |
2570 | struct rb_node **new_node, *parent_node = NULL; | |
2571 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); | |
2572 | ||
2573 | master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams), | |
2574 | GFP_KERNEL); | |
2575 | if (!master->streams) | |
2576 | return -ENOMEM; | |
2577 | master->num_streams = fwspec->num_ids; | |
2578 | ||
2579 | mutex_lock(&smmu->streams_mutex); | |
2580 | for (i = 0; i < fwspec->num_ids; i++) { | |
2581 | u32 sid = fwspec->ids[i]; | |
2582 | ||
2583 | new_stream = &master->streams[i]; | |
2584 | new_stream->id = sid; | |
2585 | new_stream->master = master; | |
2586 | ||
04e2afd1 SK |
2587 | ret = arm_smmu_init_sid_strtab(smmu, sid); |
2588 | if (ret) | |
cdf315f9 | 2589 | break; |
cdf315f9 JPB |
2590 | |
2591 | /* Insert into SID tree */ | |
2592 | new_node = &(smmu->streams.rb_node); | |
2593 | while (*new_node) { | |
2594 | cur_stream = rb_entry(*new_node, struct arm_smmu_stream, | |
2595 | node); | |
2596 | parent_node = *new_node; | |
2597 | if (cur_stream->id > new_stream->id) { | |
2598 | new_node = &((*new_node)->rb_left); | |
2599 | } else if (cur_stream->id < new_stream->id) { | |
2600 | new_node = &((*new_node)->rb_right); | |
2601 | } else { | |
2602 | dev_warn(master->dev, | |
2603 | "stream %u already in tree\n", | |
2604 | cur_stream->id); | |
2605 | ret = -EINVAL; | |
2606 | break; | |
2607 | } | |
2608 | } | |
2609 | if (ret) | |
2610 | break; | |
2611 | ||
2612 | rb_link_node(&new_stream->node, parent_node, new_node); | |
2613 | rb_insert_color(&new_stream->node, &smmu->streams); | |
2614 | } | |
2615 | ||
2616 | if (ret) { | |
2617 | for (i--; i >= 0; i--) | |
2618 | rb_erase(&master->streams[i].node, &smmu->streams); | |
2619 | kfree(master->streams); | |
2620 | } | |
2621 | mutex_unlock(&smmu->streams_mutex); | |
2622 | ||
2623 | return ret; | |
2624 | } | |
2625 | ||
2626 | static void arm_smmu_remove_master(struct arm_smmu_master *master) | |
2627 | { | |
2628 | int i; | |
2629 | struct arm_smmu_device *smmu = master->smmu; | |
2630 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); | |
2631 | ||
2632 | if (!smmu || !master->streams) | |
2633 | return; | |
2634 | ||
2635 | mutex_lock(&smmu->streams_mutex); | |
2636 | for (i = 0; i < fwspec->num_ids; i++) | |
2637 | rb_erase(&master->streams[i].node, &smmu->streams); | |
2638 | mutex_unlock(&smmu->streams_mutex); | |
2639 | ||
2640 | kfree(master->streams); | |
2641 | } | |
2642 | ||
8f785154 RM |
2643 | static struct iommu_ops arm_smmu_ops; |
2644 | ||
cefa0d55 | 2645 | static struct iommu_device *arm_smmu_probe_device(struct device *dev) |
48ec83bc | 2646 | { |
cdf315f9 | 2647 | int ret; |
48ec83bc | 2648 | struct arm_smmu_device *smmu; |
b54f4260 | 2649 | struct arm_smmu_master *master; |
9b468f7d | 2650 | struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); |
48ec83bc | 2651 | |
8f785154 | 2652 | if (!fwspec || fwspec->ops != &arm_smmu_ops) |
cefa0d55 | 2653 | return ERR_PTR(-ENODEV); |
8f785154 | 2654 | |
b7a9662f | 2655 | if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) |
cefa0d55 | 2656 | return ERR_PTR(-EBUSY); |
92c1d360 WD |
2657 | |
2658 | smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); | |
2659 | if (!smmu) | |
cefa0d55 | 2660 | return ERR_PTR(-ENODEV); |
92c1d360 WD |
2661 | |
2662 | master = kzalloc(sizeof(*master), GFP_KERNEL); | |
2663 | if (!master) | |
cefa0d55 | 2664 | return ERR_PTR(-ENOMEM); |
92c1d360 WD |
2665 | |
2666 | master->dev = dev; | |
2667 | master->smmu = smmu; | |
f534d98b | 2668 | INIT_LIST_HEAD(&master->bonds); |
b7a9662f | 2669 | dev_iommu_priv_set(dev, master); |
48ec83bc | 2670 | |
cdf315f9 JPB |
2671 | ret = arm_smmu_insert_master(smmu, master); |
2672 | if (ret) | |
2673 | goto err_free_master; | |
48ec83bc | 2674 | |
434b73e6 JPB |
2675 | device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits); |
2676 | master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits); | |
89535821 | 2677 | |
058c59a0 JPB |
2678 | /* |
2679 | * Note that PASID must be enabled before, and disabled after ATS: | |
2680 | * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register | |
2681 | * | |
2682 | * Behavior is undefined if this bit is Set and the value of the PASID | |
2683 | * Enable, Execute Requested Enable, or Privileged Mode Requested bits | |
2684 | * are changed. | |
2685 | */ | |
2686 | arm_smmu_enable_pasid(master); | |
2687 | ||
89535821 JPB |
2688 | if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) |
2689 | master->ssid_bits = min_t(u8, master->ssid_bits, | |
2690 | CTXDESC_LINEAR_CDMAX); | |
2691 | ||
395ad89d JPB |
2692 | if ((smmu->features & ARM_SMMU_FEAT_STALLS && |
2693 | device_property_read_bool(dev, "dma-can-stall")) || | |
2694 | smmu->features & ARM_SMMU_FEAT_STALL_FORCE) | |
2695 | master->stall_enabled = true; | |
2696 | ||
cefa0d55 | 2697 | return &smmu->iommu; |
9a4a9d8c | 2698 | |
a2be6218 JPB |
2699 | err_free_master: |
2700 | kfree(master); | |
b7a9662f | 2701 | dev_iommu_priv_set(dev, NULL); |
cefa0d55 | 2702 | return ERR_PTR(ret); |
48ec83bc WD |
2703 | } |
2704 | ||
cefa0d55 | 2705 | static void arm_smmu_release_device(struct device *dev) |
48ec83bc | 2706 | { |
4d26ba67 | 2707 | struct arm_smmu_master *master = dev_iommu_priv_get(dev); |
8f785154 | 2708 | |
395ad89d JPB |
2709 | if (WARN_ON(arm_smmu_master_sva_enabled(master))) |
2710 | iopf_queue_remove_device(master->smmu->evtq.iopf, dev); | |
8be39a1a | 2711 | arm_smmu_detach_dev(master); |
058c59a0 | 2712 | arm_smmu_disable_pasid(master); |
cdf315f9 | 2713 | arm_smmu_remove_master(master); |
8f785154 | 2714 | kfree(master); |
48ec83bc WD |
2715 | } |
2716 | ||
08d4ca2a RM |
2717 | static struct iommu_group *arm_smmu_device_group(struct device *dev) |
2718 | { | |
2719 | struct iommu_group *group; | |
2720 | ||
2721 | /* | |
2722 | * We don't support devices sharing stream IDs other than PCI RID | |
2723 | * aliases, since the necessary ID-to-device lookup becomes rather | |
2724 | * impractical given a potential sparse 32-bit stream ID space. | |
2725 | */ | |
2726 | if (dev_is_pci(dev)) | |
2727 | group = pci_device_group(dev); | |
2728 | else | |
2729 | group = generic_device_group(dev); | |
2730 | ||
2731 | return group; | |
2732 | } | |
2733 | ||
7e147547 | 2734 | static int arm_smmu_enable_nesting(struct iommu_domain *domain) |
48ec83bc WD |
2735 | { |
2736 | struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); | |
48ec83bc | 2737 | int ret = 0; |
48ec83bc WD |
2738 | |
2739 | mutex_lock(&smmu_domain->init_mutex); | |
7e147547 CH |
2740 | if (smmu_domain->smmu) |
2741 | ret = -EPERM; | |
2742 | else | |
2743 | smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; | |
48ec83bc | 2744 | mutex_unlock(&smmu_domain->init_mutex); |
7e147547 | 2745 | |
48ec83bc WD |
2746 | return ret; |
2747 | } | |
2748 | ||
8f785154 RM |
2749 | static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) |
2750 | { | |
8f785154 RM |
2751 | return iommu_fwspec_add_ids(dev, args->args, 1); |
2752 | } | |
2753 | ||
50019f09 EA |
2754 | static void arm_smmu_get_resv_regions(struct device *dev, |
2755 | struct list_head *head) | |
2756 | { | |
2757 | struct iommu_resv_region *region; | |
2758 | int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; | |
2759 | ||
2760 | region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, | |
0251d010 | 2761 | prot, IOMMU_RESV_SW_MSI, GFP_KERNEL); |
50019f09 EA |
2762 | if (!region) |
2763 | return; | |
2764 | ||
2765 | list_add_tail(®ion->list, head); | |
273df963 RM |
2766 | |
2767 | iommu_dma_get_resv_regions(dev, head); | |
50019f09 EA |
2768 | } |
2769 | ||
f534d98b JPB |
2770 | static int arm_smmu_dev_enable_feature(struct device *dev, |
2771 | enum iommu_dev_features feat) | |
2772 | { | |
395ad89d JPB |
2773 | struct arm_smmu_master *master = dev_iommu_priv_get(dev); |
2774 | ||
469b7b8a | 2775 | if (!master) |
f534d98b JPB |
2776 | return -ENODEV; |
2777 | ||
f534d98b | 2778 | switch (feat) { |
395ad89d | 2779 | case IOMMU_DEV_FEAT_IOPF: |
469b7b8a CH |
2780 | if (!arm_smmu_master_iopf_supported(master)) |
2781 | return -EINVAL; | |
2782 | if (master->iopf_enabled) | |
2783 | return -EBUSY; | |
395ad89d JPB |
2784 | master->iopf_enabled = true; |
2785 | return 0; | |
f534d98b | 2786 | case IOMMU_DEV_FEAT_SVA: |
469b7b8a CH |
2787 | if (!arm_smmu_master_sva_supported(master)) |
2788 | return -EINVAL; | |
2789 | if (arm_smmu_master_sva_enabled(master)) | |
2790 | return -EBUSY; | |
395ad89d | 2791 | return arm_smmu_master_enable_sva(master); |
f534d98b JPB |
2792 | default: |
2793 | return -EINVAL; | |
2794 | } | |
2795 | } | |
2796 | ||
2797 | static int arm_smmu_dev_disable_feature(struct device *dev, | |
2798 | enum iommu_dev_features feat) | |
2799 | { | |
395ad89d JPB |
2800 | struct arm_smmu_master *master = dev_iommu_priv_get(dev); |
2801 | ||
469b7b8a | 2802 | if (!master) |
f534d98b JPB |
2803 | return -EINVAL; |
2804 | ||
2805 | switch (feat) { | |
395ad89d | 2806 | case IOMMU_DEV_FEAT_IOPF: |
469b7b8a CH |
2807 | if (!master->iopf_enabled) |
2808 | return -EINVAL; | |
395ad89d JPB |
2809 | if (master->sva_enabled) |
2810 | return -EBUSY; | |
2811 | master->iopf_enabled = false; | |
2812 | return 0; | |
f534d98b | 2813 | case IOMMU_DEV_FEAT_SVA: |
469b7b8a CH |
2814 | if (!arm_smmu_master_sva_enabled(master)) |
2815 | return -EINVAL; | |
395ad89d | 2816 | return arm_smmu_master_disable_sva(master); |
f534d98b JPB |
2817 | default: |
2818 | return -EINVAL; | |
2819 | } | |
2820 | } | |
2821 | ||
24b6c779 YY |
2822 | /* |
2823 | * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the | |
2824 | * PCIe link and save the data to memory by DMA. The hardware is restricted to | |
2825 | * use identity mapping only. | |
2826 | */ | |
2827 | #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \ | |
2828 | (pdev)->device == 0xa12e) | |
2829 | ||
2830 | static int arm_smmu_def_domain_type(struct device *dev) | |
2831 | { | |
2832 | if (dev_is_pci(dev)) { | |
2833 | struct pci_dev *pdev = to_pci_dev(dev); | |
2834 | ||
2835 | if (IS_HISI_PTT_DEVICE(pdev)) | |
2836 | return IOMMU_DOMAIN_IDENTITY; | |
2837 | } | |
2838 | ||
2839 | return 0; | |
2840 | } | |
2841 | ||
386fa64f LB |
2842 | static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) |
2843 | { | |
2844 | struct iommu_domain *domain; | |
2845 | ||
2846 | domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA); | |
2847 | if (WARN_ON(IS_ERR(domain)) || !domain) | |
2848 | return; | |
2849 | ||
2850 | arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); | |
2851 | } | |
2852 | ||
48ec83bc WD |
2853 | static struct iommu_ops arm_smmu_ops = { |
2854 | .capable = arm_smmu_capable, | |
2855 | .domain_alloc = arm_smmu_domain_alloc, | |
cefa0d55 JR |
2856 | .probe_device = arm_smmu_probe_device, |
2857 | .release_device = arm_smmu_release_device, | |
08d4ca2a | 2858 | .device_group = arm_smmu_device_group, |
8f785154 | 2859 | .of_xlate = arm_smmu_of_xlate, |
50019f09 | 2860 | .get_resv_regions = arm_smmu_get_resv_regions, |
386fa64f | 2861 | .remove_dev_pasid = arm_smmu_remove_dev_pasid, |
f534d98b JPB |
2862 | .dev_enable_feat = arm_smmu_dev_enable_feature, |
2863 | .dev_disable_feat = arm_smmu_dev_disable_feature, | |
395ad89d | 2864 | .page_response = arm_smmu_page_response, |
24b6c779 | 2865 | .def_domain_type = arm_smmu_def_domain_type, |
48ec83bc | 2866 | .pgsize_bitmap = -1UL, /* Restricted during device attach */ |
c0aec668 | 2867 | .owner = THIS_MODULE, |
9a630a4b LB |
2868 | .default_domain_ops = &(const struct iommu_domain_ops) { |
2869 | .attach_dev = arm_smmu_attach_dev, | |
2870 | .map_pages = arm_smmu_map_pages, | |
2871 | .unmap_pages = arm_smmu_unmap_pages, | |
2872 | .flush_iotlb_all = arm_smmu_flush_iotlb_all, | |
2873 | .iotlb_sync = arm_smmu_iotlb_sync, | |
2874 | .iova_to_phys = arm_smmu_iova_to_phys, | |
2875 | .enable_nesting = arm_smmu_enable_nesting, | |
2876 | .free = arm_smmu_domain_free, | |
2877 | } | |
48ec83bc WD |
2878 | }; |
2879 | ||
2880 | /* Probing and initialisation functions */ | |
2881 | static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, | |
2882 | struct arm_smmu_queue *q, | |
86d2d921 | 2883 | void __iomem *page, |
48ec83bc WD |
2884 | unsigned long prod_off, |
2885 | unsigned long cons_off, | |
d25f6ead | 2886 | size_t dwords, const char *name) |
48ec83bc | 2887 | { |
d25f6ead WD |
2888 | size_t qsz; |
2889 | ||
2890 | do { | |
52be8637 | 2891 | qsz = ((1 << q->llq.max_n_shift) * dwords) << 3; |
d25f6ead WD |
2892 | q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, |
2893 | GFP_KERNEL); | |
2894 | if (q->base || qsz < PAGE_SIZE) | |
2895 | break; | |
2896 | ||
52be8637 | 2897 | q->llq.max_n_shift--; |
d25f6ead | 2898 | } while (1); |
48ec83bc | 2899 | |
48ec83bc | 2900 | if (!q->base) { |
d25f6ead WD |
2901 | dev_err(smmu->dev, |
2902 | "failed to allocate queue (0x%zx bytes) for %s\n", | |
2903 | qsz, name); | |
48ec83bc WD |
2904 | return -ENOMEM; |
2905 | } | |
2906 | ||
d25f6ead WD |
2907 | if (!WARN_ON(q->base_dma & (qsz - 1))) { |
2908 | dev_info(smmu->dev, "allocated %u entries for %s\n", | |
52be8637 | 2909 | 1 << q->llq.max_n_shift, name); |
d25f6ead WD |
2910 | } |
2911 | ||
86d2d921 RM |
2912 | q->prod_reg = page + prod_off; |
2913 | q->cons_reg = page + cons_off; | |
48ec83bc WD |
2914 | q->ent_dwords = dwords; |
2915 | ||
2916 | q->q_base = Q_BASE_RWA; | |
1cf9e54e | 2917 | q->q_base |= q->base_dma & Q_BASE_ADDR_MASK; |
52be8637 | 2918 | q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift); |
48ec83bc | 2919 | |
52be8637 | 2920 | q->llq.prod = q->llq.cons = 0; |
48ec83bc WD |
2921 | return 0; |
2922 | } | |
2923 | ||
587e6c10 WD |
2924 | static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) |
2925 | { | |
587e6c10 WD |
2926 | struct arm_smmu_cmdq *cmdq = &smmu->cmdq; |
2927 | unsigned int nents = 1 << cmdq->q.llq.max_n_shift; | |
587e6c10 WD |
2928 | |
2929 | atomic_set(&cmdq->owner_prod, 0); | |
2930 | atomic_set(&cmdq->lock, 0); | |
2931 | ||
fcdeb8c3 CJ |
2932 | cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents, |
2933 | GFP_KERNEL); | |
2934 | if (!cmdq->valid_map) | |
2935 | return -ENOMEM; | |
587e6c10 | 2936 | |
fcdeb8c3 | 2937 | return 0; |
587e6c10 WD |
2938 | } |
2939 | ||
48ec83bc WD |
2940 | static int arm_smmu_init_queues(struct arm_smmu_device *smmu) |
2941 | { | |
2942 | int ret; | |
2943 | ||
2944 | /* cmdq */ | |
86d2d921 RM |
2945 | ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base, |
2946 | ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, | |
2947 | CMDQ_ENT_DWORDS, "cmdq"); | |
48ec83bc | 2948 | if (ret) |
04fa26c7 | 2949 | return ret; |
48ec83bc | 2950 | |
587e6c10 WD |
2951 | ret = arm_smmu_cmdq_init(smmu); |
2952 | if (ret) | |
2953 | return ret; | |
2954 | ||
48ec83bc | 2955 | /* evtq */ |
86d2d921 RM |
2956 | ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1, |
2957 | ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, | |
2958 | EVTQ_ENT_DWORDS, "evtq"); | |
48ec83bc | 2959 | if (ret) |
04fa26c7 | 2960 | return ret; |
48ec83bc | 2961 | |
395ad89d JPB |
2962 | if ((smmu->features & ARM_SMMU_FEAT_SVA) && |
2963 | (smmu->features & ARM_SMMU_FEAT_STALLS)) { | |
2964 | smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev)); | |
2965 | if (!smmu->evtq.iopf) | |
2966 | return -ENOMEM; | |
2967 | } | |
2968 | ||
48ec83bc WD |
2969 | /* priq */ |
2970 | if (!(smmu->features & ARM_SMMU_FEAT_PRI)) | |
2971 | return 0; | |
2972 | ||
86d2d921 RM |
2973 | return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1, |
2974 | ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS, | |
2975 | PRIQ_ENT_DWORDS, "priq"); | |
48ec83bc WD |
2976 | } |
2977 | ||
2978 | static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) | |
2979 | { | |
2980 | unsigned int i; | |
2981 | struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; | |
48ec83bc WD |
2982 | void *strtab = smmu->strtab_cfg.strtab; |
2983 | ||
98b64741 CJ |
2984 | cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, |
2985 | sizeof(*cfg->l1_desc), GFP_KERNEL); | |
affa9095 | 2986 | if (!cfg->l1_desc) |
48ec83bc | 2987 | return -ENOMEM; |
48ec83bc WD |
2988 | |
2989 | for (i = 0; i < cfg->num_l1_ents; ++i) { | |
2990 | arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); | |
2991 | strtab += STRTAB_L1_DESC_DWORDS << 3; | |
2992 | } | |
2993 | ||
2994 | return 0; | |
2995 | } | |
2996 | ||
2997 | static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) | |
2998 | { | |
2999 | void *strtab; | |
3000 | u64 reg; | |
d2e88e7c | 3001 | u32 size, l1size; |
48ec83bc WD |
3002 | struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; |
3003 | ||
692c4e42 NW |
3004 | /* Calculate the L1 size, capped to the SIDSIZE. */ |
3005 | size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); | |
3006 | size = min(size, smmu->sid_bits - STRTAB_SPLIT); | |
d2e88e7c WD |
3007 | cfg->num_l1_ents = 1 << size; |
3008 | ||
3009 | size += STRTAB_SPLIT; | |
3010 | if (size < smmu->sid_bits) | |
48ec83bc WD |
3011 | dev_warn(smmu->dev, |
3012 | "2-level strtab only covers %u/%u bits of SID\n", | |
d2e88e7c | 3013 | size, smmu->sid_bits); |
48ec83bc | 3014 | |
d2e88e7c | 3015 | l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); |
04fa26c7 | 3016 | strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, |
9bb9069c | 3017 | GFP_KERNEL); |
48ec83bc WD |
3018 | if (!strtab) { |
3019 | dev_err(smmu->dev, | |
3020 | "failed to allocate l1 stream table (%u bytes)\n", | |
dc898eb8 | 3021 | l1size); |
48ec83bc WD |
3022 | return -ENOMEM; |
3023 | } | |
3024 | cfg->strtab = strtab; | |
3025 | ||
3026 | /* Configure strtab_base_cfg for 2 levels */ | |
cbcee19a RM |
3027 | reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); |
3028 | reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size); | |
3029 | reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); | |
48ec83bc WD |
3030 | cfg->strtab_base_cfg = reg; |
3031 | ||
04fa26c7 | 3032 | return arm_smmu_init_l1_strtab(smmu); |
48ec83bc WD |
3033 | } |
3034 | ||
3035 | static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) | |
3036 | { | |
3037 | void *strtab; | |
3038 | u64 reg; | |
3039 | u32 size; | |
3040 | struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; | |
3041 | ||
3042 | size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); | |
04fa26c7 | 3043 | strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, |
9bb9069c | 3044 | GFP_KERNEL); |
48ec83bc WD |
3045 | if (!strtab) { |
3046 | dev_err(smmu->dev, | |
3047 | "failed to allocate linear stream table (%u bytes)\n", | |
3048 | size); | |
3049 | return -ENOMEM; | |
3050 | } | |
3051 | cfg->strtab = strtab; | |
3052 | cfg->num_l1_ents = 1 << smmu->sid_bits; | |
3053 | ||
3054 | /* Configure strtab_base_cfg for a linear table covering all SIDs */ | |
cbcee19a RM |
3055 | reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR); |
3056 | reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); | |
48ec83bc WD |
3057 | cfg->strtab_base_cfg = reg; |
3058 | ||
6c998abb | 3059 | arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false); |
48ec83bc WD |
3060 | return 0; |
3061 | } | |
3062 | ||
3063 | static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) | |
3064 | { | |
3065 | u64 reg; | |
3066 | int ret; | |
3067 | ||
3068 | if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) | |
3069 | ret = arm_smmu_init_strtab_2lvl(smmu); | |
3070 | else | |
3071 | ret = arm_smmu_init_strtab_linear(smmu); | |
3072 | ||
3073 | if (ret) | |
3074 | return ret; | |
3075 | ||
3076 | /* Set the strtab base address */ | |
1cf9e54e | 3077 | reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK; |
48ec83bc WD |
3078 | reg |= STRTAB_BASE_RA; |
3079 | smmu->strtab_cfg.strtab_base = reg; | |
3080 | ||
3081 | /* Allocate the first VMID for stage-2 bypass STEs */ | |
3082 | set_bit(0, smmu->vmid_map); | |
3083 | return 0; | |
3084 | } | |
3085 | ||
48ec83bc WD |
3086 | static int arm_smmu_init_structures(struct arm_smmu_device *smmu) |
3087 | { | |
3088 | int ret; | |
3089 | ||
cdf315f9 JPB |
3090 | mutex_init(&smmu->streams_mutex); |
3091 | smmu->streams = RB_ROOT; | |
3092 | ||
48ec83bc WD |
3093 | ret = arm_smmu_init_queues(smmu); |
3094 | if (ret) | |
3095 | return ret; | |
3096 | ||
04fa26c7 | 3097 | return arm_smmu_init_strtab(smmu); |
48ec83bc WD |
3098 | } |
3099 | ||
3100 | static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, | |
3101 | unsigned int reg_off, unsigned int ack_off) | |
3102 | { | |
3103 | u32 reg; | |
3104 | ||
3105 | writel_relaxed(val, smmu->base + reg_off); | |
3106 | return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val, | |
3107 | 1, ARM_SMMU_POLL_TIMEOUT_US); | |
3108 | } | |
3109 | ||
dc87a98d RM |
3110 | /* GBPA is "special" */ |
3111 | static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) | |
3112 | { | |
3113 | int ret; | |
3114 | u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA; | |
3115 | ||
3116 | ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), | |
3117 | 1, ARM_SMMU_POLL_TIMEOUT_US); | |
3118 | if (ret) | |
3119 | return ret; | |
3120 | ||
3121 | reg &= ~clr; | |
3122 | reg |= set; | |
3123 | writel_relaxed(reg | GBPA_UPDATE, gbpa); | |
b63b3439 WD |
3124 | ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), |
3125 | 1, ARM_SMMU_POLL_TIMEOUT_US); | |
3126 | ||
3127 | if (ret) | |
3128 | dev_err(smmu->dev, "GBPA not responding to update\n"); | |
3129 | return ret; | |
dc87a98d RM |
3130 | } |
3131 | ||
166bdbd2 MZ |
3132 | static void arm_smmu_free_msis(void *data) |
3133 | { | |
3134 | struct device *dev = data; | |
3135 | platform_msi_domain_free_irqs(dev); | |
3136 | } | |
3137 | ||
3138 | static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) | |
3139 | { | |
3140 | phys_addr_t doorbell; | |
3141 | struct device *dev = msi_desc_to_dev(desc); | |
3142 | struct arm_smmu_device *smmu = dev_get_drvdata(dev); | |
dba27c7f | 3143 | phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index]; |
166bdbd2 MZ |
3144 | |
3145 | doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; | |
1cf9e54e | 3146 | doorbell &= MSI_CFG0_ADDR_MASK; |
166bdbd2 MZ |
3147 | |
3148 | writeq_relaxed(doorbell, smmu->base + cfg[0]); | |
3149 | writel_relaxed(msg->data, smmu->base + cfg[1]); | |
cbcee19a | 3150 | writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); |
166bdbd2 MZ |
3151 | } |
3152 | ||
3153 | static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) | |
3154 | { | |
166bdbd2 MZ |
3155 | int ret, nvec = ARM_SMMU_MAX_MSIS; |
3156 | struct device *dev = smmu->dev; | |
3157 | ||
3158 | /* Clear the MSI address regs */ | |
3159 | writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); | |
3160 | writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); | |
3161 | ||
3162 | if (smmu->features & ARM_SMMU_FEAT_PRI) | |
3163 | writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); | |
3164 | else | |
3165 | nvec--; | |
3166 | ||
3167 | if (!(smmu->features & ARM_SMMU_FEAT_MSI)) | |
3168 | return; | |
3169 | ||
34fff628 | 3170 | if (!dev->msi.domain) { |
940ded9c NW |
3171 | dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n"); |
3172 | return; | |
3173 | } | |
3174 | ||
166bdbd2 MZ |
3175 | /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ |
3176 | ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); | |
3177 | if (ret) { | |
940ded9c | 3178 | dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n"); |
166bdbd2 MZ |
3179 | return; |
3180 | } | |
3181 | ||
065afdc9 TG |
3182 | smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX); |
3183 | smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX); | |
3184 | smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX); | |
166bdbd2 MZ |
3185 | |
3186 | /* Add callback to free MSIs on teardown */ | |
3187 | devm_add_action(dev, arm_smmu_free_msis, dev); | |
3188 | } | |
3189 | ||
f935448a | 3190 | static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) |
48ec83bc | 3191 | { |
f935448a | 3192 | int irq, ret; |
48ec83bc | 3193 | |
166bdbd2 | 3194 | arm_smmu_setup_msis(smmu); |
48ec83bc | 3195 | |
166bdbd2 | 3196 | /* Request interrupt lines */ |
48ec83bc WD |
3197 | irq = smmu->evtq.q.irq; |
3198 | if (irq) { | |
b4163fb3 | 3199 | ret = devm_request_threaded_irq(smmu->dev, irq, NULL, |
48ec83bc | 3200 | arm_smmu_evtq_thread, |
b4163fb3 JPB |
3201 | IRQF_ONESHOT, |
3202 | "arm-smmu-v3-evtq", smmu); | |
287980e4 | 3203 | if (ret < 0) |
48ec83bc | 3204 | dev_warn(smmu->dev, "failed to enable evtq irq\n"); |
4c8996d7 RM |
3205 | } else { |
3206 | dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n"); | |
48ec83bc WD |
3207 | } |
3208 | ||
48ec83bc WD |
3209 | irq = smmu->gerr_irq; |
3210 | if (irq) { | |
3211 | ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, | |
3212 | 0, "arm-smmu-v3-gerror", smmu); | |
287980e4 | 3213 | if (ret < 0) |
48ec83bc | 3214 | dev_warn(smmu->dev, "failed to enable gerror irq\n"); |
4c8996d7 RM |
3215 | } else { |
3216 | dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n"); | |
48ec83bc WD |
3217 | } |
3218 | ||
3219 | if (smmu->features & ARM_SMMU_FEAT_PRI) { | |
48ec83bc WD |
3220 | irq = smmu->priq.q.irq; |
3221 | if (irq) { | |
b4163fb3 | 3222 | ret = devm_request_threaded_irq(smmu->dev, irq, NULL, |
48ec83bc | 3223 | arm_smmu_priq_thread, |
b4163fb3 JPB |
3224 | IRQF_ONESHOT, |
3225 | "arm-smmu-v3-priq", | |
48ec83bc | 3226 | smmu); |
287980e4 | 3227 | if (ret < 0) |
48ec83bc WD |
3228 | dev_warn(smmu->dev, |
3229 | "failed to enable priq irq\n"); | |
4c8996d7 RM |
3230 | } else { |
3231 | dev_warn(smmu->dev, "no priq irq - PRI will be broken\n"); | |
48ec83bc WD |
3232 | } |
3233 | } | |
f935448a GS |
3234 | } |
3235 | ||
3236 | static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) | |
3237 | { | |
3238 | int ret, irq; | |
3239 | u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; | |
3240 | ||
3241 | /* Disable IRQs first */ | |
3242 | ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, | |
3243 | ARM_SMMU_IRQ_CTRLACK); | |
3244 | if (ret) { | |
3245 | dev_err(smmu->dev, "failed to disable irqs\n"); | |
3246 | return ret; | |
3247 | } | |
3248 | ||
3249 | irq = smmu->combined_irq; | |
3250 | if (irq) { | |
3251 | /* | |
657135f3 JG |
3252 | * Cavium ThunderX2 implementation doesn't support unique irq |
3253 | * lines. Use a single irq line for all the SMMUv3 interrupts. | |
f935448a GS |
3254 | */ |
3255 | ret = devm_request_threaded_irq(smmu->dev, irq, | |
3256 | arm_smmu_combined_irq_handler, | |
3257 | arm_smmu_combined_irq_thread, | |
3258 | IRQF_ONESHOT, | |
3259 | "arm-smmu-v3-combined-irq", smmu); | |
3260 | if (ret < 0) | |
3261 | dev_warn(smmu->dev, "failed to enable combined irq\n"); | |
3262 | } else | |
3263 | arm_smmu_setup_unique_irqs(smmu); | |
3264 | ||
3265 | if (smmu->features & ARM_SMMU_FEAT_PRI) | |
3266 | irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; | |
48ec83bc WD |
3267 | |
3268 | /* Enable interrupt generation on the SMMU */ | |
ccd6385d | 3269 | ret = arm_smmu_write_reg_sync(smmu, irqen_flags, |
48ec83bc WD |
3270 | ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); |
3271 | if (ret) | |
3272 | dev_warn(smmu->dev, "failed to enable irqs\n"); | |
3273 | ||
3274 | return 0; | |
3275 | } | |
3276 | ||
3277 | static int arm_smmu_device_disable(struct arm_smmu_device *smmu) | |
3278 | { | |
3279 | int ret; | |
3280 | ||
3281 | ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); | |
3282 | if (ret) | |
3283 | dev_err(smmu->dev, "failed to clear cr0\n"); | |
3284 | ||
3285 | return ret; | |
3286 | } | |
3287 | ||
dc87a98d | 3288 | static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) |
48ec83bc WD |
3289 | { |
3290 | int ret; | |
3291 | u32 reg, enables; | |
3292 | struct arm_smmu_cmdq_ent cmd; | |
3293 | ||
3294 | /* Clear CR0 and sync (disables SMMU and queue processing) */ | |
3295 | reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); | |
b63b3439 | 3296 | if (reg & CR0_SMMUEN) { |
48ec83bc | 3297 | dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); |
3f54c447 WD |
3298 | WARN_ON(is_kdump_kernel() && !disable_bypass); |
3299 | arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); | |
b63b3439 | 3300 | } |
48ec83bc WD |
3301 | |
3302 | ret = arm_smmu_device_disable(smmu); | |
3303 | if (ret) | |
3304 | return ret; | |
3305 | ||
3306 | /* CR1 (table and queue memory attributes) */ | |
cbcee19a RM |
3307 | reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) | |
3308 | FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) | | |
3309 | FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) | | |
3310 | FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) | | |
3311 | FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) | | |
3312 | FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB); | |
48ec83bc WD |
3313 | writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); |
3314 | ||
3315 | /* CR2 (random crap) */ | |
9111aebf JPB |
3316 | reg = CR2_PTM | CR2_RECINVSID; |
3317 | ||
3318 | if (smmu->features & ARM_SMMU_FEAT_E2H) | |
3319 | reg |= CR2_E2H; | |
3320 | ||
48ec83bc WD |
3321 | writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); |
3322 | ||
3323 | /* Stream table */ | |
3324 | writeq_relaxed(smmu->strtab_cfg.strtab_base, | |
3325 | smmu->base + ARM_SMMU_STRTAB_BASE); | |
3326 | writel_relaxed(smmu->strtab_cfg.strtab_base_cfg, | |
3327 | smmu->base + ARM_SMMU_STRTAB_BASE_CFG); | |
3328 | ||
3329 | /* Command queue */ | |
3330 | writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); | |
52be8637 WD |
3331 | writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); |
3332 | writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS); | |
48ec83bc WD |
3333 | |
3334 | enables = CR0_CMDQEN; | |
3335 | ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, | |
3336 | ARM_SMMU_CR0ACK); | |
3337 | if (ret) { | |
3338 | dev_err(smmu->dev, "failed to enable command queue\n"); | |
3339 | return ret; | |
3340 | } | |
3341 | ||
3342 | /* Invalidate any cached configuration */ | |
3343 | cmd.opcode = CMDQ_OP_CFGI_ALL; | |
4537f6f1 | 3344 | arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); |
48ec83bc WD |
3345 | |
3346 | /* Invalidate any stale TLB entries */ | |
3347 | if (smmu->features & ARM_SMMU_FEAT_HYP) { | |
3348 | cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; | |
4537f6f1 | 3349 | arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); |
48ec83bc WD |
3350 | } |
3351 | ||
3352 | cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; | |
4537f6f1 | 3353 | arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); |
48ec83bc WD |
3354 | |
3355 | /* Event queue */ | |
3356 | writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); | |
86d2d921 RM |
3357 | writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD); |
3358 | writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS); | |
48ec83bc WD |
3359 | |
3360 | enables |= CR0_EVTQEN; | |
3361 | ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, | |
3362 | ARM_SMMU_CR0ACK); | |
3363 | if (ret) { | |
3364 | dev_err(smmu->dev, "failed to enable event queue\n"); | |
3365 | return ret; | |
3366 | } | |
3367 | ||
3368 | /* PRI queue */ | |
3369 | if (smmu->features & ARM_SMMU_FEAT_PRI) { | |
3370 | writeq_relaxed(smmu->priq.q.q_base, | |
3371 | smmu->base + ARM_SMMU_PRIQ_BASE); | |
52be8637 | 3372 | writel_relaxed(smmu->priq.q.llq.prod, |
86d2d921 | 3373 | smmu->page1 + ARM_SMMU_PRIQ_PROD); |
52be8637 | 3374 | writel_relaxed(smmu->priq.q.llq.cons, |
86d2d921 | 3375 | smmu->page1 + ARM_SMMU_PRIQ_CONS); |
48ec83bc WD |
3376 | |
3377 | enables |= CR0_PRIQEN; | |
3378 | ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, | |
3379 | ARM_SMMU_CR0ACK); | |
3380 | if (ret) { | |
3381 | dev_err(smmu->dev, "failed to enable PRI queue\n"); | |
3382 | return ret; | |
3383 | } | |
3384 | } | |
3385 | ||
9ce27afc JPB |
3386 | if (smmu->features & ARM_SMMU_FEAT_ATS) { |
3387 | enables |= CR0_ATSCHK; | |
3388 | ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, | |
3389 | ARM_SMMU_CR0ACK); | |
3390 | if (ret) { | |
3391 | dev_err(smmu->dev, "failed to enable ATS check\n"); | |
3392 | return ret; | |
3393 | } | |
3394 | } | |
3395 | ||
48ec83bc WD |
3396 | ret = arm_smmu_setup_irqs(smmu); |
3397 | if (ret) { | |
3398 | dev_err(smmu->dev, "failed to setup irqs\n"); | |
3399 | return ret; | |
3400 | } | |
3401 | ||
3f54c447 WD |
3402 | if (is_kdump_kernel()) |
3403 | enables &= ~(CR0_EVTQEN | CR0_PRIQEN); | |
dc87a98d RM |
3404 | |
3405 | /* Enable the SMMU interface, or ensure bypass */ | |
3406 | if (!bypass || disable_bypass) { | |
3407 | enables |= CR0_SMMUEN; | |
3408 | } else { | |
3409 | ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT); | |
b63b3439 | 3410 | if (ret) |
dc87a98d | 3411 | return ret; |
dc87a98d | 3412 | } |
48ec83bc WD |
3413 | ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, |
3414 | ARM_SMMU_CR0ACK); | |
3415 | if (ret) { | |
3416 | dev_err(smmu->dev, "failed to enable SMMU interface\n"); | |
3417 | return ret; | |
3418 | } | |
3419 | ||
3420 | return 0; | |
3421 | } | |
3422 | ||
2985b521 | 3423 | static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) |
48ec83bc WD |
3424 | { |
3425 | u32 reg; | |
2985b521 | 3426 | bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY; |
48ec83bc WD |
3427 | |
3428 | /* IDR0 */ | |
3429 | reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0); | |
3430 | ||
3431 | /* 2-level structures */ | |
cbcee19a | 3432 | if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL) |
48ec83bc WD |
3433 | smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB; |
3434 | ||
3435 | if (reg & IDR0_CD2L) | |
3436 | smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB; | |
3437 | ||
3438 | /* | |
3439 | * Translation table endianness. | |
3440 | * We currently require the same endianness as the CPU, but this | |
3441 | * could be changed later by adding a new IO_PGTABLE_QUIRK. | |
3442 | */ | |
cbcee19a | 3443 | switch (FIELD_GET(IDR0_TTENDIAN, reg)) { |
48ec83bc WD |
3444 | case IDR0_TTENDIAN_MIXED: |
3445 | smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE; | |
3446 | break; | |
3447 | #ifdef __BIG_ENDIAN | |
3448 | case IDR0_TTENDIAN_BE: | |
3449 | smmu->features |= ARM_SMMU_FEAT_TT_BE; | |
3450 | break; | |
3451 | #else | |
3452 | case IDR0_TTENDIAN_LE: | |
3453 | smmu->features |= ARM_SMMU_FEAT_TT_LE; | |
3454 | break; | |
3455 | #endif | |
3456 | default: | |
3457 | dev_err(smmu->dev, "unknown/unsupported TT endianness!\n"); | |
3458 | return -ENXIO; | |
3459 | } | |
3460 | ||
3461 | /* Boolean feature flags */ | |
3462 | if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI) | |
3463 | smmu->features |= ARM_SMMU_FEAT_PRI; | |
3464 | ||
3465 | if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS) | |
3466 | smmu->features |= ARM_SMMU_FEAT_ATS; | |
3467 | ||
3468 | if (reg & IDR0_SEV) | |
3469 | smmu->features |= ARM_SMMU_FEAT_SEV; | |
3470 | ||
bd07a20a | 3471 | if (reg & IDR0_MSI) { |
48ec83bc | 3472 | smmu->features |= ARM_SMMU_FEAT_MSI; |
bd07a20a BS |
3473 | if (coherent && !disable_msipolling) |
3474 | smmu->options |= ARM_SMMU_OPT_MSIPOLL; | |
3475 | } | |
48ec83bc | 3476 | |
9111aebf | 3477 | if (reg & IDR0_HYP) { |
48ec83bc | 3478 | smmu->features |= ARM_SMMU_FEAT_HYP; |
9111aebf JPB |
3479 | if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) |
3480 | smmu->features |= ARM_SMMU_FEAT_E2H; | |
3481 | } | |
48ec83bc WD |
3482 | |
3483 | /* | |
2985b521 | 3484 | * The coherency feature as set by FW is used in preference to the ID |
48ec83bc WD |
3485 | * register, but warn on mismatch. |
3486 | */ | |
48ec83bc | 3487 | if (!!(reg & IDR0_COHACC) != coherent) |
2a22baa2 | 3488 | dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", |
48ec83bc WD |
3489 | coherent ? "true" : "false"); |
3490 | ||
cbcee19a | 3491 | switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { |
6380be05 | 3492 | case IDR0_STALL_MODEL_FORCE: |
9cff86fd | 3493 | smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; |
df561f66 | 3494 | fallthrough; |
9cff86fd | 3495 | case IDR0_STALL_MODEL_STALL: |
48ec83bc | 3496 | smmu->features |= ARM_SMMU_FEAT_STALLS; |
6380be05 | 3497 | } |
48ec83bc WD |
3498 | |
3499 | if (reg & IDR0_S1P) | |
3500 | smmu->features |= ARM_SMMU_FEAT_TRANS_S1; | |
3501 | ||
3502 | if (reg & IDR0_S2P) | |
3503 | smmu->features |= ARM_SMMU_FEAT_TRANS_S2; | |
3504 | ||
3505 | if (!(reg & (IDR0_S1P | IDR0_S2P))) { | |
3506 | dev_err(smmu->dev, "no translation support!\n"); | |
3507 | return -ENXIO; | |
3508 | } | |
3509 | ||
3510 | /* We only support the AArch64 table format at present */ | |
cbcee19a | 3511 | switch (FIELD_GET(IDR0_TTF, reg)) { |
f0c453db WD |
3512 | case IDR0_TTF_AARCH32_64: |
3513 | smmu->ias = 40; | |
df561f66 | 3514 | fallthrough; |
f0c453db WD |
3515 | case IDR0_TTF_AARCH64: |
3516 | break; | |
3517 | default: | |
48ec83bc WD |
3518 | dev_err(smmu->dev, "AArch64 table format not supported!\n"); |
3519 | return -ENXIO; | |
3520 | } | |
3521 | ||
3522 | /* ASID/VMID sizes */ | |
3523 | smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8; | |
3524 | smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8; | |
3525 | ||
3526 | /* IDR1 */ | |
3527 | reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1); | |
3528 | if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { | |
3529 | dev_err(smmu->dev, "embedded implementation not supported\n"); | |
3530 | return -ENXIO; | |
3531 | } | |
3532 | ||
d25f6ead | 3533 | /* Queue sizes, capped to ensure natural alignment */ |
52be8637 WD |
3534 | smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, |
3535 | FIELD_GET(IDR1_CMDQS, reg)); | |
2af2e72b | 3536 | if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) { |
587e6c10 | 3537 | /* |
2af2e72b WD |
3538 | * We don't support splitting up batches, so one batch of |
3539 | * commands plus an extra sync needs to fit inside the command | |
3540 | * queue. There's also no way we can handle the weird alignment | |
3541 | * restrictions on the base pointer for a unit-length queue. | |
587e6c10 | 3542 | */ |
2af2e72b WD |
3543 | dev_err(smmu->dev, "command queue size <= %d entries not supported\n", |
3544 | CMDQ_BATCH_ENTRIES); | |
48ec83bc WD |
3545 | return -ENXIO; |
3546 | } | |
3547 | ||
52be8637 WD |
3548 | smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT, |
3549 | FIELD_GET(IDR1_EVTQS, reg)); | |
3550 | smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT, | |
3551 | FIELD_GET(IDR1_PRIQS, reg)); | |
48ec83bc WD |
3552 | |
3553 | /* SID/SSID sizes */ | |
cbcee19a RM |
3554 | smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); |
3555 | smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); | |
1adf3cc2 | 3556 | smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; |
48ec83bc | 3557 | |
692c4e42 NW |
3558 | /* |
3559 | * If the SMMU supports fewer bits than would fill a single L2 stream | |
3560 | * table, use a linear table instead. | |
3561 | */ | |
3562 | if (smmu->sid_bits <= STRTAB_SPLIT) | |
3563 | smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB; | |
3564 | ||
6a481a95 RH |
3565 | /* IDR3 */ |
3566 | reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); | |
3567 | if (FIELD_GET(IDR3_RIL, reg)) | |
3568 | smmu->features |= ARM_SMMU_FEAT_RANGE_INV; | |
3569 | ||
48ec83bc WD |
3570 | /* IDR5 */ |
3571 | reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); | |
3572 | ||
3573 | /* Maximum number of outstanding stalls */ | |
cbcee19a | 3574 | smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg); |
48ec83bc WD |
3575 | |
3576 | /* Page sizes */ | |
3577 | if (reg & IDR5_GRAN64K) | |
d5466357 | 3578 | smmu->pgsize_bitmap |= SZ_64K | SZ_512M; |
48ec83bc | 3579 | if (reg & IDR5_GRAN16K) |
d5466357 | 3580 | smmu->pgsize_bitmap |= SZ_16K | SZ_32M; |
48ec83bc | 3581 | if (reg & IDR5_GRAN4K) |
d5466357 | 3582 | smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G; |
48ec83bc | 3583 | |
dcd189e6 RM |
3584 | /* Input address size */ |
3585 | if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT) | |
3586 | smmu->features |= ARM_SMMU_FEAT_VAX; | |
3587 | ||
48ec83bc | 3588 | /* Output address size */ |
cbcee19a | 3589 | switch (FIELD_GET(IDR5_OAS, reg)) { |
48ec83bc WD |
3590 | case IDR5_OAS_32_BIT: |
3591 | smmu->oas = 32; | |
3592 | break; | |
3593 | case IDR5_OAS_36_BIT: | |
3594 | smmu->oas = 36; | |
3595 | break; | |
3596 | case IDR5_OAS_40_BIT: | |
3597 | smmu->oas = 40; | |
3598 | break; | |
3599 | case IDR5_OAS_42_BIT: | |
3600 | smmu->oas = 42; | |
3601 | break; | |
3602 | case IDR5_OAS_44_BIT: | |
3603 | smmu->oas = 44; | |
3604 | break; | |
6619c913 RM |
3605 | case IDR5_OAS_52_BIT: |
3606 | smmu->oas = 52; | |
3607 | smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */ | |
3608 | break; | |
85430968 WD |
3609 | default: |
3610 | dev_info(smmu->dev, | |
3611 | "unknown output address size. Truncating to 48-bit\n"); | |
df561f66 | 3612 | fallthrough; |
48ec83bc WD |
3613 | case IDR5_OAS_48_BIT: |
3614 | smmu->oas = 48; | |
48ec83bc WD |
3615 | } |
3616 | ||
6619c913 RM |
3617 | if (arm_smmu_ops.pgsize_bitmap == -1UL) |
3618 | arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; | |
3619 | else | |
3620 | arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; | |
3621 | ||
48ec83bc WD |
3622 | /* Set the DMA mask for our table walker */ |
3623 | if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) | |
3624 | dev_warn(smmu->dev, | |
3625 | "failed to set DMA mask for table walker\n"); | |
3626 | ||
f0c453db | 3627 | smmu->ias = max(smmu->ias, smmu->oas); |
48ec83bc | 3628 | |
d744f9e6 JPB |
3629 | if (arm_smmu_sva_supported(smmu)) |
3630 | smmu->features |= ARM_SMMU_FEAT_SVA; | |
3631 | ||
48ec83bc WD |
3632 | dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", |
3633 | smmu->ias, smmu->oas, smmu->features); | |
3634 | return 0; | |
3635 | } | |
3636 | ||
e4dadfa8 | 3637 | #ifdef CONFIG_ACPI |
e5b829de LC |
3638 | static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) |
3639 | { | |
99caf177 | 3640 | switch (model) { |
3641 | case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: | |
e5b829de | 3642 | smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; |
99caf177 | 3643 | break; |
6948d4a7 | 3644 | case ACPI_IORT_SMMU_V3_HISILICON_HI161X: |
99caf177 | 3645 | smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; |
3646 | break; | |
3647 | } | |
e5b829de LC |
3648 | |
3649 | dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); | |
3650 | } | |
3651 | ||
e4dadfa8 LP |
3652 | static int arm_smmu_device_acpi_probe(struct platform_device *pdev, |
3653 | struct arm_smmu_device *smmu) | |
3654 | { | |
3655 | struct acpi_iort_smmu_v3 *iort_smmu; | |
3656 | struct device *dev = smmu->dev; | |
3657 | struct acpi_iort_node *node; | |
3658 | ||
3659 | node = *(struct acpi_iort_node **)dev_get_platdata(dev); | |
3660 | ||
3661 | /* Retrieve SMMUv3 specific data */ | |
3662 | iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; | |
3663 | ||
e5b829de LC |
3664 | acpi_smmu_get_options(iort_smmu->model, smmu); |
3665 | ||
e4dadfa8 LP |
3666 | if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) |
3667 | smmu->features |= ARM_SMMU_FEAT_COHERENCY; | |
3668 | ||
3669 | return 0; | |
3670 | } | |
3671 | #else | |
3672 | static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, | |
3673 | struct arm_smmu_device *smmu) | |
3674 | { | |
3675 | return -ENODEV; | |
3676 | } | |
3677 | #endif | |
3678 | ||
2985b521 LP |
3679 | static int arm_smmu_device_dt_probe(struct platform_device *pdev, |
3680 | struct arm_smmu_device *smmu) | |
48ec83bc | 3681 | { |
48ec83bc | 3682 | struct device *dev = &pdev->dev; |
dc87a98d | 3683 | u32 cells; |
2985b521 | 3684 | int ret = -EINVAL; |
dc87a98d RM |
3685 | |
3686 | if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) | |
3687 | dev_err(dev, "missing #iommu-cells property\n"); | |
3688 | else if (cells != 1) | |
3689 | dev_err(dev, "invalid #iommu-cells value (%d)\n", cells); | |
3690 | else | |
2985b521 LP |
3691 | ret = 0; |
3692 | ||
3693 | parse_driver_options(smmu); | |
3694 | ||
3695 | if (of_dma_is_coherent(dev->of_node)) | |
3696 | smmu->features |= ARM_SMMU_FEAT_COHERENCY; | |
3697 | ||
3698 | return ret; | |
3699 | } | |
3700 | ||
e5b829de LC |
3701 | static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) |
3702 | { | |
3703 | if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) | |
3704 | return SZ_64K; | |
3705 | else | |
3706 | return SZ_128K; | |
3707 | } | |
3708 | ||
52f3fab0 JPB |
3709 | static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, |
3710 | resource_size_t size) | |
3711 | { | |
932bc8c7 | 3712 | struct resource res = DEFINE_RES_MEM(start, size); |
52f3fab0 JPB |
3713 | |
3714 | return devm_ioremap_resource(dev, &res); | |
3715 | } | |
3716 | ||
9bdbdaa3 SK |
3717 | static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) |
3718 | { | |
3719 | struct list_head rmr_list; | |
3720 | struct iommu_resv_region *e; | |
3721 | ||
3722 | INIT_LIST_HEAD(&rmr_list); | |
3723 | iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); | |
3724 | ||
3725 | list_for_each_entry(e, &rmr_list, list) { | |
3726 | __le64 *step; | |
3727 | struct iommu_iort_rmr_data *rmr; | |
3728 | int ret, i; | |
3729 | ||
3730 | rmr = container_of(e, struct iommu_iort_rmr_data, rr); | |
3731 | for (i = 0; i < rmr->num_sids; i++) { | |
3732 | ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]); | |
3733 | if (ret) { | |
3734 | dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n", | |
3735 | rmr->sids[i]); | |
3736 | continue; | |
3737 | } | |
3738 | ||
3739 | step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]); | |
3740 | arm_smmu_init_bypass_stes(step, 1, true); | |
3741 | } | |
3742 | } | |
3743 | ||
3744 | iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); | |
3745 | } | |
3746 | ||
2985b521 LP |
3747 | static int arm_smmu_device_probe(struct platform_device *pdev) |
3748 | { | |
3749 | int irq, ret; | |
3750 | struct resource *res; | |
9648cbc9 | 3751 | resource_size_t ioaddr; |
2985b521 LP |
3752 | struct arm_smmu_device *smmu; |
3753 | struct device *dev = &pdev->dev; | |
3754 | bool bypass; | |
48ec83bc WD |
3755 | |
3756 | smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); | |
affa9095 | 3757 | if (!smmu) |
48ec83bc | 3758 | return -ENOMEM; |
48ec83bc WD |
3759 | smmu->dev = dev; |
3760 | ||
e5b829de LC |
3761 | if (dev->of_node) { |
3762 | ret = arm_smmu_device_dt_probe(pdev, smmu); | |
3763 | } else { | |
3764 | ret = arm_smmu_device_acpi_probe(pdev, smmu); | |
3765 | if (ret == -ENODEV) | |
3766 | return ret; | |
3767 | } | |
3768 | ||
3769 | /* Set bypass mode according to firmware probing result */ | |
3770 | bypass = !!ret; | |
3771 | ||
48ec83bc WD |
3772 | /* Base address */ |
3773 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | |
b131fa8c YY |
3774 | if (!res) |
3775 | return -EINVAL; | |
322a9bbb | 3776 | if (resource_size(res) < arm_smmu_resource_size(smmu)) { |
48ec83bc WD |
3777 | dev_err(dev, "MMIO region too small (%pr)\n", res); |
3778 | return -EINVAL; | |
3779 | } | |
9648cbc9 | 3780 | ioaddr = res->start; |
48ec83bc | 3781 | |
52f3fab0 JPB |
3782 | /* |
3783 | * Don't map the IMPLEMENTATION DEFINED regions, since they may contain | |
3784 | * the PMCG registers which are reserved by the PMU driver. | |
3785 | */ | |
3786 | smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ); | |
48ec83bc WD |
3787 | if (IS_ERR(smmu->base)) |
3788 | return PTR_ERR(smmu->base); | |
3789 | ||
52f3fab0 JPB |
3790 | if (arm_smmu_resource_size(smmu) > SZ_64K) { |
3791 | smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K, | |
3792 | ARM_SMMU_REG_SZ); | |
3793 | if (IS_ERR(smmu->page1)) | |
3794 | return PTR_ERR(smmu->page1); | |
3795 | } else { | |
3796 | smmu->page1 = smmu->base; | |
3797 | } | |
3798 | ||
48ec83bc | 3799 | /* Interrupt lines */ |
48ec83bc | 3800 | |
f7aff1a9 | 3801 | irq = platform_get_irq_byname_optional(pdev, "combined"); |
48ec83bc | 3802 | if (irq > 0) |
f935448a GS |
3803 | smmu->combined_irq = irq; |
3804 | else { | |
f7aff1a9 | 3805 | irq = platform_get_irq_byname_optional(pdev, "eventq"); |
f935448a GS |
3806 | if (irq > 0) |
3807 | smmu->evtq.q.irq = irq; | |
48ec83bc | 3808 | |
f7aff1a9 | 3809 | irq = platform_get_irq_byname_optional(pdev, "priq"); |
f935448a GS |
3810 | if (irq > 0) |
3811 | smmu->priq.q.irq = irq; | |
48ec83bc | 3812 | |
f7aff1a9 | 3813 | irq = platform_get_irq_byname_optional(pdev, "gerror"); |
f935448a GS |
3814 | if (irq > 0) |
3815 | smmu->gerr_irq = irq; | |
3816 | } | |
48ec83bc | 3817 | /* Probe the h/w */ |
2985b521 | 3818 | ret = arm_smmu_device_hw_probe(smmu); |
48ec83bc WD |
3819 | if (ret) |
3820 | return ret; | |
3821 | ||
3822 | /* Initialise in-memory data structures */ | |
3823 | ret = arm_smmu_init_structures(smmu); | |
3824 | if (ret) | |
3825 | return ret; | |
3826 | ||
166bdbd2 MZ |
3827 | /* Record our private device structure */ |
3828 | platform_set_drvdata(pdev, smmu); | |
3829 | ||
9bdbdaa3 SK |
3830 | /* Check for RMRs and install bypass STEs if any */ |
3831 | arm_smmu_rmr_install_bypass_ste(smmu); | |
3832 | ||
48ec83bc | 3833 | /* Reset the device */ |
8f785154 RM |
3834 | ret = arm_smmu_device_reset(smmu, bypass); |
3835 | if (ret) | |
3836 | return ret; | |
3837 | ||
3838 | /* And we're up. Go go go! */ | |
9648cbc9 JR |
3839 | ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, |
3840 | "smmu3.%pa", &ioaddr); | |
08d4ca2a RM |
3841 | if (ret) |
3842 | return ret; | |
9648cbc9 | 3843 | |
2d471b20 | 3844 | ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); |
5c2d0218 AY |
3845 | if (ret) { |
3846 | dev_err(dev, "Failed to register iommu\n"); | |
2efbd29b RM |
3847 | iommu_device_sysfs_remove(&smmu->iommu); |
3848 | return ret; | |
5c2d0218 | 3849 | } |
778de074 | 3850 | |
249c9dc6 | 3851 | return 0; |
48ec83bc WD |
3852 | } |
3853 | ||
6e8fa740 | 3854 | static int arm_smmu_device_remove(struct platform_device *pdev) |
48ec83bc | 3855 | { |
941a802d | 3856 | struct arm_smmu_device *smmu = platform_get_drvdata(pdev); |
48ec83bc | 3857 | |
ab246774 WD |
3858 | iommu_device_unregister(&smmu->iommu); |
3859 | iommu_device_sysfs_remove(&smmu->iommu); | |
48ec83bc | 3860 | arm_smmu_device_disable(smmu); |
395ad89d | 3861 | iopf_queue_free(smmu->evtq.iopf); |
6e8fa740 WD |
3862 | |
3863 | return 0; | |
3864 | } | |
3865 | ||
3866 | static void arm_smmu_device_shutdown(struct platform_device *pdev) | |
3867 | { | |
32ea2c57 VO |
3868 | struct arm_smmu_device *smmu = platform_get_drvdata(pdev); |
3869 | ||
3870 | arm_smmu_device_disable(smmu); | |
7aa8619a NW |
3871 | } |
3872 | ||
ebdd13c9 | 3873 | static const struct of_device_id arm_smmu_of_match[] = { |
48ec83bc WD |
3874 | { .compatible = "arm,smmu-v3", }, |
3875 | { }, | |
3876 | }; | |
6e8fa740 | 3877 | MODULE_DEVICE_TABLE(of, arm_smmu_of_match); |
48ec83bc | 3878 | |
32784a95 JPB |
3879 | static void arm_smmu_driver_unregister(struct platform_driver *drv) |
3880 | { | |
3881 | arm_smmu_sva_notifier_synchronize(); | |
3882 | platform_driver_unregister(drv); | |
3883 | } | |
3884 | ||
48ec83bc WD |
3885 | static struct platform_driver arm_smmu_driver = { |
3886 | .driver = { | |
34debdca | 3887 | .name = "arm-smmu-v3", |
8efda06f | 3888 | .of_match_table = arm_smmu_of_match, |
34debdca | 3889 | .suppress_bind_attrs = true, |
48ec83bc | 3890 | }, |
2985b521 | 3891 | .probe = arm_smmu_device_probe, |
6e8fa740 | 3892 | .remove = arm_smmu_device_remove, |
7aa8619a | 3893 | .shutdown = arm_smmu_device_shutdown, |
48ec83bc | 3894 | }; |
32784a95 JPB |
3895 | module_driver(arm_smmu_driver, platform_driver_register, |
3896 | arm_smmu_driver_unregister); | |
6e8fa740 WD |
3897 | |
3898 | MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); | |
1ea27ee2 | 3899 | MODULE_AUTHOR("Will Deacon <will@kernel.org>"); |
d3daf666 | 3900 | MODULE_ALIAS("platform:arm-smmu-v3"); |
6e8fa740 | 3901 | MODULE_LICENSE("GPL v2"); |