iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure
[linux-block.git] / drivers / iommu / arm-smmu-v3.c
CommitLineData
2d7ca2c4 1// SPDX-License-Identifier: GPL-2.0
48ec83bc
WD
2/*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
48ec83bc
WD
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
e4dadfa8
LP
12#include <linux/acpi.h>
13#include <linux/acpi_iort.h>
cbcee19a 14#include <linux/bitfield.h>
1cf9e54e 15#include <linux/bitops.h>
b63b3439 16#include <linux/crash_dump.h>
48ec83bc 17#include <linux/delay.h>
9adb9594 18#include <linux/dma-iommu.h>
48ec83bc
WD
19#include <linux/err.h>
20#include <linux/interrupt.h>
b77cf11f 21#include <linux/io-pgtable.h>
48ec83bc
WD
22#include <linux/iommu.h>
23#include <linux/iopoll.h>
6e8fa740 24#include <linux/module.h>
166bdbd2 25#include <linux/msi.h>
48ec83bc
WD
26#include <linux/of.h>
27#include <linux/of_address.h>
8f785154 28#include <linux/of_iommu.h>
941a802d 29#include <linux/of_platform.h>
48ec83bc 30#include <linux/pci.h>
9ce27afc 31#include <linux/pci-ats.h>
48ec83bc
WD
32#include <linux/platform_device.h>
33
08d4ca2a
RM
34#include <linux/amba/bus.h>
35
48ec83bc
WD
36/* MMIO registers */
37#define ARM_SMMU_IDR0 0x0
cbcee19a
RM
38#define IDR0_ST_LVL GENMASK(28, 27)
39#define IDR0_ST_LVL_2LVL 1
40#define IDR0_STALL_MODEL GENMASK(25, 24)
41#define IDR0_STALL_MODEL_STALL 0
42#define IDR0_STALL_MODEL_FORCE 2
43#define IDR0_TTENDIAN GENMASK(22, 21)
44#define IDR0_TTENDIAN_MIXED 0
45#define IDR0_TTENDIAN_LE 2
46#define IDR0_TTENDIAN_BE 3
48ec83bc
WD
47#define IDR0_CD2L (1 << 19)
48#define IDR0_VMID16 (1 << 18)
49#define IDR0_PRI (1 << 16)
50#define IDR0_SEV (1 << 14)
51#define IDR0_MSI (1 << 13)
52#define IDR0_ASID16 (1 << 12)
53#define IDR0_ATS (1 << 10)
54#define IDR0_HYP (1 << 9)
55#define IDR0_COHACC (1 << 4)
cbcee19a
RM
56#define IDR0_TTF GENMASK(3, 2)
57#define IDR0_TTF_AARCH64 2
58#define IDR0_TTF_AARCH32_64 3
48ec83bc
WD
59#define IDR0_S1P (1 << 1)
60#define IDR0_S2P (1 << 0)
61
62#define ARM_SMMU_IDR1 0x4
63#define IDR1_TABLES_PRESET (1 << 30)
64#define IDR1_QUEUES_PRESET (1 << 29)
65#define IDR1_REL (1 << 28)
cbcee19a
RM
66#define IDR1_CMDQS GENMASK(25, 21)
67#define IDR1_EVTQS GENMASK(20, 16)
68#define IDR1_PRIQS GENMASK(15, 11)
69#define IDR1_SSIDSIZE GENMASK(10, 6)
70#define IDR1_SIDSIZE GENMASK(5, 0)
48ec83bc
WD
71
72#define ARM_SMMU_IDR5 0x14
cbcee19a 73#define IDR5_STALL_MAX GENMASK(31, 16)
48ec83bc
WD
74#define IDR5_GRAN64K (1 << 6)
75#define IDR5_GRAN16K (1 << 5)
76#define IDR5_GRAN4K (1 << 4)
cbcee19a
RM
77#define IDR5_OAS GENMASK(2, 0)
78#define IDR5_OAS_32_BIT 0
79#define IDR5_OAS_36_BIT 1
80#define IDR5_OAS_40_BIT 2
81#define IDR5_OAS_42_BIT 3
82#define IDR5_OAS_44_BIT 4
83#define IDR5_OAS_48_BIT 5
6619c913 84#define IDR5_OAS_52_BIT 6
dcd189e6
RM
85#define IDR5_VAX GENMASK(11, 10)
86#define IDR5_VAX_52_BIT 1
48ec83bc
WD
87
88#define ARM_SMMU_CR0 0x20
9ce27afc 89#define CR0_ATSCHK (1 << 4)
48ec83bc
WD
90#define CR0_CMDQEN (1 << 3)
91#define CR0_EVTQEN (1 << 2)
92#define CR0_PRIQEN (1 << 1)
93#define CR0_SMMUEN (1 << 0)
94
95#define ARM_SMMU_CR0ACK 0x24
96
97#define ARM_SMMU_CR1 0x28
cbcee19a
RM
98#define CR1_TABLE_SH GENMASK(11, 10)
99#define CR1_TABLE_OC GENMASK(9, 8)
100#define CR1_TABLE_IC GENMASK(7, 6)
101#define CR1_QUEUE_SH GENMASK(5, 4)
102#define CR1_QUEUE_OC GENMASK(3, 2)
103#define CR1_QUEUE_IC GENMASK(1, 0)
104/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
48ec83bc
WD
105#define CR1_CACHE_NC 0
106#define CR1_CACHE_WB 1
107#define CR1_CACHE_WT 2
48ec83bc
WD
108
109#define ARM_SMMU_CR2 0x2c
110#define CR2_PTM (1 << 2)
111#define CR2_RECINVSID (1 << 1)
112#define CR2_E2H (1 << 0)
113
dc87a98d 114#define ARM_SMMU_GBPA 0x44
dc87a98d 115#define GBPA_UPDATE (1 << 31)
cbcee19a 116#define GBPA_ABORT (1 << 20)
dc87a98d 117
48ec83bc
WD
118#define ARM_SMMU_IRQ_CTRL 0x50
119#define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
ccd6385d 120#define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
48ec83bc
WD
121#define IRQ_CTRL_GERROR_IRQEN (1 << 0)
122
123#define ARM_SMMU_IRQ_CTRLACK 0x54
124
125#define ARM_SMMU_GERROR 0x60
126#define GERROR_SFM_ERR (1 << 8)
127#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
128#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
129#define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
130#define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
131#define GERROR_PRIQ_ABT_ERR (1 << 3)
132#define GERROR_EVTQ_ABT_ERR (1 << 2)
133#define GERROR_CMDQ_ERR (1 << 0)
134#define GERROR_ERR_MASK 0xfd
135
136#define ARM_SMMU_GERRORN 0x64
137
138#define ARM_SMMU_GERROR_IRQ_CFG0 0x68
139#define ARM_SMMU_GERROR_IRQ_CFG1 0x70
140#define ARM_SMMU_GERROR_IRQ_CFG2 0x74
141
142#define ARM_SMMU_STRTAB_BASE 0x80
143#define STRTAB_BASE_RA (1UL << 62)
6619c913 144#define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
48ec83bc
WD
145
146#define ARM_SMMU_STRTAB_BASE_CFG 0x88
cbcee19a
RM
147#define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
148#define STRTAB_BASE_CFG_FMT_LINEAR 0
149#define STRTAB_BASE_CFG_FMT_2LVL 1
150#define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
151#define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
48ec83bc
WD
152
153#define ARM_SMMU_CMDQ_BASE 0x90
154#define ARM_SMMU_CMDQ_PROD 0x98
155#define ARM_SMMU_CMDQ_CONS 0x9c
156
157#define ARM_SMMU_EVTQ_BASE 0xa0
158#define ARM_SMMU_EVTQ_PROD 0x100a8
159#define ARM_SMMU_EVTQ_CONS 0x100ac
160#define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
161#define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
162#define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
163
164#define ARM_SMMU_PRIQ_BASE 0xc0
165#define ARM_SMMU_PRIQ_PROD 0x100c8
166#define ARM_SMMU_PRIQ_CONS 0x100cc
167#define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
168#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
169#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
170
171/* Common MSI config fields */
6619c913 172#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
cbcee19a
RM
173#define MSI_CFG2_SH GENMASK(5, 4)
174#define MSI_CFG2_MEMATTR GENMASK(3, 0)
175
176/* Common memory attribute values */
177#define ARM_SMMU_SH_NSH 0
178#define ARM_SMMU_SH_OSH 2
179#define ARM_SMMU_SH_ISH 3
180#define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
7417b99c 181#define ARM_SMMU_MEMATTR_OIWB 0xf
48ec83bc 182
7c288a5b
WD
183#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
184#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
587e6c10 185#define Q_OVERFLOW_FLAG (1U << 31)
8a073da0 186#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
48ec83bc 187#define Q_ENT(q, p) ((q)->base + \
7c288a5b
WD
188 Q_IDX(&((q)->llq), p) * \
189 (q)->ent_dwords)
48ec83bc
WD
190
191#define Q_BASE_RWA (1UL << 62)
6619c913 192#define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
cbcee19a 193#define Q_BASE_LOG2SIZE GENMASK(4, 0)
900a85ca
WD
194
195/* Ensure DMA allocations are naturally aligned */
196#ifdef CONFIG_CMA_ALIGNMENT
d25f6ead 197#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
900a85ca
WD
198#else
199#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
200#endif
48ec83bc
WD
201
202/*
203 * Stream table.
204 *
205 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
e2f4c233
ZL
206 * 2lvl: 128k L1 entries,
207 * 256 lazy entries per table (each table covers a PCI bus)
48ec83bc 208 */
e2f4c233 209#define STRTAB_L1_SZ_SHIFT 20
48ec83bc
WD
210#define STRTAB_SPLIT 8
211
212#define STRTAB_L1_DESC_DWORDS 1
ba08bdcb 213#define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
6619c913 214#define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
48ec83bc
WD
215
216#define STRTAB_STE_DWORDS 8
217#define STRTAB_STE_0_V (1UL << 0)
ba08bdcb
RM
218#define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
219#define STRTAB_STE_0_CFG_ABORT 0
220#define STRTAB_STE_0_CFG_BYPASS 4
221#define STRTAB_STE_0_CFG_S1_TRANS 5
222#define STRTAB_STE_0_CFG_S2_TRANS 6
223
224#define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
225#define STRTAB_STE_0_S1FMT_LINEAR 0
6619c913 226#define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
ba08bdcb 227#define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
48ec83bc 228
87f42391
JPB
229#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0)
230#define STRTAB_STE_1_S1DSS_TERMINATE 0x0
231#define STRTAB_STE_1_S1DSS_BYPASS 0x1
232#define STRTAB_STE_1_S1DSS_SSID0 0x2
233
48ec83bc
WD
234#define STRTAB_STE_1_S1C_CACHE_NC 0UL
235#define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
236#define STRTAB_STE_1_S1C_CACHE_WT 2UL
237#define STRTAB_STE_1_S1C_CACHE_WB 3UL
ba08bdcb
RM
238#define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
239#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
240#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
48ec83bc
WD
241
242#define STRTAB_STE_1_S1STALLD (1UL << 27)
243
ba08bdcb 244#define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
48ec83bc
WD
245#define STRTAB_STE_1_EATS_ABT 0UL
246#define STRTAB_STE_1_EATS_TRANS 1UL
247#define STRTAB_STE_1_EATS_S1CHK 2UL
48ec83bc 248
ba08bdcb 249#define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
48ec83bc
WD
250#define STRTAB_STE_1_STRW_NSEL1 0UL
251#define STRTAB_STE_1_STRW_EL2 2UL
48ec83bc 252
ba08bdcb 253#define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
a0eacd89 254#define STRTAB_STE_1_SHCFG_INCOMING 1UL
a0eacd89 255
ba08bdcb
RM
256#define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
257#define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
ac4b80e5
WD
258#define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0)
259#define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6)
260#define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8)
261#define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10)
262#define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12)
263#define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14)
264#define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16)
48ec83bc
WD
265#define STRTAB_STE_2_S2AA64 (1UL << 51)
266#define STRTAB_STE_2_S2ENDI (1UL << 52)
267#define STRTAB_STE_2_S2PTW (1UL << 54)
268#define STRTAB_STE_2_S2R (1UL << 58)
269
6619c913 270#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
48ec83bc
WD
271
272/* Context descriptor (stage-1 only) */
273#define CTXDESC_CD_DWORDS 8
ba08bdcb 274#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
ba08bdcb 275#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
ba08bdcb 276#define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
ba08bdcb 277#define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
ba08bdcb 278#define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
ba08bdcb 279#define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
ba08bdcb 280#define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
48ec83bc
WD
281
282#define CTXDESC_CD_0_ENDI (1UL << 15)
283#define CTXDESC_CD_0_V (1UL << 31)
284
ba08bdcb 285#define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
ba08bdcb 286#define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
48ec83bc
WD
287
288#define CTXDESC_CD_0_AA64 (1UL << 41)
9cff86fd 289#define CTXDESC_CD_0_S (1UL << 44)
48ec83bc
WD
290#define CTXDESC_CD_0_R (1UL << 45)
291#define CTXDESC_CD_0_A (1UL << 46)
ba08bdcb
RM
292#define CTXDESC_CD_0_ASET (1UL << 47)
293#define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
48ec83bc 294
6619c913 295#define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
48ec83bc 296
89535821
JPB
297/*
298 * When the SMMU only supports linear context descriptor tables, pick a
299 * reasonable size limit (64kB).
300 */
301#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
302
48ec83bc 303/* Command queue */
d25f6ead
WD
304#define CMDQ_ENT_SZ_SHIFT 4
305#define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
306#define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
48ec83bc 307
cbcee19a 308#define CMDQ_CONS_ERR GENMASK(30, 24)
48ec83bc
WD
309#define CMDQ_ERR_CERROR_NONE_IDX 0
310#define CMDQ_ERR_CERROR_ILL_IDX 1
311#define CMDQ_ERR_CERROR_ABT_IDX 2
9ce27afc 312#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
48ec83bc 313
587e6c10
WD
314#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
315
2af2e72b
WD
316/*
317 * This is used to size the command queue and therefore must be at least
318 * BITS_PER_LONG so that the valid_map works correctly (it relies on the
319 * total number of queue entries being a multiple of BITS_PER_LONG).
320 */
321#define CMDQ_BATCH_ENTRIES BITS_PER_LONG
322
7417b99c 323#define CMDQ_0_OP GENMASK_ULL(7, 0)
48ec83bc
WD
324#define CMDQ_0_SSV (1UL << 11)
325
7417b99c
RM
326#define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
327#define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
1cf9e54e 328#define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
48ec83bc 329
87f42391 330#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
7417b99c 331#define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
48ec83bc 332#define CMDQ_CFGI_1_LEAF (1UL << 0)
7417b99c 333#define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
48ec83bc 334
7417b99c
RM
335#define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
336#define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
48ec83bc 337#define CMDQ_TLBI_1_LEAF (1UL << 0)
1cf9e54e 338#define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
6619c913 339#define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
48ec83bc 340
9ce27afc
JPB
341#define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12)
342#define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
343#define CMDQ_ATC_0_GLOBAL (1UL << 9)
344#define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
345#define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
346
7417b99c
RM
347#define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
348#define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
349#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
350#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
351
352#define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
353#define CMDQ_SYNC_0_CS_NONE 0
354#define CMDQ_SYNC_0_CS_IRQ 1
355#define CMDQ_SYNC_0_CS_SEV 2
356#define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
357#define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
358#define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
6619c913 359#define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
48ec83bc
WD
360
361/* Event queue */
d25f6ead
WD
362#define EVTQ_ENT_SZ_SHIFT 5
363#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
364#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
48ec83bc 365
7417b99c 366#define EVTQ_0_ID GENMASK_ULL(7, 0)
48ec83bc
WD
367
368/* PRI queue */
d25f6ead
WD
369#define PRIQ_ENT_SZ_SHIFT 4
370#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
371#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
48ec83bc 372
7417b99c
RM
373#define PRIQ_0_SID GENMASK_ULL(31, 0)
374#define PRIQ_0_SSID GENMASK_ULL(51, 32)
48ec83bc
WD
375#define PRIQ_0_PERM_PRIV (1UL << 58)
376#define PRIQ_0_PERM_EXEC (1UL << 59)
377#define PRIQ_0_PERM_READ (1UL << 60)
378#define PRIQ_0_PERM_WRITE (1UL << 61)
379#define PRIQ_0_PRG_LAST (1UL << 62)
380#define PRIQ_0_SSID_V (1UL << 63)
381
7417b99c 382#define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
1cf9e54e 383#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
48ec83bc
WD
384
385/* High-level queue structures */
587e6c10
WD
386#define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
387#define ARM_SMMU_POLL_SPIN_COUNT 10
48ec83bc 388
50019f09
EA
389#define MSI_IOVA_BASE 0x8000000
390#define MSI_IOVA_LENGTH 0x100000
391
a71792de 392static bool disable_bypass = 1;
48ec83bc
WD
393module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
394MODULE_PARM_DESC(disable_bypass,
395 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
396
397enum pri_resp {
7417b99c
RM
398 PRI_RESP_DENY = 0,
399 PRI_RESP_FAIL = 1,
400 PRI_RESP_SUCC = 2,
48ec83bc
WD
401};
402
166bdbd2
MZ
403enum arm_smmu_msi_index {
404 EVTQ_MSI_INDEX,
405 GERROR_MSI_INDEX,
406 PRIQ_MSI_INDEX,
407 ARM_SMMU_MAX_MSIS,
408};
409
410static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
411 [EVTQ_MSI_INDEX] = {
412 ARM_SMMU_EVTQ_IRQ_CFG0,
413 ARM_SMMU_EVTQ_IRQ_CFG1,
414 ARM_SMMU_EVTQ_IRQ_CFG2,
415 },
416 [GERROR_MSI_INDEX] = {
417 ARM_SMMU_GERROR_IRQ_CFG0,
418 ARM_SMMU_GERROR_IRQ_CFG1,
419 ARM_SMMU_GERROR_IRQ_CFG2,
420 },
421 [PRIQ_MSI_INDEX] = {
422 ARM_SMMU_PRIQ_IRQ_CFG0,
423 ARM_SMMU_PRIQ_IRQ_CFG1,
424 ARM_SMMU_PRIQ_IRQ_CFG2,
425 },
426};
427
48ec83bc
WD
428struct arm_smmu_cmdq_ent {
429 /* Common fields */
430 u8 opcode;
431 bool substream_valid;
432
433 /* Command-specific fields */
434 union {
435 #define CMDQ_OP_PREFETCH_CFG 0x1
436 struct {
437 u32 sid;
438 u8 size;
439 u64 addr;
440 } prefetch;
441
442 #define CMDQ_OP_CFGI_STE 0x3
443 #define CMDQ_OP_CFGI_ALL 0x4
87f42391
JPB
444 #define CMDQ_OP_CFGI_CD 0x5
445 #define CMDQ_OP_CFGI_CD_ALL 0x6
48ec83bc
WD
446 struct {
447 u32 sid;
87f42391 448 u32 ssid;
48ec83bc
WD
449 union {
450 bool leaf;
451 u8 span;
452 };
453 } cfgi;
454
455 #define CMDQ_OP_TLBI_NH_ASID 0x11
456 #define CMDQ_OP_TLBI_NH_VA 0x12
457 #define CMDQ_OP_TLBI_EL2_ALL 0x20
458 #define CMDQ_OP_TLBI_S12_VMALL 0x28
459 #define CMDQ_OP_TLBI_S2_IPA 0x2a
460 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
461 struct {
462 u16 asid;
463 u16 vmid;
464 bool leaf;
465 u64 addr;
466 } tlbi;
467
9ce27afc
JPB
468 #define CMDQ_OP_ATC_INV 0x40
469 #define ATC_INV_SIZE_ALL 52
470 struct {
471 u32 sid;
472 u32 ssid;
473 u64 addr;
474 u8 size;
475 bool global;
476 } atc;
477
48ec83bc
WD
478 #define CMDQ_OP_PRI_RESP 0x41
479 struct {
480 u32 sid;
481 u32 ssid;
482 u16 grpid;
483 enum pri_resp resp;
484 } pri;
485
486 #define CMDQ_OP_CMD_SYNC 0x46
37de98f8 487 struct {
37de98f8
RM
488 u64 msiaddr;
489 } sync;
48ec83bc
WD
490 };
491};
492
52be8637 493struct arm_smmu_ll_queue {
587e6c10
WD
494 union {
495 u64 val;
496 struct {
497 u32 prod;
498 u32 cons;
499 };
500 struct {
501 atomic_t prod;
502 atomic_t cons;
503 } atomic;
504 u8 __pad[SMP_CACHE_BYTES];
505 } ____cacheline_aligned_in_smp;
52be8637
WD
506 u32 max_n_shift;
507};
508
48ec83bc 509struct arm_smmu_queue {
52be8637 510 struct arm_smmu_ll_queue llq;
48ec83bc
WD
511 int irq; /* Wired interrupt */
512
513 __le64 *base;
514 dma_addr_t base_dma;
515 u64 q_base;
516
517 size_t ent_dwords;
48ec83bc
WD
518
519 u32 __iomem *prod_reg;
520 u32 __iomem *cons_reg;
521};
522
587e6c10
WD
523struct arm_smmu_queue_poll {
524 ktime_t timeout;
525 unsigned int delay;
526 unsigned int spin_cnt;
527 bool wfe;
528};
529
48ec83bc
WD
530struct arm_smmu_cmdq {
531 struct arm_smmu_queue q;
587e6c10
WD
532 atomic_long_t *valid_map;
533 atomic_t owner_prod;
534 atomic_t lock;
48ec83bc
WD
535};
536
537struct arm_smmu_evtq {
538 struct arm_smmu_queue q;
539 u32 max_stalls;
540};
541
542struct arm_smmu_priq {
543 struct arm_smmu_queue q;
544};
545
546/* High-level stream table and context descriptor structures */
547struct arm_smmu_strtab_l1_desc {
548 u8 span;
549
550 __le64 *l2ptr;
551 dma_addr_t l2ptr_dma;
552};
553
7bc4f3fa
JPB
554struct arm_smmu_ctx_desc {
555 u16 asid;
556 u64 ttbr;
557 u64 tcr;
558 u64 mair;
559};
560
561struct arm_smmu_ctx_desc_cfg {
562 __le64 *cdtab;
563 dma_addr_t cdtab_dma;
a557aff0 564 unsigned int num_l1_ents;
7bc4f3fa
JPB
565};
566
48ec83bc 567struct arm_smmu_s1_cfg {
7bc4f3fa
JPB
568 struct arm_smmu_ctx_desc_cfg cdcfg;
569 struct arm_smmu_ctx_desc cd;
87f42391 570 u8 s1fmt;
a557aff0 571 u8 s1cdmax;
48ec83bc
WD
572};
573
574struct arm_smmu_s2_cfg {
575 u16 vmid;
576 u64 vttbr;
577 u64 vtcr;
578};
579
48ec83bc
WD
580struct arm_smmu_strtab_cfg {
581 __le64 *strtab;
582 dma_addr_t strtab_dma;
583 struct arm_smmu_strtab_l1_desc *l1_desc;
584 unsigned int num_l1_ents;
585
586 u64 strtab_base;
587 u32 strtab_base_cfg;
588};
589
590/* An SMMUv3 instance */
591struct arm_smmu_device {
592 struct device *dev;
593 void __iomem *base;
594
595#define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
596#define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
597#define ARM_SMMU_FEAT_TT_LE (1 << 2)
598#define ARM_SMMU_FEAT_TT_BE (1 << 3)
599#define ARM_SMMU_FEAT_PRI (1 << 4)
600#define ARM_SMMU_FEAT_ATS (1 << 5)
601#define ARM_SMMU_FEAT_SEV (1 << 6)
602#define ARM_SMMU_FEAT_MSI (1 << 7)
603#define ARM_SMMU_FEAT_COHERENCY (1 << 8)
604#define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
605#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
606#define ARM_SMMU_FEAT_STALLS (1 << 11)
607#define ARM_SMMU_FEAT_HYP (1 << 12)
9cff86fd 608#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
dcd189e6 609#define ARM_SMMU_FEAT_VAX (1 << 14)
48ec83bc
WD
610 u32 features;
611
5e92946c 612#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
e5b829de 613#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
5e92946c
ZL
614 u32 options;
615
48ec83bc
WD
616 struct arm_smmu_cmdq cmdq;
617 struct arm_smmu_evtq evtq;
618 struct arm_smmu_priq priq;
619
620 int gerr_irq;
f935448a 621 int combined_irq;
48ec83bc
WD
622
623 unsigned long ias; /* IPA */
624 unsigned long oas; /* PA */
d5466357 625 unsigned long pgsize_bitmap;
48ec83bc
WD
626
627#define ARM_SMMU_MAX_ASIDS (1 << 16)
628 unsigned int asid_bits;
629 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
630
631#define ARM_SMMU_MAX_VMIDS (1 << 16)
632 unsigned int vmid_bits;
633 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
634
635 unsigned int ssid_bits;
636 unsigned int sid_bits;
637
638 struct arm_smmu_strtab_cfg strtab_cfg;
9648cbc9
JR
639
640 /* IOMMU core code handle */
641 struct iommu_device iommu;
48ec83bc
WD
642};
643
8f785154 644/* SMMU private data for each master */
b54f4260 645struct arm_smmu_master {
48ec83bc 646 struct arm_smmu_device *smmu;
9ce27afc 647 struct device *dev;
8be39a1a 648 struct arm_smmu_domain *domain;
2a7e62f5 649 struct list_head domain_head;
bcecaee4
JPB
650 u32 *sids;
651 unsigned int num_sids;
f75d8e33 652 bool ats_enabled;
89535821 653 unsigned int ssid_bits;
48ec83bc
WD
654};
655
656/* SMMU private data for an IOMMU domain */
657enum arm_smmu_domain_stage {
658 ARM_SMMU_DOMAIN_S1 = 0,
659 ARM_SMMU_DOMAIN_S2,
660 ARM_SMMU_DOMAIN_NESTED,
beb3c6a0 661 ARM_SMMU_DOMAIN_BYPASS,
48ec83bc
WD
662};
663
664struct arm_smmu_domain {
665 struct arm_smmu_device *smmu;
666 struct mutex init_mutex; /* Protects smmu pointer */
667
668 struct io_pgtable_ops *pgtbl_ops;
9662b99a 669 bool non_strict;
cdb8a3c3 670 atomic_t nr_ats_masters;
48ec83bc
WD
671
672 enum arm_smmu_domain_stage stage;
673 union {
674 struct arm_smmu_s1_cfg s1_cfg;
675 struct arm_smmu_s2_cfg s2_cfg;
676 };
677
678 struct iommu_domain domain;
2a7e62f5
JPB
679
680 struct list_head devices;
681 spinlock_t devices_lock;
48ec83bc
WD
682};
683
5e92946c
ZL
684struct arm_smmu_option_prop {
685 u32 opt;
686 const char *prop;
687};
688
689static struct arm_smmu_option_prop arm_smmu_options[] = {
690 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
e5b829de 691 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
5e92946c
ZL
692 { 0, NULL},
693};
694
e5b829de
LC
695static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
696 struct arm_smmu_device *smmu)
697{
698 if ((offset > SZ_64K) &&
699 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
700 offset -= SZ_64K;
701
702 return smmu->base + offset;
703}
704
48ec83bc
WD
705static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
706{
707 return container_of(dom, struct arm_smmu_domain, domain);
708}
709
5e92946c
ZL
710static void parse_driver_options(struct arm_smmu_device *smmu)
711{
712 int i = 0;
713
714 do {
715 if (of_property_read_bool(smmu->dev->of_node,
716 arm_smmu_options[i].prop)) {
717 smmu->options |= arm_smmu_options[i].opt;
718 dev_notice(smmu->dev, "option %s\n",
719 arm_smmu_options[i].prop);
720 }
721 } while (arm_smmu_options[++i].opt);
722}
723
48ec83bc 724/* Low-level queue manipulation functions */
587e6c10
WD
725static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
726{
727 u32 space, prod, cons;
728
729 prod = Q_IDX(q, q->prod);
730 cons = Q_IDX(q, q->cons);
731
732 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
733 space = (1 << q->max_n_shift) - (prod - cons);
734 else
735 space = cons - prod;
736
737 return space >= n;
738}
739
7c288a5b 740static bool queue_full(struct arm_smmu_ll_queue *q)
48ec83bc
WD
741{
742 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
743 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
744}
745
7c288a5b 746static bool queue_empty(struct arm_smmu_ll_queue *q)
48ec83bc
WD
747{
748 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
749 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
750}
751
587e6c10 752static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
48ec83bc 753{
587e6c10
WD
754 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
755 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
756 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
757 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
48ec83bc
WD
758}
759
2a8868f1 760static void queue_sync_cons_out(struct arm_smmu_queue *q)
48ec83bc 761{
a868e853
WD
762 /*
763 * Ensure that all CPU accesses (reads and writes) to the queue
764 * are complete before we update the cons pointer.
765 */
766 mb();
52be8637 767 writel_relaxed(q->llq.cons, q->cons_reg);
48ec83bc
WD
768}
769
7c288a5b 770static void queue_inc_cons(struct arm_smmu_ll_queue *q)
2a8868f1 771{
7c288a5b
WD
772 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
773 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
48ec83bc
WD
774}
775
2a8868f1 776static int queue_sync_prod_in(struct arm_smmu_queue *q)
48ec83bc
WD
777{
778 int ret = 0;
779 u32 prod = readl_relaxed(q->prod_reg);
780
52be8637 781 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
48ec83bc
WD
782 ret = -EOVERFLOW;
783
52be8637 784 q->llq.prod = prod;
48ec83bc
WD
785 return ret;
786}
787
587e6c10 788static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
48ec83bc 789{
587e6c10
WD
790 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
791 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
48ec83bc
WD
792}
793
587e6c10
WD
794static void queue_poll_init(struct arm_smmu_device *smmu,
795 struct arm_smmu_queue_poll *qp)
48ec83bc 796{
587e6c10
WD
797 qp->delay = 1;
798 qp->spin_cnt = 0;
799 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
800 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
48ec83bc 801}
48ec83bc 802
587e6c10 803static int queue_poll(struct arm_smmu_queue_poll *qp)
48ec83bc 804{
587e6c10
WD
805 if (ktime_compare(ktime_get(), qp->timeout) > 0)
806 return -ETIMEDOUT;
48ec83bc 807
587e6c10
WD
808 if (qp->wfe) {
809 wfe();
810 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
811 cpu_relax();
812 } else {
813 udelay(qp->delay);
814 qp->delay *= 2;
815 qp->spin_cnt = 0;
48ec83bc
WD
816 }
817
818 return 0;
819}
820
821static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
822{
823 int i;
824
825 for (i = 0; i < n_dwords; ++i)
826 *dst++ = cpu_to_le64(*src++);
827}
828
48ec83bc
WD
829static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
830{
831 int i;
832
833 for (i = 0; i < n_dwords; ++i)
834 *dst++ = le64_to_cpu(*src++);
835}
836
837static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
838{
7c288a5b 839 if (queue_empty(&q->llq))
48ec83bc
WD
840 return -EAGAIN;
841
52be8637 842 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
7c288a5b 843 queue_inc_cons(&q->llq);
2a8868f1 844 queue_sync_cons_out(q);
48ec83bc
WD
845 return 0;
846}
847
848/* High-level queue accessors */
849static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
850{
d25f6ead 851 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
7417b99c 852 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
48ec83bc
WD
853
854 switch (ent->opcode) {
855 case CMDQ_OP_TLBI_EL2_ALL:
856 case CMDQ_OP_TLBI_NSNH_ALL:
857 break;
858 case CMDQ_OP_PREFETCH_CFG:
7417b99c
RM
859 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
860 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
48ec83bc
WD
861 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
862 break;
87f42391
JPB
863 case CMDQ_OP_CFGI_CD:
864 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
865 /* Fallthrough */
48ec83bc 866 case CMDQ_OP_CFGI_STE:
7417b99c
RM
867 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
868 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
48ec83bc 869 break;
87f42391
JPB
870 case CMDQ_OP_CFGI_CD_ALL:
871 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
872 break;
48ec83bc
WD
873 case CMDQ_OP_CFGI_ALL:
874 /* Cover the entire SID range */
7417b99c 875 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
48ec83bc
WD
876 break;
877 case CMDQ_OP_TLBI_NH_VA:
935d43ba 878 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
7417b99c
RM
879 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
880 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
1c27df1c
WD
881 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
882 break;
48ec83bc 883 case CMDQ_OP_TLBI_S2_IPA:
7417b99c
RM
884 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
885 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
1c27df1c 886 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
48ec83bc
WD
887 break;
888 case CMDQ_OP_TLBI_NH_ASID:
7417b99c 889 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
48ec83bc
WD
890 /* Fallthrough */
891 case CMDQ_OP_TLBI_S12_VMALL:
7417b99c 892 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
48ec83bc 893 break;
9ce27afc
JPB
894 case CMDQ_OP_ATC_INV:
895 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
896 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
897 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
898 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
899 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
900 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
901 break;
48ec83bc 902 case CMDQ_OP_PRI_RESP:
7417b99c
RM
903 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
904 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
905 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
906 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
48ec83bc
WD
907 switch (ent->pri.resp) {
908 case PRI_RESP_DENY:
48ec83bc 909 case PRI_RESP_FAIL:
48ec83bc 910 case PRI_RESP_SUCC:
48ec83bc
WD
911 break;
912 default:
913 return -EINVAL;
914 }
7417b99c 915 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
48ec83bc
WD
916 break;
917 case CMDQ_OP_CMD_SYNC:
587e6c10 918 if (ent->sync.msiaddr) {
7417b99c 919 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
587e6c10
WD
920 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
921 } else {
7417b99c 922 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
587e6c10 923 }
7417b99c
RM
924 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
925 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
48ec83bc
WD
926 break;
927 default:
928 return -ENOENT;
929 }
930
931 return 0;
932}
933
587e6c10
WD
934static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
935 u32 prod)
936{
937 struct arm_smmu_queue *q = &smmu->cmdq.q;
938 struct arm_smmu_cmdq_ent ent = {
939 .opcode = CMDQ_OP_CMD_SYNC,
940 };
941
942 /*
943 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
944 * payload, so the write will zero the entire command on that platform.
945 */
946 if (smmu->features & ARM_SMMU_FEAT_MSI &&
947 smmu->features & ARM_SMMU_FEAT_COHERENCY) {
948 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
949 q->ent_dwords * 8;
950 }
951
952 arm_smmu_cmdq_build_cmd(cmd, &ent);
953}
954
48ec83bc
WD
955static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
956{
957 static const char *cerror_str[] = {
958 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
959 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
960 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
9ce27afc 961 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
48ec83bc
WD
962 };
963
964 int i;
965 u64 cmd[CMDQ_ENT_DWORDS];
966 struct arm_smmu_queue *q = &smmu->cmdq.q;
967 u32 cons = readl_relaxed(q->cons_reg);
cbcee19a 968 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
48ec83bc
WD
969 struct arm_smmu_cmdq_ent cmd_sync = {
970 .opcode = CMDQ_OP_CMD_SYNC,
971 };
972
973 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
a0d5c04c 974 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
48ec83bc
WD
975
976 switch (idx) {
48ec83bc
WD
977 case CMDQ_ERR_CERROR_ABT_IDX:
978 dev_err(smmu->dev, "retrying command fetch\n");
979 case CMDQ_ERR_CERROR_NONE_IDX:
980 return;
9ce27afc
JPB
981 case CMDQ_ERR_CERROR_ATC_INV_IDX:
982 /*
983 * ATC Invalidation Completion timeout. CONS is still pointing
984 * at the CMD_SYNC. Attempt to complete other pending commands
985 * by repeating the CMD_SYNC, though we might well end up back
986 * here since the ATC invalidation may still be pending.
987 */
988 return;
a0d5c04c
WD
989 case CMDQ_ERR_CERROR_ILL_IDX:
990 /* Fallthrough */
991 default:
992 break;
48ec83bc
WD
993 }
994
995 /*
996 * We may have concurrent producers, so we need to be careful
997 * not to touch any of the shadow cmdq state.
998 */
aea2037e 999 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
48ec83bc
WD
1000 dev_err(smmu->dev, "skipping command in error state:\n");
1001 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1002 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1003
1004 /* Convert the erroneous command into a CMD_SYNC */
1005 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1006 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1007 return;
1008 }
1009
aea2037e 1010 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
48ec83bc
WD
1011}
1012
587e6c10
WD
1013/*
1014 * Command queue locking.
1015 * This is a form of bastardised rwlock with the following major changes:
1016 *
1017 * - The only LOCK routines are exclusive_trylock() and shared_lock().
1018 * Neither have barrier semantics, and instead provide only a control
1019 * dependency.
1020 *
1021 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1022 * fails if the caller appears to be the last lock holder (yes, this is
1023 * racy). All successful UNLOCK routines have RELEASE semantics.
1024 */
1025static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
2f657add 1026{
587e6c10 1027 int val;
2f657add 1028
587e6c10
WD
1029 /*
1030 * We can try to avoid the cmpxchg() loop by simply incrementing the
1031 * lock counter. When held in exclusive state, the lock counter is set
1032 * to INT_MIN so these increments won't hurt as the value will remain
1033 * negative.
1034 */
1035 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1036 return;
1037
1038 do {
1039 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1040 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1041}
1042
1043static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1044{
1045 (void)atomic_dec_return_release(&cmdq->lock);
1046}
1047
1048static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1049{
1050 if (atomic_read(&cmdq->lock) == 1)
1051 return false;
1052
1053 arm_smmu_cmdq_shared_unlock(cmdq);
1054 return true;
1055}
1056
1057#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
1058({ \
1059 bool __ret; \
1060 local_irq_save(flags); \
1061 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
1062 if (!__ret) \
1063 local_irq_restore(flags); \
1064 __ret; \
1065})
1066
1067#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
1068({ \
1069 atomic_set_release(&cmdq->lock, 0); \
1070 local_irq_restore(flags); \
1071})
1072
1073
1074/*
1075 * Command queue insertion.
1076 * This is made fiddly by our attempts to achieve some sort of scalability
1077 * since there is one queue shared amongst all of the CPUs in the system. If
1078 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1079 * then you'll *love* this monstrosity.
1080 *
1081 * The basic idea is to split the queue up into ranges of commands that are
1082 * owned by a given CPU; the owner may not have written all of the commands
1083 * itself, but is responsible for advancing the hardware prod pointer when
1084 * the time comes. The algorithm is roughly:
1085 *
1086 * 1. Allocate some space in the queue. At this point we also discover
1087 * whether the head of the queue is currently owned by another CPU,
1088 * or whether we are the owner.
1089 *
1090 * 2. Write our commands into our allocated slots in the queue.
1091 *
1092 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1093 *
1094 * 4. If we are an owner:
1095 * a. Wait for the previous owner to finish.
1096 * b. Mark the queue head as unowned, which tells us the range
1097 * that we are responsible for publishing.
1098 * c. Wait for all commands in our owned range to become valid.
1099 * d. Advance the hardware prod pointer.
1100 * e. Tell the next owner we've finished.
1101 *
1102 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
1103 * owner), then we need to stick around until it has completed:
1104 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1105 * to clear the first 4 bytes.
1106 * b. Otherwise, we spin waiting for the hardware cons pointer to
1107 * advance past our command.
1108 *
1109 * The devil is in the details, particularly the use of locking for handling
1110 * SYNC completion and freeing up space in the queue before we think that it is
1111 * full.
1112 */
1113static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1114 u32 sprod, u32 eprod, bool set)
1115{
1116 u32 swidx, sbidx, ewidx, ebidx;
1117 struct arm_smmu_ll_queue llq = {
1118 .max_n_shift = cmdq->q.llq.max_n_shift,
1119 .prod = sprod,
1120 };
901510ee 1121
587e6c10
WD
1122 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1123 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
2f657add 1124
587e6c10
WD
1125 while (llq.prod != eprod) {
1126 unsigned long mask;
1127 atomic_long_t *ptr;
1128 u32 limit = BITS_PER_LONG;
901510ee 1129
587e6c10
WD
1130 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1131 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1132
1133 ptr = &cmdq->valid_map[swidx];
1134
1135 if ((swidx == ewidx) && (sbidx < ebidx))
1136 limit = ebidx;
1137
1138 mask = GENMASK(limit - 1, sbidx);
1139
1140 /*
1141 * The valid bit is the inverse of the wrap bit. This means
1142 * that a zero-initialised queue is invalid and, after marking
1143 * all entries as valid, they become invalid again when we
1144 * wrap.
1145 */
1146 if (set) {
1147 atomic_long_xor(mask, ptr);
1148 } else { /* Poll */
1149 unsigned long valid;
1150
1151 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1152 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1153 }
1154
1155 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
2f657add
RM
1156 }
1157}
1158
587e6c10
WD
1159/* Mark all entries in the range [sprod, eprod) as valid */
1160static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1161 u32 sprod, u32 eprod)
1162{
1163 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1164}
1165
1166/* Wait for all entries in the range [sprod, eprod) to become valid */
1167static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1168 u32 sprod, u32 eprod)
1169{
1170 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1171}
1172
1173/* Wait for the command queue to become non-full */
1174static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1175 struct arm_smmu_ll_queue *llq)
48ec83bc 1176{
8ded2909 1177 unsigned long flags;
587e6c10
WD
1178 struct arm_smmu_queue_poll qp;
1179 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1180 int ret = 0;
48ec83bc 1181
587e6c10
WD
1182 /*
1183 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1184 * that fails, spin until somebody else updates it for us.
1185 */
1186 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1187 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1188 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1189 llq->val = READ_ONCE(cmdq->q.llq.val);
1190 return 0;
48ec83bc
WD
1191 }
1192
587e6c10
WD
1193 queue_poll_init(smmu, &qp);
1194 do {
1195 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1196 if (!queue_full(llq))
1197 break;
1198
1199 ret = queue_poll(&qp);
1200 } while (!ret);
1201
1202 return ret;
2f657add 1203}
48ec83bc 1204
37de98f8 1205/*
587e6c10
WD
1206 * Wait until the SMMU signals a CMD_SYNC completion MSI.
1207 * Must be called with the cmdq lock held in some capacity.
37de98f8 1208 */
587e6c10
WD
1209static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1210 struct arm_smmu_ll_queue *llq)
37de98f8 1211{
587e6c10
WD
1212 int ret = 0;
1213 struct arm_smmu_queue_poll qp;
1214 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1215 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
a529ea19 1216
587e6c10 1217 queue_poll_init(smmu, &qp);
37de98f8 1218
587e6c10
WD
1219 /*
1220 * The MSI won't generate an event, since it's being written back
1221 * into the command queue.
1222 */
1223 qp.wfe = false;
1224 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1225 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1226 return ret;
37de98f8
RM
1227}
1228
587e6c10
WD
1229/*
1230 * Wait until the SMMU cons index passes llq->prod.
1231 * Must be called with the cmdq lock held in some capacity.
1232 */
1233static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1234 struct arm_smmu_ll_queue *llq)
49806599 1235{
587e6c10
WD
1236 struct arm_smmu_queue_poll qp;
1237 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1238 u32 prod = llq->prod;
1239 int ret = 0;
49806599 1240
587e6c10
WD
1241 queue_poll_init(smmu, &qp);
1242 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1243 do {
1244 if (queue_consumed(llq, prod))
1245 break;
901510ee 1246
587e6c10 1247 ret = queue_poll(&qp);
901510ee 1248
587e6c10
WD
1249 /*
1250 * This needs to be a readl() so that our subsequent call
1251 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1252 *
1253 * Specifically, we need to ensure that we observe all
1254 * shared_lock()s by other CMD_SYNCs that share our owner,
1255 * so that a failing call to tryunlock() means that we're
1256 * the last one out and therefore we can safely advance
1257 * cmdq->q.llq.cons. Roughly speaking:
1258 *
1259 * CPU 0 CPU1 CPU2 (us)
1260 *
1261 * if (sync)
1262 * shared_lock();
1263 *
1264 * dma_wmb();
1265 * set_valid_map();
1266 *
1267 * if (owner) {
1268 * poll_valid_map();
1269 * <control dependency>
1270 * writel(prod_reg);
1271 *
1272 * readl(cons_reg);
1273 * tryunlock();
1274 *
1275 * Requires us to see CPU 0's shared_lock() acquisition.
1276 */
1277 llq->cons = readl(cmdq->q.cons_reg);
1278 } while (!ret);
49806599 1279
587e6c10 1280 return ret;
49806599
WD
1281}
1282
587e6c10
WD
1283static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1284 struct arm_smmu_ll_queue *llq)
2f657add 1285{
587e6c10
WD
1286 if (smmu->features & ARM_SMMU_FEAT_MSI &&
1287 smmu->features & ARM_SMMU_FEAT_COHERENCY)
1288 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1289
1290 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1291}
1292
1293static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1294 u32 prod, int n)
1295{
1296 int i;
1297 struct arm_smmu_ll_queue llq = {
1298 .max_n_shift = cmdq->q.llq.max_n_shift,
1299 .prod = prod,
1300 };
1301
1302 for (i = 0; i < n; ++i) {
1303 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1304
1305 prod = queue_inc_prod_n(&llq, i);
1306 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1307 }
1308}
1309
05cbaf4d
WD
1310/*
1311 * This is the actual insertion function, and provides the following
1312 * ordering guarantees to callers:
1313 *
1314 * - There is a dma_wmb() before publishing any commands to the queue.
1315 * This can be relied upon to order prior writes to data structures
1316 * in memory (such as a CD or an STE) before the command.
1317 *
1318 * - On completion of a CMD_SYNC, there is a control dependency.
1319 * This can be relied upon to order subsequent writes to memory (e.g.
1320 * freeing an IOVA) after completion of the CMD_SYNC.
1321 *
1322 * - Command insertion is totally ordered, so if two CPUs each race to
1323 * insert their own list of commands then all of the commands from one
1324 * CPU will appear before any of the commands from the other CPU.
1325 */
587e6c10
WD
1326static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1327 u64 *cmds, int n, bool sync)
1328{
1329 u64 cmd_sync[CMDQ_ENT_DWORDS];
1330 u32 prod;
2f657add 1331 unsigned long flags;
587e6c10
WD
1332 bool owner;
1333 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1334 struct arm_smmu_ll_queue llq = {
1335 .max_n_shift = cmdq->q.llq.max_n_shift,
1336 }, head = llq;
1337 int ret = 0;
2f657add 1338
587e6c10
WD
1339 /* 1. Allocate some space in the queue */
1340 local_irq_save(flags);
1341 llq.val = READ_ONCE(cmdq->q.llq.val);
1342 do {
1343 u64 old;
1344
1345 while (!queue_has_space(&llq, n + sync)) {
1346 local_irq_restore(flags);
1347 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1348 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1349 local_irq_save(flags);
1350 }
1351
1352 head.cons = llq.cons;
1353 head.prod = queue_inc_prod_n(&llq, n + sync) |
1354 CMDQ_PROD_OWNED_FLAG;
1355
1356 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1357 if (old == llq.val)
1358 break;
1359
1360 llq.val = old;
1361 } while (1);
1362 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1363 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1364 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1365
1366 /*
1367 * 2. Write our commands into the queue
1368 * Dependency ordering from the cmpxchg() loop above.
1369 */
1370 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1371 if (sync) {
1372 prod = queue_inc_prod_n(&llq, n);
1373 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1374 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1375
1376 /*
1377 * In order to determine completion of our CMD_SYNC, we must
1378 * ensure that the queue can't wrap twice without us noticing.
1379 * We achieve that by taking the cmdq lock as shared before
1380 * marking our slot as valid.
1381 */
1382 arm_smmu_cmdq_shared_lock(cmdq);
1383 }
2f657add 1384
587e6c10
WD
1385 /* 3. Mark our slots as valid, ensuring commands are visible first */
1386 dma_wmb();
1387 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
2f657add 1388
587e6c10
WD
1389 /* 4. If we are the owner, take control of the SMMU hardware */
1390 if (owner) {
1391 /* a. Wait for previous owner to finish */
1392 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1393
1394 /* b. Stop gathering work by clearing the owned flag */
1395 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1396 &cmdq->q.llq.atomic.prod);
1397 prod &= ~CMDQ_PROD_OWNED_FLAG;
1398
1399 /*
1400 * c. Wait for any gathered work to be written to the queue.
1401 * Note that we read our own entries so that we have the control
1402 * dependency required by (d).
1403 */
1404 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1405
1406 /*
1407 * d. Advance the hardware prod pointer
1408 * Control dependency ordering from the entries becoming valid.
1409 */
1410 writel_relaxed(prod, cmdq->q.prod_reg);
1411
1412 /*
1413 * e. Tell the next owner we're done
1414 * Make sure we've updated the hardware first, so that we don't
1415 * race to update prod and potentially move it backwards.
1416 */
1417 atomic_set_release(&cmdq->owner_prod, prod);
1418 }
1419
1420 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1421 if (sync) {
1422 llq.prod = queue_inc_prod_n(&llq, n);
1423 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1424 if (ret) {
1425 dev_err_ratelimited(smmu->dev,
1426 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1427 llq.prod,
1428 readl_relaxed(cmdq->q.prod_reg),
1429 readl_relaxed(cmdq->q.cons_reg));
1430 }
2f657add 1431
587e6c10
WD
1432 /*
1433 * Try to unlock the cmq lock. This will fail if we're the last
1434 * reader, in which case we can safely update cmdq->q.llq.cons
1435 */
1436 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1437 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1438 arm_smmu_cmdq_shared_unlock(cmdq);
1439 }
1440 }
2f657add 1441
587e6c10 1442 local_irq_restore(flags);
49806599
WD
1443 return ret;
1444}
1445
587e6c10
WD
1446static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1447 struct arm_smmu_cmdq_ent *ent)
49806599 1448{
587e6c10 1449 u64 cmd[CMDQ_ENT_DWORDS];
49806599 1450
587e6c10
WD
1451 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1452 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1453 ent->opcode);
1454 return -EINVAL;
1455 }
1456
1457 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1458}
1459
1460static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1461{
1462 return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
48ec83bc
WD
1463}
1464
1465/* Context descriptor manipulation functions */
87f42391
JPB
1466static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1467 int ssid, bool leaf)
48ec83bc 1468{
87f42391
JPB
1469 size_t i;
1470 unsigned long flags;
1471 struct arm_smmu_master *master;
1472 struct arm_smmu_device *smmu = smmu_domain->smmu;
1473 struct arm_smmu_cmdq_ent cmd = {
1474 .opcode = CMDQ_OP_CFGI_CD,
1475 .cfgi = {
1476 .ssid = ssid,
1477 .leaf = leaf,
1478 },
1479 };
1480
1481 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1482 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1483 for (i = 0; i < master->num_sids; i++) {
1484 cmd.cfgi.sid = master->sids[i];
1485 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1486 }
1487 }
1488 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
48ec83bc 1489
87f42391
JPB
1490 arm_smmu_cmdq_issue_sync(smmu);
1491}
1492
1493static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1494 int ssid, struct arm_smmu_ctx_desc *cd)
1495{
48ec83bc 1496 /*
87f42391
JPB
1497 * This function handles the following cases:
1498 *
1499 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1500 * (2) Install a secondary CD, for SID+SSID traffic.
1501 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1502 * CD, then invalidate the old entry and mappings.
1503 * (4) Remove a secondary CD.
48ec83bc 1504 */
87f42391
JPB
1505 u64 val;
1506 bool cd_live;
1507 struct arm_smmu_device *smmu = smmu_domain->smmu;
1508 __le64 *cdptr = smmu_domain->s1_cfg.cdcfg.cdtab + ssid *
1509 CTXDESC_CD_DWORDS;
1510
1511 val = le64_to_cpu(cdptr[0]);
1512 cd_live = !!(val & CTXDESC_CD_0_V);
9cff86fd 1513
87f42391
JPB
1514 if (!cd) { /* (4) */
1515 val = 0;
1516 } else if (cd_live) { /* (3) */
1517 val &= ~CTXDESC_CD_0_ASID;
1518 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1519 /*
1520 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1521 * this substream's traffic
1522 */
1523 } else { /* (1) and (2) */
1524 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1525 cdptr[2] = 0;
1526 cdptr[3] = cpu_to_le64(cd->mair);
9cff86fd 1527
87f42391
JPB
1528 /*
1529 * STE is live, and the SMMU might read dwords of this CD in any
1530 * order. Ensure that it observes valid values before reading
1531 * V=1.
1532 */
1533 arm_smmu_sync_cd(smmu_domain, ssid, true);
48ec83bc 1534
87f42391
JPB
1535 val = cd->tcr |
1536#ifdef __BIG_ENDIAN
1537 CTXDESC_CD_0_ENDI |
1538#endif
1539 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1540 CTXDESC_CD_0_AA64 |
1541 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1542 CTXDESC_CD_0_V;
1543
1544 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1545 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1546 val |= CTXDESC_CD_0_S;
1547 }
48ec83bc 1548
87f42391
JPB
1549 /*
1550 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1551 * "Configuration structures and configuration invalidation completion"
1552 *
1553 * The size of single-copy atomic reads made by the SMMU is
1554 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1555 * field within an aligned 64-bit span of a structure can be altered
1556 * without first making the structure invalid.
1557 */
1558 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1559 arm_smmu_sync_cd(smmu_domain, ssid, true);
1560 return 0;
48ec83bc
WD
1561}
1562
a557aff0
JPB
1563static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1564{
1565 size_t l1size;
1566 struct arm_smmu_device *smmu = smmu_domain->smmu;
1567 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1568 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1569
87f42391
JPB
1570 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1571
a557aff0
JPB
1572 cdcfg->num_l1_ents = 1UL << cfg->s1cdmax;
1573 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1574 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1575 GFP_KERNEL);
1576 if (!cdcfg->cdtab) {
1577 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1578 return -ENOMEM;
1579 }
1580 return 0;
1581}
1582
1583static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1584{
1585 struct arm_smmu_device *smmu = smmu_domain->smmu;
1586 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1587 size_t l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1588
1589 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1590 cdcfg->cdtab_dma = 0;
1591 cdcfg->cdtab = NULL;
1592}
1593
48ec83bc
WD
1594/* Stream table manipulation functions */
1595static void
1596arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1597{
1598 u64 val = 0;
1599
ba08bdcb 1600 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1cf9e54e 1601 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
48ec83bc
WD
1602
1603 *dst = cpu_to_le64(val);
1604}
1605
1606static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1607{
1608 struct arm_smmu_cmdq_ent cmd = {
1609 .opcode = CMDQ_OP_CFGI_STE,
1610 .cfgi = {
1611 .sid = sid,
1612 .leaf = true,
1613 },
1614 };
1615
1616 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2f657add 1617 arm_smmu_cmdq_issue_sync(smmu);
48ec83bc
WD
1618}
1619
8be39a1a
JPB
1620static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1621 __le64 *dst)
48ec83bc
WD
1622{
1623 /*
1624 * This is hideously complicated, but we only really care about
1625 * three cases at the moment:
1626 *
beb3c6a0
WD
1627 * 1. Invalid (all zero) -> bypass/fault (init)
1628 * 2. Bypass/fault -> translation/bypass (attach)
1629 * 3. Translation/bypass -> bypass/fault (detach)
48ec83bc
WD
1630 *
1631 * Given that we can't update the STE atomically and the SMMU
1632 * doesn't read the thing in a defined order, that leaves us
1633 * with the following maintenance requirements:
1634 *
1635 * 1. Update Config, return (init time STEs aren't live)
1636 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1637 * 3. Update Config, sync
1638 */
1639 u64 val = le64_to_cpu(dst[0]);
1640 bool ste_live = false;
8be39a1a
JPB
1641 struct arm_smmu_device *smmu = NULL;
1642 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1643 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1644 struct arm_smmu_domain *smmu_domain = NULL;
48ec83bc
WD
1645 struct arm_smmu_cmdq_ent prefetch_cmd = {
1646 .opcode = CMDQ_OP_PREFETCH_CFG,
1647 .prefetch = {
1648 .sid = sid,
1649 },
1650 };
1651
8be39a1a
JPB
1652 if (master) {
1653 smmu_domain = master->domain;
1654 smmu = master->smmu;
1655 }
1656
1657 if (smmu_domain) {
1658 switch (smmu_domain->stage) {
1659 case ARM_SMMU_DOMAIN_S1:
1660 s1_cfg = &smmu_domain->s1_cfg;
1661 break;
1662 case ARM_SMMU_DOMAIN_S2:
1663 case ARM_SMMU_DOMAIN_NESTED:
1664 s2_cfg = &smmu_domain->s2_cfg;
1665 break;
1666 default:
1667 break;
1668 }
1669 }
1670
48ec83bc 1671 if (val & STRTAB_STE_0_V) {
ba08bdcb 1672 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
48ec83bc
WD
1673 case STRTAB_STE_0_CFG_BYPASS:
1674 break;
1675 case STRTAB_STE_0_CFG_S1_TRANS:
1676 case STRTAB_STE_0_CFG_S2_TRANS:
1677 ste_live = true;
1678 break;
5bc0a116 1679 case STRTAB_STE_0_CFG_ABORT:
11f4fe9b
AR
1680 BUG_ON(!disable_bypass);
1681 break;
48ec83bc
WD
1682 default:
1683 BUG(); /* STE corruption */
1684 }
1685 }
1686
810871c5 1687 /* Nuke the existing STE_0 value, as we're going to rewrite it */
beb3c6a0
WD
1688 val = STRTAB_STE_0_V;
1689
1690 /* Bypass/fault */
8be39a1a
JPB
1691 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1692 if (!smmu_domain && disable_bypass)
ba08bdcb 1693 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
beb3c6a0 1694 else
ba08bdcb 1695 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
48ec83bc 1696
48ec83bc 1697 dst[0] = cpu_to_le64(val);
ba08bdcb
RM
1698 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1699 STRTAB_STE_1_SHCFG_INCOMING));
48ec83bc 1700 dst[2] = 0; /* Nuke the VMID */
704c0382
WD
1701 /*
1702 * The SMMU can perform negative caching, so we must sync
1703 * the STE regardless of whether the old value was live.
1704 */
1705 if (smmu)
48ec83bc
WD
1706 arm_smmu_sync_ste_for_sid(smmu, sid);
1707 return;
1708 }
1709
8be39a1a 1710 if (s1_cfg) {
48ec83bc
WD
1711 BUG_ON(ste_live);
1712 dst[1] = cpu_to_le64(
87f42391 1713 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
ba08bdcb
RM
1714 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1715 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1716 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
ba08bdcb 1717 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
48ec83bc 1718
9cff86fd
YX
1719 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1720 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
6380be05
PM
1721 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1722
7bc4f3fa 1723 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
87f42391
JPB
1724 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1725 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1726 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
48ec83bc
WD
1727 }
1728
8be39a1a 1729 if (s2_cfg) {
48ec83bc
WD
1730 BUG_ON(ste_live);
1731 dst[2] = cpu_to_le64(
8be39a1a
JPB
1732 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1733 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
48ec83bc
WD
1734#ifdef __BIG_ENDIAN
1735 STRTAB_STE_2_S2ENDI |
1736#endif
1737 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1738 STRTAB_STE_2_S2R);
1739
8be39a1a 1740 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
48ec83bc 1741
ba08bdcb 1742 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
48ec83bc
WD
1743 }
1744
9ce27afc
JPB
1745 if (master->ats_enabled)
1746 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1747 STRTAB_STE_1_EATS_TRANS));
1748
48ec83bc
WD
1749 arm_smmu_sync_ste_for_sid(smmu, sid);
1750 dst[0] = cpu_to_le64(val);
1751 arm_smmu_sync_ste_for_sid(smmu, sid);
1752
1753 /* It's likely that we'll want to use the new STE soon */
5e92946c
ZL
1754 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1755 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
48ec83bc
WD
1756}
1757
1758static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1759{
1760 unsigned int i;
48ec83bc
WD
1761
1762 for (i = 0; i < nent; ++i) {
8be39a1a 1763 arm_smmu_write_strtab_ent(NULL, -1, strtab);
48ec83bc
WD
1764 strtab += STRTAB_STE_DWORDS;
1765 }
1766}
1767
1768static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1769{
1770 size_t size;
1771 void *strtab;
1772 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1773 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1774
1775 if (desc->l2ptr)
1776 return 0;
1777
1778 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
69146e7b 1779 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
48ec83bc
WD
1780
1781 desc->span = STRTAB_SPLIT + 1;
04fa26c7 1782 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
9bb9069c 1783 GFP_KERNEL);
48ec83bc
WD
1784 if (!desc->l2ptr) {
1785 dev_err(smmu->dev,
1786 "failed to allocate l2 stream table for SID %u\n",
1787 sid);
1788 return -ENOMEM;
1789 }
1790
1791 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1792 arm_smmu_write_strtab_l1_desc(strtab, desc);
1793 return 0;
1794}
1795
1796/* IRQ and event handlers */
1797static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1798{
1799 int i;
1800 struct arm_smmu_device *smmu = dev;
1801 struct arm_smmu_queue *q = &smmu->evtq.q;
7c288a5b 1802 struct arm_smmu_ll_queue *llq = &q->llq;
48ec83bc
WD
1803 u64 evt[EVTQ_ENT_DWORDS];
1804
b4163fb3
JPB
1805 do {
1806 while (!queue_remove_raw(q, evt)) {
7417b99c 1807 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
48ec83bc 1808
b4163fb3
JPB
1809 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1810 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1811 dev_info(smmu->dev, "\t0x%016llx\n",
1812 (unsigned long long)evt[i]);
1813
1814 }
1815
1816 /*
1817 * Not much we can do on overflow, so scream and pretend we're
1818 * trying harder.
1819 */
2a8868f1 1820 if (queue_sync_prod_in(q) == -EOVERFLOW)
b4163fb3 1821 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
7c288a5b 1822 } while (!queue_empty(llq));
48ec83bc
WD
1823
1824 /* Sync our overflow flag, as we believe we're up to speed */
7c288a5b
WD
1825 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1826 Q_IDX(llq, llq->cons);
48ec83bc
WD
1827 return IRQ_HANDLED;
1828}
1829
b4163fb3
JPB
1830static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1831{
1832 u32 sid, ssid;
1833 u16 grpid;
1834 bool ssv, last;
1835
7417b99c
RM
1836 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1837 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1838 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1839 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1840 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
b4163fb3
JPB
1841
1842 dev_info(smmu->dev, "unexpected PRI request received:\n");
1843 dev_info(smmu->dev,
1844 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1845 sid, ssid, grpid, last ? "L" : "",
1846 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1847 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1848 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1849 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1cf9e54e 1850 evt[1] & PRIQ_1_ADDR_MASK);
b4163fb3
JPB
1851
1852 if (last) {
1853 struct arm_smmu_cmdq_ent cmd = {
1854 .opcode = CMDQ_OP_PRI_RESP,
1855 .substream_valid = ssv,
1856 .pri = {
1857 .sid = sid,
1858 .ssid = ssid,
1859 .grpid = grpid,
1860 .resp = PRI_RESP_DENY,
1861 },
1862 };
48ec83bc 1863
b4163fb3
JPB
1864 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1865 }
48ec83bc
WD
1866}
1867
1868static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1869{
1870 struct arm_smmu_device *smmu = dev;
1871 struct arm_smmu_queue *q = &smmu->priq.q;
7c288a5b 1872 struct arm_smmu_ll_queue *llq = &q->llq;
48ec83bc
WD
1873 u64 evt[PRIQ_ENT_DWORDS];
1874
b4163fb3
JPB
1875 do {
1876 while (!queue_remove_raw(q, evt))
1877 arm_smmu_handle_ppr(smmu, evt);
48ec83bc 1878
2a8868f1 1879 if (queue_sync_prod_in(q) == -EOVERFLOW)
b4163fb3 1880 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
7c288a5b 1881 } while (!queue_empty(llq));
48ec83bc
WD
1882
1883 /* Sync our overflow flag, as we believe we're up to speed */
7c288a5b
WD
1884 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1885 Q_IDX(llq, llq->cons);
1886 queue_sync_cons_out(q);
48ec83bc
WD
1887 return IRQ_HANDLED;
1888}
1889
48ec83bc
WD
1890static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1891
1892static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1893{
324ba108 1894 u32 gerror, gerrorn, active;
48ec83bc
WD
1895 struct arm_smmu_device *smmu = dev;
1896
1897 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1898 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1899
324ba108
PM
1900 active = gerror ^ gerrorn;
1901 if (!(active & GERROR_ERR_MASK))
48ec83bc
WD
1902 return IRQ_NONE; /* No errors pending */
1903
1904 dev_warn(smmu->dev,
1905 "unexpected global error reported (0x%08x), this could be serious\n",
324ba108 1906 active);
48ec83bc 1907
324ba108 1908 if (active & GERROR_SFM_ERR) {
48ec83bc
WD
1909 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1910 arm_smmu_device_disable(smmu);
1911 }
1912
324ba108 1913 if (active & GERROR_MSI_GERROR_ABT_ERR)
48ec83bc
WD
1914 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1915
b4163fb3 1916 if (active & GERROR_MSI_PRIQ_ABT_ERR)
48ec83bc 1917 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
48ec83bc 1918
b4163fb3 1919 if (active & GERROR_MSI_EVTQ_ABT_ERR)
48ec83bc 1920 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
48ec83bc 1921
dce032a1 1922 if (active & GERROR_MSI_CMDQ_ABT_ERR)
48ec83bc 1923 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
48ec83bc 1924
324ba108 1925 if (active & GERROR_PRIQ_ABT_ERR)
48ec83bc
WD
1926 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1927
324ba108 1928 if (active & GERROR_EVTQ_ABT_ERR)
48ec83bc
WD
1929 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1930
324ba108 1931 if (active & GERROR_CMDQ_ERR)
48ec83bc
WD
1932 arm_smmu_cmdq_skip_err(smmu);
1933
1934 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1935 return IRQ_HANDLED;
1936}
1937
f935448a
GS
1938static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1939{
1940 struct arm_smmu_device *smmu = dev;
1941
1942 arm_smmu_evtq_thread(irq, dev);
1943 if (smmu->features & ARM_SMMU_FEAT_PRI)
1944 arm_smmu_priq_thread(irq, dev);
1945
1946 return IRQ_HANDLED;
1947}
1948
1949static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1950{
1951 arm_smmu_gerror_handler(irq, dev);
f935448a
GS
1952 return IRQ_WAKE_THREAD;
1953}
1954
9ce27afc
JPB
1955static void
1956arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1957 struct arm_smmu_cmdq_ent *cmd)
1958{
1959 size_t log2_span;
1960 size_t span_mask;
1961 /* ATC invalidates are always on 4096-bytes pages */
1962 size_t inval_grain_shift = 12;
1963 unsigned long page_start, page_end;
1964
1965 *cmd = (struct arm_smmu_cmdq_ent) {
1966 .opcode = CMDQ_OP_ATC_INV,
1967 .substream_valid = !!ssid,
1968 .atc.ssid = ssid,
1969 };
1970
1971 if (!size) {
1972 cmd->atc.size = ATC_INV_SIZE_ALL;
1973 return;
1974 }
1975
1976 page_start = iova >> inval_grain_shift;
1977 page_end = (iova + size - 1) >> inval_grain_shift;
1978
1979 /*
1980 * In an ATS Invalidate Request, the address must be aligned on the
1981 * range size, which must be a power of two number of page sizes. We
1982 * thus have to choose between grossly over-invalidating the region, or
1983 * splitting the invalidation into multiple commands. For simplicity
1984 * we'll go with the first solution, but should refine it in the future
1985 * if multiple commands are shown to be more efficient.
1986 *
1987 * Find the smallest power of two that covers the range. The most
1988 * significant differing bit between the start and end addresses,
1989 * fls(start ^ end), indicates the required span. For example:
1990 *
1991 * We want to invalidate pages [8; 11]. This is already the ideal range:
1992 * x = 0b1000 ^ 0b1011 = 0b11
1993 * span = 1 << fls(x) = 4
1994 *
1995 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1996 * x = 0b0111 ^ 0b1010 = 0b1101
1997 * span = 1 << fls(x) = 16
1998 */
1999 log2_span = fls_long(page_start ^ page_end);
2000 span_mask = (1ULL << log2_span) - 1;
2001
2002 page_start &= ~span_mask;
2003
2004 cmd->atc.addr = page_start << inval_grain_shift;
2005 cmd->atc.size = log2_span;
2006}
2007
2008static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2009 struct arm_smmu_cmdq_ent *cmd)
2010{
2011 int i;
2012
2013 if (!master->ats_enabled)
2014 return 0;
2015
2016 for (i = 0; i < master->num_sids; i++) {
2017 cmd->atc.sid = master->sids[i];
2018 arm_smmu_cmdq_issue_cmd(master->smmu, cmd);
2019 }
2020
2021 return arm_smmu_cmdq_issue_sync(master->smmu);
2022}
2023
2024static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2025 int ssid, unsigned long iova, size_t size)
2026{
2027 int ret = 0;
2028 unsigned long flags;
2029 struct arm_smmu_cmdq_ent cmd;
2030 struct arm_smmu_master *master;
2031
2032 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2033 return 0;
2034
cdb8a3c3
WD
2035 /*
2036 * Ensure that we've completed prior invalidation of the main TLBs
2037 * before we read 'nr_ats_masters' in case of a concurrent call to
2038 * arm_smmu_enable_ats():
2039 *
2040 * // unmap() // arm_smmu_enable_ats()
2041 * TLBI+SYNC atomic_inc(&nr_ats_masters);
2042 * smp_mb(); [...]
2043 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2044 *
2045 * Ensures that we always see the incremented 'nr_ats_masters' count if
2046 * ATS was enabled at the PCI device before completion of the TLBI.
2047 */
2048 smp_mb();
2049 if (!atomic_read(&smmu_domain->nr_ats_masters))
2050 return 0;
2051
9ce27afc
JPB
2052 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2053
2054 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2055 list_for_each_entry(master, &smmu_domain->devices, domain_head)
2056 ret |= arm_smmu_atc_inv_master(master, &cmd);
2057 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2058
2059 return ret ? -ETIMEDOUT : 0;
2060}
2061
48ec83bc 2062/* IO_PGTABLE API */
48ec83bc
WD
2063static void arm_smmu_tlb_inv_context(void *cookie)
2064{
2065 struct arm_smmu_domain *smmu_domain = cookie;
2066 struct arm_smmu_device *smmu = smmu_domain->smmu;
2067 struct arm_smmu_cmdq_ent cmd;
2068
2069 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2070 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
2071 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
2072 cmd.tlbi.vmid = 0;
2073 } else {
2074 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
2075 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2076 }
2077
9662b99a
ZL
2078 /*
2079 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2080 * PTEs previously cleared by unmaps on the current CPU not yet visible
587e6c10
WD
2081 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2082 * insertion to guarantee those are observed before the TLBI. Do be
2083 * careful, 007.
9662b99a 2084 */
48ec83bc 2085 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
5e731073 2086 arm_smmu_cmdq_issue_sync(smmu);
353e3cf8 2087 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
48ec83bc
WD
2088}
2089
2af2e72b
WD
2090static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2091 size_t granule, bool leaf,
2092 struct arm_smmu_domain *smmu_domain)
48ec83bc 2093{
2af2e72b 2094 u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
48ec83bc 2095 struct arm_smmu_device *smmu = smmu_domain->smmu;
353e3cf8 2096 unsigned long start = iova, end = iova + size;
2af2e72b 2097 int i = 0;
48ec83bc
WD
2098 struct arm_smmu_cmdq_ent cmd = {
2099 .tlbi = {
2100 .leaf = leaf,
48ec83bc
WD
2101 },
2102 };
2103
7314ca86
WD
2104 if (!size)
2105 return;
2106
48ec83bc
WD
2107 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2108 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
2109 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
2110 } else {
2111 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
2112 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2113 }
2114
2af2e72b
WD
2115 while (iova < end) {
2116 if (i == CMDQ_BATCH_ENTRIES) {
2117 arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
2118 i = 0;
2119 }
2120
2121 cmd.tlbi.addr = iova;
2122 arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
2123 iova += granule;
2124 i++;
2125 }
2126
2127 arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
353e3cf8
WD
2128
2129 /*
2130 * Unfortunately, this can't be leaf-only since we may have
2131 * zapped an entire table.
2132 */
2133 arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
48ec83bc
WD
2134}
2135
3951c41a
WD
2136static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2137 unsigned long iova, size_t granule,
abfd6fe0
WD
2138 void *cookie)
2139{
2af2e72b
WD
2140 struct arm_smmu_domain *smmu_domain = cookie;
2141 struct iommu_domain *domain = &smmu_domain->domain;
2142
2143 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
abfd6fe0
WD
2144}
2145
05aed941
WD
2146static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2147 size_t granule, void *cookie)
2148{
2af2e72b 2149 arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
05aed941
WD
2150}
2151
2152static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2153 size_t granule, void *cookie)
2154{
2af2e72b 2155 arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
48ec83bc
WD
2156}
2157
298f7889 2158static const struct iommu_flush_ops arm_smmu_flush_ops = {
48ec83bc 2159 .tlb_flush_all = arm_smmu_tlb_inv_context,
05aed941
WD
2160 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2161 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
abfd6fe0 2162 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
48ec83bc
WD
2163};
2164
2165/* IOMMU API */
2166static bool arm_smmu_capable(enum iommu_cap cap)
2167{
2168 switch (cap) {
2169 case IOMMU_CAP_CACHE_COHERENCY:
2170 return true;
48ec83bc
WD
2171 case IOMMU_CAP_NOEXEC:
2172 return true;
2173 default:
2174 return false;
2175 }
2176}
2177
2178static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2179{
2180 struct arm_smmu_domain *smmu_domain;
2181
beb3c6a0
WD
2182 if (type != IOMMU_DOMAIN_UNMANAGED &&
2183 type != IOMMU_DOMAIN_DMA &&
2184 type != IOMMU_DOMAIN_IDENTITY)
48ec83bc
WD
2185 return NULL;
2186
2187 /*
2188 * Allocate the domain and initialise some of its data structures.
2189 * We can't really do anything meaningful until we've added a
2190 * master.
2191 */
2192 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2193 if (!smmu_domain)
2194 return NULL;
2195
9adb9594
RM
2196 if (type == IOMMU_DOMAIN_DMA &&
2197 iommu_get_dma_cookie(&smmu_domain->domain)) {
2198 kfree(smmu_domain);
2199 return NULL;
2200 }
2201
48ec83bc 2202 mutex_init(&smmu_domain->init_mutex);
2a7e62f5
JPB
2203 INIT_LIST_HEAD(&smmu_domain->devices);
2204 spin_lock_init(&smmu_domain->devices_lock);
2205
48ec83bc
WD
2206 return &smmu_domain->domain;
2207}
2208
2209static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2210{
2211 int idx, size = 1 << span;
2212
2213 do {
2214 idx = find_first_zero_bit(map, size);
2215 if (idx == size)
2216 return -ENOSPC;
2217 } while (test_and_set_bit(idx, map));
2218
2219 return idx;
2220}
2221
2222static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2223{
2224 clear_bit(idx, map);
2225}
2226
2227static void arm_smmu_domain_free(struct iommu_domain *domain)
2228{
2229 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2230 struct arm_smmu_device *smmu = smmu_domain->smmu;
2231
9adb9594 2232 iommu_put_dma_cookie(domain);
a6e08fb2 2233 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
48ec83bc
WD
2234
2235 /* Free the CD and ASID, if we allocated them */
2236 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2237 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2238
7bc4f3fa 2239 if (cfg->cdcfg.cdtab) {
a557aff0 2240 arm_smmu_free_cd_tables(smmu_domain);
48ec83bc
WD
2241 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
2242 }
2243 } else {
2244 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2245 if (cfg->vmid)
2246 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2247 }
2248
2249 kfree(smmu_domain);
2250}
2251
2252static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2505ec6f 2253 struct arm_smmu_master *master,
48ec83bc
WD
2254 struct io_pgtable_cfg *pgtbl_cfg)
2255{
2256 int ret;
c0733a2c 2257 int asid;
48ec83bc
WD
2258 struct arm_smmu_device *smmu = smmu_domain->smmu;
2259 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
fb485eb1 2260 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
48ec83bc
WD
2261
2262 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
287980e4 2263 if (asid < 0)
48ec83bc
WD
2264 return asid;
2265
2505ec6f
JPB
2266 cfg->s1cdmax = master->ssid_bits;
2267
a557aff0
JPB
2268 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2269 if (ret)
48ec83bc 2270 goto out_free_asid;
48ec83bc 2271
c0733a2c 2272 cfg->cd.asid = (u16)asid;
d1e5f26f 2273 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
fb485eb1
RM
2274 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2275 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2276 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2277 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2278 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2279 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2280 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
205577ab 2281 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
492ddc79
JPB
2282
2283 /*
2284 * Note that this will end up calling arm_smmu_sync_cd() before
2285 * the master has been added to the devices list for this domain.
2286 * This isn't an issue because the STE hasn't been installed yet.
2287 */
2288 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2289 if (ret)
2290 goto out_free_cd_tables;
2291
48ec83bc
WD
2292 return 0;
2293
492ddc79
JPB
2294out_free_cd_tables:
2295 arm_smmu_free_cd_tables(smmu_domain);
48ec83bc
WD
2296out_free_asid:
2297 arm_smmu_bitmap_free(smmu->asid_map, asid);
2298 return ret;
2299}
2300
2301static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2505ec6f 2302 struct arm_smmu_master *master,
48ec83bc
WD
2303 struct io_pgtable_cfg *pgtbl_cfg)
2304{
c0733a2c 2305 int vmid;
48ec83bc
WD
2306 struct arm_smmu_device *smmu = smmu_domain->smmu;
2307 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
ac4b80e5 2308 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
48ec83bc
WD
2309
2310 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
287980e4 2311 if (vmid < 0)
48ec83bc
WD
2312 return vmid;
2313
ac4b80e5 2314 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
c0733a2c 2315 cfg->vmid = (u16)vmid;
48ec83bc 2316 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
ac4b80e5
WD
2317 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2318 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2319 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2320 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2321 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2322 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2323 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
48ec83bc
WD
2324 return 0;
2325}
2326
2505ec6f
JPB
2327static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2328 struct arm_smmu_master *master)
48ec83bc
WD
2329{
2330 int ret;
2331 unsigned long ias, oas;
2332 enum io_pgtable_fmt fmt;
2333 struct io_pgtable_cfg pgtbl_cfg;
2334 struct io_pgtable_ops *pgtbl_ops;
2335 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2505ec6f 2336 struct arm_smmu_master *,
48ec83bc
WD
2337 struct io_pgtable_cfg *);
2338 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2339 struct arm_smmu_device *smmu = smmu_domain->smmu;
2340
beb3c6a0
WD
2341 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2342 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2343 return 0;
2344 }
2345
48ec83bc
WD
2346 /* Restrict the stage to what we can actually support */
2347 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2348 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2349 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2350 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2351
2352 switch (smmu_domain->stage) {
2353 case ARM_SMMU_DOMAIN_S1:
dcd189e6
RM
2354 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2355 ias = min_t(unsigned long, ias, VA_BITS);
48ec83bc
WD
2356 oas = smmu->ias;
2357 fmt = ARM_64_LPAE_S1;
2358 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2359 break;
2360 case ARM_SMMU_DOMAIN_NESTED:
2361 case ARM_SMMU_DOMAIN_S2:
2362 ias = smmu->ias;
2363 oas = smmu->oas;
2364 fmt = ARM_64_LPAE_S2;
2365 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2366 break;
2367 default:
2368 return -EINVAL;
2369 }
2370
2371 pgtbl_cfg = (struct io_pgtable_cfg) {
d5466357 2372 .pgsize_bitmap = smmu->pgsize_bitmap,
48ec83bc
WD
2373 .ias = ias,
2374 .oas = oas,
4f41845b 2375 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
298f7889 2376 .tlb = &arm_smmu_flush_ops,
bdc6d973 2377 .iommu_dev = smmu->dev,
48ec83bc
WD
2378 };
2379
9662b99a
ZL
2380 if (smmu_domain->non_strict)
2381 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2382
48ec83bc
WD
2383 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2384 if (!pgtbl_ops)
2385 return -ENOMEM;
2386
d5466357 2387 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
6619c913 2388 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
455eb7d3 2389 domain->geometry.force_aperture = true;
48ec83bc 2390
2505ec6f 2391 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
57d72e15 2392 if (ret < 0) {
48ec83bc 2393 free_io_pgtable_ops(pgtbl_ops);
57d72e15
JPB
2394 return ret;
2395 }
48ec83bc 2396
57d72e15
JPB
2397 smmu_domain->pgtbl_ops = pgtbl_ops;
2398 return 0;
48ec83bc
WD
2399}
2400
48ec83bc
WD
2401static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2402{
2403 __le64 *step;
2404 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2405
2406 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2407 struct arm_smmu_strtab_l1_desc *l1_desc;
2408 int idx;
2409
2410 /* Two-level walk */
2411 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2412 l1_desc = &cfg->l1_desc[idx];
2413 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2414 step = &l1_desc->l2ptr[idx];
2415 } else {
2416 /* Simple linear lookup */
2417 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2418 }
2419
2420 return step;
2421}
2422
bcecaee4 2423static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
48ec83bc 2424{
563b5cbe 2425 int i, j;
8f785154 2426 struct arm_smmu_device *smmu = master->smmu;
48ec83bc 2427
bcecaee4
JPB
2428 for (i = 0; i < master->num_sids; ++i) {
2429 u32 sid = master->sids[i];
48ec83bc
WD
2430 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2431
563b5cbe
RM
2432 /* Bridged PCI devices may end up with duplicated IDs */
2433 for (j = 0; j < i; j++)
bcecaee4 2434 if (master->sids[j] == sid)
563b5cbe
RM
2435 break;
2436 if (j < i)
2437 continue;
2438
8be39a1a 2439 arm_smmu_write_strtab_ent(master, sid, step);
48ec83bc 2440 }
48ec83bc
WD
2441}
2442
097a7df2 2443#ifdef CONFIG_PCI_ATS
bfff88ec 2444static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
9ce27afc 2445{
9ce27afc
JPB
2446 struct pci_dev *pdev;
2447 struct arm_smmu_device *smmu = master->smmu;
2448 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2449
2450 if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
2451 !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
bfff88ec 2452 return false;
9ce27afc
JPB
2453
2454 pdev = to_pci_dev(master->dev);
bfff88ec
WD
2455 return !pdev->untrusted && pdev->ats_cap;
2456}
097a7df2
Y
2457#else
2458static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2459{
2460 return false;
2461}
2462#endif
9ce27afc 2463
bfff88ec
WD
2464static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2465{
2466 size_t stu;
2467 struct pci_dev *pdev;
2468 struct arm_smmu_device *smmu = master->smmu;
cdb8a3c3 2469 struct arm_smmu_domain *smmu_domain = master->domain;
9ce27afc 2470
bfff88ec
WD
2471 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2472 if (!master->ats_enabled)
2473 return;
9ce27afc
JPB
2474
2475 /* Smallest Translation Unit: log2 of the smallest supported granule */
2476 stu = __ffs(smmu->pgsize_bitmap);
bfff88ec 2477 pdev = to_pci_dev(master->dev);
9ce27afc 2478
cdb8a3c3
WD
2479 atomic_inc(&smmu_domain->nr_ats_masters);
2480 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
bfff88ec
WD
2481 if (pci_enable_ats(pdev, stu))
2482 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
9ce27afc
JPB
2483}
2484
2485static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2486{
8dd8f005 2487 struct arm_smmu_cmdq_ent cmd;
cdb8a3c3 2488 struct arm_smmu_domain *smmu_domain = master->domain;
8dd8f005 2489
bfff88ec 2490 if (!master->ats_enabled)
9ce27afc
JPB
2491 return;
2492
bfff88ec
WD
2493 pci_disable_ats(to_pci_dev(master->dev));
2494 /*
2495 * Ensure ATS is disabled at the endpoint before we issue the
2496 * ATC invalidation via the SMMU.
2497 */
2498 wmb();
8dd8f005
JPB
2499 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2500 arm_smmu_atc_inv_master(master, &cmd);
cdb8a3c3 2501 atomic_dec(&smmu_domain->nr_ats_masters);
9ce27afc
JPB
2502}
2503
bcecaee4 2504static void arm_smmu_detach_dev(struct arm_smmu_master *master)
bc7f2ce0 2505{
2a7e62f5
JPB
2506 unsigned long flags;
2507 struct arm_smmu_domain *smmu_domain = master->domain;
2508
2509 if (!smmu_domain)
8be39a1a
JPB
2510 return;
2511
cdb8a3c3
WD
2512 arm_smmu_disable_ats(master);
2513
2a7e62f5
JPB
2514 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2515 list_del(&master->domain_head);
2516 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2517
8be39a1a 2518 master->domain = NULL;
bfff88ec 2519 master->ats_enabled = false;
bcecaee4 2520 arm_smmu_install_ste_for_dev(master);
bc7f2ce0
WD
2521}
2522
48ec83bc
WD
2523static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2524{
2525 int ret = 0;
2a7e62f5 2526 unsigned long flags;
9b468f7d 2527 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
48ec83bc
WD
2528 struct arm_smmu_device *smmu;
2529 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
b54f4260 2530 struct arm_smmu_master *master;
48ec83bc 2531
9b468f7d 2532 if (!fwspec)
48ec83bc
WD
2533 return -ENOENT;
2534
9b468f7d 2535 master = fwspec->iommu_priv;
8f785154 2536 smmu = master->smmu;
8f785154 2537
8be39a1a 2538 arm_smmu_detach_dev(master);
48ec83bc 2539
48ec83bc
WD
2540 mutex_lock(&smmu_domain->init_mutex);
2541
2542 if (!smmu_domain->smmu) {
2543 smmu_domain->smmu = smmu;
2505ec6f 2544 ret = arm_smmu_domain_finalise(domain, master);
48ec83bc
WD
2545 if (ret) {
2546 smmu_domain->smmu = NULL;
2547 goto out_unlock;
2548 }
2549 } else if (smmu_domain->smmu != smmu) {
2550 dev_err(dev,
2551 "cannot attach to SMMU %s (upstream of %s)\n",
2552 dev_name(smmu_domain->smmu->dev),
2553 dev_name(smmu->dev));
2554 ret = -ENXIO;
2555 goto out_unlock;
2505ec6f
JPB
2556 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2557 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2558 dev_err(dev,
2559 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2560 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2561 ret = -EINVAL;
2562 goto out_unlock;
48ec83bc
WD
2563 }
2564
8be39a1a 2565 master->domain = smmu_domain;
cbf8277e 2566
9ce27afc 2567 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
bfff88ec 2568 master->ats_enabled = arm_smmu_ats_supported(master);
9ce27afc 2569
bcecaee4 2570 arm_smmu_install_ste_for_dev(master);
cdb8a3c3
WD
2571
2572 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2573 list_add(&master->domain_head, &smmu_domain->devices);
2574 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2575
bfff88ec 2576 arm_smmu_enable_ats(master);
cdb8a3c3 2577
48ec83bc
WD
2578out_unlock:
2579 mutex_unlock(&smmu_domain->init_mutex);
2580 return ret;
2581}
2582
48ec83bc 2583static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
781ca2de 2584 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
48ec83bc 2585{
58188afe 2586 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
48ec83bc
WD
2587
2588 if (!ops)
2589 return -ENODEV;
2590
58188afe 2591 return ops->map(ops, iova, paddr, size, prot);
48ec83bc
WD
2592}
2593
56f8af5e
WD
2594static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2595 size_t size, struct iommu_iotlb_gather *gather)
48ec83bc 2596{
9ce27afc
JPB
2597 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2598 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
48ec83bc
WD
2599
2600 if (!ops)
2601 return 0;
2602
353e3cf8 2603 return ops->unmap(ops, iova, size, gather);
48ec83bc
WD
2604}
2605
07fdef34
ZL
2606static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2607{
2608 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2609
2610 if (smmu_domain->smmu)
2611 arm_smmu_tlb_inv_context(smmu_domain);
2612}
2613
56f8af5e
WD
2614static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2615 struct iommu_iotlb_gather *gather)
32b12449 2616{
2af2e72b 2617 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
32b12449 2618
2af2e72b
WD
2619 arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2620 gather->pgsize, true, smmu_domain);
32b12449
RM
2621}
2622
48ec83bc
WD
2623static phys_addr_t
2624arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2625{
58188afe 2626 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
48ec83bc 2627
bdf95923
SG
2628 if (domain->type == IOMMU_DOMAIN_IDENTITY)
2629 return iova;
2630
48ec83bc
WD
2631 if (!ops)
2632 return 0;
2633
58188afe 2634 return ops->iova_to_phys(ops, iova);
48ec83bc
WD
2635}
2636
8f785154 2637static struct platform_driver arm_smmu_driver;
48ec83bc 2638
778de074
LP
2639static
2640struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
48ec83bc 2641{
67843bba
SP
2642 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2643 fwnode);
8f785154
RM
2644 put_device(dev);
2645 return dev ? dev_get_drvdata(dev) : NULL;
48ec83bc
WD
2646}
2647
2648static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2649{
2650 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2651
2652 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2653 limit *= 1UL << STRTAB_SPLIT;
2654
2655 return sid < limit;
2656}
2657
8f785154
RM
2658static struct iommu_ops arm_smmu_ops;
2659
48ec83bc
WD
2660static int arm_smmu_add_device(struct device *dev)
2661{
2662 int i, ret;
48ec83bc 2663 struct arm_smmu_device *smmu;
b54f4260 2664 struct arm_smmu_master *master;
9b468f7d 2665 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
8f785154 2666 struct iommu_group *group;
48ec83bc 2667
8f785154 2668 if (!fwspec || fwspec->ops != &arm_smmu_ops)
48ec83bc 2669 return -ENODEV;
8f785154
RM
2670 /*
2671 * We _can_ actually withstand dodgy bus code re-calling add_device()
2672 * without an intervening remove_device()/of_xlate() sequence, but
2673 * we're not going to do so quietly...
2674 */
2675 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
2676 master = fwspec->iommu_priv;
2677 smmu = master->smmu;
48ec83bc 2678 } else {
778de074 2679 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
8f785154
RM
2680 if (!smmu)
2681 return -ENODEV;
2682 master = kzalloc(sizeof(*master), GFP_KERNEL);
2683 if (!master)
2684 return -ENOMEM;
2685
9ce27afc 2686 master->dev = dev;
8f785154 2687 master->smmu = smmu;
bcecaee4
JPB
2688 master->sids = fwspec->ids;
2689 master->num_sids = fwspec->num_ids;
8f785154 2690 fwspec->iommu_priv = master;
48ec83bc
WD
2691 }
2692
8f785154 2693 /* Check the SIDs are in range of the SMMU and our stream table */
bcecaee4
JPB
2694 for (i = 0; i < master->num_sids; i++) {
2695 u32 sid = master->sids[i];
48ec83bc 2696
8f785154
RM
2697 if (!arm_smmu_sid_in_range(smmu, sid))
2698 return -ERANGE;
48ec83bc 2699
8f785154
RM
2700 /* Ensure l2 strtab is initialised */
2701 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2702 ret = arm_smmu_init_l2_strtab(smmu, sid);
2703 if (ret)
2704 return ret;
2705 }
48ec83bc
WD
2706 }
2707
89535821
JPB
2708 master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2709
2710 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2711 master->ssid_bits = min_t(u8, master->ssid_bits,
2712 CTXDESC_LINEAR_CDMAX);
2713
8f785154 2714 group = iommu_group_get_for_dev(dev);
9648cbc9 2715 if (!IS_ERR(group)) {
8f785154 2716 iommu_group_put(group);
9648cbc9
JR
2717 iommu_device_link(&smmu->iommu, dev);
2718 }
9a4a9d8c 2719
8f785154 2720 return PTR_ERR_OR_ZERO(group);
48ec83bc
WD
2721}
2722
2723static void arm_smmu_remove_device(struct device *dev)
2724{
9b468f7d 2725 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
b54f4260 2726 struct arm_smmu_master *master;
9648cbc9 2727 struct arm_smmu_device *smmu;
8f785154
RM
2728
2729 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2730 return;
2731
2732 master = fwspec->iommu_priv;
9648cbc9 2733 smmu = master->smmu;
8be39a1a 2734 arm_smmu_detach_dev(master);
48ec83bc 2735 iommu_group_remove_device(dev);
9648cbc9 2736 iommu_device_unlink(&smmu->iommu, dev);
8f785154
RM
2737 kfree(master);
2738 iommu_fwspec_free(dev);
48ec83bc
WD
2739}
2740
08d4ca2a
RM
2741static struct iommu_group *arm_smmu_device_group(struct device *dev)
2742{
2743 struct iommu_group *group;
2744
2745 /*
2746 * We don't support devices sharing stream IDs other than PCI RID
2747 * aliases, since the necessary ID-to-device lookup becomes rather
2748 * impractical given a potential sparse 32-bit stream ID space.
2749 */
2750 if (dev_is_pci(dev))
2751 group = pci_device_group(dev);
2752 else
2753 group = generic_device_group(dev);
2754
2755 return group;
2756}
2757
48ec83bc
WD
2758static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2759 enum iommu_attr attr, void *data)
2760{
2761 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2762
9662b99a
ZL
2763 switch (domain->type) {
2764 case IOMMU_DOMAIN_UNMANAGED:
2765 switch (attr) {
2766 case DOMAIN_ATTR_NESTING:
2767 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2768 return 0;
2769 default:
2770 return -ENODEV;
2771 }
2772 break;
2773 case IOMMU_DOMAIN_DMA:
2774 switch (attr) {
2775 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2776 *(int *)data = smmu_domain->non_strict;
2777 return 0;
2778 default:
2779 return -ENODEV;
2780 }
2781 break;
48ec83bc 2782 default:
9662b99a 2783 return -EINVAL;
48ec83bc
WD
2784 }
2785}
2786
2787static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2788 enum iommu_attr attr, void *data)
2789{
2790 int ret = 0;
2791 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2792
2793 mutex_lock(&smmu_domain->init_mutex);
2794
9662b99a
ZL
2795 switch (domain->type) {
2796 case IOMMU_DOMAIN_UNMANAGED:
2797 switch (attr) {
2798 case DOMAIN_ATTR_NESTING:
2799 if (smmu_domain->smmu) {
2800 ret = -EPERM;
2801 goto out_unlock;
2802 }
2803
2804 if (*(int *)data)
2805 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2806 else
2807 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2808 break;
2809 default:
2810 ret = -ENODEV;
2811 }
2812 break;
2813 case IOMMU_DOMAIN_DMA:
2814 switch(attr) {
2815 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2816 smmu_domain->non_strict = *(int *)data;
2817 break;
2818 default:
2819 ret = -ENODEV;
48ec83bc 2820 }
48ec83bc
WD
2821 break;
2822 default:
9662b99a 2823 ret = -EINVAL;
48ec83bc
WD
2824 }
2825
2826out_unlock:
2827 mutex_unlock(&smmu_domain->init_mutex);
2828 return ret;
2829}
2830
8f785154
RM
2831static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2832{
8f785154
RM
2833 return iommu_fwspec_add_ids(dev, args->args, 1);
2834}
2835
50019f09
EA
2836static void arm_smmu_get_resv_regions(struct device *dev,
2837 struct list_head *head)
2838{
2839 struct iommu_resv_region *region;
2840 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2841
2842 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
9d3a4de4 2843 prot, IOMMU_RESV_SW_MSI);
50019f09
EA
2844 if (!region)
2845 return;
2846
2847 list_add_tail(&region->list, head);
273df963
RM
2848
2849 iommu_dma_get_resv_regions(dev, head);
50019f09
EA
2850}
2851
2852static void arm_smmu_put_resv_regions(struct device *dev,
2853 struct list_head *head)
2854{
2855 struct iommu_resv_region *entry, *next;
2856
2857 list_for_each_entry_safe(entry, next, head, list)
2858 kfree(entry);
2859}
2860
48ec83bc
WD
2861static struct iommu_ops arm_smmu_ops = {
2862 .capable = arm_smmu_capable,
2863 .domain_alloc = arm_smmu_domain_alloc,
2864 .domain_free = arm_smmu_domain_free,
2865 .attach_dev = arm_smmu_attach_dev,
48ec83bc
WD
2866 .map = arm_smmu_map,
2867 .unmap = arm_smmu_unmap,
07fdef34 2868 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
32b12449 2869 .iotlb_sync = arm_smmu_iotlb_sync,
48ec83bc
WD
2870 .iova_to_phys = arm_smmu_iova_to_phys,
2871 .add_device = arm_smmu_add_device,
2872 .remove_device = arm_smmu_remove_device,
08d4ca2a 2873 .device_group = arm_smmu_device_group,
48ec83bc
WD
2874 .domain_get_attr = arm_smmu_domain_get_attr,
2875 .domain_set_attr = arm_smmu_domain_set_attr,
8f785154 2876 .of_xlate = arm_smmu_of_xlate,
50019f09
EA
2877 .get_resv_regions = arm_smmu_get_resv_regions,
2878 .put_resv_regions = arm_smmu_put_resv_regions,
48ec83bc
WD
2879 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2880};
2881
2882/* Probing and initialisation functions */
2883static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2884 struct arm_smmu_queue *q,
2885 unsigned long prod_off,
2886 unsigned long cons_off,
d25f6ead 2887 size_t dwords, const char *name)
48ec83bc 2888{
d25f6ead
WD
2889 size_t qsz;
2890
2891 do {
52be8637 2892 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
d25f6ead
WD
2893 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2894 GFP_KERNEL);
2895 if (q->base || qsz < PAGE_SIZE)
2896 break;
2897
52be8637 2898 q->llq.max_n_shift--;
d25f6ead 2899 } while (1);
48ec83bc 2900
48ec83bc 2901 if (!q->base) {
d25f6ead
WD
2902 dev_err(smmu->dev,
2903 "failed to allocate queue (0x%zx bytes) for %s\n",
2904 qsz, name);
48ec83bc
WD
2905 return -ENOMEM;
2906 }
2907
d25f6ead
WD
2908 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2909 dev_info(smmu->dev, "allocated %u entries for %s\n",
52be8637 2910 1 << q->llq.max_n_shift, name);
d25f6ead
WD
2911 }
2912
e5b829de
LC
2913 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2914 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
48ec83bc
WD
2915 q->ent_dwords = dwords;
2916
2917 q->q_base = Q_BASE_RWA;
1cf9e54e 2918 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
52be8637 2919 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
48ec83bc 2920
52be8637 2921 q->llq.prod = q->llq.cons = 0;
48ec83bc
WD
2922 return 0;
2923}
2924
587e6c10
WD
2925static void arm_smmu_cmdq_free_bitmap(void *data)
2926{
2927 unsigned long *bitmap = data;
2928 bitmap_free(bitmap);
2929}
2930
2931static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2932{
2933 int ret = 0;
2934 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2935 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2936 atomic_long_t *bitmap;
2937
2938 atomic_set(&cmdq->owner_prod, 0);
2939 atomic_set(&cmdq->lock, 0);
2940
2941 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2942 if (!bitmap) {
2943 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2944 ret = -ENOMEM;
2945 } else {
2946 cmdq->valid_map = bitmap;
2947 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2948 }
2949
2950 return ret;
2951}
2952
48ec83bc
WD
2953static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2954{
2955 int ret;
2956
2957 /* cmdq */
48ec83bc 2958 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
d25f6ead
WD
2959 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2960 "cmdq");
48ec83bc 2961 if (ret)
04fa26c7 2962 return ret;
48ec83bc 2963
587e6c10
WD
2964 ret = arm_smmu_cmdq_init(smmu);
2965 if (ret)
2966 return ret;
2967
48ec83bc
WD
2968 /* evtq */
2969 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
d25f6ead
WD
2970 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2971 "evtq");
48ec83bc 2972 if (ret)
04fa26c7 2973 return ret;
48ec83bc
WD
2974
2975 /* priq */
2976 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2977 return 0;
2978
04fa26c7 2979 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
d25f6ead
WD
2980 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2981 "priq");
48ec83bc
WD
2982}
2983
2984static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2985{
2986 unsigned int i;
2987 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2988 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2989 void *strtab = smmu->strtab_cfg.strtab;
2990
2991 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2992 if (!cfg->l1_desc) {
2993 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2994 return -ENOMEM;
2995 }
2996
2997 for (i = 0; i < cfg->num_l1_ents; ++i) {
2998 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2999 strtab += STRTAB_L1_DESC_DWORDS << 3;
3000 }
3001
3002 return 0;
3003}
3004
3005static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3006{
3007 void *strtab;
3008 u64 reg;
d2e88e7c 3009 u32 size, l1size;
48ec83bc
WD
3010 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3011
692c4e42
NW
3012 /* Calculate the L1 size, capped to the SIDSIZE. */
3013 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3014 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
d2e88e7c
WD
3015 cfg->num_l1_ents = 1 << size;
3016
3017 size += STRTAB_SPLIT;
3018 if (size < smmu->sid_bits)
48ec83bc
WD
3019 dev_warn(smmu->dev,
3020 "2-level strtab only covers %u/%u bits of SID\n",
d2e88e7c 3021 size, smmu->sid_bits);
48ec83bc 3022
d2e88e7c 3023 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
04fa26c7 3024 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
9bb9069c 3025 GFP_KERNEL);
48ec83bc
WD
3026 if (!strtab) {
3027 dev_err(smmu->dev,
3028 "failed to allocate l1 stream table (%u bytes)\n",
3029 size);
3030 return -ENOMEM;
3031 }
3032 cfg->strtab = strtab;
3033
3034 /* Configure strtab_base_cfg for 2 levels */
cbcee19a
RM
3035 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3036 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3037 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
48ec83bc
WD
3038 cfg->strtab_base_cfg = reg;
3039
04fa26c7 3040 return arm_smmu_init_l1_strtab(smmu);
48ec83bc
WD
3041}
3042
3043static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3044{
3045 void *strtab;
3046 u64 reg;
3047 u32 size;
3048 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3049
3050 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
04fa26c7 3051 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
9bb9069c 3052 GFP_KERNEL);
48ec83bc
WD
3053 if (!strtab) {
3054 dev_err(smmu->dev,
3055 "failed to allocate linear stream table (%u bytes)\n",
3056 size);
3057 return -ENOMEM;
3058 }
3059 cfg->strtab = strtab;
3060 cfg->num_l1_ents = 1 << smmu->sid_bits;
3061
3062 /* Configure strtab_base_cfg for a linear table covering all SIDs */
cbcee19a
RM
3063 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3064 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
48ec83bc
WD
3065 cfg->strtab_base_cfg = reg;
3066
3067 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3068 return 0;
3069}
3070
3071static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3072{
3073 u64 reg;
3074 int ret;
3075
3076 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3077 ret = arm_smmu_init_strtab_2lvl(smmu);
3078 else
3079 ret = arm_smmu_init_strtab_linear(smmu);
3080
3081 if (ret)
3082 return ret;
3083
3084 /* Set the strtab base address */
1cf9e54e 3085 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
48ec83bc
WD
3086 reg |= STRTAB_BASE_RA;
3087 smmu->strtab_cfg.strtab_base = reg;
3088
3089 /* Allocate the first VMID for stage-2 bypass STEs */
3090 set_bit(0, smmu->vmid_map);
3091 return 0;
3092}
3093
48ec83bc
WD
3094static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3095{
3096 int ret;
3097
3098 ret = arm_smmu_init_queues(smmu);
3099 if (ret)
3100 return ret;
3101
04fa26c7 3102 return arm_smmu_init_strtab(smmu);
48ec83bc
WD
3103}
3104
3105static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3106 unsigned int reg_off, unsigned int ack_off)
3107{
3108 u32 reg;
3109
3110 writel_relaxed(val, smmu->base + reg_off);
3111 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3112 1, ARM_SMMU_POLL_TIMEOUT_US);
3113}
3114
dc87a98d
RM
3115/* GBPA is "special" */
3116static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3117{
3118 int ret;
3119 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3120
3121 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3122 1, ARM_SMMU_POLL_TIMEOUT_US);
3123 if (ret)
3124 return ret;
3125
3126 reg &= ~clr;
3127 reg |= set;
3128 writel_relaxed(reg | GBPA_UPDATE, gbpa);
b63b3439
WD
3129 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3130 1, ARM_SMMU_POLL_TIMEOUT_US);
3131
3132 if (ret)
3133 dev_err(smmu->dev, "GBPA not responding to update\n");
3134 return ret;
dc87a98d
RM
3135}
3136
166bdbd2
MZ
3137static void arm_smmu_free_msis(void *data)
3138{
3139 struct device *dev = data;
3140 platform_msi_domain_free_irqs(dev);
3141}
3142
3143static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3144{
3145 phys_addr_t doorbell;
3146 struct device *dev = msi_desc_to_dev(desc);
3147 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3148 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3149
3150 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
1cf9e54e 3151 doorbell &= MSI_CFG0_ADDR_MASK;
166bdbd2
MZ
3152
3153 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3154 writel_relaxed(msg->data, smmu->base + cfg[1]);
cbcee19a 3155 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
166bdbd2
MZ
3156}
3157
3158static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3159{
3160 struct msi_desc *desc;
3161 int ret, nvec = ARM_SMMU_MAX_MSIS;
3162 struct device *dev = smmu->dev;
3163
3164 /* Clear the MSI address regs */
3165 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3166 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3167
3168 if (smmu->features & ARM_SMMU_FEAT_PRI)
3169 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3170 else
3171 nvec--;
3172
3173 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3174 return;
3175
940ded9c
NW
3176 if (!dev->msi_domain) {
3177 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3178 return;
3179 }
3180
166bdbd2
MZ
3181 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3182 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3183 if (ret) {
940ded9c 3184 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
166bdbd2
MZ
3185 return;
3186 }
3187
3188 for_each_msi_entry(desc, dev) {
3189 switch (desc->platform.msi_index) {
3190 case EVTQ_MSI_INDEX:
3191 smmu->evtq.q.irq = desc->irq;
3192 break;
3193 case GERROR_MSI_INDEX:
3194 smmu->gerr_irq = desc->irq;
3195 break;
3196 case PRIQ_MSI_INDEX:
3197 smmu->priq.q.irq = desc->irq;
3198 break;
3199 default: /* Unknown */
3200 continue;
3201 }
3202 }
3203
3204 /* Add callback to free MSIs on teardown */
3205 devm_add_action(dev, arm_smmu_free_msis, dev);
3206}
3207
f935448a 3208static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
48ec83bc 3209{
f935448a 3210 int irq, ret;
48ec83bc 3211
166bdbd2 3212 arm_smmu_setup_msis(smmu);
48ec83bc 3213
166bdbd2 3214 /* Request interrupt lines */
48ec83bc
WD
3215 irq = smmu->evtq.q.irq;
3216 if (irq) {
b4163fb3 3217 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
48ec83bc 3218 arm_smmu_evtq_thread,
b4163fb3
JPB
3219 IRQF_ONESHOT,
3220 "arm-smmu-v3-evtq", smmu);
287980e4 3221 if (ret < 0)
48ec83bc 3222 dev_warn(smmu->dev, "failed to enable evtq irq\n");
4c8996d7
RM
3223 } else {
3224 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
48ec83bc
WD
3225 }
3226
48ec83bc
WD
3227 irq = smmu->gerr_irq;
3228 if (irq) {
3229 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3230 0, "arm-smmu-v3-gerror", smmu);
287980e4 3231 if (ret < 0)
48ec83bc 3232 dev_warn(smmu->dev, "failed to enable gerror irq\n");
4c8996d7
RM
3233 } else {
3234 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
48ec83bc
WD
3235 }
3236
3237 if (smmu->features & ARM_SMMU_FEAT_PRI) {
48ec83bc
WD
3238 irq = smmu->priq.q.irq;
3239 if (irq) {
b4163fb3 3240 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
48ec83bc 3241 arm_smmu_priq_thread,
b4163fb3
JPB
3242 IRQF_ONESHOT,
3243 "arm-smmu-v3-priq",
48ec83bc 3244 smmu);
287980e4 3245 if (ret < 0)
48ec83bc
WD
3246 dev_warn(smmu->dev,
3247 "failed to enable priq irq\n");
4c8996d7
RM
3248 } else {
3249 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
48ec83bc
WD
3250 }
3251 }
f935448a
GS
3252}
3253
3254static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3255{
3256 int ret, irq;
3257 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3258
3259 /* Disable IRQs first */
3260 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3261 ARM_SMMU_IRQ_CTRLACK);
3262 if (ret) {
3263 dev_err(smmu->dev, "failed to disable irqs\n");
3264 return ret;
3265 }
3266
3267 irq = smmu->combined_irq;
3268 if (irq) {
3269 /*
657135f3
JG
3270 * Cavium ThunderX2 implementation doesn't support unique irq
3271 * lines. Use a single irq line for all the SMMUv3 interrupts.
f935448a
GS
3272 */
3273 ret = devm_request_threaded_irq(smmu->dev, irq,
3274 arm_smmu_combined_irq_handler,
3275 arm_smmu_combined_irq_thread,
3276 IRQF_ONESHOT,
3277 "arm-smmu-v3-combined-irq", smmu);
3278 if (ret < 0)
3279 dev_warn(smmu->dev, "failed to enable combined irq\n");
3280 } else
3281 arm_smmu_setup_unique_irqs(smmu);
3282
3283 if (smmu->features & ARM_SMMU_FEAT_PRI)
3284 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
48ec83bc
WD
3285
3286 /* Enable interrupt generation on the SMMU */
ccd6385d 3287 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
48ec83bc
WD
3288 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3289 if (ret)
3290 dev_warn(smmu->dev, "failed to enable irqs\n");
3291
3292 return 0;
3293}
3294
3295static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3296{
3297 int ret;
3298
3299 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3300 if (ret)
3301 dev_err(smmu->dev, "failed to clear cr0\n");
3302
3303 return ret;
3304}
3305
dc87a98d 3306static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
48ec83bc
WD
3307{
3308 int ret;
3309 u32 reg, enables;
3310 struct arm_smmu_cmdq_ent cmd;
3311
3312 /* Clear CR0 and sync (disables SMMU and queue processing) */
3313 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
b63b3439 3314 if (reg & CR0_SMMUEN) {
48ec83bc 3315 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3f54c447
WD
3316 WARN_ON(is_kdump_kernel() && !disable_bypass);
3317 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
b63b3439 3318 }
48ec83bc
WD
3319
3320 ret = arm_smmu_device_disable(smmu);
3321 if (ret)
3322 return ret;
3323
3324 /* CR1 (table and queue memory attributes) */
cbcee19a
RM
3325 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3326 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3327 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3328 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3329 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3330 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
48ec83bc
WD
3331 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3332
3333 /* CR2 (random crap) */
3334 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3335 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3336
3337 /* Stream table */
3338 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3339 smmu->base + ARM_SMMU_STRTAB_BASE);
3340 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3341 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3342
3343 /* Command queue */
3344 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
52be8637
WD
3345 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3346 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
48ec83bc
WD
3347
3348 enables = CR0_CMDQEN;
3349 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3350 ARM_SMMU_CR0ACK);
3351 if (ret) {
3352 dev_err(smmu->dev, "failed to enable command queue\n");
3353 return ret;
3354 }
3355
3356 /* Invalidate any cached configuration */
3357 cmd.opcode = CMDQ_OP_CFGI_ALL;
3358 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2f657add 3359 arm_smmu_cmdq_issue_sync(smmu);
48ec83bc
WD
3360
3361 /* Invalidate any stale TLB entries */
3362 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3363 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3364 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3365 }
3366
3367 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3368 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2f657add 3369 arm_smmu_cmdq_issue_sync(smmu);
48ec83bc
WD
3370
3371 /* Event queue */
3372 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
52be8637 3373 writel_relaxed(smmu->evtq.q.llq.prod,
e5b829de 3374 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
52be8637 3375 writel_relaxed(smmu->evtq.q.llq.cons,
e5b829de 3376 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
48ec83bc
WD
3377
3378 enables |= CR0_EVTQEN;
3379 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3380 ARM_SMMU_CR0ACK);
3381 if (ret) {
3382 dev_err(smmu->dev, "failed to enable event queue\n");
3383 return ret;
3384 }
3385
3386 /* PRI queue */
3387 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3388 writeq_relaxed(smmu->priq.q.q_base,
3389 smmu->base + ARM_SMMU_PRIQ_BASE);
52be8637 3390 writel_relaxed(smmu->priq.q.llq.prod,
e5b829de 3391 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
52be8637 3392 writel_relaxed(smmu->priq.q.llq.cons,
e5b829de 3393 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
48ec83bc
WD
3394
3395 enables |= CR0_PRIQEN;
3396 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3397 ARM_SMMU_CR0ACK);
3398 if (ret) {
3399 dev_err(smmu->dev, "failed to enable PRI queue\n");
3400 return ret;
3401 }
3402 }
3403
9ce27afc
JPB
3404 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3405 enables |= CR0_ATSCHK;
3406 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3407 ARM_SMMU_CR0ACK);
3408 if (ret) {
3409 dev_err(smmu->dev, "failed to enable ATS check\n");
3410 return ret;
3411 }
3412 }
3413
48ec83bc
WD
3414 ret = arm_smmu_setup_irqs(smmu);
3415 if (ret) {
3416 dev_err(smmu->dev, "failed to setup irqs\n");
3417 return ret;
3418 }
3419
3f54c447
WD
3420 if (is_kdump_kernel())
3421 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
dc87a98d
RM
3422
3423 /* Enable the SMMU interface, or ensure bypass */
3424 if (!bypass || disable_bypass) {
3425 enables |= CR0_SMMUEN;
3426 } else {
3427 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
b63b3439 3428 if (ret)
dc87a98d 3429 return ret;
dc87a98d 3430 }
48ec83bc
WD
3431 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3432 ARM_SMMU_CR0ACK);
3433 if (ret) {
3434 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3435 return ret;
3436 }
3437
3438 return 0;
3439}
3440
2985b521 3441static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
48ec83bc
WD
3442{
3443 u32 reg;
2985b521 3444 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
48ec83bc
WD
3445
3446 /* IDR0 */
3447 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3448
3449 /* 2-level structures */
cbcee19a 3450 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
48ec83bc
WD
3451 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3452
3453 if (reg & IDR0_CD2L)
3454 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3455
3456 /*
3457 * Translation table endianness.
3458 * We currently require the same endianness as the CPU, but this
3459 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3460 */
cbcee19a 3461 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
48ec83bc
WD
3462 case IDR0_TTENDIAN_MIXED:
3463 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3464 break;
3465#ifdef __BIG_ENDIAN
3466 case IDR0_TTENDIAN_BE:
3467 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3468 break;
3469#else
3470 case IDR0_TTENDIAN_LE:
3471 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3472 break;
3473#endif
3474 default:
3475 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3476 return -ENXIO;
3477 }
3478
3479 /* Boolean feature flags */
3480 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3481 smmu->features |= ARM_SMMU_FEAT_PRI;
3482
3483 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3484 smmu->features |= ARM_SMMU_FEAT_ATS;
3485
3486 if (reg & IDR0_SEV)
3487 smmu->features |= ARM_SMMU_FEAT_SEV;
3488
3489 if (reg & IDR0_MSI)
3490 smmu->features |= ARM_SMMU_FEAT_MSI;
3491
3492 if (reg & IDR0_HYP)
3493 smmu->features |= ARM_SMMU_FEAT_HYP;
3494
3495 /*
2985b521 3496 * The coherency feature as set by FW is used in preference to the ID
48ec83bc
WD
3497 * register, but warn on mismatch.
3498 */
48ec83bc 3499 if (!!(reg & IDR0_COHACC) != coherent)
2a22baa2 3500 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
48ec83bc
WD
3501 coherent ? "true" : "false");
3502
cbcee19a 3503 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
6380be05 3504 case IDR0_STALL_MODEL_FORCE:
9cff86fd
YX
3505 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3506 /* Fallthrough */
3507 case IDR0_STALL_MODEL_STALL:
48ec83bc 3508 smmu->features |= ARM_SMMU_FEAT_STALLS;
6380be05 3509 }
48ec83bc
WD
3510
3511 if (reg & IDR0_S1P)
3512 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3513
3514 if (reg & IDR0_S2P)
3515 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3516
3517 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3518 dev_err(smmu->dev, "no translation support!\n");
3519 return -ENXIO;
3520 }
3521
3522 /* We only support the AArch64 table format at present */
cbcee19a 3523 switch (FIELD_GET(IDR0_TTF, reg)) {
f0c453db
WD
3524 case IDR0_TTF_AARCH32_64:
3525 smmu->ias = 40;
3526 /* Fallthrough */
3527 case IDR0_TTF_AARCH64:
3528 break;
3529 default:
48ec83bc
WD
3530 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3531 return -ENXIO;
3532 }
3533
3534 /* ASID/VMID sizes */
3535 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3536 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3537
3538 /* IDR1 */
3539 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3540 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3541 dev_err(smmu->dev, "embedded implementation not supported\n");
3542 return -ENXIO;
3543 }
3544
d25f6ead 3545 /* Queue sizes, capped to ensure natural alignment */
52be8637
WD
3546 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3547 FIELD_GET(IDR1_CMDQS, reg));
2af2e72b 3548 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
587e6c10 3549 /*
2af2e72b
WD
3550 * We don't support splitting up batches, so one batch of
3551 * commands plus an extra sync needs to fit inside the command
3552 * queue. There's also no way we can handle the weird alignment
3553 * restrictions on the base pointer for a unit-length queue.
587e6c10 3554 */
2af2e72b
WD
3555 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3556 CMDQ_BATCH_ENTRIES);
48ec83bc
WD
3557 return -ENXIO;
3558 }
3559
52be8637
WD
3560 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3561 FIELD_GET(IDR1_EVTQS, reg));
3562 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3563 FIELD_GET(IDR1_PRIQS, reg));
48ec83bc
WD
3564
3565 /* SID/SSID sizes */
cbcee19a
RM
3566 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3567 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
48ec83bc 3568
692c4e42
NW
3569 /*
3570 * If the SMMU supports fewer bits than would fill a single L2 stream
3571 * table, use a linear table instead.
3572 */
3573 if (smmu->sid_bits <= STRTAB_SPLIT)
3574 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3575
48ec83bc
WD
3576 /* IDR5 */
3577 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3578
3579 /* Maximum number of outstanding stalls */
cbcee19a 3580 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
48ec83bc
WD
3581
3582 /* Page sizes */
3583 if (reg & IDR5_GRAN64K)
d5466357 3584 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
48ec83bc 3585 if (reg & IDR5_GRAN16K)
d5466357 3586 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
48ec83bc 3587 if (reg & IDR5_GRAN4K)
d5466357 3588 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
48ec83bc 3589
dcd189e6
RM
3590 /* Input address size */
3591 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3592 smmu->features |= ARM_SMMU_FEAT_VAX;
3593
48ec83bc 3594 /* Output address size */
cbcee19a 3595 switch (FIELD_GET(IDR5_OAS, reg)) {
48ec83bc
WD
3596 case IDR5_OAS_32_BIT:
3597 smmu->oas = 32;
3598 break;
3599 case IDR5_OAS_36_BIT:
3600 smmu->oas = 36;
3601 break;
3602 case IDR5_OAS_40_BIT:
3603 smmu->oas = 40;
3604 break;
3605 case IDR5_OAS_42_BIT:
3606 smmu->oas = 42;
3607 break;
3608 case IDR5_OAS_44_BIT:
3609 smmu->oas = 44;
3610 break;
6619c913
RM
3611 case IDR5_OAS_52_BIT:
3612 smmu->oas = 52;
3613 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3614 break;
85430968
WD
3615 default:
3616 dev_info(smmu->dev,
3617 "unknown output address size. Truncating to 48-bit\n");
3618 /* Fallthrough */
48ec83bc
WD
3619 case IDR5_OAS_48_BIT:
3620 smmu->oas = 48;
48ec83bc
WD
3621 }
3622
6619c913
RM
3623 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3624 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3625 else
3626 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3627
48ec83bc
WD
3628 /* Set the DMA mask for our table walker */
3629 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3630 dev_warn(smmu->dev,
3631 "failed to set DMA mask for table walker\n");
3632
f0c453db 3633 smmu->ias = max(smmu->ias, smmu->oas);
48ec83bc
WD
3634
3635 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3636 smmu->ias, smmu->oas, smmu->features);
3637 return 0;
3638}
3639
e4dadfa8 3640#ifdef CONFIG_ACPI
e5b829de
LC
3641static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3642{
99caf177 3643 switch (model) {
3644 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
e5b829de 3645 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
99caf177 3646 break;
6948d4a7 3647 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
99caf177 3648 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3649 break;
3650 }
e5b829de
LC
3651
3652 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3653}
3654
e4dadfa8
LP
3655static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3656 struct arm_smmu_device *smmu)
3657{
3658 struct acpi_iort_smmu_v3 *iort_smmu;
3659 struct device *dev = smmu->dev;
3660 struct acpi_iort_node *node;
3661
3662 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3663
3664 /* Retrieve SMMUv3 specific data */
3665 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3666
e5b829de
LC
3667 acpi_smmu_get_options(iort_smmu->model, smmu);
3668
e4dadfa8
LP
3669 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3670 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3671
3672 return 0;
3673}
3674#else
3675static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3676 struct arm_smmu_device *smmu)
3677{
3678 return -ENODEV;
3679}
3680#endif
3681
2985b521
LP
3682static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3683 struct arm_smmu_device *smmu)
48ec83bc 3684{
48ec83bc 3685 struct device *dev = &pdev->dev;
dc87a98d 3686 u32 cells;
2985b521 3687 int ret = -EINVAL;
dc87a98d
RM
3688
3689 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3690 dev_err(dev, "missing #iommu-cells property\n");
3691 else if (cells != 1)
3692 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3693 else
2985b521
LP
3694 ret = 0;
3695
3696 parse_driver_options(smmu);
3697
3698 if (of_dma_is_coherent(dev->of_node))
3699 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3700
3701 return ret;
3702}
3703
e5b829de
LC
3704static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3705{
3706 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3707 return SZ_64K;
3708 else
3709 return SZ_128K;
3710}
3711
ab246774
WD
3712static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3713{
3714 int err;
3715
3716#ifdef CONFIG_PCI
3717 if (pci_bus_type.iommu_ops != ops) {
ab246774
WD
3718 err = bus_set_iommu(&pci_bus_type, ops);
3719 if (err)
3720 return err;
3721 }
3722#endif
3723#ifdef CONFIG_ARM_AMBA
3724 if (amba_bustype.iommu_ops != ops) {
3725 err = bus_set_iommu(&amba_bustype, ops);
3726 if (err)
3727 goto err_reset_pci_ops;
3728 }
3729#endif
3730 if (platform_bus_type.iommu_ops != ops) {
3731 err = bus_set_iommu(&platform_bus_type, ops);
3732 if (err)
3733 goto err_reset_amba_ops;
3734 }
3735
3736 return 0;
3737
3738err_reset_amba_ops:
3739#ifdef CONFIG_ARM_AMBA
3740 bus_set_iommu(&amba_bustype, NULL);
3741#endif
3742err_reset_pci_ops: __maybe_unused;
3743#ifdef CONFIG_PCI
3744 bus_set_iommu(&pci_bus_type, NULL);
3745#endif
3746 return err;
3747}
3748
2985b521
LP
3749static int arm_smmu_device_probe(struct platform_device *pdev)
3750{
3751 int irq, ret;
3752 struct resource *res;
9648cbc9 3753 resource_size_t ioaddr;
2985b521
LP
3754 struct arm_smmu_device *smmu;
3755 struct device *dev = &pdev->dev;
3756 bool bypass;
48ec83bc
WD
3757
3758 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3759 if (!smmu) {
3760 dev_err(dev, "failed to allocate arm_smmu_device\n");
3761 return -ENOMEM;
3762 }
3763 smmu->dev = dev;
3764
e5b829de
LC
3765 if (dev->of_node) {
3766 ret = arm_smmu_device_dt_probe(pdev, smmu);
3767 } else {
3768 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3769 if (ret == -ENODEV)
3770 return ret;
3771 }
3772
3773 /* Set bypass mode according to firmware probing result */
3774 bypass = !!ret;
3775
48ec83bc
WD
3776 /* Base address */
3777 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
322a9bbb 3778 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
48ec83bc
WD
3779 dev_err(dev, "MMIO region too small (%pr)\n", res);
3780 return -EINVAL;
3781 }
9648cbc9 3782 ioaddr = res->start;
48ec83bc
WD
3783
3784 smmu->base = devm_ioremap_resource(dev, res);
3785 if (IS_ERR(smmu->base))
3786 return PTR_ERR(smmu->base);
3787
3788 /* Interrupt lines */
48ec83bc 3789
f7aff1a9 3790 irq = platform_get_irq_byname_optional(pdev, "combined");
48ec83bc 3791 if (irq > 0)
f935448a
GS
3792 smmu->combined_irq = irq;
3793 else {
f7aff1a9 3794 irq = platform_get_irq_byname_optional(pdev, "eventq");
f935448a
GS
3795 if (irq > 0)
3796 smmu->evtq.q.irq = irq;
48ec83bc 3797
f7aff1a9 3798 irq = platform_get_irq_byname_optional(pdev, "priq");
f935448a
GS
3799 if (irq > 0)
3800 smmu->priq.q.irq = irq;
48ec83bc 3801
f7aff1a9 3802 irq = platform_get_irq_byname_optional(pdev, "gerror");
f935448a
GS
3803 if (irq > 0)
3804 smmu->gerr_irq = irq;
3805 }
48ec83bc 3806 /* Probe the h/w */
2985b521 3807 ret = arm_smmu_device_hw_probe(smmu);
48ec83bc
WD
3808 if (ret)
3809 return ret;
3810
3811 /* Initialise in-memory data structures */
3812 ret = arm_smmu_init_structures(smmu);
3813 if (ret)
3814 return ret;
3815
166bdbd2
MZ
3816 /* Record our private device structure */
3817 platform_set_drvdata(pdev, smmu);
3818
48ec83bc 3819 /* Reset the device */
8f785154
RM
3820 ret = arm_smmu_device_reset(smmu, bypass);
3821 if (ret)
3822 return ret;
3823
3824 /* And we're up. Go go go! */
9648cbc9
JR
3825 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3826 "smmu3.%pa", &ioaddr);
08d4ca2a
RM
3827 if (ret)
3828 return ret;
9648cbc9
JR
3829
3830 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3831 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3832
3833 ret = iommu_device_register(&smmu->iommu);
5c2d0218
AY
3834 if (ret) {
3835 dev_err(dev, "Failed to register iommu\n");
3836 return ret;
3837 }
778de074 3838
ab246774 3839 return arm_smmu_set_bus_ops(&arm_smmu_ops);
48ec83bc
WD
3840}
3841
6e8fa740 3842static int arm_smmu_device_remove(struct platform_device *pdev)
48ec83bc 3843{
941a802d 3844 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
48ec83bc 3845
ab246774
WD
3846 arm_smmu_set_bus_ops(NULL);
3847 iommu_device_unregister(&smmu->iommu);
3848 iommu_device_sysfs_remove(&smmu->iommu);
48ec83bc 3849 arm_smmu_device_disable(smmu);
6e8fa740
WD
3850
3851 return 0;
3852}
3853
3854static void arm_smmu_device_shutdown(struct platform_device *pdev)
3855{
3856 arm_smmu_device_remove(pdev);
7aa8619a
NW
3857}
3858
ebdd13c9 3859static const struct of_device_id arm_smmu_of_match[] = {
48ec83bc
WD
3860 { .compatible = "arm,smmu-v3", },
3861 { },
3862};
6e8fa740 3863MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
48ec83bc
WD
3864
3865static struct platform_driver arm_smmu_driver = {
3866 .driver = {
34debdca 3867 .name = "arm-smmu-v3",
8efda06f 3868 .of_match_table = arm_smmu_of_match,
34debdca 3869 .suppress_bind_attrs = true,
48ec83bc 3870 },
2985b521 3871 .probe = arm_smmu_device_probe,
6e8fa740 3872 .remove = arm_smmu_device_remove,
7aa8619a 3873 .shutdown = arm_smmu_device_shutdown,
48ec83bc 3874};
6e8fa740
WD
3875module_platform_driver(arm_smmu_driver);
3876
3877MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
1ea27ee2 3878MODULE_AUTHOR("Will Deacon <will@kernel.org>");
d3daf666 3879MODULE_ALIAS("platform:arm-smmu-v3");
6e8fa740 3880MODULE_LICENSE("GPL v2");