Commit | Line | Data |
---|---|---|
99b9d7b4 OG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Copyright 2016-2019 HabanaLabs, Ltd. | |
5 | * All Rights Reserved. | |
6 | */ | |
7 | ||
8 | #include "goyaP.h" | |
0feaf86d OS |
9 | #include "include/hw_ip/mmu/mmu_general.h" |
10 | #include "include/hw_ip/mmu/mmu_v1_0.h" | |
99b9d7b4 | 11 | #include "include/goya/asic_reg/goya_masks.h" |
4095a176 | 12 | #include "include/goya/goya_reg_map.h" |
99b9d7b4 OG |
13 | |
14 | #include <linux/pci.h> | |
99b9d7b4 | 15 | #include <linux/genalloc.h> |
d91389bc | 16 | #include <linux/hwmon.h> |
839c4803 | 17 | #include <linux/io-64-nonatomic-lo-hi.h> |
4a0ce776 | 18 | #include <linux/iommu.h> |
06deb86a | 19 | #include <linux/seq_file.h> |
99b9d7b4 OG |
20 | |
21 | /* | |
22 | * GOYA security scheme: | |
23 | * | |
24 | * 1. Host is protected by: | |
25 | * - Range registers (When MMU is enabled, DMA RR does NOT protect host) | |
26 | * - MMU | |
27 | * | |
28 | * 2. DRAM is protected by: | |
29 | * - Range registers (protect the first 512MB) | |
30 | * - MMU (isolation between users) | |
31 | * | |
32 | * 3. Configuration is protected by: | |
33 | * - Range registers | |
34 | * - Protection bits | |
35 | * | |
36 | * When MMU is disabled: | |
37 | * | |
38 | * QMAN DMA: PQ, CQ, CP, DMA are secured. | |
39 | * PQ, CB and the data are on the host. | |
40 | * | |
41 | * QMAN TPC/MME: | |
42 | * PQ, CQ and CP are not secured. | |
43 | * PQ, CB and the data are on the SRAM/DRAM. | |
44 | * | |
4c172bbf OG |
45 | * Since QMAN DMA is secured, the driver is parsing the DMA CB: |
46 | * - checks DMA pointer | |
99b9d7b4 OG |
47 | * - WREG, MSG_PROT are not allowed. |
48 | * - MSG_LONG/SHORT are allowed. | |
49 | * | |
50 | * A read/write transaction by the QMAN to a protected area will succeed if | |
51 | * and only if the QMAN's CP is secured and MSG_PROT is used | |
52 | * | |
53 | * | |
54 | * When MMU is enabled: | |
55 | * | |
56 | * QMAN DMA: PQ, CQ and CP are secured. | |
57 | * MMU is set to bypass on the Secure props register of the QMAN. | |
58 | * The reasons we don't enable MMU for PQ, CQ and CP are: | |
4c172bbf | 59 | * - PQ entry is in kernel address space and the driver doesn't map it. |
99b9d7b4 OG |
60 | * - CP writes to MSIX register and to kernel address space (completion |
61 | * queue). | |
62 | * | |
4c172bbf OG |
63 | * DMA is not secured but because CP is secured, the driver still needs to parse |
64 | * the CB, but doesn't need to check the DMA addresses. | |
99b9d7b4 | 65 | * |
4c172bbf OG |
66 | * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and |
67 | * the driver doesn't map memory in MMU. | |
99b9d7b4 OG |
68 | * |
69 | * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode) | |
70 | * | |
71 | * DMA RR does NOT protect host because DMA is not secured | |
72 | * | |
73 | */ | |
74 | ||
47f6b41c | 75 | #define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb" |
da1342a0 OG |
76 | #define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb" |
77 | ||
8ba2876d | 78 | #define GOYA_MMU_REGS_NUM 63 |
99b9d7b4 OG |
79 | |
80 | #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ | |
81 | ||
82 | #define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */ | |
83 | #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */ | |
84 | #define GOYA_RESET_WAIT_MSEC 1 /* 1ms */ | |
85 | #define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */ | |
86 | #define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ | |
99b9d7b4 | 87 | #define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ |
0feaf86d | 88 | #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) |
3dccd187 | 89 | #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) |
47f6b41c | 90 | #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ |
788cacf3 | 91 | #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ |
99b9d7b4 OG |
92 | |
93 | #define GOYA_QMAN0_FENCE_VAL 0xD169B243 | |
94 | ||
1251f23a OG |
95 | #define GOYA_MAX_STRING_LEN 20 |
96 | ||
be5d926b OG |
97 | #define GOYA_CB_POOL_CB_CNT 512 |
98 | #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */ | |
99 | ||
06deb86a TT |
100 | #define IS_QM_IDLE(engine, qm_glbl_sts0) \ |
101 | (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK) | |
102 | #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0) | |
103 | #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0) | |
104 | #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0) | |
105 | ||
106 | #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \ | |
107 | (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \ | |
108 | engine##_CMDQ_IDLE_MASK) | |
109 | #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \ | |
110 | IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0) | |
111 | #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \ | |
112 | IS_CMDQ_IDLE(MME, cmdq_glbl_sts0) | |
113 | ||
114 | #define IS_DMA_IDLE(dma_core_sts0) \ | |
115 | !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK) | |
116 | ||
117 | #define IS_TPC_IDLE(tpc_cfg_sts) \ | |
118 | (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK) | |
119 | ||
120 | #define IS_MME_IDLE(mme_arch_sts) \ | |
121 | (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) | |
122 | ||
123 | ||
1251f23a OG |
124 | static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { |
125 | "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", | |
126 | "goya cq 4", "goya cpu eq" | |
127 | }; | |
128 | ||
eff6f4a0 OG |
129 | static u16 goya_packet_sizes[MAX_PACKET_ID] = { |
130 | [PACKET_WREG_32] = sizeof(struct packet_wreg32), | |
131 | [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), | |
132 | [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), | |
133 | [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), | |
134 | [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), | |
135 | [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), | |
136 | [PACKET_FENCE] = sizeof(struct packet_fence), | |
137 | [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), | |
138 | [PACKET_NOP] = sizeof(struct packet_nop), | |
139 | [PACKET_STOP] = sizeof(struct packet_stop) | |
140 | }; | |
141 | ||
0feaf86d OS |
142 | static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = { |
143 | mmDMA_QM_0_GLBL_NON_SECURE_PROPS, | |
144 | mmDMA_QM_1_GLBL_NON_SECURE_PROPS, | |
145 | mmDMA_QM_2_GLBL_NON_SECURE_PROPS, | |
146 | mmDMA_QM_3_GLBL_NON_SECURE_PROPS, | |
147 | mmDMA_QM_4_GLBL_NON_SECURE_PROPS, | |
148 | mmTPC0_QM_GLBL_SECURE_PROPS, | |
149 | mmTPC0_QM_GLBL_NON_SECURE_PROPS, | |
150 | mmTPC0_CMDQ_GLBL_SECURE_PROPS, | |
151 | mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS, | |
152 | mmTPC0_CFG_ARUSER, | |
153 | mmTPC0_CFG_AWUSER, | |
154 | mmTPC1_QM_GLBL_SECURE_PROPS, | |
155 | mmTPC1_QM_GLBL_NON_SECURE_PROPS, | |
156 | mmTPC1_CMDQ_GLBL_SECURE_PROPS, | |
157 | mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS, | |
158 | mmTPC1_CFG_ARUSER, | |
159 | mmTPC1_CFG_AWUSER, | |
160 | mmTPC2_QM_GLBL_SECURE_PROPS, | |
161 | mmTPC2_QM_GLBL_NON_SECURE_PROPS, | |
162 | mmTPC2_CMDQ_GLBL_SECURE_PROPS, | |
163 | mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS, | |
164 | mmTPC2_CFG_ARUSER, | |
165 | mmTPC2_CFG_AWUSER, | |
166 | mmTPC3_QM_GLBL_SECURE_PROPS, | |
167 | mmTPC3_QM_GLBL_NON_SECURE_PROPS, | |
168 | mmTPC3_CMDQ_GLBL_SECURE_PROPS, | |
169 | mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS, | |
170 | mmTPC3_CFG_ARUSER, | |
171 | mmTPC3_CFG_AWUSER, | |
172 | mmTPC4_QM_GLBL_SECURE_PROPS, | |
173 | mmTPC4_QM_GLBL_NON_SECURE_PROPS, | |
174 | mmTPC4_CMDQ_GLBL_SECURE_PROPS, | |
175 | mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS, | |
176 | mmTPC4_CFG_ARUSER, | |
177 | mmTPC4_CFG_AWUSER, | |
178 | mmTPC5_QM_GLBL_SECURE_PROPS, | |
179 | mmTPC5_QM_GLBL_NON_SECURE_PROPS, | |
180 | mmTPC5_CMDQ_GLBL_SECURE_PROPS, | |
181 | mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS, | |
182 | mmTPC5_CFG_ARUSER, | |
183 | mmTPC5_CFG_AWUSER, | |
184 | mmTPC6_QM_GLBL_SECURE_PROPS, | |
185 | mmTPC6_QM_GLBL_NON_SECURE_PROPS, | |
186 | mmTPC6_CMDQ_GLBL_SECURE_PROPS, | |
187 | mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS, | |
188 | mmTPC6_CFG_ARUSER, | |
189 | mmTPC6_CFG_AWUSER, | |
190 | mmTPC7_QM_GLBL_SECURE_PROPS, | |
191 | mmTPC7_QM_GLBL_NON_SECURE_PROPS, | |
192 | mmTPC7_CMDQ_GLBL_SECURE_PROPS, | |
193 | mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS, | |
194 | mmTPC7_CFG_ARUSER, | |
195 | mmTPC7_CFG_AWUSER, | |
196 | mmMME_QM_GLBL_SECURE_PROPS, | |
197 | mmMME_QM_GLBL_NON_SECURE_PROPS, | |
198 | mmMME_CMDQ_GLBL_SECURE_PROPS, | |
199 | mmMME_CMDQ_GLBL_NON_SECURE_PROPS, | |
200 | mmMME_SBA_CONTROL_DATA, | |
201 | mmMME_SBB_CONTROL_DATA, | |
202 | mmMME_SBC_CONTROL_DATA, | |
8ba2876d OS |
203 | mmMME_WBC_CONTROL_DATA, |
204 | mmPCIE_WRAP_PSOC_ARUSER, | |
205 | mmPCIE_WRAP_PSOC_AWUSER | |
0feaf86d OS |
206 | }; |
207 | ||
b24ca458 | 208 | static u32 goya_all_events[] = { |
f8c8c7d5 OG |
209 | GOYA_ASYNC_EVENT_ID_PCIE_IF, |
210 | GOYA_ASYNC_EVENT_ID_TPC0_ECC, | |
211 | GOYA_ASYNC_EVENT_ID_TPC1_ECC, | |
212 | GOYA_ASYNC_EVENT_ID_TPC2_ECC, | |
213 | GOYA_ASYNC_EVENT_ID_TPC3_ECC, | |
214 | GOYA_ASYNC_EVENT_ID_TPC4_ECC, | |
215 | GOYA_ASYNC_EVENT_ID_TPC5_ECC, | |
216 | GOYA_ASYNC_EVENT_ID_TPC6_ECC, | |
217 | GOYA_ASYNC_EVENT_ID_TPC7_ECC, | |
218 | GOYA_ASYNC_EVENT_ID_MME_ECC, | |
219 | GOYA_ASYNC_EVENT_ID_MME_ECC_EXT, | |
220 | GOYA_ASYNC_EVENT_ID_MMU_ECC, | |
221 | GOYA_ASYNC_EVENT_ID_DMA_MACRO, | |
222 | GOYA_ASYNC_EVENT_ID_DMA_ECC, | |
223 | GOYA_ASYNC_EVENT_ID_CPU_IF_ECC, | |
224 | GOYA_ASYNC_EVENT_ID_PSOC_MEM, | |
225 | GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT, | |
226 | GOYA_ASYNC_EVENT_ID_SRAM0, | |
227 | GOYA_ASYNC_EVENT_ID_SRAM1, | |
228 | GOYA_ASYNC_EVENT_ID_SRAM2, | |
229 | GOYA_ASYNC_EVENT_ID_SRAM3, | |
230 | GOYA_ASYNC_EVENT_ID_SRAM4, | |
231 | GOYA_ASYNC_EVENT_ID_SRAM5, | |
232 | GOYA_ASYNC_EVENT_ID_SRAM6, | |
233 | GOYA_ASYNC_EVENT_ID_SRAM7, | |
234 | GOYA_ASYNC_EVENT_ID_SRAM8, | |
235 | GOYA_ASYNC_EVENT_ID_SRAM9, | |
236 | GOYA_ASYNC_EVENT_ID_SRAM10, | |
237 | GOYA_ASYNC_EVENT_ID_SRAM11, | |
238 | GOYA_ASYNC_EVENT_ID_SRAM12, | |
239 | GOYA_ASYNC_EVENT_ID_SRAM13, | |
240 | GOYA_ASYNC_EVENT_ID_SRAM14, | |
241 | GOYA_ASYNC_EVENT_ID_SRAM15, | |
242 | GOYA_ASYNC_EVENT_ID_SRAM16, | |
243 | GOYA_ASYNC_EVENT_ID_SRAM17, | |
244 | GOYA_ASYNC_EVENT_ID_SRAM18, | |
245 | GOYA_ASYNC_EVENT_ID_SRAM19, | |
246 | GOYA_ASYNC_EVENT_ID_SRAM20, | |
247 | GOYA_ASYNC_EVENT_ID_SRAM21, | |
248 | GOYA_ASYNC_EVENT_ID_SRAM22, | |
249 | GOYA_ASYNC_EVENT_ID_SRAM23, | |
250 | GOYA_ASYNC_EVENT_ID_SRAM24, | |
251 | GOYA_ASYNC_EVENT_ID_SRAM25, | |
252 | GOYA_ASYNC_EVENT_ID_SRAM26, | |
253 | GOYA_ASYNC_EVENT_ID_SRAM27, | |
254 | GOYA_ASYNC_EVENT_ID_SRAM28, | |
255 | GOYA_ASYNC_EVENT_ID_SRAM29, | |
256 | GOYA_ASYNC_EVENT_ID_GIC500, | |
257 | GOYA_ASYNC_EVENT_ID_PLL0, | |
258 | GOYA_ASYNC_EVENT_ID_PLL1, | |
259 | GOYA_ASYNC_EVENT_ID_PLL3, | |
260 | GOYA_ASYNC_EVENT_ID_PLL4, | |
261 | GOYA_ASYNC_EVENT_ID_PLL5, | |
262 | GOYA_ASYNC_EVENT_ID_PLL6, | |
263 | GOYA_ASYNC_EVENT_ID_AXI_ECC, | |
264 | GOYA_ASYNC_EVENT_ID_L2_RAM_ECC, | |
265 | GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET, | |
266 | GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT, | |
267 | GOYA_ASYNC_EVENT_ID_PCIE_DEC, | |
268 | GOYA_ASYNC_EVENT_ID_TPC0_DEC, | |
269 | GOYA_ASYNC_EVENT_ID_TPC1_DEC, | |
270 | GOYA_ASYNC_EVENT_ID_TPC2_DEC, | |
271 | GOYA_ASYNC_EVENT_ID_TPC3_DEC, | |
272 | GOYA_ASYNC_EVENT_ID_TPC4_DEC, | |
273 | GOYA_ASYNC_EVENT_ID_TPC5_DEC, | |
274 | GOYA_ASYNC_EVENT_ID_TPC6_DEC, | |
275 | GOYA_ASYNC_EVENT_ID_TPC7_DEC, | |
276 | GOYA_ASYNC_EVENT_ID_MME_WACS, | |
277 | GOYA_ASYNC_EVENT_ID_MME_WACSD, | |
278 | GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER, | |
279 | GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC, | |
280 | GOYA_ASYNC_EVENT_ID_PSOC, | |
281 | GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR, | |
282 | GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR, | |
283 | GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR, | |
284 | GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR, | |
285 | GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR, | |
286 | GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR, | |
287 | GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR, | |
288 | GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR, | |
289 | GOYA_ASYNC_EVENT_ID_TPC0_CMDQ, | |
290 | GOYA_ASYNC_EVENT_ID_TPC1_CMDQ, | |
291 | GOYA_ASYNC_EVENT_ID_TPC2_CMDQ, | |
292 | GOYA_ASYNC_EVENT_ID_TPC3_CMDQ, | |
293 | GOYA_ASYNC_EVENT_ID_TPC4_CMDQ, | |
294 | GOYA_ASYNC_EVENT_ID_TPC5_CMDQ, | |
295 | GOYA_ASYNC_EVENT_ID_TPC6_CMDQ, | |
296 | GOYA_ASYNC_EVENT_ID_TPC7_CMDQ, | |
297 | GOYA_ASYNC_EVENT_ID_TPC0_QM, | |
298 | GOYA_ASYNC_EVENT_ID_TPC1_QM, | |
299 | GOYA_ASYNC_EVENT_ID_TPC2_QM, | |
300 | GOYA_ASYNC_EVENT_ID_TPC3_QM, | |
301 | GOYA_ASYNC_EVENT_ID_TPC4_QM, | |
302 | GOYA_ASYNC_EVENT_ID_TPC5_QM, | |
303 | GOYA_ASYNC_EVENT_ID_TPC6_QM, | |
304 | GOYA_ASYNC_EVENT_ID_TPC7_QM, | |
305 | GOYA_ASYNC_EVENT_ID_MME_QM, | |
306 | GOYA_ASYNC_EVENT_ID_MME_CMDQ, | |
307 | GOYA_ASYNC_EVENT_ID_DMA0_QM, | |
308 | GOYA_ASYNC_EVENT_ID_DMA1_QM, | |
309 | GOYA_ASYNC_EVENT_ID_DMA2_QM, | |
310 | GOYA_ASYNC_EVENT_ID_DMA3_QM, | |
311 | GOYA_ASYNC_EVENT_ID_DMA4_QM, | |
312 | GOYA_ASYNC_EVENT_ID_DMA0_CH, | |
313 | GOYA_ASYNC_EVENT_ID_DMA1_CH, | |
314 | GOYA_ASYNC_EVENT_ID_DMA2_CH, | |
315 | GOYA_ASYNC_EVENT_ID_DMA3_CH, | |
316 | GOYA_ASYNC_EVENT_ID_DMA4_CH, | |
317 | GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU, | |
318 | GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU, | |
319 | GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU, | |
320 | GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU, | |
321 | GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU, | |
322 | GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU, | |
323 | GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU, | |
324 | GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU, | |
325 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH0, | |
326 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH1, | |
327 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH2, | |
328 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH3, | |
4f0e6ab7 OS |
329 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH4, |
330 | GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S, | |
331 | GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E, | |
332 | GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S, | |
333 | GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E | |
f8c8c7d5 OG |
334 | }; |
335 | ||
95b5a8b8 OG |
336 | static int goya_mmu_clear_pgt_range(struct hl_device *hdev); |
337 | static int goya_mmu_set_dram_default_page(struct hl_device *hdev); | |
338 | static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); | |
339 | static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); | |
340 | ||
b2377e03 | 341 | void goya_get_fixed_properties(struct hl_device *hdev) |
99b9d7b4 OG |
342 | { |
343 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
9494a8dd OG |
344 | int i; |
345 | ||
346 | for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { | |
347 | prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; | |
4c172bbf | 348 | prop->hw_queues_props[i].driver_only = 0; |
df762375 | 349 | prop->hw_queues_props[i].requires_kernel_cb = 1; |
9494a8dd OG |
350 | } |
351 | ||
352 | for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { | |
353 | prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; | |
4c172bbf | 354 | prop->hw_queues_props[i].driver_only = 1; |
df762375 | 355 | prop->hw_queues_props[i].requires_kernel_cb = 0; |
9494a8dd OG |
356 | } |
357 | ||
358 | for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + | |
359 | NUMBER_OF_INT_HW_QUEUES; i++) { | |
360 | prop->hw_queues_props[i].type = QUEUE_TYPE_INT; | |
4c172bbf | 361 | prop->hw_queues_props[i].driver_only = 0; |
df762375 | 362 | prop->hw_queues_props[i].requires_kernel_cb = 0; |
9494a8dd OG |
363 | } |
364 | ||
365 | for (; i < HL_MAX_QUEUES; i++) | |
366 | prop->hw_queues_props[i].type = QUEUE_TYPE_NA; | |
99b9d7b4 OG |
367 | |
368 | prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; | |
369 | ||
370 | prop->dram_base_address = DRAM_PHYS_BASE; | |
371 | prop->dram_size = DRAM_PHYS_DEFAULT_SIZE; | |
372 | prop->dram_end_address = prop->dram_base_address + prop->dram_size; | |
373 | prop->dram_user_base_address = DRAM_BASE_ADDR_USER; | |
374 | ||
375 | prop->sram_base_address = SRAM_BASE_ADDR; | |
376 | prop->sram_size = SRAM_SIZE; | |
377 | prop->sram_end_address = prop->sram_base_address + prop->sram_size; | |
378 | prop->sram_user_base_address = prop->sram_base_address + | |
379 | SRAM_USER_BASE_OFFSET; | |
380 | ||
0feaf86d | 381 | prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; |
27ca384c | 382 | prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR; |
0feaf86d OS |
383 | if (hdev->pldm) |
384 | prop->mmu_pgt_size = 0x800000; /* 8MB */ | |
385 | else | |
386 | prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; | |
387 | prop->mmu_pte_size = HL_PTE_SIZE; | |
388 | prop->mmu_hop_table_size = HOP_TABLE_SIZE; | |
389 | prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; | |
390 | prop->dram_page_size = PAGE_SIZE_2MB; | |
391 | ||
54bb6744 OS |
392 | prop->dmmu.hop0_shift = HOP0_SHIFT; |
393 | prop->dmmu.hop1_shift = HOP1_SHIFT; | |
394 | prop->dmmu.hop2_shift = HOP2_SHIFT; | |
395 | prop->dmmu.hop3_shift = HOP3_SHIFT; | |
396 | prop->dmmu.hop4_shift = HOP4_SHIFT; | |
397 | prop->dmmu.hop0_mask = HOP0_MASK; | |
398 | prop->dmmu.hop1_mask = HOP1_MASK; | |
399 | prop->dmmu.hop2_mask = HOP2_MASK; | |
400 | prop->dmmu.hop3_mask = HOP3_MASK; | |
401 | prop->dmmu.hop4_mask = HOP4_MASK; | |
64a7e295 OS |
402 | prop->dmmu.start_addr = VA_DDR_SPACE_START; |
403 | prop->dmmu.end_addr = VA_DDR_SPACE_END; | |
404 | prop->dmmu.page_size = PAGE_SIZE_2MB; | |
54bb6744 | 405 | |
64a7e295 | 406 | /* shifts and masks are the same in PMMU and DMMU */ |
54bb6744 | 407 | memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); |
64a7e295 OS |
408 | prop->pmmu.start_addr = VA_HOST_SPACE_START; |
409 | prop->pmmu.end_addr = VA_HOST_SPACE_END; | |
54bb6744 OS |
410 | prop->pmmu.page_size = PAGE_SIZE_4KB; |
411 | ||
64a7e295 OS |
412 | /* PMMU and HPMMU are the same except of page size */ |
413 | memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); | |
414 | prop->pmmu_huge.page_size = PAGE_SIZE_2MB; | |
415 | ||
416 | prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END; | |
99b9d7b4 OG |
417 | prop->cfg_size = CFG_SIZE; |
418 | prop->max_asid = MAX_ASID; | |
1251f23a | 419 | prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; |
b6f897d7 | 420 | prop->high_pll = PLL_HIGH_DEFAULT; |
839c4803 OG |
421 | prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT; |
422 | prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE; | |
d91389bc | 423 | prop->max_power_default = MAX_POWER_DEFAULT; |
99b9d7b4 | 424 | prop->tpc_enabled_mask = TPC_ENABLED_MASK; |
b6f897d7 TT |
425 | prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; |
426 | prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; | |
91edbf2c OG |
427 | |
428 | strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, | |
429 | CARD_NAME_MAX_LEN); | |
99b9d7b4 OG |
430 | } |
431 | ||
432 | /* | |
433 | * goya_pci_bars_map - Map PCI BARS of Goya device | |
434 | * | |
435 | * @hdev: pointer to hl_device structure | |
436 | * | |
437 | * Request PCI regions and map them to kernel virtual addresses. | |
438 | * Returns 0 on success | |
439 | * | |
440 | */ | |
5e6e0239 | 441 | static int goya_pci_bars_map(struct hl_device *hdev) |
99b9d7b4 | 442 | { |
b6f897d7 TT |
443 | static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"}; |
444 | bool is_wc[3] = {false, false, true}; | |
99b9d7b4 OG |
445 | int rc; |
446 | ||
b6f897d7 TT |
447 | rc = hl_pci_bars_map(hdev, name, is_wc); |
448 | if (rc) | |
99b9d7b4 | 449 | return rc; |
99b9d7b4 OG |
450 | |
451 | hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
b6f897d7 | 452 | (CFG_BASE - SRAM_BASE_ADDR); |
99b9d7b4 OG |
453 | |
454 | return 0; | |
455 | } | |
456 | ||
a38693d7 | 457 | static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) |
99b9d7b4 OG |
458 | { |
459 | struct goya_device *goya = hdev->asic_specific; | |
a38693d7 | 460 | u64 old_addr = addr; |
99b9d7b4 OG |
461 | int rc; |
462 | ||
463 | if ((goya) && (goya->ddr_bar_cur_addr == addr)) | |
a38693d7 | 464 | return old_addr; |
99b9d7b4 OG |
465 | |
466 | /* Inbound Region 1 - Bar 4 - Point to DDR */ | |
b6f897d7 TT |
467 | rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr); |
468 | if (rc) | |
a38693d7 | 469 | return U64_MAX; |
99b9d7b4 | 470 | |
a38693d7 OG |
471 | if (goya) { |
472 | old_addr = goya->ddr_bar_cur_addr; | |
99b9d7b4 | 473 | goya->ddr_bar_cur_addr = addr; |
a38693d7 | 474 | } |
99b9d7b4 | 475 | |
a38693d7 | 476 | return old_addr; |
99b9d7b4 OG |
477 | } |
478 | ||
479 | /* | |
480 | * goya_init_iatu - Initialize the iATU unit inside the PCI controller | |
481 | * | |
482 | * @hdev: pointer to hl_device structure | |
483 | * | |
484 | * This is needed in case the firmware doesn't initialize the iATU | |
485 | * | |
486 | */ | |
487 | static int goya_init_iatu(struct hl_device *hdev) | |
488 | { | |
b6f897d7 | 489 | return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE, |
94cb669c | 490 | HOST_PHYS_BASE, HOST_PHYS_SIZE); |
99b9d7b4 OG |
491 | } |
492 | ||
493 | /* | |
494 | * goya_early_init - GOYA early initialization code | |
495 | * | |
496 | * @hdev: pointer to hl_device structure | |
497 | * | |
498 | * Verify PCI bars | |
499 | * Set DMA masks | |
500 | * PCI controller initialization | |
501 | * Map PCI bars | |
502 | * | |
503 | */ | |
504 | static int goya_early_init(struct hl_device *hdev) | |
505 | { | |
506 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
507 | struct pci_dev *pdev = hdev->pdev; | |
508 | u32 val; | |
509 | int rc; | |
510 | ||
511 | goya_get_fixed_properties(hdev); | |
512 | ||
513 | /* Check BAR sizes */ | |
514 | if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) { | |
515 | dev_err(hdev->dev, | |
516 | "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", | |
517 | SRAM_CFG_BAR_ID, | |
518 | (unsigned long long) pci_resource_len(pdev, | |
519 | SRAM_CFG_BAR_ID), | |
520 | CFG_BAR_SIZE); | |
521 | return -ENODEV; | |
522 | } | |
523 | ||
524 | if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) { | |
525 | dev_err(hdev->dev, | |
526 | "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", | |
527 | MSIX_BAR_ID, | |
528 | (unsigned long long) pci_resource_len(pdev, | |
529 | MSIX_BAR_ID), | |
530 | MSIX_BAR_SIZE); | |
531 | return -ENODEV; | |
532 | } | |
533 | ||
534 | prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); | |
535 | ||
cb056b9f | 536 | rc = hl_pci_init(hdev); |
b6f897d7 | 537 | if (rc) |
99b9d7b4 | 538 | return rc; |
99b9d7b4 | 539 | |
839c4803 OG |
540 | if (!hdev->pldm) { |
541 | val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); | |
542 | if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) | |
543 | dev_warn(hdev->dev, | |
544 | "PCI strap is not configured correctly, PCI bus errors may occur\n"); | |
545 | } | |
99b9d7b4 OG |
546 | |
547 | return 0; | |
99b9d7b4 OG |
548 | } |
549 | ||
550 | /* | |
551 | * goya_early_fini - GOYA early finalization code | |
552 | * | |
553 | * @hdev: pointer to hl_device structure | |
554 | * | |
555 | * Unmap PCI bars | |
556 | * | |
557 | */ | |
5e6e0239 | 558 | static int goya_early_fini(struct hl_device *hdev) |
99b9d7b4 | 559 | { |
b6f897d7 | 560 | hl_pci_fini(hdev); |
99b9d7b4 OG |
561 | |
562 | return 0; | |
563 | } | |
564 | ||
bedd1442 OG |
565 | static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) |
566 | { | |
567 | /* mask to zero the MMBP and ASID bits */ | |
568 | WREG32_AND(reg, ~0x7FF); | |
569 | WREG32_OR(reg, asid); | |
570 | } | |
571 | ||
572 | static void goya_qman0_set_security(struct hl_device *hdev, bool secure) | |
573 | { | |
574 | struct goya_device *goya = hdev->asic_specific; | |
575 | ||
576 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
577 | return; | |
578 | ||
579 | if (secure) | |
580 | WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED); | |
581 | else | |
582 | WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED); | |
583 | ||
584 | RREG32(mmDMA_QM_0_GLBL_PROT); | |
585 | } | |
586 | ||
d91389bc OG |
587 | /* |
588 | * goya_fetch_psoc_frequency - Fetch PSOC frequency values | |
589 | * | |
590 | * @hdev: pointer to hl_device structure | |
591 | * | |
592 | */ | |
593 | static void goya_fetch_psoc_frequency(struct hl_device *hdev) | |
594 | { | |
595 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
596 | ||
597 | prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR); | |
598 | prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF); | |
599 | prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD); | |
600 | prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); | |
601 | } | |
602 | ||
b2377e03 | 603 | int goya_late_init(struct hl_device *hdev) |
d91389bc OG |
604 | { |
605 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
d91389bc OG |
606 | int rc; |
607 | ||
0b28d26b OG |
608 | goya_fetch_psoc_frequency(hdev); |
609 | ||
610 | rc = goya_mmu_clear_pgt_range(hdev); | |
611 | if (rc) { | |
612 | dev_err(hdev->dev, | |
613 | "Failed to clear MMU page tables range %d\n", rc); | |
614 | return rc; | |
615 | } | |
616 | ||
617 | rc = goya_mmu_set_dram_default_page(hdev); | |
618 | if (rc) { | |
619 | dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc); | |
620 | return rc; | |
621 | } | |
622 | ||
95b5a8b8 OG |
623 | rc = goya_mmu_add_mappings_for_device_cpu(hdev); |
624 | if (rc) | |
625 | return rc; | |
626 | ||
0b28d26b OG |
627 | rc = goya_init_cpu_queues(hdev); |
628 | if (rc) | |
629 | return rc; | |
630 | ||
631 | rc = goya_test_cpu_queue(hdev); | |
632 | if (rc) | |
633 | return rc; | |
634 | ||
393e5b55 | 635 | rc = goya_armcp_info_get(hdev); |
d91389bc | 636 | if (rc) { |
0b28d26b | 637 | dev_err(hdev->dev, "Failed to get armcp info %d\n", rc); |
d91389bc OG |
638 | return rc; |
639 | } | |
640 | ||
641 | /* Now that we have the DRAM size in ASIC prop, we need to check | |
642 | * its size and configure the DMA_IF DDR wrap protection (which is in | |
643 | * the MMU block) accordingly. The value is the log2 of the DRAM size | |
644 | */ | |
645 | WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); | |
646 | ||
3110c60f | 647 | rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); |
d91389bc | 648 | if (rc) { |
0b28d26b OG |
649 | dev_err(hdev->dev, |
650 | "Failed to enable PCI access from CPU %d\n", rc); | |
d91389bc OG |
651 | return rc; |
652 | } | |
653 | ||
654 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
655 | GOYA_ASYNC_EVENT_ID_INTS_REGISTER); | |
656 | ||
d91389bc OG |
657 | return 0; |
658 | } | |
659 | ||
660 | /* | |
661 | * goya_late_fini - GOYA late tear-down code | |
662 | * | |
663 | * @hdev: pointer to hl_device structure | |
664 | * | |
665 | * Free sensors allocated structures | |
666 | */ | |
667 | void goya_late_fini(struct hl_device *hdev) | |
668 | { | |
669 | const struct hwmon_channel_info **channel_info_arr; | |
670 | int i = 0; | |
671 | ||
672 | if (!hdev->hl_chip_info->info) | |
673 | return; | |
674 | ||
675 | channel_info_arr = hdev->hl_chip_info->info; | |
676 | ||
677 | while (channel_info_arr[i]) { | |
678 | kfree(channel_info_arr[i]->config); | |
679 | kfree(channel_info_arr[i]); | |
680 | i++; | |
681 | } | |
682 | ||
683 | kfree(channel_info_arr); | |
684 | ||
685 | hdev->hl_chip_info->info = NULL; | |
686 | } | |
687 | ||
99b9d7b4 OG |
688 | /* |
689 | * goya_sw_init - Goya software initialization code | |
690 | * | |
691 | * @hdev: pointer to hl_device structure | |
692 | * | |
693 | */ | |
694 | static int goya_sw_init(struct hl_device *hdev) | |
695 | { | |
696 | struct goya_device *goya; | |
697 | int rc; | |
698 | ||
699 | /* Allocate device structure */ | |
700 | goya = kzalloc(sizeof(*goya), GFP_KERNEL); | |
701 | if (!goya) | |
702 | return -ENOMEM; | |
703 | ||
704 | /* according to goya_init_iatu */ | |
705 | goya->ddr_bar_cur_addr = DRAM_PHYS_BASE; | |
d91389bc OG |
706 | |
707 | goya->mme_clk = GOYA_PLL_FREQ_LOW; | |
708 | goya->tpc_clk = GOYA_PLL_FREQ_LOW; | |
709 | goya->ic_clk = GOYA_PLL_FREQ_LOW; | |
710 | ||
99b9d7b4 OG |
711 | hdev->asic_specific = goya; |
712 | ||
713 | /* Create DMA pool for small allocations */ | |
714 | hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), | |
715 | &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); | |
716 | if (!hdev->dma_pool) { | |
717 | dev_err(hdev->dev, "failed to create DMA pool\n"); | |
718 | rc = -ENOMEM; | |
719 | goto free_goya_device; | |
720 | } | |
721 | ||
722 | hdev->cpu_accessible_dma_mem = | |
d9c3aa80 | 723 | hdev->asic_funcs->asic_dma_alloc_coherent(hdev, |
3110c60f | 724 | HL_CPU_ACCESSIBLE_MEM_SIZE, |
99b9d7b4 OG |
725 | &hdev->cpu_accessible_dma_address, |
726 | GFP_KERNEL | __GFP_ZERO); | |
727 | ||
728 | if (!hdev->cpu_accessible_dma_mem) { | |
99b9d7b4 OG |
729 | rc = -ENOMEM; |
730 | goto free_dma_pool; | |
731 | } | |
732 | ||
f62fa0ce AB |
733 | dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n", |
734 | &hdev->cpu_accessible_dma_address); | |
2a51558c | 735 | |
cbb10f1e | 736 | hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); |
99b9d7b4 OG |
737 | if (!hdev->cpu_accessible_dma_pool) { |
738 | dev_err(hdev->dev, | |
739 | "Failed to create CPU accessible DMA pool\n"); | |
740 | rc = -ENOMEM; | |
9f832fda | 741 | goto free_cpu_dma_mem; |
99b9d7b4 OG |
742 | } |
743 | ||
744 | rc = gen_pool_add(hdev->cpu_accessible_dma_pool, | |
745 | (uintptr_t) hdev->cpu_accessible_dma_mem, | |
3110c60f | 746 | HL_CPU_ACCESSIBLE_MEM_SIZE, -1); |
99b9d7b4 OG |
747 | if (rc) { |
748 | dev_err(hdev->dev, | |
749 | "Failed to add memory to CPU accessible DMA pool\n"); | |
750 | rc = -EFAULT; | |
ba209e15 | 751 | goto free_cpu_accessible_dma_pool; |
99b9d7b4 OG |
752 | } |
753 | ||
754 | spin_lock_init(&goya->hw_queues_lock); | |
9e5e49cd | 755 | hdev->supports_coresight = true; |
66446820 | 756 | hdev->supports_soft_reset = true; |
99b9d7b4 OG |
757 | |
758 | return 0; | |
759 | ||
ba209e15 | 760 | free_cpu_accessible_dma_pool: |
99b9d7b4 | 761 | gen_pool_destroy(hdev->cpu_accessible_dma_pool); |
9f832fda | 762 | free_cpu_dma_mem: |
d9c3aa80 OG |
763 | hdev->asic_funcs->asic_dma_free_coherent(hdev, |
764 | HL_CPU_ACCESSIBLE_MEM_SIZE, | |
99b9d7b4 OG |
765 | hdev->cpu_accessible_dma_mem, |
766 | hdev->cpu_accessible_dma_address); | |
767 | free_dma_pool: | |
768 | dma_pool_destroy(hdev->dma_pool); | |
769 | free_goya_device: | |
770 | kfree(goya); | |
771 | ||
772 | return rc; | |
773 | } | |
774 | ||
775 | /* | |
776 | * goya_sw_fini - Goya software tear-down code | |
777 | * | |
778 | * @hdev: pointer to hl_device structure | |
779 | * | |
780 | */ | |
5e6e0239 | 781 | static int goya_sw_fini(struct hl_device *hdev) |
99b9d7b4 OG |
782 | { |
783 | struct goya_device *goya = hdev->asic_specific; | |
784 | ||
785 | gen_pool_destroy(hdev->cpu_accessible_dma_pool); | |
786 | ||
d9c3aa80 OG |
787 | hdev->asic_funcs->asic_dma_free_coherent(hdev, |
788 | HL_CPU_ACCESSIBLE_MEM_SIZE, | |
99b9d7b4 OG |
789 | hdev->cpu_accessible_dma_mem, |
790 | hdev->cpu_accessible_dma_address); | |
791 | ||
792 | dma_pool_destroy(hdev->dma_pool); | |
793 | ||
794 | kfree(goya); | |
795 | ||
796 | return 0; | |
797 | } | |
798 | ||
9494a8dd OG |
799 | static void goya_init_dma_qman(struct hl_device *hdev, int dma_id, |
800 | dma_addr_t bus_address) | |
801 | { | |
802 | struct goya_device *goya = hdev->asic_specific; | |
803 | u32 mtr_base_lo, mtr_base_hi; | |
804 | u32 so_base_lo, so_base_hi; | |
805 | u32 gic_base_lo, gic_base_hi; | |
806 | u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI); | |
76cedc73 | 807 | u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN; |
9494a8dd OG |
808 | |
809 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
810 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
811 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
812 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
813 | ||
814 | gic_base_lo = | |
815 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
816 | gic_base_hi = | |
817 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
818 | ||
819 | WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address)); | |
820 | WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address)); | |
821 | ||
822 | WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH)); | |
823 | WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0); | |
824 | WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0); | |
825 | ||
826 | WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); | |
827 | WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); | |
828 | WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); | |
829 | WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); | |
830 | WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); | |
831 | WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); | |
832 | WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off, | |
833 | GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id); | |
834 | ||
835 | /* PQ has buffer of 2 cache lines, while CQ has 8 lines */ | |
836 | WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002); | |
837 | WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008); | |
838 | ||
1251f23a OG |
839 | if (goya->hw_cap_initialized & HW_CAP_MMU) |
840 | WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED); | |
9494a8dd | 841 | else |
1251f23a | 842 | WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED); |
9494a8dd | 843 | |
76cedc73 OS |
844 | if (hdev->stop_on_err) |
845 | dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT; | |
846 | ||
847 | WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg); | |
9494a8dd OG |
848 | WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE); |
849 | } | |
850 | ||
851 | static void goya_init_dma_ch(struct hl_device *hdev, int dma_id) | |
852 | { | |
853 | u32 gic_base_lo, gic_base_hi; | |
854 | u64 sob_addr; | |
855 | u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1); | |
856 | ||
857 | gic_base_lo = | |
858 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
859 | gic_base_hi = | |
860 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
861 | ||
862 | WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo); | |
863 | WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi); | |
864 | WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off, | |
865 | GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id); | |
866 | ||
887f7d38 | 867 | if (dma_id) |
9494a8dd OG |
868 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 + |
869 | (dma_id - 1) * 4; | |
887f7d38 OG |
870 | else |
871 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; | |
872 | ||
887f7d38 OG |
873 | WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr)); |
874 | WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001); | |
9494a8dd OG |
875 | } |
876 | ||
877 | /* | |
878 | * goya_init_dma_qmans - Initialize QMAN DMA registers | |
879 | * | |
880 | * @hdev: pointer to hl_device structure | |
881 | * | |
882 | * Initialize the H/W registers of the QMAN DMA channels | |
883 | * | |
884 | */ | |
b2377e03 | 885 | void goya_init_dma_qmans(struct hl_device *hdev) |
9494a8dd OG |
886 | { |
887 | struct goya_device *goya = hdev->asic_specific; | |
888 | struct hl_hw_queue *q; | |
9494a8dd OG |
889 | int i; |
890 | ||
891 | if (goya->hw_cap_initialized & HW_CAP_DMA) | |
892 | return; | |
893 | ||
894 | q = &hdev->kernel_queues[0]; | |
895 | ||
896 | for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) { | |
1fa185c6 | 897 | q->cq_id = q->msi_vec = i; |
94cb669c | 898 | goya_init_dma_qman(hdev, i, q->bus_address); |
9494a8dd OG |
899 | goya_init_dma_ch(hdev, i); |
900 | } | |
901 | ||
902 | goya->hw_cap_initialized |= HW_CAP_DMA; | |
903 | } | |
904 | ||
905 | /* | |
906 | * goya_disable_external_queues - Disable external queues | |
907 | * | |
908 | * @hdev: pointer to hl_device structure | |
909 | * | |
910 | */ | |
911 | static void goya_disable_external_queues(struct hl_device *hdev) | |
912 | { | |
908087ff OG |
913 | struct goya_device *goya = hdev->asic_specific; |
914 | ||
915 | if (!(goya->hw_cap_initialized & HW_CAP_DMA)) | |
916 | return; | |
917 | ||
9494a8dd OG |
918 | WREG32(mmDMA_QM_0_GLBL_CFG0, 0); |
919 | WREG32(mmDMA_QM_1_GLBL_CFG0, 0); | |
920 | WREG32(mmDMA_QM_2_GLBL_CFG0, 0); | |
921 | WREG32(mmDMA_QM_3_GLBL_CFG0, 0); | |
922 | WREG32(mmDMA_QM_4_GLBL_CFG0, 0); | |
923 | } | |
924 | ||
925 | static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg, | |
926 | u32 cp_sts_reg, u32 glbl_sts0_reg) | |
927 | { | |
928 | int rc; | |
929 | u32 status; | |
930 | ||
931 | /* use the values of TPC0 as they are all the same*/ | |
932 | ||
933 | WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); | |
934 | ||
935 | status = RREG32(cp_sts_reg); | |
936 | if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) { | |
937 | rc = hl_poll_timeout( | |
938 | hdev, | |
939 | cp_sts_reg, | |
940 | status, | |
941 | !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK), | |
942 | 1000, | |
943 | QMAN_FENCE_TIMEOUT_USEC); | |
944 | ||
945 | /* if QMAN is stuck in fence no need to check for stop */ | |
946 | if (rc) | |
947 | return 0; | |
948 | } | |
949 | ||
950 | rc = hl_poll_timeout( | |
951 | hdev, | |
952 | glbl_sts0_reg, | |
953 | status, | |
954 | (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK), | |
955 | 1000, | |
956 | QMAN_STOP_TIMEOUT_USEC); | |
957 | ||
958 | if (rc) { | |
959 | dev_err(hdev->dev, | |
960 | "Timeout while waiting for QMAN to stop\n"); | |
961 | return -EINVAL; | |
962 | } | |
963 | ||
964 | return 0; | |
965 | } | |
966 | ||
967 | /* | |
968 | * goya_stop_external_queues - Stop external queues | |
969 | * | |
970 | * @hdev: pointer to hl_device structure | |
971 | * | |
972 | * Returns 0 on success | |
973 | * | |
974 | */ | |
975 | static int goya_stop_external_queues(struct hl_device *hdev) | |
976 | { | |
977 | int rc, retval = 0; | |
978 | ||
908087ff OG |
979 | struct goya_device *goya = hdev->asic_specific; |
980 | ||
981 | if (!(goya->hw_cap_initialized & HW_CAP_DMA)) | |
982 | return retval; | |
983 | ||
9494a8dd OG |
984 | rc = goya_stop_queue(hdev, |
985 | mmDMA_QM_0_GLBL_CFG1, | |
986 | mmDMA_QM_0_CP_STS, | |
987 | mmDMA_QM_0_GLBL_STS0); | |
988 | ||
989 | if (rc) { | |
990 | dev_err(hdev->dev, "failed to stop DMA QMAN 0\n"); | |
991 | retval = -EIO; | |
992 | } | |
993 | ||
994 | rc = goya_stop_queue(hdev, | |
995 | mmDMA_QM_1_GLBL_CFG1, | |
996 | mmDMA_QM_1_CP_STS, | |
997 | mmDMA_QM_1_GLBL_STS0); | |
998 | ||
999 | if (rc) { | |
1000 | dev_err(hdev->dev, "failed to stop DMA QMAN 1\n"); | |
1001 | retval = -EIO; | |
1002 | } | |
1003 | ||
1004 | rc = goya_stop_queue(hdev, | |
1005 | mmDMA_QM_2_GLBL_CFG1, | |
1006 | mmDMA_QM_2_CP_STS, | |
1007 | mmDMA_QM_2_GLBL_STS0); | |
1008 | ||
1009 | if (rc) { | |
1010 | dev_err(hdev->dev, "failed to stop DMA QMAN 2\n"); | |
1011 | retval = -EIO; | |
1012 | } | |
1013 | ||
1014 | rc = goya_stop_queue(hdev, | |
1015 | mmDMA_QM_3_GLBL_CFG1, | |
1016 | mmDMA_QM_3_CP_STS, | |
1017 | mmDMA_QM_3_GLBL_STS0); | |
1018 | ||
1019 | if (rc) { | |
1020 | dev_err(hdev->dev, "failed to stop DMA QMAN 3\n"); | |
1021 | retval = -EIO; | |
1022 | } | |
1023 | ||
1024 | rc = goya_stop_queue(hdev, | |
1025 | mmDMA_QM_4_GLBL_CFG1, | |
1026 | mmDMA_QM_4_CP_STS, | |
1027 | mmDMA_QM_4_GLBL_STS0); | |
1028 | ||
1029 | if (rc) { | |
1030 | dev_err(hdev->dev, "failed to stop DMA QMAN 4\n"); | |
1031 | retval = -EIO; | |
1032 | } | |
1033 | ||
1034 | return retval; | |
1035 | } | |
1036 | ||
9494a8dd OG |
1037 | /* |
1038 | * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU | |
1039 | * | |
1040 | * @hdev: pointer to hl_device structure | |
1041 | * | |
1042 | * Returns 0 on success | |
1043 | * | |
1044 | */ | |
b2377e03 | 1045 | int goya_init_cpu_queues(struct hl_device *hdev) |
9494a8dd OG |
1046 | { |
1047 | struct goya_device *goya = hdev->asic_specific; | |
1251f23a | 1048 | struct hl_eq *eq; |
9494a8dd OG |
1049 | u32 status; |
1050 | struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; | |
1051 | int err; | |
1052 | ||
1053 | if (!hdev->cpu_queues_enable) | |
1054 | return 0; | |
1055 | ||
1056 | if (goya->hw_cap_initialized & HW_CAP_CPU_Q) | |
1057 | return 0; | |
1058 | ||
1251f23a OG |
1059 | eq = &hdev->event_queue; |
1060 | ||
4095a176 TT |
1061 | WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); |
1062 | WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); | |
9494a8dd | 1063 | |
4095a176 TT |
1064 | WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); |
1065 | WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); | |
1251f23a | 1066 | |
4095a176 | 1067 | WREG32(mmCPU_CQ_BASE_ADDR_LOW, |
f09415f5 | 1068 | lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); |
4095a176 | 1069 | WREG32(mmCPU_CQ_BASE_ADDR_HIGH, |
f09415f5 | 1070 | upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); |
9494a8dd | 1071 | |
4095a176 TT |
1072 | WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); |
1073 | WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); | |
1074 | WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); | |
9494a8dd OG |
1075 | |
1076 | /* Used for EQ CI */ | |
4095a176 | 1077 | WREG32(mmCPU_EQ_CI, 0); |
9494a8dd OG |
1078 | |
1079 | WREG32(mmCPU_IF_PF_PQ_PI, 0); | |
1080 | ||
4095a176 | 1081 | WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP); |
9494a8dd OG |
1082 | |
1083 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
1084 | GOYA_ASYNC_EVENT_ID_PI_UPDATE); | |
1085 | ||
1086 | err = hl_poll_timeout( | |
1087 | hdev, | |
4095a176 | 1088 | mmCPU_PQ_INIT_STATUS, |
9494a8dd OG |
1089 | status, |
1090 | (status == PQ_INIT_STATUS_READY_FOR_HOST), | |
1091 | 1000, | |
1092 | GOYA_CPU_TIMEOUT_USEC); | |
1093 | ||
1094 | if (err) { | |
1095 | dev_err(hdev->dev, | |
0b28d26b | 1096 | "Failed to setup communication with device CPU\n"); |
9494a8dd OG |
1097 | return -EIO; |
1098 | } | |
1099 | ||
1100 | goya->hw_cap_initialized |= HW_CAP_CPU_Q; | |
1101 | return 0; | |
1102 | } | |
1103 | ||
839c4803 OG |
1104 | static void goya_set_pll_refclk(struct hl_device *hdev) |
1105 | { | |
1106 | WREG32(mmCPU_PLL_DIV_SEL_0, 0x0); | |
1107 | WREG32(mmCPU_PLL_DIV_SEL_1, 0x0); | |
1108 | WREG32(mmCPU_PLL_DIV_SEL_2, 0x0); | |
1109 | WREG32(mmCPU_PLL_DIV_SEL_3, 0x0); | |
1110 | ||
1111 | WREG32(mmIC_PLL_DIV_SEL_0, 0x0); | |
1112 | WREG32(mmIC_PLL_DIV_SEL_1, 0x0); | |
1113 | WREG32(mmIC_PLL_DIV_SEL_2, 0x0); | |
1114 | WREG32(mmIC_PLL_DIV_SEL_3, 0x0); | |
1115 | ||
1116 | WREG32(mmMC_PLL_DIV_SEL_0, 0x0); | |
1117 | WREG32(mmMC_PLL_DIV_SEL_1, 0x0); | |
1118 | WREG32(mmMC_PLL_DIV_SEL_2, 0x0); | |
1119 | WREG32(mmMC_PLL_DIV_SEL_3, 0x0); | |
1120 | ||
1121 | WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0); | |
1122 | WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0); | |
1123 | WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0); | |
1124 | WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0); | |
1125 | ||
1126 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0); | |
1127 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0); | |
1128 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0); | |
1129 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0); | |
1130 | ||
1131 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0); | |
1132 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0); | |
1133 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0); | |
1134 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0); | |
1135 | ||
1136 | WREG32(mmTPC_PLL_DIV_SEL_0, 0x0); | |
1137 | WREG32(mmTPC_PLL_DIV_SEL_1, 0x0); | |
1138 | WREG32(mmTPC_PLL_DIV_SEL_2, 0x0); | |
1139 | WREG32(mmTPC_PLL_DIV_SEL_3, 0x0); | |
1140 | } | |
1141 | ||
1142 | static void goya_disable_clk_rlx(struct hl_device *hdev) | |
1143 | { | |
1144 | WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010); | |
1145 | WREG32(mmIC_PLL_CLK_RLX_0, 0x100010); | |
1146 | } | |
1147 | ||
1148 | static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id) | |
1149 | { | |
1150 | u64 tpc_eml_address; | |
1151 | u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset; | |
1152 | int err, slm_index; | |
1153 | ||
1154 | tpc_offset = tpc_id * 0x40000; | |
1155 | tpc_eml_offset = tpc_id * 0x200000; | |
1156 | tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE); | |
1157 | tpc_slm_offset = tpc_eml_address + 0x100000; | |
1158 | ||
1159 | /* | |
1160 | * Workaround for Bug H2 #2443 : | |
1161 | * "TPC SB is not initialized on chip reset" | |
1162 | */ | |
1163 | ||
1164 | val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset); | |
1165 | if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK) | |
1166 | dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n", | |
1167 | tpc_id); | |
1168 | ||
1169 | WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000); | |
1170 | ||
1171 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF); | |
1172 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F); | |
1173 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF); | |
1174 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF); | |
1175 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF); | |
1176 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF); | |
1177 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF); | |
1178 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF); | |
1179 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF); | |
1180 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF); | |
1181 | ||
1182 | WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset, | |
1183 | 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT); | |
1184 | ||
1185 | err = hl_poll_timeout( | |
1186 | hdev, | |
1187 | mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset, | |
1188 | val, | |
1189 | (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK), | |
1190 | 1000, | |
1191 | HL_DEVICE_TIMEOUT_USEC); | |
1192 | ||
1193 | if (err) | |
1194 | dev_err(hdev->dev, | |
1195 | "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id); | |
1196 | ||
1197 | WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset, | |
1198 | 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT); | |
1199 | ||
1200 | msleep(GOYA_RESET_WAIT_MSEC); | |
1201 | ||
1202 | WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset, | |
1203 | ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT)); | |
1204 | ||
1205 | msleep(GOYA_RESET_WAIT_MSEC); | |
1206 | ||
1207 | for (slm_index = 0 ; slm_index < 256 ; slm_index++) | |
1208 | WREG32(tpc_slm_offset + (slm_index << 2), 0); | |
1209 | ||
1210 | val = RREG32(tpc_slm_offset); | |
1211 | } | |
1212 | ||
1213 | static void goya_tpc_mbist_workaround(struct hl_device *hdev) | |
1214 | { | |
1215 | struct goya_device *goya = hdev->asic_specific; | |
1216 | int i; | |
1217 | ||
1218 | if (hdev->pldm) | |
1219 | return; | |
1220 | ||
1221 | if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST) | |
1222 | return; | |
1223 | ||
1224 | /* Workaround for H2 #2443 */ | |
1225 | ||
1226 | for (i = 0 ; i < TPC_MAX_NUM ; i++) | |
1227 | _goya_tpc_mbist_workaround(hdev, i); | |
1228 | ||
1229 | goya->hw_cap_initialized |= HW_CAP_TPC_MBIST; | |
1230 | } | |
1231 | ||
1232 | /* | |
1233 | * goya_init_golden_registers - Initialize golden registers | |
1234 | * | |
1235 | * @hdev: pointer to hl_device structure | |
1236 | * | |
1237 | * Initialize the H/W registers of the device | |
1238 | * | |
1239 | */ | |
1240 | static void goya_init_golden_registers(struct hl_device *hdev) | |
1241 | { | |
1242 | struct goya_device *goya = hdev->asic_specific; | |
1243 | u32 polynom[10], tpc_intr_mask, offset; | |
1244 | int i; | |
1245 | ||
1246 | if (goya->hw_cap_initialized & HW_CAP_GOLDEN) | |
1247 | return; | |
1248 | ||
1249 | polynom[0] = 0x00020080; | |
1250 | polynom[1] = 0x00401000; | |
1251 | polynom[2] = 0x00200800; | |
1252 | polynom[3] = 0x00002000; | |
1253 | polynom[4] = 0x00080200; | |
1254 | polynom[5] = 0x00040100; | |
1255 | polynom[6] = 0x00100400; | |
1256 | polynom[7] = 0x00004000; | |
1257 | polynom[8] = 0x00010000; | |
1258 | polynom[9] = 0x00008000; | |
1259 | ||
1260 | /* Mask all arithmetic interrupts from TPC */ | |
1261 | tpc_intr_mask = 0x7FFF; | |
1262 | ||
1263 | for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) { | |
1264 | WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1265 | WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1266 | WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1267 | WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1268 | WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1269 | ||
1270 | WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1271 | WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1272 | WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1273 | WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1274 | WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1275 | ||
1276 | ||
1277 | WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206); | |
1278 | WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206); | |
1279 | WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206); | |
1280 | WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207); | |
1281 | WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207); | |
1282 | ||
1283 | WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207); | |
1284 | WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207); | |
1285 | WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206); | |
1286 | WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206); | |
1287 | WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206); | |
1288 | ||
1289 | WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101); | |
1290 | WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102); | |
1291 | WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103); | |
1292 | WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104); | |
1293 | WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105); | |
1294 | ||
1295 | WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105); | |
1296 | WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104); | |
1297 | WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103); | |
1298 | WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102); | |
1299 | WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101); | |
1300 | } | |
1301 | ||
1302 | WREG32(mmMME_STORE_MAX_CREDIT, 0x21); | |
1303 | WREG32(mmMME_AGU, 0x0f0f0f10); | |
1304 | WREG32(mmMME_SEI_MASK, ~0x0); | |
1305 | ||
1306 | WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1307 | WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101); | |
1308 | WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101); | |
1309 | WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101); | |
1310 | WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1311 | WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701); | |
1312 | WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401); | |
1313 | WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401); | |
1314 | WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301); | |
1315 | WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101); | |
1316 | WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101); | |
1317 | WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105); | |
1318 | WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501); | |
1319 | WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501); | |
1320 | WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301); | |
1321 | WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401); | |
1322 | WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101); | |
1323 | WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101); | |
1324 | WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202); | |
1325 | WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101); | |
1326 | WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201); | |
1327 | WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701); | |
1328 | WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101); | |
1329 | WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101); | |
1330 | WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101); | |
1331 | WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101); | |
1332 | WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701); | |
1333 | WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201); | |
1334 | WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101); | |
1335 | WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102); | |
1336 | WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701); | |
1337 | WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701); | |
1338 | WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707); | |
1339 | WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201); | |
1340 | WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201); | |
1341 | WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201); | |
1342 | WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102); | |
1343 | WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102); | |
1344 | WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102); | |
1345 | WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102); | |
1346 | WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102); | |
1347 | WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107); | |
1348 | WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106); | |
1349 | WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102); | |
1350 | WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102); | |
1351 | WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102); | |
1352 | WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102); | |
1353 | WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102); | |
1354 | WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702); | |
1355 | WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702); | |
1356 | WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602); | |
1357 | WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402); | |
1358 | WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202); | |
1359 | WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102); | |
1360 | WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1361 | WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1362 | WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1363 | WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1364 | WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1365 | WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1366 | WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101); | |
1367 | WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101); | |
1368 | WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101); | |
1369 | WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101); | |
1370 | WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101); | |
1371 | WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107); | |
1372 | WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107); | |
1373 | WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101); | |
1374 | WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101); | |
1375 | WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101); | |
1376 | WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101); | |
1377 | WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101); | |
1378 | WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501); | |
1379 | WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501); | |
1380 | WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301); | |
1381 | WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401); | |
1382 | WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101); | |
1383 | WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101); | |
1384 | WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1385 | WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1386 | WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1387 | WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1388 | WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1389 | WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1390 | ||
1391 | WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1392 | WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101); | |
1393 | WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101); | |
1394 | WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102); | |
1395 | WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101); | |
1396 | WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202); | |
1397 | WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201); | |
1398 | WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201); | |
1399 | WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202); | |
1400 | WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101); | |
1401 | WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101); | |
1402 | WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101); | |
1403 | ||
1404 | WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101); | |
1405 | WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101); | |
1406 | WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201); | |
1407 | WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102); | |
1408 | WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101); | |
1409 | WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202); | |
1410 | WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201); | |
1411 | WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201); | |
1412 | WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202); | |
1413 | WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101); | |
1414 | WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101); | |
1415 | WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101); | |
1416 | ||
1417 | WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101); | |
1418 | WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101); | |
1419 | WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301); | |
1420 | WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102); | |
1421 | WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101); | |
1422 | WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301); | |
1423 | WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201); | |
1424 | WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201); | |
1425 | WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402); | |
1426 | WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101); | |
1427 | WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101); | |
1428 | WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401); | |
1429 | ||
1430 | WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101); | |
1431 | WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101); | |
1432 | WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401); | |
1433 | WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102); | |
1434 | WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101); | |
1435 | WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702); | |
1436 | WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201); | |
1437 | WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201); | |
1438 | WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602); | |
1439 | WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101); | |
1440 | WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101); | |
1441 | WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301); | |
1442 | ||
1443 | WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101); | |
1444 | WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101); | |
1445 | WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501); | |
1446 | WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102); | |
1447 | WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101); | |
1448 | WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602); | |
1449 | WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201); | |
1450 | WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201); | |
1451 | WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702); | |
1452 | WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101); | |
1453 | WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101); | |
1454 | WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501); | |
1455 | ||
1456 | WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1457 | WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101); | |
1458 | WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601); | |
1459 | WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101); | |
1460 | WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101); | |
1461 | WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702); | |
1462 | WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101); | |
1463 | WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101); | |
1464 | WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702); | |
1465 | WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101); | |
1466 | WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101); | |
1467 | WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501); | |
1468 | ||
1469 | for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) { | |
1470 | WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1471 | WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1472 | WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1473 | WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1474 | WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1475 | WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1476 | ||
1477 | WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1478 | WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1479 | WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1480 | WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1481 | WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1482 | WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1483 | WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1484 | WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1485 | ||
1486 | WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1487 | WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1488 | } | |
1489 | ||
1490 | for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) { | |
1491 | WREG32(mmMME1_RTR_SCRAMB_EN + offset, | |
1492 | 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT); | |
1493 | WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset, | |
1494 | 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT); | |
1495 | } | |
1496 | ||
1497 | for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) { | |
1498 | /* | |
1499 | * Workaround for Bug H2 #2441 : | |
1500 | * "ST.NOP set trace event illegal opcode" | |
1501 | */ | |
1502 | WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask); | |
1503 | ||
1504 | WREG32(mmTPC0_NRTR_SCRAMB_EN + offset, | |
1505 | 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT); | |
1506 | WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset, | |
1507 | 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT); | |
8fdacf2a OG |
1508 | |
1509 | WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset, | |
1510 | ICACHE_FETCH_LINE_NUM, 2); | |
839c4803 OG |
1511 | } |
1512 | ||
1513 | WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT); | |
1514 | WREG32(mmDMA_NRTR_NON_LIN_SCRAMB, | |
1515 | 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT); | |
1516 | ||
1517 | WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT); | |
1518 | WREG32(mmPCI_NRTR_NON_LIN_SCRAMB, | |
1519 | 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT); | |
1520 | ||
1521 | /* | |
1522 | * Workaround for H2 #HW-23 bug | |
bfb57a91 OG |
1523 | * Set DMA max outstanding read requests to 240 on DMA CH 1. |
1524 | * This limitation is still large enough to not affect Gen4 bandwidth. | |
1525 | * We need to only limit that DMA channel because the user can only read | |
839c4803 OG |
1526 | * from Host using DMA CH 1 |
1527 | */ | |
839c4803 | 1528 | WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0); |
8ba2876d | 1529 | |
a691a1eb | 1530 | WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); |
839c4803 OG |
1531 | |
1532 | goya->hw_cap_initialized |= HW_CAP_GOLDEN; | |
1533 | } | |
1534 | ||
9494a8dd | 1535 | static void goya_init_mme_qman(struct hl_device *hdev) |
839c4803 | 1536 | { |
9494a8dd OG |
1537 | u32 mtr_base_lo, mtr_base_hi; |
1538 | u32 so_base_lo, so_base_hi; | |
1539 | u32 gic_base_lo, gic_base_hi; | |
1540 | u64 qman_base_addr; | |
839c4803 | 1541 | |
9494a8dd OG |
1542 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); |
1543 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1544 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1545 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
839c4803 | 1546 | |
9494a8dd OG |
1547 | gic_base_lo = |
1548 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1549 | gic_base_hi = | |
1550 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
839c4803 | 1551 | |
9494a8dd OG |
1552 | qman_base_addr = hdev->asic_prop.sram_base_address + |
1553 | MME_QMAN_BASE_OFFSET; | |
839c4803 | 1554 | |
9494a8dd OG |
1555 | WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr)); |
1556 | WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr)); | |
1557 | WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH)); | |
1558 | WREG32(mmMME_QM_PQ_PI, 0); | |
1559 | WREG32(mmMME_QM_PQ_CI, 0); | |
1560 | WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0); | |
1561 | WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4); | |
1562 | WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8); | |
1563 | WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC); | |
839c4803 | 1564 | |
9494a8dd OG |
1565 | WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); |
1566 | WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); | |
1567 | WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo); | |
1568 | WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi); | |
839c4803 | 1569 | |
9494a8dd OG |
1570 | /* QMAN CQ has 8 cache lines */ |
1571 | WREG32(mmMME_QM_CQ_CFG1, 0x00080008); | |
839c4803 | 1572 | |
9494a8dd OG |
1573 | WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo); |
1574 | WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi); | |
839c4803 | 1575 | |
9494a8dd | 1576 | WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM); |
839c4803 | 1577 | |
9494a8dd | 1578 | WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN); |
839c4803 | 1579 | |
9494a8dd OG |
1580 | WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT); |
1581 | ||
1582 | WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE); | |
839c4803 OG |
1583 | } |
1584 | ||
9494a8dd | 1585 | static void goya_init_mme_cmdq(struct hl_device *hdev) |
839c4803 | 1586 | { |
9494a8dd OG |
1587 | u32 mtr_base_lo, mtr_base_hi; |
1588 | u32 so_base_lo, so_base_hi; | |
1589 | u32 gic_base_lo, gic_base_hi; | |
839c4803 | 1590 | |
9494a8dd OG |
1591 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); |
1592 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1593 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1594 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
839c4803 | 1595 | |
9494a8dd OG |
1596 | gic_base_lo = |
1597 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1598 | gic_base_hi = | |
1599 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
839c4803 | 1600 | |
9494a8dd OG |
1601 | WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); |
1602 | WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); | |
1603 | WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); | |
1604 | WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi); | |
839c4803 | 1605 | |
9494a8dd OG |
1606 | /* CMDQ CQ has 20 cache lines */ |
1607 | WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014); | |
839c4803 | 1608 | |
9494a8dd OG |
1609 | WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo); |
1610 | WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi); | |
839c4803 | 1611 | |
9494a8dd | 1612 | WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ); |
839c4803 | 1613 | |
9494a8dd | 1614 | WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN); |
839c4803 | 1615 | |
9494a8dd OG |
1616 | WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT); |
1617 | ||
1618 | WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE); | |
839c4803 OG |
1619 | } |
1620 | ||
b2377e03 | 1621 | void goya_init_mme_qmans(struct hl_device *hdev) |
839c4803 | 1622 | { |
9494a8dd OG |
1623 | struct goya_device *goya = hdev->asic_specific; |
1624 | u32 so_base_lo, so_base_hi; | |
839c4803 | 1625 | |
9494a8dd | 1626 | if (goya->hw_cap_initialized & HW_CAP_MME) |
839c4803 | 1627 | return; |
839c4803 | 1628 | |
9494a8dd OG |
1629 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); |
1630 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
839c4803 | 1631 | |
9494a8dd OG |
1632 | WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo); |
1633 | WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi); | |
1634 | ||
1635 | goya_init_mme_qman(hdev); | |
1636 | goya_init_mme_cmdq(hdev); | |
1637 | ||
1638 | goya->hw_cap_initialized |= HW_CAP_MME; | |
1639 | } | |
1640 | ||
1641 | static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id) | |
1642 | { | |
1643 | u32 mtr_base_lo, mtr_base_hi; | |
1644 | u32 so_base_lo, so_base_hi; | |
1645 | u32 gic_base_lo, gic_base_hi; | |
1646 | u64 qman_base_addr; | |
1647 | u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI); | |
1648 | ||
1649 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1650 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1651 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1652 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1653 | ||
1654 | gic_base_lo = | |
1655 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1656 | gic_base_hi = | |
1657 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1658 | ||
1659 | qman_base_addr = hdev->asic_prop.sram_base_address + base_off; | |
1660 | ||
1661 | WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr)); | |
1662 | WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr)); | |
1663 | WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH)); | |
1664 | WREG32(mmTPC0_QM_PQ_PI + reg_off, 0); | |
1665 | WREG32(mmTPC0_QM_PQ_CI + reg_off, 0); | |
1666 | WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0); | |
1667 | WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4); | |
1668 | WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8); | |
1669 | WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC); | |
1670 | ||
1671 | WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); | |
1672 | WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); | |
1673 | WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); | |
1674 | WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); | |
1675 | ||
1676 | WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008); | |
1677 | ||
1678 | WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); | |
1679 | WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); | |
1680 | ||
1681 | WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off, | |
1682 | GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id); | |
1683 | ||
1684 | WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN); | |
1685 | ||
1686 | WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT); | |
1687 | ||
1688 | WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE); | |
1689 | } | |
1690 | ||
1691 | static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id) | |
1692 | { | |
1693 | u32 mtr_base_lo, mtr_base_hi; | |
1694 | u32 so_base_lo, so_base_hi; | |
1695 | u32 gic_base_lo, gic_base_hi; | |
1696 | u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1); | |
1697 | ||
1698 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1699 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1700 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1701 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1702 | ||
1703 | gic_base_lo = | |
1704 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1705 | gic_base_hi = | |
1706 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1707 | ||
1708 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); | |
1709 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); | |
1710 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); | |
1711 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); | |
1712 | ||
1713 | WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014); | |
1714 | ||
1715 | WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); | |
1716 | WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); | |
1717 | ||
1718 | WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off, | |
1719 | GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id); | |
1720 | ||
1721 | WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN); | |
1722 | ||
1723 | WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT); | |
1724 | ||
1725 | WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE); | |
1726 | } | |
1727 | ||
b2377e03 | 1728 | void goya_init_tpc_qmans(struct hl_device *hdev) |
9494a8dd OG |
1729 | { |
1730 | struct goya_device *goya = hdev->asic_specific; | |
1731 | u32 so_base_lo, so_base_hi; | |
1732 | u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW - | |
1733 | mmTPC0_CFG_SM_BASE_ADDRESS_LOW; | |
1734 | int i; | |
1735 | ||
1736 | if (goya->hw_cap_initialized & HW_CAP_TPC) | |
1737 | return; | |
1738 | ||
1739 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1740 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1741 | ||
1742 | for (i = 0 ; i < TPC_MAX_NUM ; i++) { | |
1743 | WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off, | |
1744 | so_base_lo); | |
1745 | WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off, | |
1746 | so_base_hi); | |
1747 | } | |
1748 | ||
1749 | goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0); | |
1750 | goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1); | |
1751 | goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2); | |
1752 | goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3); | |
1753 | goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4); | |
1754 | goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5); | |
1755 | goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6); | |
1756 | goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7); | |
1757 | ||
1758 | for (i = 0 ; i < TPC_MAX_NUM ; i++) | |
1759 | goya_init_tpc_cmdq(hdev, i); | |
1760 | ||
1761 | goya->hw_cap_initialized |= HW_CAP_TPC; | |
1762 | } | |
1763 | ||
1764 | /* | |
1765 | * goya_disable_internal_queues - Disable internal queues | |
1766 | * | |
1767 | * @hdev: pointer to hl_device structure | |
1768 | * | |
1769 | */ | |
1770 | static void goya_disable_internal_queues(struct hl_device *hdev) | |
1771 | { | |
908087ff OG |
1772 | struct goya_device *goya = hdev->asic_specific; |
1773 | ||
1774 | if (!(goya->hw_cap_initialized & HW_CAP_MME)) | |
1775 | goto disable_tpc; | |
1776 | ||
9494a8dd OG |
1777 | WREG32(mmMME_QM_GLBL_CFG0, 0); |
1778 | WREG32(mmMME_CMDQ_GLBL_CFG0, 0); | |
1779 | ||
908087ff OG |
1780 | disable_tpc: |
1781 | if (!(goya->hw_cap_initialized & HW_CAP_TPC)) | |
1782 | return; | |
1783 | ||
9494a8dd OG |
1784 | WREG32(mmTPC0_QM_GLBL_CFG0, 0); |
1785 | WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0); | |
1786 | ||
1787 | WREG32(mmTPC1_QM_GLBL_CFG0, 0); | |
1788 | WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0); | |
1789 | ||
1790 | WREG32(mmTPC2_QM_GLBL_CFG0, 0); | |
1791 | WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0); | |
1792 | ||
1793 | WREG32(mmTPC3_QM_GLBL_CFG0, 0); | |
1794 | WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0); | |
1795 | ||
1796 | WREG32(mmTPC4_QM_GLBL_CFG0, 0); | |
1797 | WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0); | |
1798 | ||
1799 | WREG32(mmTPC5_QM_GLBL_CFG0, 0); | |
1800 | WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0); | |
1801 | ||
1802 | WREG32(mmTPC6_QM_GLBL_CFG0, 0); | |
1803 | WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0); | |
1804 | ||
1805 | WREG32(mmTPC7_QM_GLBL_CFG0, 0); | |
1806 | WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0); | |
1807 | } | |
1808 | ||
1809 | /* | |
1810 | * goya_stop_internal_queues - Stop internal queues | |
1811 | * | |
1812 | * @hdev: pointer to hl_device structure | |
1813 | * | |
1814 | * Returns 0 on success | |
1815 | * | |
1816 | */ | |
1817 | static int goya_stop_internal_queues(struct hl_device *hdev) | |
1818 | { | |
908087ff | 1819 | struct goya_device *goya = hdev->asic_specific; |
9494a8dd OG |
1820 | int rc, retval = 0; |
1821 | ||
908087ff OG |
1822 | if (!(goya->hw_cap_initialized & HW_CAP_MME)) |
1823 | goto stop_tpc; | |
1824 | ||
9494a8dd OG |
1825 | /* |
1826 | * Each queue (QMAN) is a separate H/W logic. That means that each | |
1827 | * QMAN can be stopped independently and failure to stop one does NOT | |
1828 | * mandate we should not try to stop other QMANs | |
1829 | */ | |
1830 | ||
1831 | rc = goya_stop_queue(hdev, | |
1832 | mmMME_QM_GLBL_CFG1, | |
1833 | mmMME_QM_CP_STS, | |
1834 | mmMME_QM_GLBL_STS0); | |
1835 | ||
1836 | if (rc) { | |
1837 | dev_err(hdev->dev, "failed to stop MME QMAN\n"); | |
1838 | retval = -EIO; | |
1839 | } | |
1840 | ||
1841 | rc = goya_stop_queue(hdev, | |
1842 | mmMME_CMDQ_GLBL_CFG1, | |
1843 | mmMME_CMDQ_CP_STS, | |
1844 | mmMME_CMDQ_GLBL_STS0); | |
1845 | ||
1846 | if (rc) { | |
1847 | dev_err(hdev->dev, "failed to stop MME CMDQ\n"); | |
1848 | retval = -EIO; | |
1849 | } | |
1850 | ||
908087ff OG |
1851 | stop_tpc: |
1852 | if (!(goya->hw_cap_initialized & HW_CAP_TPC)) | |
1853 | return retval; | |
1854 | ||
9494a8dd OG |
1855 | rc = goya_stop_queue(hdev, |
1856 | mmTPC0_QM_GLBL_CFG1, | |
1857 | mmTPC0_QM_CP_STS, | |
1858 | mmTPC0_QM_GLBL_STS0); | |
1859 | ||
1860 | if (rc) { | |
1861 | dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n"); | |
1862 | retval = -EIO; | |
1863 | } | |
1864 | ||
1865 | rc = goya_stop_queue(hdev, | |
1866 | mmTPC0_CMDQ_GLBL_CFG1, | |
1867 | mmTPC0_CMDQ_CP_STS, | |
1868 | mmTPC0_CMDQ_GLBL_STS0); | |
1869 | ||
1870 | if (rc) { | |
1871 | dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n"); | |
1872 | retval = -EIO; | |
1873 | } | |
1874 | ||
1875 | rc = goya_stop_queue(hdev, | |
1876 | mmTPC1_QM_GLBL_CFG1, | |
1877 | mmTPC1_QM_CP_STS, | |
1878 | mmTPC1_QM_GLBL_STS0); | |
1879 | ||
1880 | if (rc) { | |
1881 | dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n"); | |
1882 | retval = -EIO; | |
1883 | } | |
1884 | ||
1885 | rc = goya_stop_queue(hdev, | |
1886 | mmTPC1_CMDQ_GLBL_CFG1, | |
1887 | mmTPC1_CMDQ_CP_STS, | |
1888 | mmTPC1_CMDQ_GLBL_STS0); | |
1889 | ||
1890 | if (rc) { | |
1891 | dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n"); | |
1892 | retval = -EIO; | |
1893 | } | |
1894 | ||
1895 | rc = goya_stop_queue(hdev, | |
1896 | mmTPC2_QM_GLBL_CFG1, | |
1897 | mmTPC2_QM_CP_STS, | |
1898 | mmTPC2_QM_GLBL_STS0); | |
1899 | ||
1900 | if (rc) { | |
1901 | dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n"); | |
1902 | retval = -EIO; | |
1903 | } | |
1904 | ||
1905 | rc = goya_stop_queue(hdev, | |
1906 | mmTPC2_CMDQ_GLBL_CFG1, | |
1907 | mmTPC2_CMDQ_CP_STS, | |
1908 | mmTPC2_CMDQ_GLBL_STS0); | |
1909 | ||
1910 | if (rc) { | |
1911 | dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n"); | |
1912 | retval = -EIO; | |
1913 | } | |
1914 | ||
1915 | rc = goya_stop_queue(hdev, | |
1916 | mmTPC3_QM_GLBL_CFG1, | |
1917 | mmTPC3_QM_CP_STS, | |
1918 | mmTPC3_QM_GLBL_STS0); | |
1919 | ||
1920 | if (rc) { | |
1921 | dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n"); | |
1922 | retval = -EIO; | |
1923 | } | |
1924 | ||
1925 | rc = goya_stop_queue(hdev, | |
1926 | mmTPC3_CMDQ_GLBL_CFG1, | |
1927 | mmTPC3_CMDQ_CP_STS, | |
1928 | mmTPC3_CMDQ_GLBL_STS0); | |
1929 | ||
1930 | if (rc) { | |
1931 | dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n"); | |
1932 | retval = -EIO; | |
1933 | } | |
1934 | ||
1935 | rc = goya_stop_queue(hdev, | |
1936 | mmTPC4_QM_GLBL_CFG1, | |
1937 | mmTPC4_QM_CP_STS, | |
1938 | mmTPC4_QM_GLBL_STS0); | |
1939 | ||
1940 | if (rc) { | |
1941 | dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n"); | |
1942 | retval = -EIO; | |
1943 | } | |
1944 | ||
1945 | rc = goya_stop_queue(hdev, | |
1946 | mmTPC4_CMDQ_GLBL_CFG1, | |
1947 | mmTPC4_CMDQ_CP_STS, | |
1948 | mmTPC4_CMDQ_GLBL_STS0); | |
1949 | ||
1950 | if (rc) { | |
1951 | dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n"); | |
1952 | retval = -EIO; | |
1953 | } | |
1954 | ||
1955 | rc = goya_stop_queue(hdev, | |
1956 | mmTPC5_QM_GLBL_CFG1, | |
1957 | mmTPC5_QM_CP_STS, | |
1958 | mmTPC5_QM_GLBL_STS0); | |
1959 | ||
1960 | if (rc) { | |
1961 | dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n"); | |
1962 | retval = -EIO; | |
1963 | } | |
1964 | ||
1965 | rc = goya_stop_queue(hdev, | |
1966 | mmTPC5_CMDQ_GLBL_CFG1, | |
1967 | mmTPC5_CMDQ_CP_STS, | |
1968 | mmTPC5_CMDQ_GLBL_STS0); | |
1969 | ||
1970 | if (rc) { | |
1971 | dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n"); | |
1972 | retval = -EIO; | |
1973 | } | |
1974 | ||
1975 | rc = goya_stop_queue(hdev, | |
1976 | mmTPC6_QM_GLBL_CFG1, | |
1977 | mmTPC6_QM_CP_STS, | |
1978 | mmTPC6_QM_GLBL_STS0); | |
1979 | ||
1980 | if (rc) { | |
1981 | dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n"); | |
1982 | retval = -EIO; | |
1983 | } | |
1984 | ||
1985 | rc = goya_stop_queue(hdev, | |
1986 | mmTPC6_CMDQ_GLBL_CFG1, | |
1987 | mmTPC6_CMDQ_CP_STS, | |
1988 | mmTPC6_CMDQ_GLBL_STS0); | |
1989 | ||
1990 | if (rc) { | |
1991 | dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n"); | |
1992 | retval = -EIO; | |
1993 | } | |
1994 | ||
1995 | rc = goya_stop_queue(hdev, | |
1996 | mmTPC7_QM_GLBL_CFG1, | |
1997 | mmTPC7_QM_CP_STS, | |
1998 | mmTPC7_QM_GLBL_STS0); | |
1999 | ||
2000 | if (rc) { | |
2001 | dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n"); | |
2002 | retval = -EIO; | |
2003 | } | |
2004 | ||
2005 | rc = goya_stop_queue(hdev, | |
2006 | mmTPC7_CMDQ_GLBL_CFG1, | |
2007 | mmTPC7_CMDQ_CP_STS, | |
2008 | mmTPC7_CMDQ_GLBL_STS0); | |
2009 | ||
2010 | if (rc) { | |
2011 | dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n"); | |
2012 | retval = -EIO; | |
2013 | } | |
2014 | ||
2015 | return retval; | |
2016 | } | |
2017 | ||
1251f23a OG |
2018 | static void goya_dma_stall(struct hl_device *hdev) |
2019 | { | |
908087ff OG |
2020 | struct goya_device *goya = hdev->asic_specific; |
2021 | ||
2022 | if (!(goya->hw_cap_initialized & HW_CAP_DMA)) | |
2023 | return; | |
2024 | ||
1251f23a OG |
2025 | WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT); |
2026 | WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT); | |
2027 | WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT); | |
2028 | WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT); | |
2029 | WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT); | |
2030 | } | |
2031 | ||
2032 | static void goya_tpc_stall(struct hl_device *hdev) | |
2033 | { | |
908087ff OG |
2034 | struct goya_device *goya = hdev->asic_specific; |
2035 | ||
2036 | if (!(goya->hw_cap_initialized & HW_CAP_TPC)) | |
2037 | return; | |
2038 | ||
1251f23a OG |
2039 | WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); |
2040 | WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT); | |
2041 | WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT); | |
2042 | WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT); | |
2043 | WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT); | |
2044 | WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT); | |
2045 | WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT); | |
2046 | WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT); | |
2047 | } | |
2048 | ||
2049 | static void goya_mme_stall(struct hl_device *hdev) | |
2050 | { | |
908087ff OG |
2051 | struct goya_device *goya = hdev->asic_specific; |
2052 | ||
2053 | if (!(goya->hw_cap_initialized & HW_CAP_MME)) | |
2054 | return; | |
2055 | ||
1251f23a OG |
2056 | WREG32(mmMME_STALL, 0xFFFFFFFF); |
2057 | } | |
2058 | ||
2059 | static int goya_enable_msix(struct hl_device *hdev) | |
2060 | { | |
2061 | struct goya_device *goya = hdev->asic_specific; | |
2062 | int cq_cnt = hdev->asic_prop.completion_queues_count; | |
2063 | int rc, i, irq_cnt_init, irq; | |
2064 | ||
2065 | if (goya->hw_cap_initialized & HW_CAP_MSIX) | |
2066 | return 0; | |
2067 | ||
2068 | rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES, | |
2069 | GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX); | |
2070 | if (rc < 0) { | |
2071 | dev_err(hdev->dev, | |
2072 | "MSI-X: Failed to enable support -- %d/%d\n", | |
2073 | GOYA_MSIX_ENTRIES, rc); | |
2074 | return rc; | |
2075 | } | |
2076 | ||
2077 | for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { | |
2078 | irq = pci_irq_vector(hdev->pdev, i); | |
2079 | rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i], | |
2080 | &hdev->completion_queue[i]); | |
2081 | if (rc) { | |
2082 | dev_err(hdev->dev, "Failed to request IRQ %d", irq); | |
2083 | goto free_irqs; | |
2084 | } | |
2085 | } | |
2086 | ||
c535bfdd | 2087 | irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); |
1251f23a OG |
2088 | |
2089 | rc = request_irq(irq, hl_irq_handler_eq, 0, | |
c535bfdd | 2090 | goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX], |
1251f23a OG |
2091 | &hdev->event_queue); |
2092 | if (rc) { | |
2093 | dev_err(hdev->dev, "Failed to request IRQ %d", irq); | |
2094 | goto free_irqs; | |
2095 | } | |
2096 | ||
2097 | goya->hw_cap_initialized |= HW_CAP_MSIX; | |
2098 | return 0; | |
2099 | ||
2100 | free_irqs: | |
2101 | for (i = 0 ; i < irq_cnt_init ; i++) | |
2102 | free_irq(pci_irq_vector(hdev->pdev, i), | |
2103 | &hdev->completion_queue[i]); | |
2104 | ||
2105 | pci_free_irq_vectors(hdev->pdev); | |
2106 | return rc; | |
2107 | } | |
2108 | ||
2109 | static void goya_sync_irqs(struct hl_device *hdev) | |
2110 | { | |
2111 | struct goya_device *goya = hdev->asic_specific; | |
2112 | int i; | |
2113 | ||
2114 | if (!(goya->hw_cap_initialized & HW_CAP_MSIX)) | |
2115 | return; | |
2116 | ||
2117 | /* Wait for all pending IRQs to be finished */ | |
2118 | for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) | |
2119 | synchronize_irq(pci_irq_vector(hdev->pdev, i)); | |
2120 | ||
c535bfdd | 2121 | synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX)); |
1251f23a OG |
2122 | } |
2123 | ||
2124 | static void goya_disable_msix(struct hl_device *hdev) | |
2125 | { | |
2126 | struct goya_device *goya = hdev->asic_specific; | |
2127 | int i, irq; | |
2128 | ||
2129 | if (!(goya->hw_cap_initialized & HW_CAP_MSIX)) | |
2130 | return; | |
2131 | ||
2132 | goya_sync_irqs(hdev); | |
2133 | ||
c535bfdd | 2134 | irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); |
1251f23a OG |
2135 | free_irq(irq, &hdev->event_queue); |
2136 | ||
2137 | for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { | |
2138 | irq = pci_irq_vector(hdev->pdev, i); | |
2139 | free_irq(irq, &hdev->completion_queue[i]); | |
2140 | } | |
2141 | ||
2142 | pci_free_irq_vectors(hdev->pdev); | |
2143 | ||
2144 | goya->hw_cap_initialized &= ~HW_CAP_MSIX; | |
2145 | } | |
2146 | ||
413cf576 TT |
2147 | static void goya_enable_timestamp(struct hl_device *hdev) |
2148 | { | |
2149 | /* Disable the timestamp counter */ | |
2150 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); | |
2151 | ||
2152 | /* Zero the lower/upper parts of the 64-bit counter */ | |
2153 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); | |
2154 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); | |
2155 | ||
2156 | /* Enable the counter */ | |
2157 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); | |
2158 | } | |
2159 | ||
2160 | static void goya_disable_timestamp(struct hl_device *hdev) | |
2161 | { | |
2162 | /* Disable the timestamp counter */ | |
2163 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); | |
2164 | } | |
2165 | ||
1251f23a OG |
2166 | static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) |
2167 | { | |
2168 | u32 wait_timeout_ms, cpu_timeout_ms; | |
2169 | ||
2170 | dev_info(hdev->dev, | |
2171 | "Halting compute engines and disabling interrupts\n"); | |
2172 | ||
2173 | if (hdev->pldm) { | |
2174 | wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; | |
2175 | cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; | |
2176 | } else { | |
2177 | wait_timeout_ms = GOYA_RESET_WAIT_MSEC; | |
2178 | cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC; | |
2179 | } | |
2180 | ||
2181 | if (hard_reset) { | |
2182 | /* | |
2183 | * I don't know what is the state of the CPU so make sure it is | |
2184 | * stopped in any means necessary | |
2185 | */ | |
2186 | WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE); | |
2187 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
2188 | GOYA_ASYNC_EVENT_ID_HALT_MACHINE); | |
2189 | msleep(cpu_timeout_ms); | |
2190 | } | |
2191 | ||
2192 | goya_stop_external_queues(hdev); | |
2193 | goya_stop_internal_queues(hdev); | |
2194 | ||
2195 | msleep(wait_timeout_ms); | |
2196 | ||
2197 | goya_dma_stall(hdev); | |
2198 | goya_tpc_stall(hdev); | |
2199 | goya_mme_stall(hdev); | |
2200 | ||
2201 | msleep(wait_timeout_ms); | |
2202 | ||
2203 | goya_disable_external_queues(hdev); | |
2204 | goya_disable_internal_queues(hdev); | |
2205 | ||
413cf576 TT |
2206 | goya_disable_timestamp(hdev); |
2207 | ||
95b5a8b8 | 2208 | if (hard_reset) { |
1251f23a | 2209 | goya_disable_msix(hdev); |
95b5a8b8 OG |
2210 | goya_mmu_remove_device_cpu_mappings(hdev); |
2211 | } else { | |
1251f23a | 2212 | goya_sync_irqs(hdev); |
95b5a8b8 | 2213 | } |
1251f23a | 2214 | } |
9494a8dd OG |
2215 | |
2216 | /* | |
47f6b41c | 2217 | * goya_load_firmware_to_device() - Load LINUX FW code to device. |
3110c60f | 2218 | * @hdev: Pointer to hl_device structure. |
9494a8dd | 2219 | * |
47f6b41c | 2220 | * Copy LINUX fw code from firmware file to HBM BAR. |
9494a8dd | 2221 | * |
3110c60f | 2222 | * Return: 0 on success, non-zero for failure. |
9494a8dd | 2223 | */ |
47f6b41c | 2224 | static int goya_load_firmware_to_device(struct hl_device *hdev) |
9494a8dd | 2225 | { |
3110c60f | 2226 | void __iomem *dst; |
9494a8dd | 2227 | |
47f6b41c | 2228 | dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; |
9494a8dd | 2229 | |
47f6b41c | 2230 | return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst); |
3110c60f | 2231 | } |
9494a8dd | 2232 | |
3110c60f | 2233 | /* |
47f6b41c | 2234 | * goya_load_boot_fit_to_device() - Load boot fit to device. |
3110c60f TT |
2235 | * @hdev: Pointer to hl_device structure. |
2236 | * | |
47f6b41c | 2237 | * Copy boot fit file to SRAM BAR. |
3110c60f TT |
2238 | * |
2239 | * Return: 0 on success, non-zero for failure. | |
2240 | */ | |
47f6b41c | 2241 | static int goya_load_boot_fit_to_device(struct hl_device *hdev) |
3110c60f | 2242 | { |
3110c60f | 2243 | void __iomem *dst; |
9494a8dd | 2244 | |
47f6b41c | 2245 | dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET; |
9494a8dd | 2246 | |
47f6b41c | 2247 | return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst); |
9494a8dd OG |
2248 | } |
2249 | ||
2250 | /* | |
2251 | * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx. | |
2252 | * The version string should be located by that offset. | |
2253 | */ | |
2254 | static void goya_read_device_fw_version(struct hl_device *hdev, | |
7e1c07dd | 2255 | enum hl_fw_component fwc) |
9494a8dd OG |
2256 | { |
2257 | const char *name; | |
2258 | u32 ver_off; | |
2259 | char *dest; | |
2260 | ||
2261 | switch (fwc) { | |
2262 | case FW_COMP_UBOOT: | |
4095a176 | 2263 | ver_off = RREG32(mmUBOOT_VER_OFFSET); |
9494a8dd OG |
2264 | dest = hdev->asic_prop.uboot_ver; |
2265 | name = "U-Boot"; | |
2266 | break; | |
2267 | case FW_COMP_PREBOOT: | |
4095a176 | 2268 | ver_off = RREG32(mmPREBOOT_VER_OFFSET); |
9494a8dd OG |
2269 | dest = hdev->asic_prop.preboot_ver; |
2270 | name = "Preboot"; | |
2271 | break; | |
2272 | default: | |
2273 | dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); | |
2274 | return; | |
2275 | } | |
2276 | ||
2277 | ver_off &= ~((u32)SRAM_BASE_ADDR); | |
2278 | ||
2279 | if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) { | |
839c4803 OG |
2280 | memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off, |
2281 | VERSION_MAX_LEN); | |
2282 | } else { | |
2283 | dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", | |
2284 | name, ver_off); | |
2285 | strcpy(dest, "unavailable"); | |
2286 | } | |
2287 | } | |
2288 | ||
47f6b41c | 2289 | static int goya_init_cpu(struct hl_device *hdev) |
839c4803 OG |
2290 | { |
2291 | struct goya_device *goya = hdev->asic_specific; | |
839c4803 OG |
2292 | int rc; |
2293 | ||
2294 | if (!hdev->cpu_enable) | |
2295 | return 0; | |
2296 | ||
2297 | if (goya->hw_cap_initialized & HW_CAP_CPU) | |
2298 | return 0; | |
2299 | ||
2300 | /* | |
2301 | * Before pushing u-boot/linux to device, need to set the ddr bar to | |
2302 | * base address of dram | |
2303 | */ | |
a38693d7 | 2304 | if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { |
839c4803 OG |
2305 | dev_err(hdev->dev, |
2306 | "failed to map DDR bar to DRAM base address\n"); | |
a38693d7 | 2307 | return -EIO; |
839c4803 OG |
2308 | } |
2309 | ||
7e1c07dd | 2310 | rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, |
47f6b41c OB |
2311 | mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, |
2312 | mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0, | |
2313 | false, GOYA_CPU_TIMEOUT_USEC, | |
2314 | GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); | |
0c169b8a | 2315 | |
839c4803 OG |
2316 | if (rc) |
2317 | return rc; | |
2318 | ||
839c4803 OG |
2319 | goya->hw_cap_initialized |= HW_CAP_CPU; |
2320 | ||
2321 | return 0; | |
2322 | } | |
2323 | ||
bedd1442 OG |
2324 | static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, |
2325 | u64 phys_addr) | |
2326 | { | |
2327 | u32 status, timeout_usec; | |
2328 | int rc; | |
2329 | ||
2330 | if (hdev->pldm) | |
2331 | timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; | |
2332 | else | |
2333 | timeout_usec = MMU_CONFIG_TIMEOUT_USEC; | |
2334 | ||
2335 | WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); | |
2336 | WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); | |
2337 | WREG32(MMU_ASID_BUSY, 0x80000000 | asid); | |
2338 | ||
2339 | rc = hl_poll_timeout( | |
2340 | hdev, | |
2341 | MMU_ASID_BUSY, | |
2342 | status, | |
2343 | !(status & 0x80000000), | |
2344 | 1000, | |
2345 | timeout_usec); | |
2346 | ||
2347 | if (rc) { | |
2348 | dev_err(hdev->dev, | |
2349 | "Timeout during MMU hop0 config of asid %d\n", asid); | |
2350 | return rc; | |
2351 | } | |
2352 | ||
2353 | return 0; | |
2354 | } | |
2355 | ||
b2377e03 | 2356 | int goya_mmu_init(struct hl_device *hdev) |
0feaf86d OS |
2357 | { |
2358 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
2359 | struct goya_device *goya = hdev->asic_specific; | |
2360 | u64 hop0_addr; | |
2361 | int rc, i; | |
2362 | ||
2363 | if (!hdev->mmu_enable) | |
2364 | return 0; | |
2365 | ||
2366 | if (goya->hw_cap_initialized & HW_CAP_MMU) | |
2367 | return 0; | |
2368 | ||
2369 | hdev->dram_supports_virtual_memory = true; | |
27ca384c | 2370 | hdev->dram_default_page_mapping = true; |
0feaf86d OS |
2371 | |
2372 | for (i = 0 ; i < prop->max_asid ; i++) { | |
2373 | hop0_addr = prop->mmu_pgt_addr + | |
2374 | (i * prop->mmu_hop_table_size); | |
2375 | ||
2376 | rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); | |
2377 | if (rc) { | |
2378 | dev_err(hdev->dev, | |
2379 | "failed to set hop0 addr for asid %d\n", i); | |
2380 | goto err; | |
2381 | } | |
2382 | } | |
2383 | ||
2384 | goya->hw_cap_initialized |= HW_CAP_MMU; | |
2385 | ||
2386 | /* init MMU cache manage page */ | |
1e7c1ec1 OG |
2387 | WREG32(mmSTLB_CACHE_INV_BASE_39_8, |
2388 | lower_32_bits(MMU_CACHE_MNG_ADDR >> 8)); | |
2389 | WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40); | |
0feaf86d OS |
2390 | |
2391 | /* Remove follower feature due to performance bug */ | |
2392 | WREG32_AND(mmSTLB_STLB_FEATURE_EN, | |
2393 | (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); | |
2394 | ||
7b6e4ea0 OS |
2395 | hdev->asic_funcs->mmu_invalidate_cache(hdev, true, |
2396 | VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); | |
0feaf86d OS |
2397 | |
2398 | WREG32(mmMMU_MMU_ENABLE, 1); | |
2399 | WREG32(mmMMU_SPI_MASK, 0xF); | |
2400 | ||
2401 | return 0; | |
2402 | ||
2403 | err: | |
2404 | return rc; | |
2405 | } | |
2406 | ||
839c4803 OG |
2407 | /* |
2408 | * goya_hw_init - Goya hardware initialization code | |
2409 | * | |
2410 | * @hdev: pointer to hl_device structure | |
2411 | * | |
2412 | * Returns 0 on success | |
2413 | * | |
2414 | */ | |
2415 | static int goya_hw_init(struct hl_device *hdev) | |
2416 | { | |
2417 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
839c4803 OG |
2418 | int rc; |
2419 | ||
2420 | dev_info(hdev->dev, "Starting initialization of H/W\n"); | |
2421 | ||
2422 | /* Perform read from the device to make sure device is up */ | |
68a1fdf2 | 2423 | RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); |
839c4803 | 2424 | |
f8c8c7d5 OG |
2425 | /* |
2426 | * Let's mark in the H/W that we have reached this point. We check | |
2427 | * this value in the reset_before_init function to understand whether | |
2428 | * we need to reset the chip before doing H/W init. This register is | |
2429 | * cleared by the H/W upon H/W reset | |
2430 | */ | |
10d7de2c | 2431 | WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); |
f8c8c7d5 | 2432 | |
47f6b41c | 2433 | rc = goya_init_cpu(hdev); |
839c4803 OG |
2434 | if (rc) { |
2435 | dev_err(hdev->dev, "failed to initialize CPU\n"); | |
2436 | return rc; | |
2437 | } | |
2438 | ||
2439 | goya_tpc_mbist_workaround(hdev); | |
2440 | ||
2441 | goya_init_golden_registers(hdev); | |
2442 | ||
2443 | /* | |
2444 | * After CPU initialization is finished, change DDR bar mapping inside | |
2445 | * iATU to point to the start address of the MMU page tables | |
2446 | */ | |
0c002ceb | 2447 | if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR & |
a38693d7 | 2448 | ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) { |
839c4803 OG |
2449 | dev_err(hdev->dev, |
2450 | "failed to map DDR bar to MMU page tables\n"); | |
a38693d7 | 2451 | return -EIO; |
839c4803 OG |
2452 | } |
2453 | ||
0feaf86d OS |
2454 | rc = goya_mmu_init(hdev); |
2455 | if (rc) | |
2456 | return rc; | |
2457 | ||
839c4803 OG |
2458 | goya_init_security(hdev); |
2459 | ||
9494a8dd OG |
2460 | goya_init_dma_qmans(hdev); |
2461 | ||
2462 | goya_init_mme_qmans(hdev); | |
2463 | ||
2464 | goya_init_tpc_qmans(hdev); | |
2465 | ||
413cf576 TT |
2466 | goya_enable_timestamp(hdev); |
2467 | ||
1251f23a OG |
2468 | /* MSI-X must be enabled before CPU queues are initialized */ |
2469 | rc = goya_enable_msix(hdev); | |
2470 | if (rc) | |
2471 | goto disable_queues; | |
2472 | ||
839c4803 | 2473 | /* Perform read from the device to flush all MSI-X configuration */ |
68a1fdf2 | 2474 | RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); |
839c4803 OG |
2475 | |
2476 | return 0; | |
9494a8dd | 2477 | |
9494a8dd OG |
2478 | disable_queues: |
2479 | goya_disable_internal_queues(hdev); | |
2480 | goya_disable_external_queues(hdev); | |
2481 | ||
2482 | return rc; | |
839c4803 OG |
2483 | } |
2484 | ||
2485 | /* | |
2486 | * goya_hw_fini - Goya hardware tear-down code | |
2487 | * | |
2488 | * @hdev: pointer to hl_device structure | |
2489 | * @hard_reset: should we do hard reset to all engines or just reset the | |
2490 | * compute/dma engines | |
2491 | */ | |
2492 | static void goya_hw_fini(struct hl_device *hdev, bool hard_reset) | |
2493 | { | |
2494 | struct goya_device *goya = hdev->asic_specific; | |
2495 | u32 reset_timeout_ms, status; | |
2496 | ||
2497 | if (hdev->pldm) | |
2498 | reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC; | |
2499 | else | |
2500 | reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC; | |
2501 | ||
2502 | if (hard_reset) { | |
2503 | goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE); | |
2504 | goya_disable_clk_rlx(hdev); | |
2505 | goya_set_pll_refclk(hdev); | |
2506 | ||
2507 | WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL); | |
2508 | dev_info(hdev->dev, | |
2509 | "Issued HARD reset command, going to wait %dms\n", | |
2510 | reset_timeout_ms); | |
2511 | } else { | |
2512 | WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET); | |
2513 | dev_info(hdev->dev, | |
2514 | "Issued SOFT reset command, going to wait %dms\n", | |
2515 | reset_timeout_ms); | |
2516 | } | |
2517 | ||
2518 | /* | |
2519 | * After hard reset, we can't poll the BTM_FSM register because the PSOC | |
2520 | * itself is in reset. In either reset we need to wait until the reset | |
2521 | * is deasserted | |
2522 | */ | |
2523 | msleep(reset_timeout_ms); | |
2524 | ||
2525 | status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); | |
2526 | if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) | |
2527 | dev_err(hdev->dev, | |
2528 | "Timeout while waiting for device to reset 0x%x\n", | |
2529 | status); | |
2530 | ||
f8c8c7d5 OG |
2531 | if (!hard_reset) { |
2532 | goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME | | |
2533 | HW_CAP_GOLDEN | HW_CAP_TPC); | |
2534 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
2535 | GOYA_ASYNC_EVENT_ID_SOFT_RESET); | |
2536 | return; | |
2537 | } | |
2538 | ||
839c4803 OG |
2539 | /* Chicken bit to re-initiate boot sequencer flow */ |
2540 | WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, | |
2541 | 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT); | |
2542 | /* Move boot manager FSM to pre boot sequencer init state */ | |
2543 | WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM, | |
2544 | 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT); | |
2545 | ||
2546 | goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | | |
2547 | HW_CAP_DDR_0 | HW_CAP_DDR_1 | | |
2548 | HW_CAP_DMA | HW_CAP_MME | | |
2549 | HW_CAP_MMU | HW_CAP_TPC_MBIST | | |
2550 | HW_CAP_GOLDEN | HW_CAP_TPC); | |
1251f23a | 2551 | memset(goya->events_stat, 0, sizeof(goya->events_stat)); |
839c4803 OG |
2552 | } |
2553 | ||
99b9d7b4 OG |
2554 | int goya_suspend(struct hl_device *hdev) |
2555 | { | |
9494a8dd OG |
2556 | int rc; |
2557 | ||
3110c60f | 2558 | rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); |
9494a8dd OG |
2559 | if (rc) |
2560 | dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); | |
2561 | ||
2562 | return rc; | |
99b9d7b4 OG |
2563 | } |
2564 | ||
2565 | int goya_resume(struct hl_device *hdev) | |
2566 | { | |
7cb5101e | 2567 | return goya_init_iatu(hdev); |
99b9d7b4 OG |
2568 | } |
2569 | ||
5e6e0239 | 2570 | static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, |
be5d926b OG |
2571 | u64 kaddress, phys_addr_t paddress, u32 size) |
2572 | { | |
2573 | int rc; | |
2574 | ||
2575 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | | |
2576 | VM_DONTCOPY | VM_NORESERVE; | |
2577 | ||
2578 | rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT, | |
2579 | size, vma->vm_page_prot); | |
2580 | if (rc) | |
2581 | dev_err(hdev->dev, "remap_pfn_range error %d", rc); | |
2582 | ||
2583 | return rc; | |
2584 | } | |
2585 | ||
b2377e03 | 2586 | void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) |
9494a8dd OG |
2587 | { |
2588 | u32 db_reg_offset, db_value; | |
9494a8dd OG |
2589 | |
2590 | switch (hw_queue_id) { | |
2591 | case GOYA_QUEUE_ID_DMA_0: | |
2592 | db_reg_offset = mmDMA_QM_0_PQ_PI; | |
2593 | break; | |
2594 | ||
2595 | case GOYA_QUEUE_ID_DMA_1: | |
2596 | db_reg_offset = mmDMA_QM_1_PQ_PI; | |
2597 | break; | |
2598 | ||
2599 | case GOYA_QUEUE_ID_DMA_2: | |
2600 | db_reg_offset = mmDMA_QM_2_PQ_PI; | |
2601 | break; | |
2602 | ||
2603 | case GOYA_QUEUE_ID_DMA_3: | |
2604 | db_reg_offset = mmDMA_QM_3_PQ_PI; | |
2605 | break; | |
2606 | ||
2607 | case GOYA_QUEUE_ID_DMA_4: | |
2608 | db_reg_offset = mmDMA_QM_4_PQ_PI; | |
2609 | break; | |
2610 | ||
2611 | case GOYA_QUEUE_ID_CPU_PQ: | |
34a5fab7 | 2612 | db_reg_offset = mmCPU_IF_PF_PQ_PI; |
9494a8dd OG |
2613 | break; |
2614 | ||
2615 | case GOYA_QUEUE_ID_MME: | |
2616 | db_reg_offset = mmMME_QM_PQ_PI; | |
2617 | break; | |
2618 | ||
2619 | case GOYA_QUEUE_ID_TPC0: | |
2620 | db_reg_offset = mmTPC0_QM_PQ_PI; | |
2621 | break; | |
2622 | ||
2623 | case GOYA_QUEUE_ID_TPC1: | |
2624 | db_reg_offset = mmTPC1_QM_PQ_PI; | |
2625 | break; | |
2626 | ||
2627 | case GOYA_QUEUE_ID_TPC2: | |
2628 | db_reg_offset = mmTPC2_QM_PQ_PI; | |
2629 | break; | |
2630 | ||
2631 | case GOYA_QUEUE_ID_TPC3: | |
2632 | db_reg_offset = mmTPC3_QM_PQ_PI; | |
2633 | break; | |
2634 | ||
2635 | case GOYA_QUEUE_ID_TPC4: | |
2636 | db_reg_offset = mmTPC4_QM_PQ_PI; | |
2637 | break; | |
2638 | ||
2639 | case GOYA_QUEUE_ID_TPC5: | |
2640 | db_reg_offset = mmTPC5_QM_PQ_PI; | |
2641 | break; | |
2642 | ||
2643 | case GOYA_QUEUE_ID_TPC6: | |
2644 | db_reg_offset = mmTPC6_QM_PQ_PI; | |
2645 | break; | |
2646 | ||
2647 | case GOYA_QUEUE_ID_TPC7: | |
2648 | db_reg_offset = mmTPC7_QM_PQ_PI; | |
2649 | break; | |
2650 | ||
2651 | default: | |
9494a8dd | 2652 | /* Should never get here */ |
34a5fab7 | 2653 | dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n", |
9494a8dd OG |
2654 | hw_queue_id); |
2655 | return; | |
2656 | } | |
2657 | ||
2658 | db_value = pi; | |
2659 | ||
2660 | /* ring the doorbell */ | |
2661 | WREG32(db_reg_offset, db_value); | |
2662 | ||
2663 | if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) | |
2664 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
2665 | GOYA_ASYNC_EVENT_ID_PI_UPDATE); | |
2666 | } | |
2667 | ||
b9040c99 | 2668 | void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) |
9494a8dd | 2669 | { |
b9040c99 OG |
2670 | /* The QMANs are on the SRAM so need to copy to IO space */ |
2671 | memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd)); | |
9494a8dd OG |
2672 | } |
2673 | ||
5e6e0239 | 2674 | static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, |
99b9d7b4 OG |
2675 | dma_addr_t *dma_handle, gfp_t flags) |
2676 | { | |
94cb669c TT |
2677 | void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, |
2678 | dma_handle, flags); | |
2679 | ||
2680 | /* Shift to the device's base physical address of host memory */ | |
2681 | if (kernel_addr) | |
2682 | *dma_handle += HOST_PHYS_BASE; | |
2683 | ||
2684 | return kernel_addr; | |
99b9d7b4 OG |
2685 | } |
2686 | ||
5e6e0239 OG |
2687 | static void goya_dma_free_coherent(struct hl_device *hdev, size_t size, |
2688 | void *cpu_addr, dma_addr_t dma_handle) | |
99b9d7b4 | 2689 | { |
94cb669c TT |
2690 | /* Cancel the device's base physical address of host memory */ |
2691 | dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; | |
2692 | ||
2693 | dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); | |
99b9d7b4 OG |
2694 | } |
2695 | ||
9494a8dd OG |
2696 | void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id, |
2697 | dma_addr_t *dma_handle, u16 *queue_len) | |
2698 | { | |
2699 | void *base; | |
2700 | u32 offset; | |
2701 | ||
2702 | *dma_handle = hdev->asic_prop.sram_base_address; | |
2703 | ||
7c22278e | 2704 | base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID]; |
9494a8dd OG |
2705 | |
2706 | switch (queue_id) { | |
2707 | case GOYA_QUEUE_ID_MME: | |
2708 | offset = MME_QMAN_BASE_OFFSET; | |
2709 | *queue_len = MME_QMAN_LENGTH; | |
2710 | break; | |
2711 | case GOYA_QUEUE_ID_TPC0: | |
2712 | offset = TPC0_QMAN_BASE_OFFSET; | |
2713 | *queue_len = TPC_QMAN_LENGTH; | |
2714 | break; | |
2715 | case GOYA_QUEUE_ID_TPC1: | |
2716 | offset = TPC1_QMAN_BASE_OFFSET; | |
2717 | *queue_len = TPC_QMAN_LENGTH; | |
2718 | break; | |
2719 | case GOYA_QUEUE_ID_TPC2: | |
2720 | offset = TPC2_QMAN_BASE_OFFSET; | |
2721 | *queue_len = TPC_QMAN_LENGTH; | |
2722 | break; | |
2723 | case GOYA_QUEUE_ID_TPC3: | |
2724 | offset = TPC3_QMAN_BASE_OFFSET; | |
2725 | *queue_len = TPC_QMAN_LENGTH; | |
2726 | break; | |
2727 | case GOYA_QUEUE_ID_TPC4: | |
2728 | offset = TPC4_QMAN_BASE_OFFSET; | |
2729 | *queue_len = TPC_QMAN_LENGTH; | |
2730 | break; | |
2731 | case GOYA_QUEUE_ID_TPC5: | |
2732 | offset = TPC5_QMAN_BASE_OFFSET; | |
2733 | *queue_len = TPC_QMAN_LENGTH; | |
2734 | break; | |
2735 | case GOYA_QUEUE_ID_TPC6: | |
2736 | offset = TPC6_QMAN_BASE_OFFSET; | |
2737 | *queue_len = TPC_QMAN_LENGTH; | |
2738 | break; | |
2739 | case GOYA_QUEUE_ID_TPC7: | |
2740 | offset = TPC7_QMAN_BASE_OFFSET; | |
2741 | *queue_len = TPC_QMAN_LENGTH; | |
2742 | break; | |
2743 | default: | |
2744 | dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); | |
2745 | return NULL; | |
2746 | } | |
2747 | ||
2748 | base += offset; | |
2749 | *dma_handle += offset; | |
2750 | ||
2751 | return base; | |
2752 | } | |
2753 | ||
5e6e0239 | 2754 | static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) |
eff6f4a0 | 2755 | { |
eff6f4a0 OG |
2756 | struct packet_msg_prot *fence_pkt; |
2757 | u32 *fence_ptr; | |
2758 | dma_addr_t fence_dma_addr; | |
2759 | struct hl_cb *cb; | |
3dccd187 | 2760 | u32 tmp, timeout; |
eff6f4a0 OG |
2761 | int rc; |
2762 | ||
3dccd187 OS |
2763 | if (hdev->pldm) |
2764 | timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC; | |
2765 | else | |
2766 | timeout = HL_DEVICE_TIMEOUT_USEC; | |
2767 | ||
e8960ca0 | 2768 | if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { |
eff6f4a0 | 2769 | dev_err_ratelimited(hdev->dev, |
4c172bbf | 2770 | "Can't send driver job on QMAN0 because the device is not idle\n"); |
af5f7eea | 2771 | return -EBUSY; |
eff6f4a0 OG |
2772 | } |
2773 | ||
d9c3aa80 | 2774 | fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, |
eff6f4a0 OG |
2775 | &fence_dma_addr); |
2776 | if (!fence_ptr) { | |
2777 | dev_err(hdev->dev, | |
2778 | "Failed to allocate fence memory for QMAN0\n"); | |
2779 | return -ENOMEM; | |
2780 | } | |
2781 | ||
b2377e03 | 2782 | goya_qman0_set_security(hdev, true); |
eff6f4a0 | 2783 | |
eff6f4a0 OG |
2784 | cb = job->patched_cb; |
2785 | ||
2786 | fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + | |
2787 | job->job_cb_size - sizeof(struct packet_msg_prot)); | |
2788 | ||
df697bce | 2789 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
eff6f4a0 OG |
2790 | (1 << GOYA_PKT_CTL_EB_SHIFT) | |
2791 | (1 << GOYA_PKT_CTL_MB_SHIFT); | |
df697bce TT |
2792 | fence_pkt->ctl = cpu_to_le32(tmp); |
2793 | fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL); | |
94cb669c | 2794 | fence_pkt->addr = cpu_to_le64(fence_dma_addr); |
eff6f4a0 OG |
2795 | |
2796 | rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0, | |
2797 | job->job_cb_size, cb->bus_address); | |
2798 | if (rc) { | |
2799 | dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); | |
2800 | goto free_fence_ptr; | |
2801 | } | |
2802 | ||
a08b51a9 | 2803 | rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, |
2aa4e410 BS |
2804 | (tmp == GOYA_QMAN0_FENCE_VAL), 1000, |
2805 | timeout, true); | |
eff6f4a0 OG |
2806 | |
2807 | hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0); | |
2808 | ||
a08b51a9 OG |
2809 | if (rc == -ETIMEDOUT) { |
2810 | dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); | |
2811 | goto free_fence_ptr; | |
eff6f4a0 OG |
2812 | } |
2813 | ||
2814 | free_fence_ptr: | |
d9c3aa80 | 2815 | hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, |
eff6f4a0 OG |
2816 | fence_dma_addr); |
2817 | ||
b2377e03 | 2818 | goya_qman0_set_security(hdev, false); |
eff6f4a0 OG |
2819 | |
2820 | return rc; | |
2821 | } | |
2822 | ||
9494a8dd OG |
2823 | int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, |
2824 | u32 timeout, long *result) | |
2825 | { | |
2826 | struct goya_device *goya = hdev->asic_specific; | |
9494a8dd OG |
2827 | |
2828 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) { | |
2829 | if (result) | |
2830 | *result = 0; | |
2831 | return 0; | |
2832 | } | |
2833 | ||
788cacf3 OG |
2834 | if (!timeout) |
2835 | timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; | |
2836 | ||
3110c60f TT |
2837 | return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, |
2838 | timeout, result); | |
9494a8dd OG |
2839 | } |
2840 | ||
2841 | int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) | |
2842 | { | |
2843 | struct packet_msg_prot *fence_pkt; | |
2844 | dma_addr_t pkt_dma_addr; | |
2845 | u32 fence_val, tmp; | |
2846 | dma_addr_t fence_dma_addr; | |
2847 | u32 *fence_ptr; | |
2848 | int rc; | |
2849 | ||
2850 | fence_val = GOYA_QMAN0_FENCE_VAL; | |
2851 | ||
d9c3aa80 | 2852 | fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, |
9494a8dd OG |
2853 | &fence_dma_addr); |
2854 | if (!fence_ptr) { | |
2855 | dev_err(hdev->dev, | |
2856 | "Failed to allocate memory for queue testing\n"); | |
2857 | return -ENOMEM; | |
2858 | } | |
2859 | ||
2860 | *fence_ptr = 0; | |
2861 | ||
d9c3aa80 | 2862 | fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, |
9494a8dd OG |
2863 | sizeof(struct packet_msg_prot), |
2864 | GFP_KERNEL, &pkt_dma_addr); | |
2865 | if (!fence_pkt) { | |
2866 | dev_err(hdev->dev, | |
2867 | "Failed to allocate packet for queue testing\n"); | |
2868 | rc = -ENOMEM; | |
2869 | goto free_fence_ptr; | |
2870 | } | |
2871 | ||
df697bce | 2872 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
9494a8dd OG |
2873 | (1 << GOYA_PKT_CTL_EB_SHIFT) | |
2874 | (1 << GOYA_PKT_CTL_MB_SHIFT); | |
df697bce TT |
2875 | fence_pkt->ctl = cpu_to_le32(tmp); |
2876 | fence_pkt->value = cpu_to_le32(fence_val); | |
94cb669c | 2877 | fence_pkt->addr = cpu_to_le64(fence_dma_addr); |
9494a8dd OG |
2878 | |
2879 | rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, | |
2880 | sizeof(struct packet_msg_prot), | |
2881 | pkt_dma_addr); | |
2882 | if (rc) { | |
2883 | dev_err(hdev->dev, | |
2884 | "Failed to send fence packet\n"); | |
2885 | goto free_pkt; | |
2886 | } | |
2887 | ||
a08b51a9 | 2888 | rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), |
2aa4e410 | 2889 | 1000, GOYA_TEST_QUEUE_WAIT_USEC, true); |
9494a8dd OG |
2890 | |
2891 | hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); | |
2892 | ||
a08b51a9 | 2893 | if (rc == -ETIMEDOUT) { |
9494a8dd OG |
2894 | dev_err(hdev->dev, |
2895 | "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", | |
2896 | hw_queue_id, (unsigned long long) fence_dma_addr, tmp); | |
a08b51a9 | 2897 | rc = -EIO; |
9494a8dd OG |
2898 | } |
2899 | ||
2900 | free_pkt: | |
d9c3aa80 | 2901 | hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, |
9494a8dd OG |
2902 | pkt_dma_addr); |
2903 | free_fence_ptr: | |
d9c3aa80 | 2904 | hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, |
9494a8dd OG |
2905 | fence_dma_addr); |
2906 | return rc; | |
2907 | } | |
2908 | ||
2909 | int goya_test_cpu_queue(struct hl_device *hdev) | |
2910 | { | |
3110c60f | 2911 | struct goya_device *goya = hdev->asic_specific; |
9494a8dd | 2912 | |
3110c60f TT |
2913 | /* |
2914 | * check capability here as send_cpu_message() won't update the result | |
2915 | * value if no capability | |
2916 | */ | |
2917 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
2918 | return 0; | |
9494a8dd | 2919 | |
3110c60f | 2920 | return hl_fw_test_cpu_queue(hdev); |
9494a8dd OG |
2921 | } |
2922 | ||
bedd1442 | 2923 | int goya_test_queues(struct hl_device *hdev) |
9494a8dd | 2924 | { |
9494a8dd OG |
2925 | int i, rc, ret_val = 0; |
2926 | ||
2927 | for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { | |
2928 | rc = goya_test_queue(hdev, i); | |
2929 | if (rc) | |
2930 | ret_val = -EINVAL; | |
2931 | } | |
2932 | ||
9494a8dd OG |
2933 | return ret_val; |
2934 | } | |
2935 | ||
5e6e0239 OG |
2936 | static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size, |
2937 | gfp_t mem_flags, dma_addr_t *dma_handle) | |
9494a8dd | 2938 | { |
94cb669c TT |
2939 | void *kernel_addr; |
2940 | ||
9494a8dd OG |
2941 | if (size > GOYA_DMA_POOL_BLK_SIZE) |
2942 | return NULL; | |
2943 | ||
94cb669c TT |
2944 | kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); |
2945 | ||
2946 | /* Shift to the device's base physical address of host memory */ | |
2947 | if (kernel_addr) | |
2948 | *dma_handle += HOST_PHYS_BASE; | |
2949 | ||
2950 | return kernel_addr; | |
9494a8dd OG |
2951 | } |
2952 | ||
5e6e0239 OG |
2953 | static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr, |
2954 | dma_addr_t dma_addr) | |
9494a8dd | 2955 | { |
94cb669c TT |
2956 | /* Cancel the device's base physical address of host memory */ |
2957 | dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; | |
2958 | ||
2959 | dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); | |
9494a8dd OG |
2960 | } |
2961 | ||
bedd1442 OG |
2962 | void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, |
2963 | dma_addr_t *dma_handle) | |
9494a8dd | 2964 | { |
f09415f5 OG |
2965 | void *vaddr; |
2966 | ||
2967 | vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); | |
2968 | *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address + | |
2969 | VA_CPU_ACCESSIBLE_MEM_ADDR; | |
2970 | ||
2971 | return vaddr; | |
9494a8dd OG |
2972 | } |
2973 | ||
bedd1442 OG |
2974 | void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, |
2975 | void *vaddr) | |
9494a8dd | 2976 | { |
3110c60f | 2977 | hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); |
9494a8dd OG |
2978 | } |
2979 | ||
94cb669c | 2980 | static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, |
5e6e0239 | 2981 | int nents, enum dma_data_direction dir) |
eff6f4a0 | 2982 | { |
94cb669c TT |
2983 | struct scatterlist *sg; |
2984 | int i; | |
2985 | ||
2986 | if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) | |
eff6f4a0 OG |
2987 | return -ENOMEM; |
2988 | ||
94cb669c TT |
2989 | /* Shift to the device's base physical address of host memory */ |
2990 | for_each_sg(sgl, sg, nents, i) | |
2991 | sg->dma_address += HOST_PHYS_BASE; | |
2992 | ||
eff6f4a0 OG |
2993 | return 0; |
2994 | } | |
2995 | ||
94cb669c | 2996 | static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, |
5e6e0239 | 2997 | int nents, enum dma_data_direction dir) |
eff6f4a0 | 2998 | { |
94cb669c TT |
2999 | struct scatterlist *sg; |
3000 | int i; | |
3001 | ||
3002 | /* Cancel the device's base physical address of host memory */ | |
3003 | for_each_sg(sgl, sg, nents, i) | |
3004 | sg->dma_address -= HOST_PHYS_BASE; | |
3005 | ||
3006 | dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); | |
eff6f4a0 OG |
3007 | } |
3008 | ||
5e6e0239 | 3009 | u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) |
eff6f4a0 OG |
3010 | { |
3011 | struct scatterlist *sg, *sg_next_iter; | |
e99f1683 OG |
3012 | u32 count, dma_desc_cnt; |
3013 | u64 len, len_next; | |
eff6f4a0 OG |
3014 | dma_addr_t addr, addr_next; |
3015 | ||
3016 | dma_desc_cnt = 0; | |
3017 | ||
3018 | for_each_sg(sgt->sgl, sg, sgt->nents, count) { | |
3019 | ||
3020 | len = sg_dma_len(sg); | |
3021 | addr = sg_dma_address(sg); | |
3022 | ||
3023 | if (len == 0) | |
3024 | break; | |
3025 | ||
3026 | while ((count + 1) < sgt->nents) { | |
3027 | sg_next_iter = sg_next(sg); | |
3028 | len_next = sg_dma_len(sg_next_iter); | |
3029 | addr_next = sg_dma_address(sg_next_iter); | |
3030 | ||
3031 | if (len_next == 0) | |
3032 | break; | |
3033 | ||
3034 | if ((addr + len == addr_next) && | |
3035 | (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { | |
3036 | len += len_next; | |
3037 | count++; | |
3038 | sg = sg_next_iter; | |
3039 | } else { | |
3040 | break; | |
3041 | } | |
3042 | } | |
3043 | ||
3044 | dma_desc_cnt++; | |
3045 | } | |
3046 | ||
3047 | return dma_desc_cnt * sizeof(struct packet_lin_dma); | |
3048 | } | |
3049 | ||
3050 | static int goya_pin_memory_before_cs(struct hl_device *hdev, | |
3051 | struct hl_cs_parser *parser, | |
3052 | struct packet_lin_dma *user_dma_pkt, | |
3053 | u64 addr, enum dma_data_direction dir) | |
3054 | { | |
3055 | struct hl_userptr *userptr; | |
3056 | int rc; | |
3057 | ||
df697bce | 3058 | if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), |
eff6f4a0 OG |
3059 | parser->job_userptr_list, &userptr)) |
3060 | goto already_pinned; | |
3061 | ||
3062 | userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); | |
3063 | if (!userptr) | |
3064 | return -ENOMEM; | |
3065 | ||
df697bce TT |
3066 | rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), |
3067 | userptr); | |
eff6f4a0 OG |
3068 | if (rc) |
3069 | goto free_userptr; | |
3070 | ||
3071 | list_add_tail(&userptr->job_node, parser->job_userptr_list); | |
3072 | ||
3073 | rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, | |
3074 | userptr->sgt->nents, dir); | |
3075 | if (rc) { | |
3076 | dev_err(hdev->dev, "failed to map sgt with DMA region\n"); | |
3077 | goto unpin_memory; | |
3078 | } | |
3079 | ||
3080 | userptr->dma_mapped = true; | |
3081 | userptr->dir = dir; | |
3082 | ||
3083 | already_pinned: | |
3084 | parser->patched_cb_size += | |
3085 | goya_get_dma_desc_list_size(hdev, userptr->sgt); | |
3086 | ||
3087 | return 0; | |
3088 | ||
3089 | unpin_memory: | |
3090 | hl_unpin_host_memory(hdev, userptr); | |
3091 | free_userptr: | |
3092 | kfree(userptr); | |
3093 | return rc; | |
3094 | } | |
3095 | ||
3096 | static int goya_validate_dma_pkt_host(struct hl_device *hdev, | |
3097 | struct hl_cs_parser *parser, | |
3098 | struct packet_lin_dma *user_dma_pkt) | |
3099 | { | |
3100 | u64 device_memory_addr, addr; | |
3101 | enum dma_data_direction dir; | |
3102 | enum goya_dma_direction user_dir; | |
3103 | bool sram_addr = true; | |
3104 | bool skip_host_mem_pin = false; | |
3105 | bool user_memset; | |
df697bce | 3106 | u32 ctl; |
eff6f4a0 OG |
3107 | int rc = 0; |
3108 | ||
df697bce TT |
3109 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3110 | ||
3111 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3112 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3113 | ||
df697bce | 3114 | user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >> |
eff6f4a0 OG |
3115 | GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT; |
3116 | ||
3117 | switch (user_dir) { | |
3118 | case DMA_HOST_TO_DRAM: | |
3119 | dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n"); | |
3120 | dir = DMA_TO_DEVICE; | |
3121 | sram_addr = false; | |
df697bce TT |
3122 | addr = le64_to_cpu(user_dma_pkt->src_addr); |
3123 | device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3124 | if (user_memset) |
3125 | skip_host_mem_pin = true; | |
3126 | break; | |
3127 | ||
3128 | case DMA_DRAM_TO_HOST: | |
3129 | dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n"); | |
3130 | dir = DMA_FROM_DEVICE; | |
3131 | sram_addr = false; | |
df697bce TT |
3132 | addr = le64_to_cpu(user_dma_pkt->dst_addr); |
3133 | device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); | |
eff6f4a0 OG |
3134 | break; |
3135 | ||
3136 | case DMA_HOST_TO_SRAM: | |
3137 | dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n"); | |
3138 | dir = DMA_TO_DEVICE; | |
df697bce TT |
3139 | addr = le64_to_cpu(user_dma_pkt->src_addr); |
3140 | device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3141 | if (user_memset) |
3142 | skip_host_mem_pin = true; | |
3143 | break; | |
3144 | ||
3145 | case DMA_SRAM_TO_HOST: | |
3146 | dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n"); | |
3147 | dir = DMA_FROM_DEVICE; | |
df697bce TT |
3148 | addr = le64_to_cpu(user_dma_pkt->dst_addr); |
3149 | device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); | |
eff6f4a0 OG |
3150 | break; |
3151 | default: | |
3152 | dev_err(hdev->dev, "DMA direction is undefined\n"); | |
3153 | return -EFAULT; | |
3154 | } | |
3155 | ||
f0539fb0 DBZ |
3156 | if (sram_addr) { |
3157 | if (!hl_mem_area_inside_range(device_memory_addr, | |
3158 | le32_to_cpu(user_dma_pkt->tsize), | |
3159 | hdev->asic_prop.sram_user_base_address, | |
3160 | hdev->asic_prop.sram_end_address)) { | |
3161 | ||
3162 | dev_err(hdev->dev, | |
3163 | "SRAM address 0x%llx + 0x%x is invalid\n", | |
3164 | device_memory_addr, | |
3165 | user_dma_pkt->tsize); | |
3166 | return -EFAULT; | |
3167 | } | |
3168 | } else { | |
3169 | if (!hl_mem_area_inside_range(device_memory_addr, | |
3170 | le32_to_cpu(user_dma_pkt->tsize), | |
3171 | hdev->asic_prop.dram_user_base_address, | |
3172 | hdev->asic_prop.dram_end_address)) { | |
3173 | ||
3174 | dev_err(hdev->dev, | |
3175 | "DRAM address 0x%llx + 0x%x is invalid\n", | |
3176 | device_memory_addr, | |
3177 | user_dma_pkt->tsize); | |
3178 | return -EFAULT; | |
eff6f4a0 OG |
3179 | } |
3180 | } | |
3181 | ||
3182 | if (skip_host_mem_pin) | |
3183 | parser->patched_cb_size += sizeof(*user_dma_pkt); | |
3184 | else { | |
3185 | if ((dir == DMA_TO_DEVICE) && | |
3186 | (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) { | |
3187 | dev_err(hdev->dev, | |
3188 | "Can't DMA from host on queue other then 1\n"); | |
3189 | return -EFAULT; | |
3190 | } | |
3191 | ||
3192 | rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt, | |
3193 | addr, dir); | |
3194 | } | |
3195 | ||
3196 | return rc; | |
3197 | } | |
3198 | ||
3199 | static int goya_validate_dma_pkt_no_host(struct hl_device *hdev, | |
3200 | struct hl_cs_parser *parser, | |
3201 | struct packet_lin_dma *user_dma_pkt) | |
3202 | { | |
3203 | u64 sram_memory_addr, dram_memory_addr; | |
3204 | enum goya_dma_direction user_dir; | |
df697bce | 3205 | u32 ctl; |
eff6f4a0 | 3206 | |
df697bce TT |
3207 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3208 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3209 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3210 | ||
3211 | if (user_dir == DMA_DRAM_TO_SRAM) { | |
3212 | dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n"); | |
df697bce TT |
3213 | dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); |
3214 | sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3215 | } else { |
3216 | dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n"); | |
df697bce TT |
3217 | sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); |
3218 | dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3219 | } |
3220 | ||
df697bce TT |
3221 | if (!hl_mem_area_inside_range(sram_memory_addr, |
3222 | le32_to_cpu(user_dma_pkt->tsize), | |
eff6f4a0 OG |
3223 | hdev->asic_prop.sram_user_base_address, |
3224 | hdev->asic_prop.sram_end_address)) { | |
3225 | dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n", | |
3226 | sram_memory_addr, user_dma_pkt->tsize); | |
3227 | return -EFAULT; | |
3228 | } | |
3229 | ||
df697bce TT |
3230 | if (!hl_mem_area_inside_range(dram_memory_addr, |
3231 | le32_to_cpu(user_dma_pkt->tsize), | |
eff6f4a0 OG |
3232 | hdev->asic_prop.dram_user_base_address, |
3233 | hdev->asic_prop.dram_end_address)) { | |
3234 | dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n", | |
3235 | dram_memory_addr, user_dma_pkt->tsize); | |
3236 | return -EFAULT; | |
3237 | } | |
3238 | ||
3239 | parser->patched_cb_size += sizeof(*user_dma_pkt); | |
3240 | ||
3241 | return 0; | |
3242 | } | |
3243 | ||
3244 | static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev, | |
3245 | struct hl_cs_parser *parser, | |
3246 | struct packet_lin_dma *user_dma_pkt) | |
3247 | { | |
3248 | enum goya_dma_direction user_dir; | |
df697bce | 3249 | u32 ctl; |
eff6f4a0 OG |
3250 | int rc; |
3251 | ||
3252 | dev_dbg(hdev->dev, "DMA packet details:\n"); | |
b421d83a BS |
3253 | dev_dbg(hdev->dev, "source == 0x%llx\n", |
3254 | le64_to_cpu(user_dma_pkt->src_addr)); | |
3255 | dev_dbg(hdev->dev, "destination == 0x%llx\n", | |
3256 | le64_to_cpu(user_dma_pkt->dst_addr)); | |
3257 | dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); | |
eff6f4a0 | 3258 | |
df697bce TT |
3259 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3260 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3261 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3262 | ||
3263 | /* | |
3264 | * Special handling for DMA with size 0. The H/W has a bug where | |
3265 | * this can cause the QMAN DMA to get stuck, so block it here. | |
3266 | */ | |
3267 | if (user_dma_pkt->tsize == 0) { | |
3268 | dev_err(hdev->dev, | |
3269 | "Got DMA with size 0, might reset the device\n"); | |
3270 | return -EINVAL; | |
3271 | } | |
3272 | ||
3273 | if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM)) | |
3274 | rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt); | |
3275 | else | |
3276 | rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt); | |
3277 | ||
3278 | return rc; | |
3279 | } | |
3280 | ||
3281 | static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, | |
3282 | struct hl_cs_parser *parser, | |
3283 | struct packet_lin_dma *user_dma_pkt) | |
3284 | { | |
3285 | dev_dbg(hdev->dev, "DMA packet details:\n"); | |
b421d83a BS |
3286 | dev_dbg(hdev->dev, "source == 0x%llx\n", |
3287 | le64_to_cpu(user_dma_pkt->src_addr)); | |
3288 | dev_dbg(hdev->dev, "destination == 0x%llx\n", | |
3289 | le64_to_cpu(user_dma_pkt->dst_addr)); | |
3290 | dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); | |
eff6f4a0 OG |
3291 | |
3292 | /* | |
3293 | * WA for HW-23. | |
3294 | * We can't allow user to read from Host using QMANs other than 1. | |
64a7e295 | 3295 | * PMMU and HPMMU addresses are equal, check only one of them. |
eff6f4a0 | 3296 | */ |
bfb57a91 | 3297 | if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && |
df697bce TT |
3298 | hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), |
3299 | le32_to_cpu(user_dma_pkt->tsize), | |
64a7e295 OS |
3300 | hdev->asic_prop.pmmu.start_addr, |
3301 | hdev->asic_prop.pmmu.end_addr)) { | |
eff6f4a0 OG |
3302 | dev_err(hdev->dev, |
3303 | "Can't DMA from host on queue other then 1\n"); | |
3304 | return -EFAULT; | |
3305 | } | |
3306 | ||
3307 | if (user_dma_pkt->tsize == 0) { | |
3308 | dev_err(hdev->dev, | |
3309 | "Got DMA with size 0, might reset the device\n"); | |
3310 | return -EINVAL; | |
3311 | } | |
3312 | ||
3313 | parser->patched_cb_size += sizeof(*user_dma_pkt); | |
3314 | ||
3315 | return 0; | |
3316 | } | |
3317 | ||
3318 | static int goya_validate_wreg32(struct hl_device *hdev, | |
3319 | struct hl_cs_parser *parser, | |
3320 | struct packet_wreg32 *wreg_pkt) | |
3321 | { | |
3322 | struct goya_device *goya = hdev->asic_specific; | |
3323 | u32 sob_start_addr, sob_end_addr; | |
3324 | u16 reg_offset; | |
3325 | ||
df697bce TT |
3326 | reg_offset = le32_to_cpu(wreg_pkt->ctl) & |
3327 | GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK; | |
eff6f4a0 OG |
3328 | |
3329 | dev_dbg(hdev->dev, "WREG32 packet details:\n"); | |
3330 | dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset); | |
b421d83a BS |
3331 | dev_dbg(hdev->dev, "value == 0x%x\n", |
3332 | le32_to_cpu(wreg_pkt->value)); | |
eff6f4a0 | 3333 | |
6765fda0 | 3334 | if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) { |
eff6f4a0 OG |
3335 | dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n", |
3336 | reg_offset); | |
3337 | return -EPERM; | |
3338 | } | |
3339 | ||
3340 | /* | |
3341 | * With MMU, DMA channels are not secured, so it doesn't matter where | |
3342 | * the WR COMP will be written to because it will go out with | |
3343 | * non-secured property | |
3344 | */ | |
3345 | if (goya->hw_cap_initialized & HW_CAP_MMU) | |
3346 | return 0; | |
3347 | ||
3348 | sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
3349 | sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023); | |
3350 | ||
df697bce TT |
3351 | if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) || |
3352 | (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) { | |
eff6f4a0 OG |
3353 | |
3354 | dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n", | |
3355 | wreg_pkt->value); | |
3356 | return -EPERM; | |
3357 | } | |
3358 | ||
3359 | return 0; | |
3360 | } | |
3361 | ||
3362 | static int goya_validate_cb(struct hl_device *hdev, | |
3363 | struct hl_cs_parser *parser, bool is_mmu) | |
3364 | { | |
3365 | u32 cb_parsed_length = 0; | |
3366 | int rc = 0; | |
3367 | ||
3368 | parser->patched_cb_size = 0; | |
3369 | ||
3370 | /* cb_user_size is more than 0 so loop will always be executed */ | |
3371 | while (cb_parsed_length < parser->user_cb_size) { | |
3372 | enum packet_id pkt_id; | |
3373 | u16 pkt_size; | |
213ad5ad | 3374 | struct goya_packet *user_pkt; |
eff6f4a0 | 3375 | |
213ad5ad | 3376 | user_pkt = (struct goya_packet *) (uintptr_t) |
eff6f4a0 OG |
3377 | (parser->user_cb->kernel_address + cb_parsed_length); |
3378 | ||
213ad5ad BS |
3379 | pkt_id = (enum packet_id) ( |
3380 | (le64_to_cpu(user_pkt->header) & | |
eff6f4a0 OG |
3381 | PACKET_HEADER_PACKET_ID_MASK) >> |
3382 | PACKET_HEADER_PACKET_ID_SHIFT); | |
3383 | ||
3384 | pkt_size = goya_packet_sizes[pkt_id]; | |
3385 | cb_parsed_length += pkt_size; | |
3386 | if (cb_parsed_length > parser->user_cb_size) { | |
3387 | dev_err(hdev->dev, | |
3388 | "packet 0x%x is out of CB boundary\n", pkt_id); | |
3389 | rc = -EINVAL; | |
3390 | break; | |
3391 | } | |
3392 | ||
3393 | switch (pkt_id) { | |
3394 | case PACKET_WREG_32: | |
3395 | /* | |
3396 | * Although it is validated after copy in patch_cb(), | |
3397 | * need to validate here as well because patch_cb() is | |
3398 | * not called in MMU path while this function is called | |
3399 | */ | |
213ad5ad BS |
3400 | rc = goya_validate_wreg32(hdev, |
3401 | parser, (struct packet_wreg32 *) user_pkt); | |
87eaea1c | 3402 | parser->patched_cb_size += pkt_size; |
eff6f4a0 OG |
3403 | break; |
3404 | ||
3405 | case PACKET_WREG_BULK: | |
3406 | dev_err(hdev->dev, | |
3407 | "User not allowed to use WREG_BULK\n"); | |
3408 | rc = -EPERM; | |
3409 | break; | |
3410 | ||
3411 | case PACKET_MSG_PROT: | |
3412 | dev_err(hdev->dev, | |
3413 | "User not allowed to use MSG_PROT\n"); | |
3414 | rc = -EPERM; | |
3415 | break; | |
3416 | ||
3417 | case PACKET_CP_DMA: | |
3418 | dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); | |
3419 | rc = -EPERM; | |
3420 | break; | |
3421 | ||
3422 | case PACKET_STOP: | |
3423 | dev_err(hdev->dev, "User not allowed to use STOP\n"); | |
3424 | rc = -EPERM; | |
3425 | break; | |
3426 | ||
3427 | case PACKET_LIN_DMA: | |
3428 | if (is_mmu) | |
3429 | rc = goya_validate_dma_pkt_mmu(hdev, parser, | |
213ad5ad | 3430 | (struct packet_lin_dma *) user_pkt); |
eff6f4a0 OG |
3431 | else |
3432 | rc = goya_validate_dma_pkt_no_mmu(hdev, parser, | |
213ad5ad | 3433 | (struct packet_lin_dma *) user_pkt); |
eff6f4a0 OG |
3434 | break; |
3435 | ||
3436 | case PACKET_MSG_LONG: | |
3437 | case PACKET_MSG_SHORT: | |
3438 | case PACKET_FENCE: | |
3439 | case PACKET_NOP: | |
3440 | parser->patched_cb_size += pkt_size; | |
3441 | break; | |
3442 | ||
3443 | default: | |
3444 | dev_err(hdev->dev, "Invalid packet header 0x%x\n", | |
3445 | pkt_id); | |
3446 | rc = -EINVAL; | |
3447 | break; | |
3448 | } | |
3449 | ||
3450 | if (rc) | |
3451 | break; | |
3452 | } | |
3453 | ||
3454 | /* | |
3455 | * The new CB should have space at the end for two MSG_PROT packets: | |
3456 | * 1. A packet that will act as a completion packet | |
3457 | * 2. A packet that will generate MSI-X interrupt | |
3458 | */ | |
3459 | parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2; | |
3460 | ||
3461 | return rc; | |
3462 | } | |
3463 | ||
3464 | static int goya_patch_dma_packet(struct hl_device *hdev, | |
3465 | struct hl_cs_parser *parser, | |
3466 | struct packet_lin_dma *user_dma_pkt, | |
3467 | struct packet_lin_dma *new_dma_pkt, | |
3468 | u32 *new_dma_pkt_size) | |
3469 | { | |
3470 | struct hl_userptr *userptr; | |
3471 | struct scatterlist *sg, *sg_next_iter; | |
e99f1683 OG |
3472 | u32 count, dma_desc_cnt; |
3473 | u64 len, len_next; | |
eff6f4a0 OG |
3474 | dma_addr_t dma_addr, dma_addr_next; |
3475 | enum goya_dma_direction user_dir; | |
3476 | u64 device_memory_addr, addr; | |
3477 | enum dma_data_direction dir; | |
3478 | struct sg_table *sgt; | |
3479 | bool skip_host_mem_pin = false; | |
3480 | bool user_memset; | |
df697bce | 3481 | u32 user_rdcomp_mask, user_wrcomp_mask, ctl; |
eff6f4a0 | 3482 | |
df697bce TT |
3483 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3484 | ||
3485 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3486 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3487 | ||
df697bce | 3488 | user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >> |
eff6f4a0 OG |
3489 | GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT; |
3490 | ||
3491 | if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) || | |
3492 | (user_dma_pkt->tsize == 0)) { | |
3493 | memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt)); | |
3494 | *new_dma_pkt_size = sizeof(*new_dma_pkt); | |
3495 | return 0; | |
3496 | } | |
3497 | ||
3498 | if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) { | |
df697bce TT |
3499 | addr = le64_to_cpu(user_dma_pkt->src_addr); |
3500 | device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3501 | dir = DMA_TO_DEVICE; |
3502 | if (user_memset) | |
3503 | skip_host_mem_pin = true; | |
3504 | } else { | |
df697bce TT |
3505 | addr = le64_to_cpu(user_dma_pkt->dst_addr); |
3506 | device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); | |
eff6f4a0 OG |
3507 | dir = DMA_FROM_DEVICE; |
3508 | } | |
3509 | ||
3510 | if ((!skip_host_mem_pin) && | |
df697bce TT |
3511 | (hl_userptr_is_pinned(hdev, addr, |
3512 | le32_to_cpu(user_dma_pkt->tsize), | |
eff6f4a0 OG |
3513 | parser->job_userptr_list, &userptr) == false)) { |
3514 | dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", | |
3515 | addr, user_dma_pkt->tsize); | |
3516 | return -EFAULT; | |
3517 | } | |
3518 | ||
3519 | if ((user_memset) && (dir == DMA_TO_DEVICE)) { | |
3520 | memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); | |
3521 | *new_dma_pkt_size = sizeof(*user_dma_pkt); | |
3522 | return 0; | |
3523 | } | |
3524 | ||
df697bce | 3525 | user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK; |
eff6f4a0 | 3526 | |
df697bce | 3527 | user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK; |
eff6f4a0 OG |
3528 | |
3529 | sgt = userptr->sgt; | |
3530 | dma_desc_cnt = 0; | |
3531 | ||
3532 | for_each_sg(sgt->sgl, sg, sgt->nents, count) { | |
3533 | len = sg_dma_len(sg); | |
3534 | dma_addr = sg_dma_address(sg); | |
3535 | ||
3536 | if (len == 0) | |
3537 | break; | |
3538 | ||
3539 | while ((count + 1) < sgt->nents) { | |
3540 | sg_next_iter = sg_next(sg); | |
3541 | len_next = sg_dma_len(sg_next_iter); | |
3542 | dma_addr_next = sg_dma_address(sg_next_iter); | |
3543 | ||
3544 | if (len_next == 0) | |
3545 | break; | |
3546 | ||
3547 | if ((dma_addr + len == dma_addr_next) && | |
3548 | (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { | |
3549 | len += len_next; | |
3550 | count++; | |
3551 | sg = sg_next_iter; | |
3552 | } else { | |
3553 | break; | |
3554 | } | |
3555 | } | |
3556 | ||
df697bce | 3557 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
eff6f4a0 | 3558 | if (likely(dma_desc_cnt)) |
df697bce TT |
3559 | ctl &= ~GOYA_PKT_CTL_EB_MASK; |
3560 | ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK | | |
3561 | GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK); | |
3562 | new_dma_pkt->ctl = cpu_to_le32(ctl); | |
3563 | new_dma_pkt->tsize = cpu_to_le32((u32) len); | |
eff6f4a0 | 3564 | |
eff6f4a0 | 3565 | if (dir == DMA_TO_DEVICE) { |
df697bce TT |
3566 | new_dma_pkt->src_addr = cpu_to_le64(dma_addr); |
3567 | new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); | |
eff6f4a0 | 3568 | } else { |
df697bce TT |
3569 | new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); |
3570 | new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); | |
eff6f4a0 OG |
3571 | } |
3572 | ||
3573 | if (!user_memset) | |
3574 | device_memory_addr += len; | |
3575 | dma_desc_cnt++; | |
3576 | new_dma_pkt++; | |
3577 | } | |
3578 | ||
3579 | if (!dma_desc_cnt) { | |
3580 | dev_err(hdev->dev, | |
3581 | "Error of 0 SG entries when patching DMA packet\n"); | |
3582 | return -EFAULT; | |
3583 | } | |
3584 | ||
3585 | /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */ | |
3586 | new_dma_pkt--; | |
df697bce | 3587 | new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask); |
eff6f4a0 OG |
3588 | |
3589 | *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); | |
3590 | ||
3591 | return 0; | |
3592 | } | |
3593 | ||
3594 | static int goya_patch_cb(struct hl_device *hdev, | |
3595 | struct hl_cs_parser *parser) | |
3596 | { | |
3597 | u32 cb_parsed_length = 0; | |
3598 | u32 cb_patched_cur_length = 0; | |
3599 | int rc = 0; | |
3600 | ||
3601 | /* cb_user_size is more than 0 so loop will always be executed */ | |
3602 | while (cb_parsed_length < parser->user_cb_size) { | |
3603 | enum packet_id pkt_id; | |
3604 | u16 pkt_size; | |
3605 | u32 new_pkt_size = 0; | |
213ad5ad | 3606 | struct goya_packet *user_pkt, *kernel_pkt; |
eff6f4a0 | 3607 | |
213ad5ad | 3608 | user_pkt = (struct goya_packet *) (uintptr_t) |
eff6f4a0 | 3609 | (parser->user_cb->kernel_address + cb_parsed_length); |
213ad5ad | 3610 | kernel_pkt = (struct goya_packet *) (uintptr_t) |
eff6f4a0 OG |
3611 | (parser->patched_cb->kernel_address + |
3612 | cb_patched_cur_length); | |
3613 | ||
213ad5ad BS |
3614 | pkt_id = (enum packet_id) ( |
3615 | (le64_to_cpu(user_pkt->header) & | |
eff6f4a0 OG |
3616 | PACKET_HEADER_PACKET_ID_MASK) >> |
3617 | PACKET_HEADER_PACKET_ID_SHIFT); | |
3618 | ||
3619 | pkt_size = goya_packet_sizes[pkt_id]; | |
3620 | cb_parsed_length += pkt_size; | |
3621 | if (cb_parsed_length > parser->user_cb_size) { | |
3622 | dev_err(hdev->dev, | |
3623 | "packet 0x%x is out of CB boundary\n", pkt_id); | |
3624 | rc = -EINVAL; | |
3625 | break; | |
3626 | } | |
3627 | ||
3628 | switch (pkt_id) { | |
3629 | case PACKET_LIN_DMA: | |
213ad5ad BS |
3630 | rc = goya_patch_dma_packet(hdev, parser, |
3631 | (struct packet_lin_dma *) user_pkt, | |
3632 | (struct packet_lin_dma *) kernel_pkt, | |
3633 | &new_pkt_size); | |
eff6f4a0 OG |
3634 | cb_patched_cur_length += new_pkt_size; |
3635 | break; | |
3636 | ||
3637 | case PACKET_WREG_32: | |
3638 | memcpy(kernel_pkt, user_pkt, pkt_size); | |
3639 | cb_patched_cur_length += pkt_size; | |
213ad5ad BS |
3640 | rc = goya_validate_wreg32(hdev, parser, |
3641 | (struct packet_wreg32 *) kernel_pkt); | |
eff6f4a0 OG |
3642 | break; |
3643 | ||
3644 | case PACKET_WREG_BULK: | |
3645 | dev_err(hdev->dev, | |
3646 | "User not allowed to use WREG_BULK\n"); | |
3647 | rc = -EPERM; | |
3648 | break; | |
3649 | ||
3650 | case PACKET_MSG_PROT: | |
3651 | dev_err(hdev->dev, | |
3652 | "User not allowed to use MSG_PROT\n"); | |
3653 | rc = -EPERM; | |
3654 | break; | |
3655 | ||
3656 | case PACKET_CP_DMA: | |
3657 | dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); | |
3658 | rc = -EPERM; | |
3659 | break; | |
3660 | ||
3661 | case PACKET_STOP: | |
3662 | dev_err(hdev->dev, "User not allowed to use STOP\n"); | |
3663 | rc = -EPERM; | |
3664 | break; | |
3665 | ||
3666 | case PACKET_MSG_LONG: | |
3667 | case PACKET_MSG_SHORT: | |
3668 | case PACKET_FENCE: | |
3669 | case PACKET_NOP: | |
3670 | memcpy(kernel_pkt, user_pkt, pkt_size); | |
3671 | cb_patched_cur_length += pkt_size; | |
3672 | break; | |
3673 | ||
3674 | default: | |
3675 | dev_err(hdev->dev, "Invalid packet header 0x%x\n", | |
3676 | pkt_id); | |
3677 | rc = -EINVAL; | |
3678 | break; | |
3679 | } | |
3680 | ||
3681 | if (rc) | |
3682 | break; | |
3683 | } | |
3684 | ||
3685 | return rc; | |
3686 | } | |
3687 | ||
3688 | static int goya_parse_cb_mmu(struct hl_device *hdev, | |
3689 | struct hl_cs_parser *parser) | |
3690 | { | |
3691 | u64 patched_cb_handle; | |
3692 | u32 patched_cb_size; | |
3693 | struct hl_cb *user_cb; | |
3694 | int rc; | |
3695 | ||
3696 | /* | |
3697 | * The new CB should have space at the end for two MSG_PROT pkt: | |
3698 | * 1. A packet that will act as a completion packet | |
3699 | * 2. A packet that will generate MSI-X interrupt | |
3700 | */ | |
3701 | parser->patched_cb_size = parser->user_cb_size + | |
3702 | sizeof(struct packet_msg_prot) * 2; | |
3703 | ||
3704 | rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, | |
3705 | parser->patched_cb_size, | |
3706 | &patched_cb_handle, HL_KERNEL_ASID_ID); | |
3707 | ||
3708 | if (rc) { | |
3709 | dev_err(hdev->dev, | |
3710 | "Failed to allocate patched CB for DMA CS %d\n", | |
3711 | rc); | |
3712 | return rc; | |
3713 | } | |
3714 | ||
3715 | patched_cb_handle >>= PAGE_SHIFT; | |
3716 | parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, | |
3717 | (u32) patched_cb_handle); | |
3718 | /* hl_cb_get should never fail here so use kernel WARN */ | |
3719 | WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n", | |
3720 | (u32) patched_cb_handle); | |
3721 | if (!parser->patched_cb) { | |
3722 | rc = -EFAULT; | |
3723 | goto out; | |
3724 | } | |
3725 | ||
3726 | /* | |
3727 | * The check that parser->user_cb_size <= parser->user_cb->size was done | |
3728 | * in validate_queue_index(). | |
3729 | */ | |
3730 | memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address, | |
3731 | (void *) (uintptr_t) parser->user_cb->kernel_address, | |
3732 | parser->user_cb_size); | |
3733 | ||
3734 | patched_cb_size = parser->patched_cb_size; | |
3735 | ||
3736 | /* validate patched CB instead of user CB */ | |
3737 | user_cb = parser->user_cb; | |
3738 | parser->user_cb = parser->patched_cb; | |
3739 | rc = goya_validate_cb(hdev, parser, true); | |
3740 | parser->user_cb = user_cb; | |
3741 | ||
3742 | if (rc) { | |
3743 | hl_cb_put(parser->patched_cb); | |
3744 | goto out; | |
3745 | } | |
3746 | ||
3747 | if (patched_cb_size != parser->patched_cb_size) { | |
3748 | dev_err(hdev->dev, "user CB size mismatch\n"); | |
3749 | hl_cb_put(parser->patched_cb); | |
3750 | rc = -EINVAL; | |
3751 | goto out; | |
3752 | } | |
3753 | ||
3754 | out: | |
3755 | /* | |
3756 | * Always call cb destroy here because we still have 1 reference | |
3757 | * to it by calling cb_get earlier. After the job will be completed, | |
3758 | * cb_put will release it, but here we want to remove it from the | |
3759 | * idr | |
3760 | */ | |
3761 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, | |
3762 | patched_cb_handle << PAGE_SHIFT); | |
3763 | ||
3764 | return rc; | |
3765 | } | |
3766 | ||
5e6e0239 OG |
3767 | static int goya_parse_cb_no_mmu(struct hl_device *hdev, |
3768 | struct hl_cs_parser *parser) | |
eff6f4a0 OG |
3769 | { |
3770 | u64 patched_cb_handle; | |
3771 | int rc; | |
3772 | ||
3773 | rc = goya_validate_cb(hdev, parser, false); | |
3774 | ||
3775 | if (rc) | |
3776 | goto free_userptr; | |
3777 | ||
3778 | rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, | |
3779 | parser->patched_cb_size, | |
3780 | &patched_cb_handle, HL_KERNEL_ASID_ID); | |
3781 | if (rc) { | |
3782 | dev_err(hdev->dev, | |
3783 | "Failed to allocate patched CB for DMA CS %d\n", rc); | |
3784 | goto free_userptr; | |
3785 | } | |
3786 | ||
3787 | patched_cb_handle >>= PAGE_SHIFT; | |
3788 | parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, | |
3789 | (u32) patched_cb_handle); | |
3790 | /* hl_cb_get should never fail here so use kernel WARN */ | |
3791 | WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n", | |
3792 | (u32) patched_cb_handle); | |
3793 | if (!parser->patched_cb) { | |
3794 | rc = -EFAULT; | |
3795 | goto out; | |
3796 | } | |
3797 | ||
3798 | rc = goya_patch_cb(hdev, parser); | |
3799 | ||
3800 | if (rc) | |
3801 | hl_cb_put(parser->patched_cb); | |
3802 | ||
3803 | out: | |
3804 | /* | |
3805 | * Always call cb destroy here because we still have 1 reference | |
3806 | * to it by calling cb_get earlier. After the job will be completed, | |
3807 | * cb_put will release it, but here we want to remove it from the | |
3808 | * idr | |
3809 | */ | |
3810 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, | |
3811 | patched_cb_handle << PAGE_SHIFT); | |
3812 | ||
3813 | free_userptr: | |
3814 | if (rc) | |
3815 | hl_userptr_delete_list(hdev, parser->job_userptr_list); | |
3816 | return rc; | |
3817 | } | |
3818 | ||
883c2459 | 3819 | static int goya_parse_cb_no_ext_queue(struct hl_device *hdev, |
5e6e0239 | 3820 | struct hl_cs_parser *parser) |
eff6f4a0 OG |
3821 | { |
3822 | struct asic_fixed_properties *asic_prop = &hdev->asic_prop; | |
3823 | struct goya_device *goya = hdev->asic_specific; | |
3824 | ||
883c2459 OG |
3825 | if (goya->hw_cap_initialized & HW_CAP_MMU) |
3826 | return 0; | |
eff6f4a0 | 3827 | |
883c2459 OG |
3828 | /* For internal queue jobs, just check if CB address is valid */ |
3829 | if (hl_mem_area_inside_range( | |
3830 | (u64) (uintptr_t) parser->user_cb, | |
3831 | parser->user_cb_size, | |
3832 | asic_prop->sram_user_base_address, | |
3833 | asic_prop->sram_end_address)) | |
3834 | return 0; | |
eff6f4a0 | 3835 | |
883c2459 OG |
3836 | if (hl_mem_area_inside_range( |
3837 | (u64) (uintptr_t) parser->user_cb, | |
3838 | parser->user_cb_size, | |
3839 | asic_prop->dram_user_base_address, | |
3840 | asic_prop->dram_end_address)) | |
3841 | return 0; | |
eff6f4a0 | 3842 | |
883c2459 | 3843 | dev_err(hdev->dev, |
7f74d4d3 | 3844 | "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n", |
883c2459 | 3845 | parser->user_cb, parser->user_cb_size); |
eff6f4a0 | 3846 | |
883c2459 | 3847 | return -EFAULT; |
eff6f4a0 OG |
3848 | } |
3849 | ||
3850 | int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) | |
3851 | { | |
3852 | struct goya_device *goya = hdev->asic_specific; | |
3853 | ||
cb596aee | 3854 | if (parser->queue_type == QUEUE_TYPE_INT) |
883c2459 | 3855 | return goya_parse_cb_no_ext_queue(hdev, parser); |
eff6f4a0 | 3856 | |
5809e18e | 3857 | if (goya->hw_cap_initialized & HW_CAP_MMU) |
eff6f4a0 OG |
3858 | return goya_parse_cb_mmu(hdev, parser); |
3859 | else | |
3860 | return goya_parse_cb_no_mmu(hdev, parser); | |
3861 | } | |
3862 | ||
921a465b | 3863 | void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, |
926ba4cc OG |
3864 | u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, |
3865 | bool eb) | |
eff6f4a0 OG |
3866 | { |
3867 | struct packet_msg_prot *cq_pkt; | |
df697bce | 3868 | u32 tmp; |
eff6f4a0 OG |
3869 | |
3870 | cq_pkt = (struct packet_msg_prot *) (uintptr_t) | |
3871 | (kernel_address + len - (sizeof(struct packet_msg_prot) * 2)); | |
3872 | ||
df697bce | 3873 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
eff6f4a0 OG |
3874 | (1 << GOYA_PKT_CTL_EB_SHIFT) | |
3875 | (1 << GOYA_PKT_CTL_MB_SHIFT); | |
df697bce TT |
3876 | cq_pkt->ctl = cpu_to_le32(tmp); |
3877 | cq_pkt->value = cpu_to_le32(cq_val); | |
3878 | cq_pkt->addr = cpu_to_le64(cq_addr); | |
eff6f4a0 OG |
3879 | |
3880 | cq_pkt++; | |
3881 | ||
df697bce | 3882 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
eff6f4a0 | 3883 | (1 << GOYA_PKT_CTL_MB_SHIFT); |
df697bce TT |
3884 | cq_pkt->ctl = cpu_to_le32(tmp); |
3885 | cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF); | |
3886 | cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF); | |
eff6f4a0 OG |
3887 | } |
3888 | ||
b2377e03 | 3889 | void goya_update_eq_ci(struct hl_device *hdev, u32 val) |
1251f23a | 3890 | { |
4095a176 | 3891 | WREG32(mmCPU_EQ_CI, val); |
1251f23a OG |
3892 | } |
3893 | ||
b2377e03 | 3894 | void goya_restore_phase_topology(struct hl_device *hdev) |
9c46f7b1 DBZ |
3895 | { |
3896 | ||
3897 | } | |
3898 | ||
3899 | static void goya_clear_sm_regs(struct hl_device *hdev) | |
eff6f4a0 OG |
3900 | { |
3901 | int i, num_of_sob_in_longs, num_of_mon_in_longs; | |
3902 | ||
3903 | num_of_sob_in_longs = | |
3904 | ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4); | |
3905 | ||
3906 | num_of_mon_in_longs = | |
3907 | ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4); | |
3908 | ||
3909 | for (i = 0 ; i < num_of_sob_in_longs ; i += 4) | |
3910 | WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0); | |
3911 | ||
3912 | for (i = 0 ; i < num_of_mon_in_longs ; i += 4) | |
3913 | WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0); | |
3914 | ||
3915 | /* Flush all WREG to prevent race */ | |
3916 | i = RREG32(mmSYNC_MNGR_SOB_OBJ_0); | |
3917 | } | |
3918 | ||
c2164773 | 3919 | /* |
4a0ce776 TT |
3920 | * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped |
3921 | * address. | |
c2164773 OG |
3922 | * |
3923 | * @hdev: pointer to hl_device structure | |
4a0ce776 | 3924 | * @addr: device or host mapped address |
c2164773 OG |
3925 | * @val: returned value |
3926 | * | |
3927 | * In case of DDR address that is not mapped into the default aperture that | |
3928 | * the DDR bar exposes, the function will configure the iATU so that the DDR | |
3929 | * bar will be positioned at a base address that allows reading from the | |
3930 | * required address. Configuring the iATU during normal operation can | |
3931 | * lead to undefined behavior and therefore, should be done with extreme care | |
3932 | * | |
3933 | */ | |
5e6e0239 | 3934 | static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) |
c2164773 OG |
3935 | { |
3936 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
a38693d7 | 3937 | u64 ddr_bar_addr; |
c2164773 OG |
3938 | int rc = 0; |
3939 | ||
3940 | if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { | |
3941 | *val = RREG32(addr - CFG_BASE); | |
3942 | ||
3943 | } else if ((addr >= SRAM_BASE_ADDR) && | |
3944 | (addr < SRAM_BASE_ADDR + SRAM_SIZE)) { | |
3945 | ||
3946 | *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
3947 | (addr - SRAM_BASE_ADDR)); | |
3948 | ||
3949 | } else if ((addr >= DRAM_PHYS_BASE) && | |
3950 | (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) { | |
3951 | ||
3952 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
3953 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
3954 | ||
a38693d7 OG |
3955 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); |
3956 | if (ddr_bar_addr != U64_MAX) { | |
c2164773 OG |
3957 | *val = readl(hdev->pcie_bar[DDR_BAR_ID] + |
3958 | (addr - bar_base_addr)); | |
3959 | ||
a38693d7 OG |
3960 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, |
3961 | ddr_bar_addr); | |
c2164773 | 3962 | } |
a38693d7 OG |
3963 | if (ddr_bar_addr == U64_MAX) |
3964 | rc = -EIO; | |
4a0ce776 TT |
3965 | |
3966 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
3967 | *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); | |
3968 | ||
c2164773 OG |
3969 | } else { |
3970 | rc = -EFAULT; | |
3971 | } | |
3972 | ||
3973 | return rc; | |
3974 | } | |
3975 | ||
3976 | /* | |
4a0ce776 TT |
3977 | * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped |
3978 | * address. | |
c2164773 OG |
3979 | * |
3980 | * @hdev: pointer to hl_device structure | |
4a0ce776 | 3981 | * @addr: device or host mapped address |
c2164773 OG |
3982 | * @val: returned value |
3983 | * | |
3984 | * In case of DDR address that is not mapped into the default aperture that | |
3985 | * the DDR bar exposes, the function will configure the iATU so that the DDR | |
3986 | * bar will be positioned at a base address that allows writing to the | |
3987 | * required address. Configuring the iATU during normal operation can | |
3988 | * lead to undefined behavior and therefore, should be done with extreme care | |
3989 | * | |
3990 | */ | |
5e6e0239 | 3991 | static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) |
c2164773 OG |
3992 | { |
3993 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
a38693d7 | 3994 | u64 ddr_bar_addr; |
c2164773 OG |
3995 | int rc = 0; |
3996 | ||
3997 | if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { | |
3998 | WREG32(addr - CFG_BASE, val); | |
3999 | ||
4000 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4001 | (addr < SRAM_BASE_ADDR + SRAM_SIZE)) { | |
4002 | ||
4003 | writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4004 | (addr - SRAM_BASE_ADDR)); | |
4005 | ||
4006 | } else if ((addr >= DRAM_PHYS_BASE) && | |
4007 | (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) { | |
4008 | ||
4009 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4010 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4011 | ||
a38693d7 OG |
4012 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); |
4013 | if (ddr_bar_addr != U64_MAX) { | |
c2164773 OG |
4014 | writel(val, hdev->pcie_bar[DDR_BAR_ID] + |
4015 | (addr - bar_base_addr)); | |
4016 | ||
a38693d7 OG |
4017 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, |
4018 | ddr_bar_addr); | |
c2164773 | 4019 | } |
a38693d7 OG |
4020 | if (ddr_bar_addr == U64_MAX) |
4021 | rc = -EIO; | |
4a0ce776 TT |
4022 | |
4023 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4024 | *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; | |
4025 | ||
c2164773 OG |
4026 | } else { |
4027 | rc = -EFAULT; | |
4028 | } | |
4029 | ||
4030 | return rc; | |
4031 | } | |
4032 | ||
5cce5146 MH |
4033 | static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) |
4034 | { | |
4035 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4036 | u64 ddr_bar_addr; | |
4037 | int rc = 0; | |
4038 | ||
4039 | if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { | |
4040 | u32 val_l = RREG32(addr - CFG_BASE); | |
4041 | u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); | |
4042 | ||
4043 | *val = (((u64) val_h) << 32) | val_l; | |
4044 | ||
4045 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4046 | (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { | |
4047 | ||
4048 | *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4049 | (addr - SRAM_BASE_ADDR)); | |
4050 | ||
4051 | } else if ((addr >= DRAM_PHYS_BASE) && | |
4052 | (addr <= | |
4053 | DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { | |
4054 | ||
4055 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4056 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4057 | ||
4058 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); | |
4059 | if (ddr_bar_addr != U64_MAX) { | |
4060 | *val = readq(hdev->pcie_bar[DDR_BAR_ID] + | |
4061 | (addr - bar_base_addr)); | |
4062 | ||
4063 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, | |
4064 | ddr_bar_addr); | |
4065 | } | |
4066 | if (ddr_bar_addr == U64_MAX) | |
4067 | rc = -EIO; | |
4068 | ||
4069 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4070 | *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); | |
4071 | ||
4072 | } else { | |
4073 | rc = -EFAULT; | |
4074 | } | |
4075 | ||
4076 | return rc; | |
4077 | } | |
4078 | ||
4079 | static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) | |
4080 | { | |
4081 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4082 | u64 ddr_bar_addr; | |
4083 | int rc = 0; | |
4084 | ||
4085 | if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { | |
4086 | WREG32(addr - CFG_BASE, lower_32_bits(val)); | |
4087 | WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); | |
4088 | ||
4089 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4090 | (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { | |
4091 | ||
4092 | writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4093 | (addr - SRAM_BASE_ADDR)); | |
4094 | ||
4095 | } else if ((addr >= DRAM_PHYS_BASE) && | |
4096 | (addr <= | |
4097 | DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { | |
4098 | ||
4099 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4100 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4101 | ||
4102 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); | |
4103 | if (ddr_bar_addr != U64_MAX) { | |
4104 | writeq(val, hdev->pcie_bar[DDR_BAR_ID] + | |
4105 | (addr - bar_base_addr)); | |
4106 | ||
4107 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, | |
4108 | ddr_bar_addr); | |
4109 | } | |
4110 | if (ddr_bar_addr == U64_MAX) | |
4111 | rc = -EIO; | |
4112 | ||
4113 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4114 | *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; | |
4115 | ||
4116 | } else { | |
4117 | rc = -EFAULT; | |
4118 | } | |
4119 | ||
4120 | return rc; | |
4121 | } | |
4122 | ||
0feaf86d OS |
4123 | static u64 goya_read_pte(struct hl_device *hdev, u64 addr) |
4124 | { | |
4125 | struct goya_device *goya = hdev->asic_specific; | |
4126 | ||
9f201aba OG |
4127 | if (hdev->hard_reset_pending) |
4128 | return U64_MAX; | |
4129 | ||
0feaf86d OS |
4130 | return readq(hdev->pcie_bar[DDR_BAR_ID] + |
4131 | (addr - goya->ddr_bar_cur_addr)); | |
4132 | } | |
4133 | ||
4134 | static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) | |
4135 | { | |
4136 | struct goya_device *goya = hdev->asic_specific; | |
4137 | ||
9f201aba OG |
4138 | if (hdev->hard_reset_pending) |
4139 | return; | |
4140 | ||
0feaf86d OS |
4141 | writeq(val, hdev->pcie_bar[DDR_BAR_ID] + |
4142 | (addr - goya->ddr_bar_cur_addr)); | |
4143 | } | |
4144 | ||
60b7dcca | 4145 | static const char *_goya_get_event_desc(u16 event_type) |
1251f23a | 4146 | { |
60b7dcca | 4147 | switch (event_type) { |
460696ed OS |
4148 | case GOYA_ASYNC_EVENT_ID_PCIE_IF: |
4149 | return "PCIe_if"; | |
4150 | case GOYA_ASYNC_EVENT_ID_TPC0_ECC: | |
4151 | case GOYA_ASYNC_EVENT_ID_TPC1_ECC: | |
4152 | case GOYA_ASYNC_EVENT_ID_TPC2_ECC: | |
4153 | case GOYA_ASYNC_EVENT_ID_TPC3_ECC: | |
4154 | case GOYA_ASYNC_EVENT_ID_TPC4_ECC: | |
4155 | case GOYA_ASYNC_EVENT_ID_TPC5_ECC: | |
4156 | case GOYA_ASYNC_EVENT_ID_TPC6_ECC: | |
4157 | case GOYA_ASYNC_EVENT_ID_TPC7_ECC: | |
4158 | return "TPC%d_ecc"; | |
4159 | case GOYA_ASYNC_EVENT_ID_MME_ECC: | |
4160 | return "MME_ecc"; | |
4161 | case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT: | |
4162 | return "MME_ecc_ext"; | |
4163 | case GOYA_ASYNC_EVENT_ID_MMU_ECC: | |
4164 | return "MMU_ecc"; | |
4165 | case GOYA_ASYNC_EVENT_ID_DMA_MACRO: | |
4166 | return "DMA_macro"; | |
4167 | case GOYA_ASYNC_EVENT_ID_DMA_ECC: | |
4168 | return "DMA_ecc"; | |
4169 | case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC: | |
4170 | return "CPU_if_ecc"; | |
4171 | case GOYA_ASYNC_EVENT_ID_PSOC_MEM: | |
4172 | return "PSOC_mem"; | |
4173 | case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: | |
4174 | return "PSOC_coresight"; | |
4175 | case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: | |
4176 | return "SRAM%d"; | |
4177 | case GOYA_ASYNC_EVENT_ID_GIC500: | |
4178 | return "GIC500"; | |
4179 | case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: | |
4180 | return "PLL%d"; | |
4181 | case GOYA_ASYNC_EVENT_ID_AXI_ECC: | |
4182 | return "AXI_ecc"; | |
4183 | case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: | |
4184 | return "L2_ram_ecc"; | |
4185 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: | |
4186 | return "PSOC_gpio_05_sw_reset"; | |
4187 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: | |
4188 | return "PSOC_gpio_10_vrhot_icrit"; | |
60b7dcca TT |
4189 | case GOYA_ASYNC_EVENT_ID_PCIE_DEC: |
4190 | return "PCIe_dec"; | |
4191 | case GOYA_ASYNC_EVENT_ID_TPC0_DEC: | |
4192 | case GOYA_ASYNC_EVENT_ID_TPC1_DEC: | |
4193 | case GOYA_ASYNC_EVENT_ID_TPC2_DEC: | |
4194 | case GOYA_ASYNC_EVENT_ID_TPC3_DEC: | |
4195 | case GOYA_ASYNC_EVENT_ID_TPC4_DEC: | |
4196 | case GOYA_ASYNC_EVENT_ID_TPC5_DEC: | |
4197 | case GOYA_ASYNC_EVENT_ID_TPC6_DEC: | |
4198 | case GOYA_ASYNC_EVENT_ID_TPC7_DEC: | |
4199 | return "TPC%d_dec"; | |
4200 | case GOYA_ASYNC_EVENT_ID_MME_WACS: | |
4201 | return "MME_wacs"; | |
4202 | case GOYA_ASYNC_EVENT_ID_MME_WACSD: | |
4203 | return "MME_wacsd"; | |
4204 | case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER: | |
4205 | return "CPU_axi_splitter"; | |
4206 | case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC: | |
4207 | return "PSOC_axi_dec"; | |
4208 | case GOYA_ASYNC_EVENT_ID_PSOC: | |
4209 | return "PSOC"; | |
4210 | case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR: | |
4211 | case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR: | |
4212 | case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR: | |
4213 | case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR: | |
4214 | case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR: | |
4215 | case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR: | |
4216 | case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR: | |
4217 | case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR: | |
4218 | return "TPC%d_krn_err"; | |
4219 | case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ: | |
4220 | return "TPC%d_cq"; | |
4221 | case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM: | |
4222 | return "TPC%d_qm"; | |
4223 | case GOYA_ASYNC_EVENT_ID_MME_QM: | |
4224 | return "MME_qm"; | |
4225 | case GOYA_ASYNC_EVENT_ID_MME_CMDQ: | |
4226 | return "MME_cq"; | |
4227 | case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: | |
4228 | return "DMA%d_qm"; | |
4229 | case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: | |
4230 | return "DMA%d_ch"; | |
460696ed OS |
4231 | case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: |
4232 | case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: | |
4233 | case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: | |
4234 | case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: | |
4235 | case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: | |
4236 | case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: | |
4237 | case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: | |
4238 | case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: | |
4239 | return "TPC%d_bmon_spmu"; | |
4240 | case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: | |
4241 | return "DMA_bm_ch%d"; | |
4f0e6ab7 OS |
4242 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: |
4243 | return "POWER_ENV_S"; | |
4244 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: | |
4245 | return "POWER_ENV_E"; | |
4246 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: | |
4247 | return "THERMAL_ENV_S"; | |
4248 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: | |
4249 | return "THERMAL_ENV_E"; | |
60b7dcca TT |
4250 | default: |
4251 | return "N/A"; | |
4252 | } | |
1251f23a OG |
4253 | } |
4254 | ||
60b7dcca | 4255 | static void goya_get_event_desc(u16 event_type, char *desc, size_t size) |
1251f23a | 4256 | { |
60b7dcca TT |
4257 | u8 index; |
4258 | ||
4259 | switch (event_type) { | |
460696ed OS |
4260 | case GOYA_ASYNC_EVENT_ID_TPC0_ECC: |
4261 | case GOYA_ASYNC_EVENT_ID_TPC1_ECC: | |
4262 | case GOYA_ASYNC_EVENT_ID_TPC2_ECC: | |
4263 | case GOYA_ASYNC_EVENT_ID_TPC3_ECC: | |
4264 | case GOYA_ASYNC_EVENT_ID_TPC4_ECC: | |
4265 | case GOYA_ASYNC_EVENT_ID_TPC5_ECC: | |
4266 | case GOYA_ASYNC_EVENT_ID_TPC6_ECC: | |
4267 | case GOYA_ASYNC_EVENT_ID_TPC7_ECC: | |
4268 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3; | |
4269 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4270 | break; | |
4271 | case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: | |
4272 | index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0; | |
4273 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4274 | break; | |
4275 | case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: | |
4276 | index = event_type - GOYA_ASYNC_EVENT_ID_PLL0; | |
4277 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4278 | break; | |
60b7dcca TT |
4279 | case GOYA_ASYNC_EVENT_ID_TPC0_DEC: |
4280 | case GOYA_ASYNC_EVENT_ID_TPC1_DEC: | |
4281 | case GOYA_ASYNC_EVENT_ID_TPC2_DEC: | |
4282 | case GOYA_ASYNC_EVENT_ID_TPC3_DEC: | |
4283 | case GOYA_ASYNC_EVENT_ID_TPC4_DEC: | |
4284 | case GOYA_ASYNC_EVENT_ID_TPC5_DEC: | |
4285 | case GOYA_ASYNC_EVENT_ID_TPC6_DEC: | |
4286 | case GOYA_ASYNC_EVENT_ID_TPC7_DEC: | |
4287 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3; | |
4288 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4289 | break; | |
4290 | case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR: | |
4291 | case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR: | |
4292 | case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR: | |
4293 | case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR: | |
4294 | case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR: | |
4295 | case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR: | |
4296 | case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR: | |
4297 | case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR: | |
4298 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10; | |
4299 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4300 | break; | |
4301 | case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ: | |
4302 | index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ; | |
4303 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4304 | break; | |
4305 | case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM: | |
4306 | index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM; | |
4307 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4308 | break; | |
4309 | case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: | |
4310 | index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM; | |
4311 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4312 | break; | |
4313 | case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: | |
4314 | index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH; | |
4315 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4316 | break; | |
460696ed OS |
4317 | case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: |
4318 | case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: | |
4319 | case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: | |
4320 | case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: | |
4321 | case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: | |
4322 | case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: | |
4323 | case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: | |
4324 | case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: | |
4325 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10; | |
4326 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4327 | break; | |
4328 | case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: | |
4329 | index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0; | |
4330 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4331 | break; | |
60b7dcca TT |
4332 | default: |
4333 | snprintf(desc, size, _goya_get_event_desc(event_type)); | |
4334 | break; | |
1251f23a OG |
4335 | } |
4336 | } | |
4337 | ||
60b7dcca | 4338 | static void goya_print_razwi_info(struct hl_device *hdev) |
1251f23a | 4339 | { |
1251f23a | 4340 | if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) { |
e5509d52 | 4341 | dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n"); |
1251f23a | 4342 | WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0); |
1251f23a | 4343 | } |
60b7dcca | 4344 | |
1251f23a | 4345 | if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) { |
e5509d52 | 4346 | dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n"); |
1251f23a | 4347 | WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0); |
1251f23a | 4348 | } |
60b7dcca | 4349 | |
1251f23a | 4350 | if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) { |
e5509d52 | 4351 | dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n"); |
1251f23a | 4352 | WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0); |
1251f23a | 4353 | } |
60b7dcca | 4354 | |
1251f23a | 4355 | if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) { |
e5509d52 | 4356 | dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n"); |
1251f23a | 4357 | WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0); |
1251f23a | 4358 | } |
60b7dcca | 4359 | } |
1251f23a | 4360 | |
60b7dcca TT |
4361 | static void goya_print_mmu_error_info(struct hl_device *hdev) |
4362 | { | |
4363 | struct goya_device *goya = hdev->asic_specific; | |
4364 | u64 addr; | |
4365 | u32 val; | |
1251f23a | 4366 | |
60b7dcca TT |
4367 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) |
4368 | return; | |
1251f23a | 4369 | |
60b7dcca TT |
4370 | val = RREG32(mmMMU_PAGE_ERROR_CAPTURE); |
4371 | if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { | |
4372 | addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK; | |
4373 | addr <<= 32; | |
4374 | addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA); | |
1251f23a | 4375 | |
e5509d52 OG |
4376 | dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", |
4377 | addr); | |
60b7dcca TT |
4378 | |
4379 | WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0); | |
1251f23a OG |
4380 | } |
4381 | } | |
4382 | ||
460696ed OS |
4383 | static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, |
4384 | bool razwi) | |
60b7dcca TT |
4385 | { |
4386 | char desc[20] = ""; | |
4387 | ||
4388 | goya_get_event_desc(event_type, desc, sizeof(desc)); | |
e5509d52 | 4389 | dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", |
60b7dcca TT |
4390 | event_type, desc); |
4391 | ||
460696ed OS |
4392 | if (razwi) { |
4393 | goya_print_razwi_info(hdev); | |
4394 | goya_print_mmu_error_info(hdev); | |
4395 | } | |
60b7dcca TT |
4396 | } |
4397 | ||
f8c8c7d5 OG |
4398 | static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, |
4399 | size_t irq_arr_size) | |
4400 | { | |
4401 | struct armcp_unmask_irq_arr_packet *pkt; | |
4402 | size_t total_pkt_size; | |
4403 | long result; | |
4404 | int rc; | |
b421d83a BS |
4405 | int irq_num_entries, irq_arr_index; |
4406 | __le32 *goya_irq_arr; | |
f8c8c7d5 OG |
4407 | |
4408 | total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + | |
4409 | irq_arr_size; | |
4410 | ||
4411 | /* data should be aligned to 8 bytes in order to ArmCP to copy it */ | |
4412 | total_pkt_size = (total_pkt_size + 0x7) & ~0x7; | |
4413 | ||
4414 | /* total_pkt_size is casted to u16 later on */ | |
4415 | if (total_pkt_size > USHRT_MAX) { | |
4416 | dev_err(hdev->dev, "too many elements in IRQ array\n"); | |
4417 | return -EINVAL; | |
4418 | } | |
4419 | ||
4420 | pkt = kzalloc(total_pkt_size, GFP_KERNEL); | |
4421 | if (!pkt) | |
4422 | return -ENOMEM; | |
4423 | ||
b421d83a BS |
4424 | irq_num_entries = irq_arr_size / sizeof(irq_arr[0]); |
4425 | pkt->length = cpu_to_le32(irq_num_entries); | |
4426 | ||
4427 | /* We must perform any necessary endianness conversation on the irq | |
4428 | * array being passed to the goya hardware | |
4429 | */ | |
4430 | for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs; | |
4431 | irq_arr_index < irq_num_entries ; irq_arr_index++) | |
4432 | goya_irq_arr[irq_arr_index] = | |
4433 | cpu_to_le32(irq_arr[irq_arr_index]); | |
f8c8c7d5 | 4434 | |
df697bce TT |
4435 | pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << |
4436 | ARMCP_PKT_CTL_OPCODE_SHIFT); | |
f8c8c7d5 | 4437 | |
788cacf3 OG |
4438 | rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, |
4439 | total_pkt_size, 0, &result); | |
f8c8c7d5 OG |
4440 | |
4441 | if (rc) | |
4442 | dev_err(hdev->dev, "failed to unmask IRQ array\n"); | |
4443 | ||
4444 | kfree(pkt); | |
4445 | ||
4446 | return rc; | |
4447 | } | |
4448 | ||
4449 | static int goya_soft_reset_late_init(struct hl_device *hdev) | |
4450 | { | |
4451 | /* | |
4452 | * Unmask all IRQs since some could have been received | |
4453 | * during the soft reset | |
4454 | */ | |
b24ca458 OG |
4455 | return goya_unmask_irq_arr(hdev, goya_all_events, |
4456 | sizeof(goya_all_events)); | |
f8c8c7d5 OG |
4457 | } |
4458 | ||
1251f23a OG |
4459 | static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) |
4460 | { | |
4461 | struct armcp_packet pkt; | |
4462 | long result; | |
4463 | int rc; | |
4464 | ||
4465 | memset(&pkt, 0, sizeof(pkt)); | |
4466 | ||
df697bce TT |
4467 | pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << |
4468 | ARMCP_PKT_CTL_OPCODE_SHIFT); | |
4469 | pkt.value = cpu_to_le64(event_type); | |
1251f23a | 4470 | |
788cacf3 OG |
4471 | rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), |
4472 | 0, &result); | |
1251f23a OG |
4473 | |
4474 | if (rc) | |
4475 | dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); | |
4476 | ||
4477 | return rc; | |
4478 | } | |
4479 | ||
4f0e6ab7 OS |
4480 | static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) |
4481 | { | |
4482 | switch (event_type) { | |
4483 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: | |
4484 | dev_info_ratelimited(hdev->dev, | |
4485 | "Clock throttling due to power consumption\n"); | |
4486 | break; | |
4487 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: | |
4488 | dev_info_ratelimited(hdev->dev, | |
4489 | "Power envelop is safe, back to optimal clock\n"); | |
4490 | break; | |
4491 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: | |
4492 | dev_info_ratelimited(hdev->dev, | |
4493 | "Clock throttling due to overheating\n"); | |
4494 | break; | |
4495 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: | |
4496 | dev_info_ratelimited(hdev->dev, | |
4497 | "Thermal envelop is safe, back to optimal clock\n"); | |
4498 | break; | |
4499 | ||
4500 | default: | |
4501 | dev_err(hdev->dev, "Received invalid clock change event %d\n", | |
4502 | event_type); | |
4503 | break; | |
4504 | } | |
4505 | } | |
4506 | ||
1251f23a OG |
4507 | void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) |
4508 | { | |
df697bce TT |
4509 | u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); |
4510 | u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) | |
4511 | >> EQ_CTL_EVENT_TYPE_SHIFT); | |
1251f23a OG |
4512 | struct goya_device *goya = hdev->asic_specific; |
4513 | ||
4514 | goya->events_stat[event_type]++; | |
e9730763 | 4515 | goya->events_stat_aggregate[event_type]++; |
1251f23a OG |
4516 | |
4517 | switch (event_type) { | |
4518 | case GOYA_ASYNC_EVENT_ID_PCIE_IF: | |
4519 | case GOYA_ASYNC_EVENT_ID_TPC0_ECC: | |
4520 | case GOYA_ASYNC_EVENT_ID_TPC1_ECC: | |
4521 | case GOYA_ASYNC_EVENT_ID_TPC2_ECC: | |
4522 | case GOYA_ASYNC_EVENT_ID_TPC3_ECC: | |
4523 | case GOYA_ASYNC_EVENT_ID_TPC4_ECC: | |
4524 | case GOYA_ASYNC_EVENT_ID_TPC5_ECC: | |
4525 | case GOYA_ASYNC_EVENT_ID_TPC6_ECC: | |
4526 | case GOYA_ASYNC_EVENT_ID_TPC7_ECC: | |
4527 | case GOYA_ASYNC_EVENT_ID_MME_ECC: | |
4528 | case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT: | |
4529 | case GOYA_ASYNC_EVENT_ID_MMU_ECC: | |
4530 | case GOYA_ASYNC_EVENT_ID_DMA_MACRO: | |
4531 | case GOYA_ASYNC_EVENT_ID_DMA_ECC: | |
4532 | case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC: | |
4533 | case GOYA_ASYNC_EVENT_ID_PSOC_MEM: | |
4534 | case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: | |
4535 | case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: | |
4536 | case GOYA_ASYNC_EVENT_ID_GIC500: | |
460696ed | 4537 | case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: |
1251f23a OG |
4538 | case GOYA_ASYNC_EVENT_ID_AXI_ECC: |
4539 | case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: | |
4540 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: | |
460696ed | 4541 | goya_print_irq_info(hdev, event_type, false); |
f8c8c7d5 | 4542 | hl_device_reset(hdev, true, false); |
1251f23a OG |
4543 | break; |
4544 | ||
4545 | case GOYA_ASYNC_EVENT_ID_PCIE_DEC: | |
4546 | case GOYA_ASYNC_EVENT_ID_TPC0_DEC: | |
4547 | case GOYA_ASYNC_EVENT_ID_TPC1_DEC: | |
4548 | case GOYA_ASYNC_EVENT_ID_TPC2_DEC: | |
4549 | case GOYA_ASYNC_EVENT_ID_TPC3_DEC: | |
4550 | case GOYA_ASYNC_EVENT_ID_TPC4_DEC: | |
4551 | case GOYA_ASYNC_EVENT_ID_TPC5_DEC: | |
4552 | case GOYA_ASYNC_EVENT_ID_TPC6_DEC: | |
4553 | case GOYA_ASYNC_EVENT_ID_TPC7_DEC: | |
4554 | case GOYA_ASYNC_EVENT_ID_MME_WACS: | |
4555 | case GOYA_ASYNC_EVENT_ID_MME_WACSD: | |
4556 | case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER: | |
4557 | case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC: | |
4558 | case GOYA_ASYNC_EVENT_ID_PSOC: | |
4559 | case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR: | |
4560 | case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR: | |
4561 | case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR: | |
4562 | case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR: | |
4563 | case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR: | |
4564 | case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR: | |
4565 | case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR: | |
4566 | case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR: | |
4567 | case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM: | |
4568 | case GOYA_ASYNC_EVENT_ID_MME_QM: | |
4569 | case GOYA_ASYNC_EVENT_ID_MME_CMDQ: | |
4570 | case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: | |
4571 | case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: | |
460696ed | 4572 | goya_print_irq_info(hdev, event_type, true); |
1251f23a OG |
4573 | goya_unmask_irq(hdev, event_type); |
4574 | break; | |
4575 | ||
717261e1 | 4576 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: |
1251f23a OG |
4577 | case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: |
4578 | case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: | |
4579 | case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: | |
4580 | case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: | |
4581 | case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: | |
4582 | case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: | |
4583 | case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: | |
4584 | case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: | |
460696ed OS |
4585 | case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: |
4586 | goya_print_irq_info(hdev, event_type, false); | |
4587 | goya_unmask_irq(hdev, event_type); | |
1251f23a OG |
4588 | break; |
4589 | ||
4f0e6ab7 OS |
4590 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: |
4591 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: | |
4592 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: | |
4593 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: | |
4594 | goya_print_clk_change_info(hdev, event_type); | |
4595 | goya_unmask_irq(hdev, event_type); | |
4596 | break; | |
4597 | ||
1251f23a OG |
4598 | default: |
4599 | dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", | |
4600 | event_type); | |
4601 | break; | |
4602 | } | |
4603 | } | |
4604 | ||
e9730763 | 4605 | void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) |
1251f23a OG |
4606 | { |
4607 | struct goya_device *goya = hdev->asic_specific; | |
4608 | ||
e9730763 OG |
4609 | if (aggregate) { |
4610 | *size = (u32) sizeof(goya->events_stat_aggregate); | |
4611 | return goya->events_stat_aggregate; | |
4612 | } | |
1251f23a | 4613 | |
e9730763 | 4614 | *size = (u32) sizeof(goya->events_stat); |
1251f23a OG |
4615 | return goya->events_stat; |
4616 | } | |
4617 | ||
ac742737 | 4618 | static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, |
27ca384c | 4619 | u64 val, bool is_dram) |
0feaf86d | 4620 | { |
27ca384c | 4621 | struct packet_lin_dma *lin_dma_pkt; |
0feaf86d | 4622 | struct hl_cs_job *job; |
df697bce | 4623 | u32 cb_size, ctl; |
0feaf86d | 4624 | struct hl_cb *cb; |
ac742737 | 4625 | int rc, lin_dma_pkts_cnt; |
0feaf86d | 4626 | |
ac742737 OG |
4627 | lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G); |
4628 | cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) + | |
4629 | sizeof(struct packet_msg_prot); | |
4630 | cb = hl_cb_kernel_create(hdev, cb_size); | |
0feaf86d | 4631 | if (!cb) |
ac742737 | 4632 | return -ENOMEM; |
0feaf86d | 4633 | |
27ca384c OS |
4634 | lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; |
4635 | ||
ac742737 OG |
4636 | do { |
4637 | memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); | |
4638 | ||
4639 | ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | | |
4640 | (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | | |
4641 | (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | | |
4642 | (1 << GOYA_PKT_CTL_RB_SHIFT) | | |
4643 | (1 << GOYA_PKT_CTL_MB_SHIFT)); | |
4644 | ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) << | |
4645 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; | |
4646 | lin_dma_pkt->ctl = cpu_to_le32(ctl); | |
4647 | ||
4648 | lin_dma_pkt->src_addr = cpu_to_le64(val); | |
4649 | lin_dma_pkt->dst_addr = cpu_to_le64(addr); | |
4650 | if (lin_dma_pkts_cnt > 1) | |
4651 | lin_dma_pkt->tsize = cpu_to_le32(SZ_2G); | |
4652 | else | |
4653 | lin_dma_pkt->tsize = cpu_to_le32(size); | |
0feaf86d | 4654 | |
ac742737 OG |
4655 | size -= SZ_2G; |
4656 | addr += SZ_2G; | |
4657 | lin_dma_pkt++; | |
4658 | } while (--lin_dma_pkts_cnt); | |
0feaf86d | 4659 | |
cb596aee | 4660 | job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); |
0feaf86d OS |
4661 | if (!job) { |
4662 | dev_err(hdev->dev, "Failed to allocate a new job\n"); | |
4663 | rc = -ENOMEM; | |
4664 | goto release_cb; | |
4665 | } | |
4666 | ||
4667 | job->id = 0; | |
4668 | job->user_cb = cb; | |
4669 | job->user_cb->cs_cnt++; | |
4670 | job->user_cb_size = cb_size; | |
4671 | job->hw_queue_id = GOYA_QUEUE_ID_DMA_0; | |
3706b470 | 4672 | job->patched_cb = job->user_cb; |
ac742737 | 4673 | job->job_cb_size = job->user_cb_size; |
0feaf86d | 4674 | |
c2164773 OG |
4675 | hl_debugfs_add_job(hdev, job); |
4676 | ||
0feaf86d OS |
4677 | rc = goya_send_job_on_qman0(hdev, job); |
4678 | ||
c2164773 | 4679 | hl_debugfs_remove_job(hdev, job); |
0feaf86d OS |
4680 | kfree(job); |
4681 | cb->cs_cnt--; | |
4682 | ||
4683 | release_cb: | |
4684 | hl_cb_put(cb); | |
4685 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); | |
4686 | ||
4687 | return rc; | |
4688 | } | |
4689 | ||
b2377e03 | 4690 | int goya_context_switch(struct hl_device *hdev, u32 asid) |
27ca384c OS |
4691 | { |
4692 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
5c823ae1 | 4693 | u64 addr = prop->sram_base_address, sob_addr; |
27ca384c OS |
4694 | u32 size = hdev->pldm ? 0x10000 : prop->sram_size; |
4695 | u64 val = 0x7777777777777777ull; | |
5c823ae1 DBZ |
4696 | int rc, dma_id; |
4697 | u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO - | |
4698 | mmDMA_CH_0_WR_COMP_ADDR_LO; | |
27ca384c OS |
4699 | |
4700 | rc = goya_memset_device_memory(hdev, addr, size, val, false); | |
4701 | if (rc) { | |
4702 | dev_err(hdev->dev, "Failed to clear SRAM in context switch\n"); | |
4703 | return rc; | |
4704 | } | |
4705 | ||
5c823ae1 DBZ |
4706 | /* we need to reset registers that the user is allowed to change */ |
4707 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; | |
4708 | WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr)); | |
4709 | ||
4710 | for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) { | |
4711 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 + | |
4712 | (dma_id - 1) * 4; | |
4713 | WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id, | |
4714 | lower_32_bits(sob_addr)); | |
4715 | } | |
4716 | ||
a691a1eb | 4717 | WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); |
5c823ae1 | 4718 | |
27ca384c OS |
4719 | goya_mmu_prepare(hdev, asid); |
4720 | ||
9c46f7b1 DBZ |
4721 | goya_clear_sm_regs(hdev); |
4722 | ||
27ca384c OS |
4723 | return 0; |
4724 | } | |
4725 | ||
95b5a8b8 | 4726 | static int goya_mmu_clear_pgt_range(struct hl_device *hdev) |
27ca384c OS |
4727 | { |
4728 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4729 | struct goya_device *goya = hdev->asic_specific; | |
4730 | u64 addr = prop->mmu_pgt_addr; | |
4731 | u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE + | |
4732 | MMU_CACHE_MNG_SIZE; | |
4733 | ||
4734 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4735 | return 0; | |
4736 | ||
4737 | return goya_memset_device_memory(hdev, addr, size, 0, true); | |
4738 | } | |
4739 | ||
95b5a8b8 | 4740 | static int goya_mmu_set_dram_default_page(struct hl_device *hdev) |
27ca384c OS |
4741 | { |
4742 | struct goya_device *goya = hdev->asic_specific; | |
4743 | u64 addr = hdev->asic_prop.mmu_dram_default_page_addr; | |
4744 | u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE; | |
4745 | u64 val = 0x9999999999999999ull; | |
4746 | ||
4747 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4748 | return 0; | |
4749 | ||
4750 | return goya_memset_device_memory(hdev, addr, size, val, true); | |
4751 | } | |
4752 | ||
95b5a8b8 OG |
4753 | static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) |
4754 | { | |
4755 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4756 | struct goya_device *goya = hdev->asic_specific; | |
4757 | s64 off, cpu_off; | |
4758 | int rc; | |
4759 | ||
4760 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4761 | return 0; | |
4762 | ||
4763 | for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { | |
4764 | rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off, | |
7fc40bca PP |
4765 | prop->dram_base_address + off, PAGE_SIZE_2MB, |
4766 | (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE); | |
95b5a8b8 OG |
4767 | if (rc) { |
4768 | dev_err(hdev->dev, "Map failed for address 0x%llx\n", | |
4769 | prop->dram_base_address + off); | |
4770 | goto unmap; | |
4771 | } | |
4772 | } | |
4773 | ||
4774 | if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { | |
4775 | rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, | |
7fc40bca | 4776 | hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true); |
95b5a8b8 OG |
4777 | |
4778 | if (rc) { | |
4779 | dev_err(hdev->dev, | |
4780 | "Map failed for CPU accessible memory\n"); | |
4781 | off -= PAGE_SIZE_2MB; | |
4782 | goto unmap; | |
4783 | } | |
4784 | } else { | |
4785 | for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) { | |
4786 | rc = hl_mmu_map(hdev->kernel_ctx, | |
4787 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, | |
4788 | hdev->cpu_accessible_dma_address + cpu_off, | |
7fc40bca | 4789 | PAGE_SIZE_4KB, true); |
95b5a8b8 OG |
4790 | if (rc) { |
4791 | dev_err(hdev->dev, | |
4792 | "Map failed for CPU accessible memory\n"); | |
4793 | cpu_off -= PAGE_SIZE_4KB; | |
4794 | goto unmap_cpu; | |
4795 | } | |
4796 | } | |
4797 | } | |
4798 | ||
f09415f5 OG |
4799 | goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID); |
4800 | goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID); | |
4801 | WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF); | |
4802 | WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF); | |
4803 | ||
4804 | /* Make sure configuration is flushed to device */ | |
4805 | RREG32(mmCPU_IF_AWUSER_OVR_EN); | |
4806 | ||
95b5a8b8 OG |
4807 | goya->device_cpu_mmu_mappings_done = true; |
4808 | ||
4809 | return 0; | |
4810 | ||
4811 | unmap_cpu: | |
4812 | for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) | |
4813 | if (hl_mmu_unmap(hdev->kernel_ctx, | |
4814 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, | |
7fc40bca | 4815 | PAGE_SIZE_4KB, true)) |
95b5a8b8 OG |
4816 | dev_warn_ratelimited(hdev->dev, |
4817 | "failed to unmap address 0x%llx\n", | |
4818 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); | |
4819 | unmap: | |
4820 | for (; off >= 0 ; off -= PAGE_SIZE_2MB) | |
4821 | if (hl_mmu_unmap(hdev->kernel_ctx, | |
7fc40bca PP |
4822 | prop->dram_base_address + off, PAGE_SIZE_2MB, |
4823 | true)) | |
95b5a8b8 OG |
4824 | dev_warn_ratelimited(hdev->dev, |
4825 | "failed to unmap address 0x%llx\n", | |
4826 | prop->dram_base_address + off); | |
4827 | ||
4828 | return rc; | |
4829 | } | |
4830 | ||
4831 | void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) | |
4832 | { | |
4833 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4834 | struct goya_device *goya = hdev->asic_specific; | |
4835 | u32 off, cpu_off; | |
4836 | ||
4837 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4838 | return; | |
4839 | ||
4840 | if (!goya->device_cpu_mmu_mappings_done) | |
4841 | return; | |
4842 | ||
f09415f5 OG |
4843 | WREG32(mmCPU_IF_ARUSER_OVR_EN, 0); |
4844 | WREG32(mmCPU_IF_AWUSER_OVR_EN, 0); | |
4845 | ||
95b5a8b8 OG |
4846 | if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { |
4847 | if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, | |
7fc40bca | 4848 | PAGE_SIZE_2MB, true)) |
95b5a8b8 OG |
4849 | dev_warn(hdev->dev, |
4850 | "Failed to unmap CPU accessible memory\n"); | |
4851 | } else { | |
4852 | for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) | |
4853 | if (hl_mmu_unmap(hdev->kernel_ctx, | |
4854 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, | |
7fc40bca PP |
4855 | PAGE_SIZE_4KB, |
4856 | (cpu_off + PAGE_SIZE_4KB) >= SZ_2M)) | |
95b5a8b8 OG |
4857 | dev_warn_ratelimited(hdev->dev, |
4858 | "failed to unmap address 0x%llx\n", | |
4859 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); | |
4860 | } | |
4861 | ||
4862 | for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) | |
4863 | if (hl_mmu_unmap(hdev->kernel_ctx, | |
7fc40bca PP |
4864 | prop->dram_base_address + off, PAGE_SIZE_2MB, |
4865 | (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE)) | |
95b5a8b8 OG |
4866 | dev_warn_ratelimited(hdev->dev, |
4867 | "Failed to unmap address 0x%llx\n", | |
4868 | prop->dram_base_address + off); | |
4869 | ||
4870 | goya->device_cpu_mmu_mappings_done = false; | |
4871 | } | |
4872 | ||
4873 | static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) | |
0feaf86d OS |
4874 | { |
4875 | struct goya_device *goya = hdev->asic_specific; | |
4876 | int i; | |
4877 | ||
4878 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4879 | return; | |
4880 | ||
4881 | if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) { | |
4882 | WARN(1, "asid %u is too big\n", asid); | |
4883 | return; | |
4884 | } | |
4885 | ||
4886 | /* zero the MMBP and ASID bits and then set the ASID */ | |
bedd1442 | 4887 | for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) |
b2377e03 | 4888 | goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); |
0feaf86d OS |
4889 | } |
4890 | ||
8ff5f4fd | 4891 | static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, |
7b6e4ea0 | 4892 | u32 flags) |
0feaf86d OS |
4893 | { |
4894 | struct goya_device *goya = hdev->asic_specific; | |
4895 | u32 status, timeout_usec; | |
4896 | int rc; | |
4897 | ||
bc75d799 OS |
4898 | if (!(goya->hw_cap_initialized & HW_CAP_MMU) || |
4899 | hdev->hard_reset_pending) | |
8ff5f4fd | 4900 | return 0; |
0feaf86d OS |
4901 | |
4902 | /* no need in L1 only invalidation in Goya */ | |
4903 | if (!is_hard) | |
8ff5f4fd | 4904 | return 0; |
0feaf86d OS |
4905 | |
4906 | if (hdev->pldm) | |
4907 | timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; | |
4908 | else | |
4909 | timeout_usec = MMU_CONFIG_TIMEOUT_USEC; | |
4910 | ||
4911 | mutex_lock(&hdev->mmu_cache_lock); | |
4912 | ||
4913 | /* L0 & L1 invalidation */ | |
4914 | WREG32(mmSTLB_INV_ALL_START, 1); | |
4915 | ||
4916 | rc = hl_poll_timeout( | |
4917 | hdev, | |
4918 | mmSTLB_INV_ALL_START, | |
4919 | status, | |
4920 | !status, | |
4921 | 1000, | |
4922 | timeout_usec); | |
4923 | ||
4924 | mutex_unlock(&hdev->mmu_cache_lock); | |
4925 | ||
8ff5f4fd OS |
4926 | if (rc) { |
4927 | dev_err_ratelimited(hdev->dev, | |
4928 | "MMU cache invalidation timeout\n"); | |
4929 | hl_device_reset(hdev, true, false); | |
4930 | } | |
4931 | ||
4932 | return rc; | |
0feaf86d OS |
4933 | } |
4934 | ||
8ff5f4fd OS |
4935 | static int goya_mmu_invalidate_cache_range(struct hl_device *hdev, |
4936 | bool is_hard, u32 asid, u64 va, u64 size) | |
0feaf86d OS |
4937 | { |
4938 | struct goya_device *goya = hdev->asic_specific; | |
4939 | u32 status, timeout_usec, inv_data, pi; | |
4940 | int rc; | |
4941 | ||
bc75d799 OS |
4942 | if (!(goya->hw_cap_initialized & HW_CAP_MMU) || |
4943 | hdev->hard_reset_pending) | |
8ff5f4fd | 4944 | return 0; |
0feaf86d OS |
4945 | |
4946 | /* no need in L1 only invalidation in Goya */ | |
4947 | if (!is_hard) | |
8ff5f4fd | 4948 | return 0; |
0feaf86d OS |
4949 | |
4950 | if (hdev->pldm) | |
4951 | timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; | |
4952 | else | |
4953 | timeout_usec = MMU_CONFIG_TIMEOUT_USEC; | |
4954 | ||
4955 | mutex_lock(&hdev->mmu_cache_lock); | |
4956 | ||
4957 | /* | |
4958 | * TODO: currently invalidate entire L0 & L1 as in regular hard | |
4959 | * invalidation. Need to apply invalidation of specific cache lines with | |
4960 | * mask of ASID & VA & size. | |
4961 | * Note that L1 with be flushed entirely in any case. | |
4962 | */ | |
4963 | ||
4964 | /* L0 & L1 invalidation */ | |
4965 | inv_data = RREG32(mmSTLB_CACHE_INV); | |
4966 | /* PI is 8 bit */ | |
4967 | pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF; | |
4968 | WREG32(mmSTLB_CACHE_INV, | |
4969 | (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi); | |
4970 | ||
4971 | rc = hl_poll_timeout( | |
4972 | hdev, | |
4973 | mmSTLB_INV_CONSUMER_INDEX, | |
4974 | status, | |
4975 | status == pi, | |
4976 | 1000, | |
4977 | timeout_usec); | |
4978 | ||
4979 | mutex_unlock(&hdev->mmu_cache_lock); | |
4980 | ||
8ff5f4fd OS |
4981 | if (rc) { |
4982 | dev_err_ratelimited(hdev->dev, | |
4983 | "MMU cache invalidation timeout\n"); | |
4984 | hl_device_reset(hdev, true, false); | |
4985 | } | |
4986 | ||
4987 | return rc; | |
0feaf86d OS |
4988 | } |
4989 | ||
f8c8c7d5 OG |
4990 | int goya_send_heartbeat(struct hl_device *hdev) |
4991 | { | |
4992 | struct goya_device *goya = hdev->asic_specific; | |
f8c8c7d5 OG |
4993 | |
4994 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
4995 | return 0; | |
4996 | ||
3110c60f | 4997 | return hl_fw_send_heartbeat(hdev); |
f8c8c7d5 OG |
4998 | } |
4999 | ||
393e5b55 | 5000 | int goya_armcp_info_get(struct hl_device *hdev) |
d91389bc OG |
5001 | { |
5002 | struct goya_device *goya = hdev->asic_specific; | |
5003 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
d91389bc | 5004 | u64 dram_size; |
d91389bc OG |
5005 | int rc; |
5006 | ||
5007 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
5008 | return 0; | |
5009 | ||
3110c60f TT |
5010 | rc = hl_fw_armcp_info_get(hdev); |
5011 | if (rc) | |
5012 | return rc; | |
d91389bc | 5013 | |
df697bce | 5014 | dram_size = le64_to_cpu(prop->armcp_info.dram_size); |
d91389bc OG |
5015 | if (dram_size) { |
5016 | if ((!is_power_of_2(dram_size)) || | |
5017 | (dram_size < DRAM_PHYS_DEFAULT_SIZE)) { | |
5018 | dev_err(hdev->dev, | |
5019 | "F/W reported invalid DRAM size %llu. Trying to use default size\n", | |
5020 | dram_size); | |
5021 | dram_size = DRAM_PHYS_DEFAULT_SIZE; | |
5022 | } | |
5023 | ||
5024 | prop->dram_size = dram_size; | |
5025 | prop->dram_end_address = prop->dram_base_address + dram_size; | |
5026 | } | |
5027 | ||
0996bd1c OG |
5028 | if (!strlen(prop->armcp_info.card_name)) |
5029 | strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, | |
5030 | CARD_NAME_MAX_LEN); | |
5031 | ||
3110c60f | 5032 | return 0; |
d91389bc OG |
5033 | } |
5034 | ||
e38bfd30 | 5035 | static void goya_set_clock_gating(struct hl_device *hdev) |
ca62433f | 5036 | { |
e38bfd30 | 5037 | /* clock gating not supported in Goya */ |
ca62433f OG |
5038 | } |
5039 | ||
5040 | static void goya_disable_clock_gating(struct hl_device *hdev) | |
5041 | { | |
e38bfd30 | 5042 | /* clock gating not supported in Goya */ |
ca62433f OG |
5043 | } |
5044 | ||
e8960ca0 TT |
5045 | static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, |
5046 | struct seq_file *s) | |
eff6f4a0 | 5047 | { |
06deb86a TT |
5048 | const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; |
5049 | const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; | |
5050 | u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts, | |
5051 | mme_arch_sts; | |
5052 | bool is_idle = true, is_eng_idle; | |
5053 | u64 offset; | |
eff6f4a0 OG |
5054 | int i; |
5055 | ||
06deb86a TT |
5056 | if (s) |
5057 | seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" | |
5058 | "--- ------- ------------ -------------\n"); | |
5059 | ||
eff6f4a0 OG |
5060 | offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; |
5061 | ||
5062 | for (i = 0 ; i < DMA_MAX_NUM ; i++) { | |
06deb86a TT |
5063 | qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset); |
5064 | dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset); | |
5065 | is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) && | |
5066 | IS_DMA_IDLE(dma_core_sts0); | |
5067 | is_idle &= is_eng_idle; | |
eff6f4a0 | 5068 | |
e8960ca0 TT |
5069 | if (mask) |
5070 | *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i); | |
06deb86a TT |
5071 | if (s) |
5072 | seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", | |
5073 | qm_glbl_sts0, dma_core_sts0); | |
eff6f4a0 OG |
5074 | } |
5075 | ||
06deb86a TT |
5076 | if (s) |
5077 | seq_puts(s, | |
5078 | "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" | |
5079 | "--- ------- ------------ -------------- ----------\n"); | |
5080 | ||
eff6f4a0 OG |
5081 | offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0; |
5082 | ||
5083 | for (i = 0 ; i < TPC_MAX_NUM ; i++) { | |
06deb86a TT |
5084 | qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset); |
5085 | cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset); | |
5086 | tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset); | |
5087 | is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) && | |
5088 | IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) && | |
5089 | IS_TPC_IDLE(tpc_cfg_sts); | |
5090 | is_idle &= is_eng_idle; | |
5091 | ||
e8960ca0 TT |
5092 | if (mask) |
5093 | *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i); | |
06deb86a TT |
5094 | if (s) |
5095 | seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", | |
5096 | qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); | |
5097 | } | |
5098 | ||
5099 | if (s) | |
5100 | seq_puts(s, | |
5101 | "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" | |
5102 | "--- ------- ------------ -------------- -----------\n"); | |
5103 | ||
5104 | qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0); | |
5105 | cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0); | |
5106 | mme_arch_sts = RREG32(mmMME_ARCH_STATUS); | |
5107 | is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) && | |
5108 | IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) && | |
5109 | IS_MME_IDLE(mme_arch_sts); | |
5110 | is_idle &= is_eng_idle; | |
5111 | ||
e8960ca0 TT |
5112 | if (mask) |
5113 | *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0; | |
06deb86a TT |
5114 | if (s) { |
5115 | seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, | |
5116 | cmdq_glbl_sts0, mme_arch_sts); | |
5117 | seq_puts(s, "\n"); | |
5118 | } | |
5119 | ||
5120 | return is_idle; | |
eff6f4a0 OG |
5121 | } |
5122 | ||
9494a8dd | 5123 | static void goya_hw_queues_lock(struct hl_device *hdev) |
cf87f966 | 5124 | __acquires(&goya->hw_queues_lock) |
9494a8dd OG |
5125 | { |
5126 | struct goya_device *goya = hdev->asic_specific; | |
5127 | ||
5128 | spin_lock(&goya->hw_queues_lock); | |
5129 | } | |
5130 | ||
5131 | static void goya_hw_queues_unlock(struct hl_device *hdev) | |
8a7a88c1 | 5132 | __releases(&goya->hw_queues_lock) |
9494a8dd OG |
5133 | { |
5134 | struct goya_device *goya = hdev->asic_specific; | |
5135 | ||
5136 | spin_unlock(&goya->hw_queues_lock); | |
5137 | } | |
5138 | ||
d8dd7b0a OG |
5139 | static u32 goya_get_pci_id(struct hl_device *hdev) |
5140 | { | |
5141 | return hdev->pdev->device; | |
5142 | } | |
5143 | ||
5e6e0239 OG |
5144 | static int goya_get_eeprom_data(struct hl_device *hdev, void *data, |
5145 | size_t max_size) | |
d91389bc OG |
5146 | { |
5147 | struct goya_device *goya = hdev->asic_specific; | |
d91389bc OG |
5148 | |
5149 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
5150 | return 0; | |
5151 | ||
3110c60f | 5152 | return hl_fw_get_eeprom_data(hdev, data, max_size); |
d91389bc OG |
5153 | } |
5154 | ||
f8c8c7d5 OG |
5155 | static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) |
5156 | { | |
10d7de2c | 5157 | return RREG32(mmHW_STATE); |
f8c8c7d5 OG |
5158 | } |
5159 | ||
1fa185c6 OS |
5160 | u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) |
5161 | { | |
5162 | return cq_idx; | |
5163 | } | |
5164 | ||
ec2f8a30 OS |
5165 | static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx) |
5166 | { | |
5167 | ||
5168 | } | |
5169 | ||
5170 | static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx) | |
5171 | { | |
5172 | ||
5173 | } | |
5174 | ||
5175 | static u32 goya_get_signal_cb_size(struct hl_device *hdev) | |
5176 | { | |
5177 | return 0; | |
5178 | } | |
5179 | ||
5180 | static u32 goya_get_wait_cb_size(struct hl_device *hdev) | |
5181 | { | |
5182 | return 0; | |
5183 | } | |
5184 | ||
5185 | static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) | |
5186 | { | |
5187 | ||
5188 | } | |
5189 | ||
5190 | static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, | |
5191 | u16 sob_val, u16 mon_id, u32 q_idx) | |
5192 | { | |
5193 | ||
5194 | } | |
5195 | ||
5196 | static void goya_reset_sob(struct hl_device *hdev, void *data) | |
5197 | { | |
5198 | ||
5199 | } | |
5200 | ||
cb056b9f OG |
5201 | static void goya_set_dma_mask_from_fw(struct hl_device *hdev) |
5202 | { | |
5203 | if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == | |
5204 | HL_POWER9_HOST_MAGIC) { | |
5205 | dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n"); | |
5206 | hdev->power9_64bit_dma_enable = 1; | |
5207 | hdev->dma_mask = 64; | |
5208 | } else { | |
5209 | dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n"); | |
5210 | hdev->power9_64bit_dma_enable = 0; | |
5211 | hdev->dma_mask = 48; | |
5212 | } | |
5213 | } | |
5214 | ||
25e7aeba TT |
5215 | u64 goya_get_device_time(struct hl_device *hdev) |
5216 | { | |
5217 | u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; | |
5218 | ||
5219 | return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); | |
5220 | } | |
5221 | ||
99b9d7b4 OG |
5222 | static const struct hl_asic_funcs goya_funcs = { |
5223 | .early_init = goya_early_init, | |
5224 | .early_fini = goya_early_fini, | |
d91389bc OG |
5225 | .late_init = goya_late_init, |
5226 | .late_fini = goya_late_fini, | |
99b9d7b4 OG |
5227 | .sw_init = goya_sw_init, |
5228 | .sw_fini = goya_sw_fini, | |
839c4803 OG |
5229 | .hw_init = goya_hw_init, |
5230 | .hw_fini = goya_hw_fini, | |
1251f23a | 5231 | .halt_engines = goya_halt_engines, |
99b9d7b4 OG |
5232 | .suspend = goya_suspend, |
5233 | .resume = goya_resume, | |
be5d926b | 5234 | .cb_mmap = goya_cb_mmap, |
9494a8dd | 5235 | .ring_doorbell = goya_ring_doorbell, |
b9040c99 | 5236 | .pqe_write = goya_pqe_write, |
d9c3aa80 OG |
5237 | .asic_dma_alloc_coherent = goya_dma_alloc_coherent, |
5238 | .asic_dma_free_coherent = goya_dma_free_coherent, | |
9494a8dd OG |
5239 | .get_int_queue_base = goya_get_int_queue_base, |
5240 | .test_queues = goya_test_queues, | |
d9c3aa80 OG |
5241 | .asic_dma_pool_zalloc = goya_dma_pool_zalloc, |
5242 | .asic_dma_pool_free = goya_dma_pool_free, | |
9494a8dd OG |
5243 | .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, |
5244 | .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, | |
eff6f4a0 OG |
5245 | .hl_dma_unmap_sg = goya_dma_unmap_sg, |
5246 | .cs_parser = goya_cs_parser, | |
5247 | .asic_dma_map_sg = goya_dma_map_sg, | |
5248 | .get_dma_desc_list_size = goya_get_dma_desc_list_size, | |
5249 | .add_end_of_cb_packets = goya_add_end_of_cb_packets, | |
1251f23a | 5250 | .update_eq_ci = goya_update_eq_ci, |
eff6f4a0 OG |
5251 | .context_switch = goya_context_switch, |
5252 | .restore_phase_topology = goya_restore_phase_topology, | |
c2164773 OG |
5253 | .debugfs_read32 = goya_debugfs_read32, |
5254 | .debugfs_write32 = goya_debugfs_write32, | |
5cce5146 MH |
5255 | .debugfs_read64 = goya_debugfs_read64, |
5256 | .debugfs_write64 = goya_debugfs_write64, | |
d91389bc | 5257 | .add_device_attr = goya_add_device_attr, |
1251f23a | 5258 | .handle_eqe = goya_handle_eqe, |
d91389bc | 5259 | .set_pll_profile = goya_set_pll_profile, |
1251f23a | 5260 | .get_events_stat = goya_get_events_stat, |
0feaf86d OS |
5261 | .read_pte = goya_read_pte, |
5262 | .write_pte = goya_write_pte, | |
5263 | .mmu_invalidate_cache = goya_mmu_invalidate_cache, | |
5264 | .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, | |
f8c8c7d5 | 5265 | .send_heartbeat = goya_send_heartbeat, |
e38bfd30 | 5266 | .set_clock_gating = goya_set_clock_gating, |
ca62433f | 5267 | .disable_clock_gating = goya_disable_clock_gating, |
315bc055 | 5268 | .debug_coresight = goya_debug_coresight, |
eff6f4a0 | 5269 | .is_device_idle = goya_is_device_idle, |
f8c8c7d5 | 5270 | .soft_reset_late_init = goya_soft_reset_late_init, |
9494a8dd OG |
5271 | .hw_queues_lock = goya_hw_queues_lock, |
5272 | .hw_queues_unlock = goya_hw_queues_unlock, | |
d8dd7b0a | 5273 | .get_pci_id = goya_get_pci_id, |
d91389bc | 5274 | .get_eeprom_data = goya_get_eeprom_data, |
f8c8c7d5 | 5275 | .send_cpu_message = goya_send_cpu_message, |
b6f897d7 TT |
5276 | .get_hw_state = goya_get_hw_state, |
5277 | .pci_bars_map = goya_pci_bars_map, | |
5278 | .set_dram_bar_base = goya_set_ddr_bar_base, | |
b2377e03 OG |
5279 | .init_iatu = goya_init_iatu, |
5280 | .rreg = hl_rreg, | |
89225ce4 | 5281 | .wreg = hl_wreg, |
62c1e124 | 5282 | .halt_coresight = goya_halt_coresight, |
1fa185c6 | 5283 | .get_clk_rate = goya_get_clk_rate, |
7e1c07dd OG |
5284 | .get_queue_id_for_cq = goya_get_queue_id_for_cq, |
5285 | .read_device_fw_version = goya_read_device_fw_version, | |
cb056b9f | 5286 | .load_firmware_to_device = goya_load_firmware_to_device, |
47f6b41c | 5287 | .load_boot_fit_to_device = goya_load_boot_fit_to_device, |
ec2f8a30 OS |
5288 | .ext_queue_init = goya_ext_queue_init, |
5289 | .ext_queue_reset = goya_ext_queue_reset, | |
5290 | .get_signal_cb_size = goya_get_signal_cb_size, | |
5291 | .get_wait_cb_size = goya_get_wait_cb_size, | |
5292 | .gen_signal_cb = goya_gen_signal_cb, | |
5293 | .gen_wait_cb = goya_gen_wait_cb, | |
5294 | .reset_sob = goya_reset_sob, | |
25e7aeba TT |
5295 | .set_dma_mask_from_fw = goya_set_dma_mask_from_fw, |
5296 | .get_device_time = goya_get_device_time | |
99b9d7b4 OG |
5297 | }; |
5298 | ||
5299 | /* | |
5300 | * goya_set_asic_funcs - set Goya function pointers | |
5301 | * | |
5302 | * @*hdev: pointer to hl_device structure | |
5303 | * | |
5304 | */ | |
5305 | void goya_set_asic_funcs(struct hl_device *hdev) | |
5306 | { | |
5307 | hdev->asic_funcs = &goya_funcs; | |
5308 | } |