Commit | Line | Data |
---|---|---|
99b9d7b4 OG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Copyright 2016-2019 HabanaLabs, Ltd. | |
5 | * All Rights Reserved. | |
6 | */ | |
7 | ||
8 | #include "goyaP.h" | |
7b16a155 GKH |
9 | #include "../include/hw_ip/mmu/mmu_general.h" |
10 | #include "../include/hw_ip/mmu/mmu_v1_0.h" | |
11 | #include "../include/goya/asic_reg/goya_masks.h" | |
12 | #include "../include/goya/goya_reg_map.h" | |
99b9d7b4 OG |
13 | |
14 | #include <linux/pci.h> | |
d91389bc | 15 | #include <linux/hwmon.h> |
4a0ce776 | 16 | #include <linux/iommu.h> |
06deb86a | 17 | #include <linux/seq_file.h> |
99b9d7b4 OG |
18 | |
19 | /* | |
20 | * GOYA security scheme: | |
21 | * | |
22 | * 1. Host is protected by: | |
23 | * - Range registers (When MMU is enabled, DMA RR does NOT protect host) | |
24 | * - MMU | |
25 | * | |
26 | * 2. DRAM is protected by: | |
27 | * - Range registers (protect the first 512MB) | |
28 | * - MMU (isolation between users) | |
29 | * | |
30 | * 3. Configuration is protected by: | |
31 | * - Range registers | |
32 | * - Protection bits | |
33 | * | |
34 | * When MMU is disabled: | |
35 | * | |
36 | * QMAN DMA: PQ, CQ, CP, DMA are secured. | |
37 | * PQ, CB and the data are on the host. | |
38 | * | |
39 | * QMAN TPC/MME: | |
40 | * PQ, CQ and CP are not secured. | |
41 | * PQ, CB and the data are on the SRAM/DRAM. | |
42 | * | |
4c172bbf OG |
43 | * Since QMAN DMA is secured, the driver is parsing the DMA CB: |
44 | * - checks DMA pointer | |
99b9d7b4 OG |
45 | * - WREG, MSG_PROT are not allowed. |
46 | * - MSG_LONG/SHORT are allowed. | |
47 | * | |
48 | * A read/write transaction by the QMAN to a protected area will succeed if | |
49 | * and only if the QMAN's CP is secured and MSG_PROT is used | |
50 | * | |
51 | * | |
52 | * When MMU is enabled: | |
53 | * | |
54 | * QMAN DMA: PQ, CQ and CP are secured. | |
55 | * MMU is set to bypass on the Secure props register of the QMAN. | |
56 | * The reasons we don't enable MMU for PQ, CQ and CP are: | |
4c172bbf | 57 | * - PQ entry is in kernel address space and the driver doesn't map it. |
99b9d7b4 OG |
58 | * - CP writes to MSIX register and to kernel address space (completion |
59 | * queue). | |
60 | * | |
4c172bbf OG |
61 | * DMA is not secured but because CP is secured, the driver still needs to parse |
62 | * the CB, but doesn't need to check the DMA addresses. | |
99b9d7b4 | 63 | * |
4c172bbf OG |
64 | * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and |
65 | * the driver doesn't map memory in MMU. | |
99b9d7b4 OG |
66 | * |
67 | * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode) | |
68 | * | |
69 | * DMA RR does NOT protect host because DMA is not secured | |
70 | * | |
71 | */ | |
72 | ||
47f6b41c | 73 | #define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb" |
da1342a0 OG |
74 | #define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb" |
75 | ||
8ba2876d | 76 | #define GOYA_MMU_REGS_NUM 63 |
99b9d7b4 OG |
77 | |
78 | #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ | |
79 | ||
80 | #define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */ | |
81 | #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */ | |
82 | #define GOYA_RESET_WAIT_MSEC 1 /* 1ms */ | |
83 | #define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */ | |
84 | #define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ | |
99b9d7b4 | 85 | #define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ |
0feaf86d | 86 | #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) |
3dccd187 | 87 | #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) |
47f6b41c | 88 | #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ |
788cacf3 | 89 | #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ |
99b9d7b4 OG |
90 | |
91 | #define GOYA_QMAN0_FENCE_VAL 0xD169B243 | |
92 | ||
1251f23a OG |
93 | #define GOYA_MAX_STRING_LEN 20 |
94 | ||
be5d926b OG |
95 | #define GOYA_CB_POOL_CB_CNT 512 |
96 | #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */ | |
97 | ||
06deb86a TT |
98 | #define IS_QM_IDLE(engine, qm_glbl_sts0) \ |
99 | (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK) | |
100 | #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0) | |
101 | #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0) | |
102 | #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0) | |
103 | ||
104 | #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \ | |
105 | (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \ | |
106 | engine##_CMDQ_IDLE_MASK) | |
107 | #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \ | |
108 | IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0) | |
109 | #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \ | |
110 | IS_CMDQ_IDLE(MME, cmdq_glbl_sts0) | |
111 | ||
112 | #define IS_DMA_IDLE(dma_core_sts0) \ | |
113 | !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK) | |
114 | ||
115 | #define IS_TPC_IDLE(tpc_cfg_sts) \ | |
116 | (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK) | |
117 | ||
118 | #define IS_MME_IDLE(mme_arch_sts) \ | |
119 | (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) | |
120 | ||
121 | ||
1251f23a OG |
122 | static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { |
123 | "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", | |
124 | "goya cq 4", "goya cpu eq" | |
125 | }; | |
126 | ||
eff6f4a0 OG |
127 | static u16 goya_packet_sizes[MAX_PACKET_ID] = { |
128 | [PACKET_WREG_32] = sizeof(struct packet_wreg32), | |
129 | [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), | |
130 | [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), | |
131 | [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), | |
132 | [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), | |
133 | [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), | |
134 | [PACKET_FENCE] = sizeof(struct packet_fence), | |
135 | [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), | |
136 | [PACKET_NOP] = sizeof(struct packet_nop), | |
137 | [PACKET_STOP] = sizeof(struct packet_stop) | |
138 | }; | |
139 | ||
bc75be24 OB |
140 | static inline bool validate_packet_id(enum packet_id id) |
141 | { | |
142 | switch (id) { | |
143 | case PACKET_WREG_32: | |
144 | case PACKET_WREG_BULK: | |
145 | case PACKET_MSG_LONG: | |
146 | case PACKET_MSG_SHORT: | |
147 | case PACKET_CP_DMA: | |
148 | case PACKET_MSG_PROT: | |
149 | case PACKET_FENCE: | |
150 | case PACKET_LIN_DMA: | |
151 | case PACKET_NOP: | |
152 | case PACKET_STOP: | |
153 | return true; | |
154 | default: | |
155 | return false; | |
156 | } | |
157 | } | |
158 | ||
0feaf86d OS |
159 | static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = { |
160 | mmDMA_QM_0_GLBL_NON_SECURE_PROPS, | |
161 | mmDMA_QM_1_GLBL_NON_SECURE_PROPS, | |
162 | mmDMA_QM_2_GLBL_NON_SECURE_PROPS, | |
163 | mmDMA_QM_3_GLBL_NON_SECURE_PROPS, | |
164 | mmDMA_QM_4_GLBL_NON_SECURE_PROPS, | |
165 | mmTPC0_QM_GLBL_SECURE_PROPS, | |
166 | mmTPC0_QM_GLBL_NON_SECURE_PROPS, | |
167 | mmTPC0_CMDQ_GLBL_SECURE_PROPS, | |
168 | mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS, | |
169 | mmTPC0_CFG_ARUSER, | |
170 | mmTPC0_CFG_AWUSER, | |
171 | mmTPC1_QM_GLBL_SECURE_PROPS, | |
172 | mmTPC1_QM_GLBL_NON_SECURE_PROPS, | |
173 | mmTPC1_CMDQ_GLBL_SECURE_PROPS, | |
174 | mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS, | |
175 | mmTPC1_CFG_ARUSER, | |
176 | mmTPC1_CFG_AWUSER, | |
177 | mmTPC2_QM_GLBL_SECURE_PROPS, | |
178 | mmTPC2_QM_GLBL_NON_SECURE_PROPS, | |
179 | mmTPC2_CMDQ_GLBL_SECURE_PROPS, | |
180 | mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS, | |
181 | mmTPC2_CFG_ARUSER, | |
182 | mmTPC2_CFG_AWUSER, | |
183 | mmTPC3_QM_GLBL_SECURE_PROPS, | |
184 | mmTPC3_QM_GLBL_NON_SECURE_PROPS, | |
185 | mmTPC3_CMDQ_GLBL_SECURE_PROPS, | |
186 | mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS, | |
187 | mmTPC3_CFG_ARUSER, | |
188 | mmTPC3_CFG_AWUSER, | |
189 | mmTPC4_QM_GLBL_SECURE_PROPS, | |
190 | mmTPC4_QM_GLBL_NON_SECURE_PROPS, | |
191 | mmTPC4_CMDQ_GLBL_SECURE_PROPS, | |
192 | mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS, | |
193 | mmTPC4_CFG_ARUSER, | |
194 | mmTPC4_CFG_AWUSER, | |
195 | mmTPC5_QM_GLBL_SECURE_PROPS, | |
196 | mmTPC5_QM_GLBL_NON_SECURE_PROPS, | |
197 | mmTPC5_CMDQ_GLBL_SECURE_PROPS, | |
198 | mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS, | |
199 | mmTPC5_CFG_ARUSER, | |
200 | mmTPC5_CFG_AWUSER, | |
201 | mmTPC6_QM_GLBL_SECURE_PROPS, | |
202 | mmTPC6_QM_GLBL_NON_SECURE_PROPS, | |
203 | mmTPC6_CMDQ_GLBL_SECURE_PROPS, | |
204 | mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS, | |
205 | mmTPC6_CFG_ARUSER, | |
206 | mmTPC6_CFG_AWUSER, | |
207 | mmTPC7_QM_GLBL_SECURE_PROPS, | |
208 | mmTPC7_QM_GLBL_NON_SECURE_PROPS, | |
209 | mmTPC7_CMDQ_GLBL_SECURE_PROPS, | |
210 | mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS, | |
211 | mmTPC7_CFG_ARUSER, | |
212 | mmTPC7_CFG_AWUSER, | |
213 | mmMME_QM_GLBL_SECURE_PROPS, | |
214 | mmMME_QM_GLBL_NON_SECURE_PROPS, | |
215 | mmMME_CMDQ_GLBL_SECURE_PROPS, | |
216 | mmMME_CMDQ_GLBL_NON_SECURE_PROPS, | |
217 | mmMME_SBA_CONTROL_DATA, | |
218 | mmMME_SBB_CONTROL_DATA, | |
219 | mmMME_SBC_CONTROL_DATA, | |
8ba2876d OS |
220 | mmMME_WBC_CONTROL_DATA, |
221 | mmPCIE_WRAP_PSOC_ARUSER, | |
222 | mmPCIE_WRAP_PSOC_AWUSER | |
0feaf86d OS |
223 | }; |
224 | ||
b24ca458 | 225 | static u32 goya_all_events[] = { |
f8c8c7d5 OG |
226 | GOYA_ASYNC_EVENT_ID_PCIE_IF, |
227 | GOYA_ASYNC_EVENT_ID_TPC0_ECC, | |
228 | GOYA_ASYNC_EVENT_ID_TPC1_ECC, | |
229 | GOYA_ASYNC_EVENT_ID_TPC2_ECC, | |
230 | GOYA_ASYNC_EVENT_ID_TPC3_ECC, | |
231 | GOYA_ASYNC_EVENT_ID_TPC4_ECC, | |
232 | GOYA_ASYNC_EVENT_ID_TPC5_ECC, | |
233 | GOYA_ASYNC_EVENT_ID_TPC6_ECC, | |
234 | GOYA_ASYNC_EVENT_ID_TPC7_ECC, | |
235 | GOYA_ASYNC_EVENT_ID_MME_ECC, | |
236 | GOYA_ASYNC_EVENT_ID_MME_ECC_EXT, | |
237 | GOYA_ASYNC_EVENT_ID_MMU_ECC, | |
238 | GOYA_ASYNC_EVENT_ID_DMA_MACRO, | |
239 | GOYA_ASYNC_EVENT_ID_DMA_ECC, | |
240 | GOYA_ASYNC_EVENT_ID_CPU_IF_ECC, | |
241 | GOYA_ASYNC_EVENT_ID_PSOC_MEM, | |
242 | GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT, | |
243 | GOYA_ASYNC_EVENT_ID_SRAM0, | |
244 | GOYA_ASYNC_EVENT_ID_SRAM1, | |
245 | GOYA_ASYNC_EVENT_ID_SRAM2, | |
246 | GOYA_ASYNC_EVENT_ID_SRAM3, | |
247 | GOYA_ASYNC_EVENT_ID_SRAM4, | |
248 | GOYA_ASYNC_EVENT_ID_SRAM5, | |
249 | GOYA_ASYNC_EVENT_ID_SRAM6, | |
250 | GOYA_ASYNC_EVENT_ID_SRAM7, | |
251 | GOYA_ASYNC_EVENT_ID_SRAM8, | |
252 | GOYA_ASYNC_EVENT_ID_SRAM9, | |
253 | GOYA_ASYNC_EVENT_ID_SRAM10, | |
254 | GOYA_ASYNC_EVENT_ID_SRAM11, | |
255 | GOYA_ASYNC_EVENT_ID_SRAM12, | |
256 | GOYA_ASYNC_EVENT_ID_SRAM13, | |
257 | GOYA_ASYNC_EVENT_ID_SRAM14, | |
258 | GOYA_ASYNC_EVENT_ID_SRAM15, | |
259 | GOYA_ASYNC_EVENT_ID_SRAM16, | |
260 | GOYA_ASYNC_EVENT_ID_SRAM17, | |
261 | GOYA_ASYNC_EVENT_ID_SRAM18, | |
262 | GOYA_ASYNC_EVENT_ID_SRAM19, | |
263 | GOYA_ASYNC_EVENT_ID_SRAM20, | |
264 | GOYA_ASYNC_EVENT_ID_SRAM21, | |
265 | GOYA_ASYNC_EVENT_ID_SRAM22, | |
266 | GOYA_ASYNC_EVENT_ID_SRAM23, | |
267 | GOYA_ASYNC_EVENT_ID_SRAM24, | |
268 | GOYA_ASYNC_EVENT_ID_SRAM25, | |
269 | GOYA_ASYNC_EVENT_ID_SRAM26, | |
270 | GOYA_ASYNC_EVENT_ID_SRAM27, | |
271 | GOYA_ASYNC_EVENT_ID_SRAM28, | |
272 | GOYA_ASYNC_EVENT_ID_SRAM29, | |
273 | GOYA_ASYNC_EVENT_ID_GIC500, | |
274 | GOYA_ASYNC_EVENT_ID_PLL0, | |
275 | GOYA_ASYNC_EVENT_ID_PLL1, | |
276 | GOYA_ASYNC_EVENT_ID_PLL3, | |
277 | GOYA_ASYNC_EVENT_ID_PLL4, | |
278 | GOYA_ASYNC_EVENT_ID_PLL5, | |
279 | GOYA_ASYNC_EVENT_ID_PLL6, | |
280 | GOYA_ASYNC_EVENT_ID_AXI_ECC, | |
281 | GOYA_ASYNC_EVENT_ID_L2_RAM_ECC, | |
282 | GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET, | |
283 | GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT, | |
284 | GOYA_ASYNC_EVENT_ID_PCIE_DEC, | |
285 | GOYA_ASYNC_EVENT_ID_TPC0_DEC, | |
286 | GOYA_ASYNC_EVENT_ID_TPC1_DEC, | |
287 | GOYA_ASYNC_EVENT_ID_TPC2_DEC, | |
288 | GOYA_ASYNC_EVENT_ID_TPC3_DEC, | |
289 | GOYA_ASYNC_EVENT_ID_TPC4_DEC, | |
290 | GOYA_ASYNC_EVENT_ID_TPC5_DEC, | |
291 | GOYA_ASYNC_EVENT_ID_TPC6_DEC, | |
292 | GOYA_ASYNC_EVENT_ID_TPC7_DEC, | |
293 | GOYA_ASYNC_EVENT_ID_MME_WACS, | |
294 | GOYA_ASYNC_EVENT_ID_MME_WACSD, | |
295 | GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER, | |
296 | GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC, | |
297 | GOYA_ASYNC_EVENT_ID_PSOC, | |
298 | GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR, | |
299 | GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR, | |
300 | GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR, | |
301 | GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR, | |
302 | GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR, | |
303 | GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR, | |
304 | GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR, | |
305 | GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR, | |
306 | GOYA_ASYNC_EVENT_ID_TPC0_CMDQ, | |
307 | GOYA_ASYNC_EVENT_ID_TPC1_CMDQ, | |
308 | GOYA_ASYNC_EVENT_ID_TPC2_CMDQ, | |
309 | GOYA_ASYNC_EVENT_ID_TPC3_CMDQ, | |
310 | GOYA_ASYNC_EVENT_ID_TPC4_CMDQ, | |
311 | GOYA_ASYNC_EVENT_ID_TPC5_CMDQ, | |
312 | GOYA_ASYNC_EVENT_ID_TPC6_CMDQ, | |
313 | GOYA_ASYNC_EVENT_ID_TPC7_CMDQ, | |
314 | GOYA_ASYNC_EVENT_ID_TPC0_QM, | |
315 | GOYA_ASYNC_EVENT_ID_TPC1_QM, | |
316 | GOYA_ASYNC_EVENT_ID_TPC2_QM, | |
317 | GOYA_ASYNC_EVENT_ID_TPC3_QM, | |
318 | GOYA_ASYNC_EVENT_ID_TPC4_QM, | |
319 | GOYA_ASYNC_EVENT_ID_TPC5_QM, | |
320 | GOYA_ASYNC_EVENT_ID_TPC6_QM, | |
321 | GOYA_ASYNC_EVENT_ID_TPC7_QM, | |
322 | GOYA_ASYNC_EVENT_ID_MME_QM, | |
323 | GOYA_ASYNC_EVENT_ID_MME_CMDQ, | |
324 | GOYA_ASYNC_EVENT_ID_DMA0_QM, | |
325 | GOYA_ASYNC_EVENT_ID_DMA1_QM, | |
326 | GOYA_ASYNC_EVENT_ID_DMA2_QM, | |
327 | GOYA_ASYNC_EVENT_ID_DMA3_QM, | |
328 | GOYA_ASYNC_EVENT_ID_DMA4_QM, | |
329 | GOYA_ASYNC_EVENT_ID_DMA0_CH, | |
330 | GOYA_ASYNC_EVENT_ID_DMA1_CH, | |
331 | GOYA_ASYNC_EVENT_ID_DMA2_CH, | |
332 | GOYA_ASYNC_EVENT_ID_DMA3_CH, | |
333 | GOYA_ASYNC_EVENT_ID_DMA4_CH, | |
334 | GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU, | |
335 | GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU, | |
336 | GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU, | |
337 | GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU, | |
338 | GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU, | |
339 | GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU, | |
340 | GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU, | |
341 | GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU, | |
342 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH0, | |
343 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH1, | |
344 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH2, | |
345 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH3, | |
4f0e6ab7 OS |
346 | GOYA_ASYNC_EVENT_ID_DMA_BM_CH4, |
347 | GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S, | |
348 | GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E, | |
349 | GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S, | |
350 | GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E | |
f8c8c7d5 OG |
351 | }; |
352 | ||
95b5a8b8 OG |
353 | static int goya_mmu_clear_pgt_range(struct hl_device *hdev); |
354 | static int goya_mmu_set_dram_default_page(struct hl_device *hdev); | |
355 | static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); | |
356 | static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); | |
357 | ||
3abc99bb | 358 | int goya_get_fixed_properties(struct hl_device *hdev) |
99b9d7b4 OG |
359 | { |
360 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
9494a8dd OG |
361 | int i; |
362 | ||
3abc99bb OB |
363 | prop->max_queues = GOYA_QUEUE_ID_SIZE; |
364 | prop->hw_queues_props = kcalloc(prop->max_queues, | |
365 | sizeof(struct hw_queue_properties), | |
366 | GFP_KERNEL); | |
367 | ||
368 | if (!prop->hw_queues_props) | |
369 | return -ENOMEM; | |
370 | ||
9494a8dd OG |
371 | for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { |
372 | prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; | |
4c172bbf | 373 | prop->hw_queues_props[i].driver_only = 0; |
4bb1f2f3 | 374 | prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL; |
9494a8dd OG |
375 | } |
376 | ||
377 | for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { | |
378 | prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; | |
4c172bbf | 379 | prop->hw_queues_props[i].driver_only = 1; |
4bb1f2f3 | 380 | prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL; |
9494a8dd OG |
381 | } |
382 | ||
383 | for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + | |
384 | NUMBER_OF_INT_HW_QUEUES; i++) { | |
385 | prop->hw_queues_props[i].type = QUEUE_TYPE_INT; | |
4c172bbf | 386 | prop->hw_queues_props[i].driver_only = 0; |
4bb1f2f3 | 387 | prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER; |
9494a8dd OG |
388 | } |
389 | ||
99b9d7b4 OG |
390 | prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; |
391 | ||
392 | prop->dram_base_address = DRAM_PHYS_BASE; | |
393 | prop->dram_size = DRAM_PHYS_DEFAULT_SIZE; | |
394 | prop->dram_end_address = prop->dram_base_address + prop->dram_size; | |
395 | prop->dram_user_base_address = DRAM_BASE_ADDR_USER; | |
396 | ||
397 | prop->sram_base_address = SRAM_BASE_ADDR; | |
398 | prop->sram_size = SRAM_SIZE; | |
399 | prop->sram_end_address = prop->sram_base_address + prop->sram_size; | |
400 | prop->sram_user_base_address = prop->sram_base_address + | |
401 | SRAM_USER_BASE_OFFSET; | |
402 | ||
0feaf86d | 403 | prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; |
27ca384c | 404 | prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR; |
0feaf86d OS |
405 | if (hdev->pldm) |
406 | prop->mmu_pgt_size = 0x800000; /* 8MB */ | |
407 | else | |
408 | prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; | |
409 | prop->mmu_pte_size = HL_PTE_SIZE; | |
410 | prop->mmu_hop_table_size = HOP_TABLE_SIZE; | |
411 | prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; | |
412 | prop->dram_page_size = PAGE_SIZE_2MB; | |
7f070c91 | 413 | prop->dram_supports_virtual_memory = true; |
0feaf86d | 414 | |
54bb6744 OS |
415 | prop->dmmu.hop0_shift = HOP0_SHIFT; |
416 | prop->dmmu.hop1_shift = HOP1_SHIFT; | |
417 | prop->dmmu.hop2_shift = HOP2_SHIFT; | |
418 | prop->dmmu.hop3_shift = HOP3_SHIFT; | |
419 | prop->dmmu.hop4_shift = HOP4_SHIFT; | |
420 | prop->dmmu.hop0_mask = HOP0_MASK; | |
421 | prop->dmmu.hop1_mask = HOP1_MASK; | |
422 | prop->dmmu.hop2_mask = HOP2_MASK; | |
423 | prop->dmmu.hop3_mask = HOP3_MASK; | |
424 | prop->dmmu.hop4_mask = HOP4_MASK; | |
64a7e295 OS |
425 | prop->dmmu.start_addr = VA_DDR_SPACE_START; |
426 | prop->dmmu.end_addr = VA_DDR_SPACE_END; | |
427 | prop->dmmu.page_size = PAGE_SIZE_2MB; | |
7edf341b | 428 | prop->dmmu.num_hops = MMU_ARCH_5_HOPS; |
54bb6744 | 429 | |
64a7e295 | 430 | /* shifts and masks are the same in PMMU and DMMU */ |
54bb6744 | 431 | memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); |
64a7e295 OS |
432 | prop->pmmu.start_addr = VA_HOST_SPACE_START; |
433 | prop->pmmu.end_addr = VA_HOST_SPACE_END; | |
54bb6744 | 434 | prop->pmmu.page_size = PAGE_SIZE_4KB; |
7edf341b | 435 | prop->pmmu.num_hops = MMU_ARCH_5_HOPS; |
54bb6744 | 436 | |
64a7e295 OS |
437 | /* PMMU and HPMMU are the same except of page size */ |
438 | memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); | |
439 | prop->pmmu_huge.page_size = PAGE_SIZE_2MB; | |
440 | ||
441 | prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END; | |
99b9d7b4 OG |
442 | prop->cfg_size = CFG_SIZE; |
443 | prop->max_asid = MAX_ASID; | |
1251f23a | 444 | prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; |
b6f897d7 | 445 | prop->high_pll = PLL_HIGH_DEFAULT; |
839c4803 OG |
446 | prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT; |
447 | prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE; | |
d91389bc | 448 | prop->max_power_default = MAX_POWER_DEFAULT; |
99b9d7b4 | 449 | prop->tpc_enabled_mask = TPC_ENABLED_MASK; |
b6f897d7 TT |
450 | prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; |
451 | prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; | |
91edbf2c | 452 | |
2f55342c | 453 | strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, |
91edbf2c | 454 | CARD_NAME_MAX_LEN); |
c16d45f4 OB |
455 | |
456 | prop->max_pending_cs = GOYA_MAX_PENDING_CS; | |
3abc99bb | 457 | |
323b7267 OB |
458 | /* disable fw security for now, set it in a later stage */ |
459 | prop->fw_security_disabled = true; | |
460 | prop->fw_security_status_valid = false; | |
d611b9f0 | 461 | prop->hard_reset_done_by_fw = false; |
323b7267 | 462 | |
3abc99bb | 463 | return 0; |
99b9d7b4 OG |
464 | } |
465 | ||
466 | /* | |
467 | * goya_pci_bars_map - Map PCI BARS of Goya device | |
468 | * | |
469 | * @hdev: pointer to hl_device structure | |
470 | * | |
471 | * Request PCI regions and map them to kernel virtual addresses. | |
472 | * Returns 0 on success | |
473 | * | |
474 | */ | |
5e6e0239 | 475 | static int goya_pci_bars_map(struct hl_device *hdev) |
99b9d7b4 | 476 | { |
b6f897d7 TT |
477 | static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"}; |
478 | bool is_wc[3] = {false, false, true}; | |
99b9d7b4 OG |
479 | int rc; |
480 | ||
b6f897d7 TT |
481 | rc = hl_pci_bars_map(hdev, name, is_wc); |
482 | if (rc) | |
99b9d7b4 | 483 | return rc; |
99b9d7b4 OG |
484 | |
485 | hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
b6f897d7 | 486 | (CFG_BASE - SRAM_BASE_ADDR); |
99b9d7b4 OG |
487 | |
488 | return 0; | |
489 | } | |
490 | ||
a38693d7 | 491 | static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr) |
99b9d7b4 OG |
492 | { |
493 | struct goya_device *goya = hdev->asic_specific; | |
f4cbfd24 | 494 | struct hl_inbound_pci_region pci_region; |
a38693d7 | 495 | u64 old_addr = addr; |
99b9d7b4 OG |
496 | int rc; |
497 | ||
498 | if ((goya) && (goya->ddr_bar_cur_addr == addr)) | |
a38693d7 | 499 | return old_addr; |
99b9d7b4 OG |
500 | |
501 | /* Inbound Region 1 - Bar 4 - Point to DDR */ | |
f4cbfd24 OB |
502 | pci_region.mode = PCI_BAR_MATCH_MODE; |
503 | pci_region.bar = DDR_BAR_ID; | |
504 | pci_region.addr = addr; | |
505 | rc = hl_pci_set_inbound_region(hdev, 1, &pci_region); | |
b6f897d7 | 506 | if (rc) |
a38693d7 | 507 | return U64_MAX; |
99b9d7b4 | 508 | |
a38693d7 OG |
509 | if (goya) { |
510 | old_addr = goya->ddr_bar_cur_addr; | |
99b9d7b4 | 511 | goya->ddr_bar_cur_addr = addr; |
a38693d7 | 512 | } |
99b9d7b4 | 513 | |
a38693d7 | 514 | return old_addr; |
99b9d7b4 OG |
515 | } |
516 | ||
517 | /* | |
518 | * goya_init_iatu - Initialize the iATU unit inside the PCI controller | |
519 | * | |
520 | * @hdev: pointer to hl_device structure | |
521 | * | |
522 | * This is needed in case the firmware doesn't initialize the iATU | |
523 | * | |
524 | */ | |
525 | static int goya_init_iatu(struct hl_device *hdev) | |
526 | { | |
f4cbfd24 OB |
527 | struct hl_inbound_pci_region inbound_region; |
528 | struct hl_outbound_pci_region outbound_region; | |
529 | int rc; | |
530 | ||
531 | /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */ | |
532 | inbound_region.mode = PCI_BAR_MATCH_MODE; | |
533 | inbound_region.bar = SRAM_CFG_BAR_ID; | |
534 | inbound_region.addr = SRAM_BASE_ADDR; | |
535 | rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); | |
536 | if (rc) | |
537 | goto done; | |
538 | ||
539 | /* Inbound Region 1 - Bar 4 - Point to DDR */ | |
540 | inbound_region.mode = PCI_BAR_MATCH_MODE; | |
541 | inbound_region.bar = DDR_BAR_ID; | |
542 | inbound_region.addr = DRAM_PHYS_BASE; | |
543 | rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); | |
544 | if (rc) | |
545 | goto done; | |
546 | ||
547 | hdev->asic_funcs->set_dma_mask_from_fw(hdev); | |
548 | ||
549 | /* Outbound Region 0 - Point to Host */ | |
550 | outbound_region.addr = HOST_PHYS_BASE; | |
551 | outbound_region.size = HOST_PHYS_SIZE; | |
552 | rc = hl_pci_set_outbound_region(hdev, &outbound_region); | |
553 | ||
554 | done: | |
555 | return rc; | |
99b9d7b4 OG |
556 | } |
557 | ||
d1ddd905 OB |
558 | static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev) |
559 | { | |
560 | return RREG32(mmHW_STATE); | |
561 | } | |
562 | ||
99b9d7b4 OG |
563 | /* |
564 | * goya_early_init - GOYA early initialization code | |
565 | * | |
566 | * @hdev: pointer to hl_device structure | |
567 | * | |
568 | * Verify PCI bars | |
569 | * Set DMA masks | |
570 | * PCI controller initialization | |
571 | * Map PCI bars | |
572 | * | |
573 | */ | |
574 | static int goya_early_init(struct hl_device *hdev) | |
575 | { | |
576 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
577 | struct pci_dev *pdev = hdev->pdev; | |
578 | u32 val; | |
579 | int rc; | |
580 | ||
3abc99bb OB |
581 | rc = goya_get_fixed_properties(hdev); |
582 | if (rc) { | |
583 | dev_err(hdev->dev, "Failed to get fixed properties\n"); | |
584 | return rc; | |
585 | } | |
99b9d7b4 OG |
586 | |
587 | /* Check BAR sizes */ | |
588 | if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) { | |
589 | dev_err(hdev->dev, | |
590 | "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", | |
591 | SRAM_CFG_BAR_ID, | |
592 | (unsigned long long) pci_resource_len(pdev, | |
593 | SRAM_CFG_BAR_ID), | |
594 | CFG_BAR_SIZE); | |
3abc99bb OB |
595 | rc = -ENODEV; |
596 | goto free_queue_props; | |
99b9d7b4 OG |
597 | } |
598 | ||
599 | if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) { | |
600 | dev_err(hdev->dev, | |
601 | "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", | |
602 | MSIX_BAR_ID, | |
603 | (unsigned long long) pci_resource_len(pdev, | |
604 | MSIX_BAR_ID), | |
605 | MSIX_BAR_SIZE); | |
3abc99bb OB |
606 | rc = -ENODEV; |
607 | goto free_queue_props; | |
99b9d7b4 OG |
608 | } |
609 | ||
610 | prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); | |
611 | ||
d1ddd905 | 612 | rc = hl_pci_init(hdev); |
b6f897d7 | 613 | if (rc) |
3abc99bb | 614 | goto free_queue_props; |
99b9d7b4 | 615 | |
d1ddd905 OB |
616 | /* Before continuing in the initialization, we need to read the preboot |
617 | * version to determine whether we run with a security-enabled firmware | |
618 | */ | |
619 | rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, | |
620 | mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0, | |
621 | GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); | |
622 | if (rc) { | |
623 | if (hdev->reset_on_preboot_fail) | |
624 | hdev->asic_funcs->hw_fini(hdev, true); | |
625 | goto pci_fini; | |
626 | } | |
627 | ||
9c9013cb OB |
628 | if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { |
629 | dev_info(hdev->dev, | |
630 | "H/W state is dirty, must reset before initializing\n"); | |
631 | hdev->asic_funcs->hw_fini(hdev, true); | |
632 | } | |
633 | ||
839c4803 OG |
634 | if (!hdev->pldm) { |
635 | val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); | |
636 | if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) | |
637 | dev_warn(hdev->dev, | |
638 | "PCI strap is not configured correctly, PCI bus errors may occur\n"); | |
639 | } | |
99b9d7b4 OG |
640 | |
641 | return 0; | |
3abc99bb | 642 | |
d1ddd905 OB |
643 | pci_fini: |
644 | hl_pci_fini(hdev); | |
3abc99bb OB |
645 | free_queue_props: |
646 | kfree(hdev->asic_prop.hw_queues_props); | |
647 | return rc; | |
99b9d7b4 OG |
648 | } |
649 | ||
650 | /* | |
651 | * goya_early_fini - GOYA early finalization code | |
652 | * | |
653 | * @hdev: pointer to hl_device structure | |
654 | * | |
655 | * Unmap PCI bars | |
656 | * | |
657 | */ | |
5e6e0239 | 658 | static int goya_early_fini(struct hl_device *hdev) |
99b9d7b4 | 659 | { |
3abc99bb | 660 | kfree(hdev->asic_prop.hw_queues_props); |
b6f897d7 | 661 | hl_pci_fini(hdev); |
99b9d7b4 OG |
662 | |
663 | return 0; | |
664 | } | |
665 | ||
bedd1442 OG |
666 | static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) |
667 | { | |
668 | /* mask to zero the MMBP and ASID bits */ | |
669 | WREG32_AND(reg, ~0x7FF); | |
670 | WREG32_OR(reg, asid); | |
671 | } | |
672 | ||
673 | static void goya_qman0_set_security(struct hl_device *hdev, bool secure) | |
674 | { | |
675 | struct goya_device *goya = hdev->asic_specific; | |
676 | ||
677 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
678 | return; | |
679 | ||
680 | if (secure) | |
681 | WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED); | |
682 | else | |
683 | WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED); | |
684 | ||
685 | RREG32(mmDMA_QM_0_GLBL_PROT); | |
686 | } | |
687 | ||
d91389bc OG |
688 | /* |
689 | * goya_fetch_psoc_frequency - Fetch PSOC frequency values | |
690 | * | |
691 | * @hdev: pointer to hl_device structure | |
692 | * | |
693 | */ | |
694 | static void goya_fetch_psoc_frequency(struct hl_device *hdev) | |
695 | { | |
696 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
47834899 AM |
697 | u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; |
698 | u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; | |
699 | int rc; | |
700 | ||
701 | if (hdev->asic_prop.fw_security_disabled) { | |
702 | div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); | |
703 | div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); | |
704 | nr = RREG32(mmPSOC_PCI_PLL_NR); | |
705 | nf = RREG32(mmPSOC_PCI_PLL_NF); | |
706 | od = RREG32(mmPSOC_PCI_PLL_OD); | |
707 | ||
708 | if (div_sel == DIV_SEL_REF_CLK || | |
709 | div_sel == DIV_SEL_DIVIDED_REF) { | |
710 | if (div_sel == DIV_SEL_REF_CLK) | |
711 | freq = PLL_REF_CLK; | |
712 | else | |
713 | freq = PLL_REF_CLK / (div_fctr + 1); | |
714 | } else if (div_sel == DIV_SEL_PLL_CLK || | |
715 | div_sel == DIV_SEL_DIVIDED_PLL) { | |
716 | pll_clk = PLL_REF_CLK * (nf + 1) / | |
717 | ((nr + 1) * (od + 1)); | |
718 | if (div_sel == DIV_SEL_PLL_CLK) | |
719 | freq = pll_clk; | |
720 | else | |
721 | freq = pll_clk / (div_fctr + 1); | |
722 | } else { | |
723 | dev_warn(hdev->dev, | |
724 | "Received invalid div select value: %d", | |
725 | div_sel); | |
726 | freq = 0; | |
727 | } | |
e8edded6 | 728 | } else { |
47834899 AM |
729 | rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr); |
730 | ||
731 | if (rc) | |
732 | return; | |
733 | ||
734 | freq = pll_freq_arr[1]; | |
e8edded6 | 735 | } |
d91389bc | 736 | |
47834899 | 737 | prop->psoc_timestamp_frequency = freq; |
e8edded6 AA |
738 | prop->psoc_pci_pll_nr = nr; |
739 | prop->psoc_pci_pll_nf = nf; | |
740 | prop->psoc_pci_pll_od = od; | |
741 | prop->psoc_pci_pll_div_factor = div_fctr; | |
d91389bc OG |
742 | } |
743 | ||
b2377e03 | 744 | int goya_late_init(struct hl_device *hdev) |
d91389bc OG |
745 | { |
746 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
d91389bc OG |
747 | int rc; |
748 | ||
0b28d26b OG |
749 | goya_fetch_psoc_frequency(hdev); |
750 | ||
751 | rc = goya_mmu_clear_pgt_range(hdev); | |
752 | if (rc) { | |
753 | dev_err(hdev->dev, | |
754 | "Failed to clear MMU page tables range %d\n", rc); | |
755 | return rc; | |
756 | } | |
757 | ||
758 | rc = goya_mmu_set_dram_default_page(hdev); | |
759 | if (rc) { | |
760 | dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc); | |
761 | return rc; | |
762 | } | |
763 | ||
95b5a8b8 OG |
764 | rc = goya_mmu_add_mappings_for_device_cpu(hdev); |
765 | if (rc) | |
766 | return rc; | |
767 | ||
0b28d26b OG |
768 | rc = goya_init_cpu_queues(hdev); |
769 | if (rc) | |
770 | return rc; | |
771 | ||
772 | rc = goya_test_cpu_queue(hdev); | |
773 | if (rc) | |
774 | return rc; | |
775 | ||
2f55342c | 776 | rc = goya_cpucp_info_get(hdev); |
d91389bc | 777 | if (rc) { |
2f55342c | 778 | dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc); |
d91389bc OG |
779 | return rc; |
780 | } | |
781 | ||
782 | /* Now that we have the DRAM size in ASIC prop, we need to check | |
783 | * its size and configure the DMA_IF DDR wrap protection (which is in | |
784 | * the MMU block) accordingly. The value is the log2 of the DRAM size | |
785 | */ | |
786 | WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); | |
787 | ||
2f55342c | 788 | rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); |
d91389bc | 789 | if (rc) { |
0b28d26b OG |
790 | dev_err(hdev->dev, |
791 | "Failed to enable PCI access from CPU %d\n", rc); | |
d91389bc OG |
792 | return rc; |
793 | } | |
794 | ||
795 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
796 | GOYA_ASYNC_EVENT_ID_INTS_REGISTER); | |
797 | ||
d91389bc OG |
798 | return 0; |
799 | } | |
800 | ||
801 | /* | |
802 | * goya_late_fini - GOYA late tear-down code | |
803 | * | |
804 | * @hdev: pointer to hl_device structure | |
805 | * | |
806 | * Free sensors allocated structures | |
807 | */ | |
808 | void goya_late_fini(struct hl_device *hdev) | |
809 | { | |
810 | const struct hwmon_channel_info **channel_info_arr; | |
811 | int i = 0; | |
812 | ||
813 | if (!hdev->hl_chip_info->info) | |
814 | return; | |
815 | ||
816 | channel_info_arr = hdev->hl_chip_info->info; | |
817 | ||
818 | while (channel_info_arr[i]) { | |
819 | kfree(channel_info_arr[i]->config); | |
820 | kfree(channel_info_arr[i]); | |
821 | i++; | |
822 | } | |
823 | ||
824 | kfree(channel_info_arr); | |
825 | ||
826 | hdev->hl_chip_info->info = NULL; | |
827 | } | |
828 | ||
99b9d7b4 OG |
829 | /* |
830 | * goya_sw_init - Goya software initialization code | |
831 | * | |
832 | * @hdev: pointer to hl_device structure | |
833 | * | |
834 | */ | |
835 | static int goya_sw_init(struct hl_device *hdev) | |
836 | { | |
837 | struct goya_device *goya; | |
838 | int rc; | |
839 | ||
840 | /* Allocate device structure */ | |
841 | goya = kzalloc(sizeof(*goya), GFP_KERNEL); | |
842 | if (!goya) | |
843 | return -ENOMEM; | |
844 | ||
845 | /* according to goya_init_iatu */ | |
846 | goya->ddr_bar_cur_addr = DRAM_PHYS_BASE; | |
d91389bc OG |
847 | |
848 | goya->mme_clk = GOYA_PLL_FREQ_LOW; | |
849 | goya->tpc_clk = GOYA_PLL_FREQ_LOW; | |
850 | goya->ic_clk = GOYA_PLL_FREQ_LOW; | |
851 | ||
99b9d7b4 OG |
852 | hdev->asic_specific = goya; |
853 | ||
854 | /* Create DMA pool for small allocations */ | |
855 | hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), | |
856 | &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); | |
857 | if (!hdev->dma_pool) { | |
858 | dev_err(hdev->dev, "failed to create DMA pool\n"); | |
859 | rc = -ENOMEM; | |
860 | goto free_goya_device; | |
861 | } | |
862 | ||
863 | hdev->cpu_accessible_dma_mem = | |
d9c3aa80 | 864 | hdev->asic_funcs->asic_dma_alloc_coherent(hdev, |
3110c60f | 865 | HL_CPU_ACCESSIBLE_MEM_SIZE, |
99b9d7b4 OG |
866 | &hdev->cpu_accessible_dma_address, |
867 | GFP_KERNEL | __GFP_ZERO); | |
868 | ||
869 | if (!hdev->cpu_accessible_dma_mem) { | |
99b9d7b4 OG |
870 | rc = -ENOMEM; |
871 | goto free_dma_pool; | |
872 | } | |
873 | ||
f62fa0ce AB |
874 | dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n", |
875 | &hdev->cpu_accessible_dma_address); | |
2a51558c | 876 | |
cbb10f1e | 877 | hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); |
99b9d7b4 OG |
878 | if (!hdev->cpu_accessible_dma_pool) { |
879 | dev_err(hdev->dev, | |
880 | "Failed to create CPU accessible DMA pool\n"); | |
881 | rc = -ENOMEM; | |
9f832fda | 882 | goto free_cpu_dma_mem; |
99b9d7b4 OG |
883 | } |
884 | ||
885 | rc = gen_pool_add(hdev->cpu_accessible_dma_pool, | |
886 | (uintptr_t) hdev->cpu_accessible_dma_mem, | |
3110c60f | 887 | HL_CPU_ACCESSIBLE_MEM_SIZE, -1); |
99b9d7b4 OG |
888 | if (rc) { |
889 | dev_err(hdev->dev, | |
890 | "Failed to add memory to CPU accessible DMA pool\n"); | |
891 | rc = -EFAULT; | |
ba209e15 | 892 | goto free_cpu_accessible_dma_pool; |
99b9d7b4 OG |
893 | } |
894 | ||
895 | spin_lock_init(&goya->hw_queues_lock); | |
9e5e49cd | 896 | hdev->supports_coresight = true; |
66446820 | 897 | hdev->supports_soft_reset = true; |
99b9d7b4 OG |
898 | |
899 | return 0; | |
900 | ||
ba209e15 | 901 | free_cpu_accessible_dma_pool: |
99b9d7b4 | 902 | gen_pool_destroy(hdev->cpu_accessible_dma_pool); |
9f832fda | 903 | free_cpu_dma_mem: |
d9c3aa80 OG |
904 | hdev->asic_funcs->asic_dma_free_coherent(hdev, |
905 | HL_CPU_ACCESSIBLE_MEM_SIZE, | |
99b9d7b4 OG |
906 | hdev->cpu_accessible_dma_mem, |
907 | hdev->cpu_accessible_dma_address); | |
908 | free_dma_pool: | |
909 | dma_pool_destroy(hdev->dma_pool); | |
910 | free_goya_device: | |
911 | kfree(goya); | |
912 | ||
913 | return rc; | |
914 | } | |
915 | ||
916 | /* | |
917 | * goya_sw_fini - Goya software tear-down code | |
918 | * | |
919 | * @hdev: pointer to hl_device structure | |
920 | * | |
921 | */ | |
5e6e0239 | 922 | static int goya_sw_fini(struct hl_device *hdev) |
99b9d7b4 OG |
923 | { |
924 | struct goya_device *goya = hdev->asic_specific; | |
925 | ||
926 | gen_pool_destroy(hdev->cpu_accessible_dma_pool); | |
927 | ||
d9c3aa80 OG |
928 | hdev->asic_funcs->asic_dma_free_coherent(hdev, |
929 | HL_CPU_ACCESSIBLE_MEM_SIZE, | |
99b9d7b4 OG |
930 | hdev->cpu_accessible_dma_mem, |
931 | hdev->cpu_accessible_dma_address); | |
932 | ||
933 | dma_pool_destroy(hdev->dma_pool); | |
934 | ||
935 | kfree(goya); | |
936 | ||
937 | return 0; | |
938 | } | |
939 | ||
9494a8dd OG |
940 | static void goya_init_dma_qman(struct hl_device *hdev, int dma_id, |
941 | dma_addr_t bus_address) | |
942 | { | |
943 | struct goya_device *goya = hdev->asic_specific; | |
944 | u32 mtr_base_lo, mtr_base_hi; | |
945 | u32 so_base_lo, so_base_hi; | |
946 | u32 gic_base_lo, gic_base_hi; | |
947 | u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI); | |
76cedc73 | 948 | u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN; |
9494a8dd OG |
949 | |
950 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
951 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
952 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
953 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
954 | ||
955 | gic_base_lo = | |
956 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
957 | gic_base_hi = | |
958 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
959 | ||
960 | WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address)); | |
961 | WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address)); | |
962 | ||
963 | WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH)); | |
964 | WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0); | |
965 | WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0); | |
966 | ||
967 | WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); | |
968 | WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); | |
969 | WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); | |
970 | WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); | |
971 | WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); | |
972 | WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); | |
973 | WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off, | |
974 | GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id); | |
975 | ||
976 | /* PQ has buffer of 2 cache lines, while CQ has 8 lines */ | |
977 | WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002); | |
978 | WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008); | |
979 | ||
1251f23a OG |
980 | if (goya->hw_cap_initialized & HW_CAP_MMU) |
981 | WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED); | |
9494a8dd | 982 | else |
1251f23a | 983 | WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED); |
9494a8dd | 984 | |
76cedc73 OS |
985 | if (hdev->stop_on_err) |
986 | dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT; | |
987 | ||
988 | WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg); | |
9494a8dd OG |
989 | WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE); |
990 | } | |
991 | ||
992 | static void goya_init_dma_ch(struct hl_device *hdev, int dma_id) | |
993 | { | |
994 | u32 gic_base_lo, gic_base_hi; | |
995 | u64 sob_addr; | |
996 | u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1); | |
997 | ||
998 | gic_base_lo = | |
999 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1000 | gic_base_hi = | |
1001 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1002 | ||
1003 | WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo); | |
1004 | WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi); | |
1005 | WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off, | |
1006 | GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id); | |
1007 | ||
887f7d38 | 1008 | if (dma_id) |
9494a8dd OG |
1009 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 + |
1010 | (dma_id - 1) * 4; | |
887f7d38 OG |
1011 | else |
1012 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; | |
1013 | ||
887f7d38 OG |
1014 | WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr)); |
1015 | WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001); | |
9494a8dd OG |
1016 | } |
1017 | ||
1018 | /* | |
1019 | * goya_init_dma_qmans - Initialize QMAN DMA registers | |
1020 | * | |
1021 | * @hdev: pointer to hl_device structure | |
1022 | * | |
1023 | * Initialize the H/W registers of the QMAN DMA channels | |
1024 | * | |
1025 | */ | |
b2377e03 | 1026 | void goya_init_dma_qmans(struct hl_device *hdev) |
9494a8dd OG |
1027 | { |
1028 | struct goya_device *goya = hdev->asic_specific; | |
1029 | struct hl_hw_queue *q; | |
9494a8dd OG |
1030 | int i; |
1031 | ||
1032 | if (goya->hw_cap_initialized & HW_CAP_DMA) | |
1033 | return; | |
1034 | ||
1035 | q = &hdev->kernel_queues[0]; | |
1036 | ||
1037 | for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) { | |
1fa185c6 | 1038 | q->cq_id = q->msi_vec = i; |
94cb669c | 1039 | goya_init_dma_qman(hdev, i, q->bus_address); |
9494a8dd OG |
1040 | goya_init_dma_ch(hdev, i); |
1041 | } | |
1042 | ||
1043 | goya->hw_cap_initialized |= HW_CAP_DMA; | |
1044 | } | |
1045 | ||
1046 | /* | |
1047 | * goya_disable_external_queues - Disable external queues | |
1048 | * | |
1049 | * @hdev: pointer to hl_device structure | |
1050 | * | |
1051 | */ | |
1052 | static void goya_disable_external_queues(struct hl_device *hdev) | |
1053 | { | |
908087ff OG |
1054 | struct goya_device *goya = hdev->asic_specific; |
1055 | ||
1056 | if (!(goya->hw_cap_initialized & HW_CAP_DMA)) | |
1057 | return; | |
1058 | ||
9494a8dd OG |
1059 | WREG32(mmDMA_QM_0_GLBL_CFG0, 0); |
1060 | WREG32(mmDMA_QM_1_GLBL_CFG0, 0); | |
1061 | WREG32(mmDMA_QM_2_GLBL_CFG0, 0); | |
1062 | WREG32(mmDMA_QM_3_GLBL_CFG0, 0); | |
1063 | WREG32(mmDMA_QM_4_GLBL_CFG0, 0); | |
1064 | } | |
1065 | ||
1066 | static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg, | |
1067 | u32 cp_sts_reg, u32 glbl_sts0_reg) | |
1068 | { | |
1069 | int rc; | |
1070 | u32 status; | |
1071 | ||
1072 | /* use the values of TPC0 as they are all the same*/ | |
1073 | ||
1074 | WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); | |
1075 | ||
1076 | status = RREG32(cp_sts_reg); | |
1077 | if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) { | |
1078 | rc = hl_poll_timeout( | |
1079 | hdev, | |
1080 | cp_sts_reg, | |
1081 | status, | |
1082 | !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK), | |
1083 | 1000, | |
1084 | QMAN_FENCE_TIMEOUT_USEC); | |
1085 | ||
1086 | /* if QMAN is stuck in fence no need to check for stop */ | |
1087 | if (rc) | |
1088 | return 0; | |
1089 | } | |
1090 | ||
1091 | rc = hl_poll_timeout( | |
1092 | hdev, | |
1093 | glbl_sts0_reg, | |
1094 | status, | |
1095 | (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK), | |
1096 | 1000, | |
1097 | QMAN_STOP_TIMEOUT_USEC); | |
1098 | ||
1099 | if (rc) { | |
1100 | dev_err(hdev->dev, | |
1101 | "Timeout while waiting for QMAN to stop\n"); | |
1102 | return -EINVAL; | |
1103 | } | |
1104 | ||
1105 | return 0; | |
1106 | } | |
1107 | ||
1108 | /* | |
1109 | * goya_stop_external_queues - Stop external queues | |
1110 | * | |
1111 | * @hdev: pointer to hl_device structure | |
1112 | * | |
1113 | * Returns 0 on success | |
1114 | * | |
1115 | */ | |
1116 | static int goya_stop_external_queues(struct hl_device *hdev) | |
1117 | { | |
1118 | int rc, retval = 0; | |
1119 | ||
908087ff OG |
1120 | struct goya_device *goya = hdev->asic_specific; |
1121 | ||
1122 | if (!(goya->hw_cap_initialized & HW_CAP_DMA)) | |
1123 | return retval; | |
1124 | ||
9494a8dd OG |
1125 | rc = goya_stop_queue(hdev, |
1126 | mmDMA_QM_0_GLBL_CFG1, | |
1127 | mmDMA_QM_0_CP_STS, | |
1128 | mmDMA_QM_0_GLBL_STS0); | |
1129 | ||
1130 | if (rc) { | |
1131 | dev_err(hdev->dev, "failed to stop DMA QMAN 0\n"); | |
1132 | retval = -EIO; | |
1133 | } | |
1134 | ||
1135 | rc = goya_stop_queue(hdev, | |
1136 | mmDMA_QM_1_GLBL_CFG1, | |
1137 | mmDMA_QM_1_CP_STS, | |
1138 | mmDMA_QM_1_GLBL_STS0); | |
1139 | ||
1140 | if (rc) { | |
1141 | dev_err(hdev->dev, "failed to stop DMA QMAN 1\n"); | |
1142 | retval = -EIO; | |
1143 | } | |
1144 | ||
1145 | rc = goya_stop_queue(hdev, | |
1146 | mmDMA_QM_2_GLBL_CFG1, | |
1147 | mmDMA_QM_2_CP_STS, | |
1148 | mmDMA_QM_2_GLBL_STS0); | |
1149 | ||
1150 | if (rc) { | |
1151 | dev_err(hdev->dev, "failed to stop DMA QMAN 2\n"); | |
1152 | retval = -EIO; | |
1153 | } | |
1154 | ||
1155 | rc = goya_stop_queue(hdev, | |
1156 | mmDMA_QM_3_GLBL_CFG1, | |
1157 | mmDMA_QM_3_CP_STS, | |
1158 | mmDMA_QM_3_GLBL_STS0); | |
1159 | ||
1160 | if (rc) { | |
1161 | dev_err(hdev->dev, "failed to stop DMA QMAN 3\n"); | |
1162 | retval = -EIO; | |
1163 | } | |
1164 | ||
1165 | rc = goya_stop_queue(hdev, | |
1166 | mmDMA_QM_4_GLBL_CFG1, | |
1167 | mmDMA_QM_4_CP_STS, | |
1168 | mmDMA_QM_4_GLBL_STS0); | |
1169 | ||
1170 | if (rc) { | |
1171 | dev_err(hdev->dev, "failed to stop DMA QMAN 4\n"); | |
1172 | retval = -EIO; | |
1173 | } | |
1174 | ||
1175 | return retval; | |
1176 | } | |
1177 | ||
9494a8dd OG |
1178 | /* |
1179 | * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU | |
1180 | * | |
1181 | * @hdev: pointer to hl_device structure | |
1182 | * | |
1183 | * Returns 0 on success | |
1184 | * | |
1185 | */ | |
b2377e03 | 1186 | int goya_init_cpu_queues(struct hl_device *hdev) |
9494a8dd OG |
1187 | { |
1188 | struct goya_device *goya = hdev->asic_specific; | |
1251f23a | 1189 | struct hl_eq *eq; |
9494a8dd OG |
1190 | u32 status; |
1191 | struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; | |
1192 | int err; | |
1193 | ||
1194 | if (!hdev->cpu_queues_enable) | |
1195 | return 0; | |
1196 | ||
1197 | if (goya->hw_cap_initialized & HW_CAP_CPU_Q) | |
1198 | return 0; | |
1199 | ||
1251f23a OG |
1200 | eq = &hdev->event_queue; |
1201 | ||
4095a176 TT |
1202 | WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); |
1203 | WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); | |
9494a8dd | 1204 | |
4095a176 TT |
1205 | WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); |
1206 | WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); | |
1251f23a | 1207 | |
4095a176 | 1208 | WREG32(mmCPU_CQ_BASE_ADDR_LOW, |
f09415f5 | 1209 | lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); |
4095a176 | 1210 | WREG32(mmCPU_CQ_BASE_ADDR_HIGH, |
f09415f5 | 1211 | upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); |
9494a8dd | 1212 | |
4095a176 TT |
1213 | WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); |
1214 | WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); | |
1215 | WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); | |
9494a8dd OG |
1216 | |
1217 | /* Used for EQ CI */ | |
4095a176 | 1218 | WREG32(mmCPU_EQ_CI, 0); |
9494a8dd OG |
1219 | |
1220 | WREG32(mmCPU_IF_PF_PQ_PI, 0); | |
1221 | ||
4095a176 | 1222 | WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP); |
9494a8dd OG |
1223 | |
1224 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
1225 | GOYA_ASYNC_EVENT_ID_PI_UPDATE); | |
1226 | ||
1227 | err = hl_poll_timeout( | |
1228 | hdev, | |
4095a176 | 1229 | mmCPU_PQ_INIT_STATUS, |
9494a8dd OG |
1230 | status, |
1231 | (status == PQ_INIT_STATUS_READY_FOR_HOST), | |
1232 | 1000, | |
1233 | GOYA_CPU_TIMEOUT_USEC); | |
1234 | ||
1235 | if (err) { | |
1236 | dev_err(hdev->dev, | |
0b28d26b | 1237 | "Failed to setup communication with device CPU\n"); |
9494a8dd OG |
1238 | return -EIO; |
1239 | } | |
1240 | ||
1241 | goya->hw_cap_initialized |= HW_CAP_CPU_Q; | |
1242 | return 0; | |
1243 | } | |
1244 | ||
839c4803 OG |
1245 | static void goya_set_pll_refclk(struct hl_device *hdev) |
1246 | { | |
1247 | WREG32(mmCPU_PLL_DIV_SEL_0, 0x0); | |
1248 | WREG32(mmCPU_PLL_DIV_SEL_1, 0x0); | |
1249 | WREG32(mmCPU_PLL_DIV_SEL_2, 0x0); | |
1250 | WREG32(mmCPU_PLL_DIV_SEL_3, 0x0); | |
1251 | ||
1252 | WREG32(mmIC_PLL_DIV_SEL_0, 0x0); | |
1253 | WREG32(mmIC_PLL_DIV_SEL_1, 0x0); | |
1254 | WREG32(mmIC_PLL_DIV_SEL_2, 0x0); | |
1255 | WREG32(mmIC_PLL_DIV_SEL_3, 0x0); | |
1256 | ||
1257 | WREG32(mmMC_PLL_DIV_SEL_0, 0x0); | |
1258 | WREG32(mmMC_PLL_DIV_SEL_1, 0x0); | |
1259 | WREG32(mmMC_PLL_DIV_SEL_2, 0x0); | |
1260 | WREG32(mmMC_PLL_DIV_SEL_3, 0x0); | |
1261 | ||
1262 | WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0); | |
1263 | WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0); | |
1264 | WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0); | |
1265 | WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0); | |
1266 | ||
1267 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0); | |
1268 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0); | |
1269 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0); | |
1270 | WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0); | |
1271 | ||
1272 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0); | |
1273 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0); | |
1274 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0); | |
1275 | WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0); | |
1276 | ||
1277 | WREG32(mmTPC_PLL_DIV_SEL_0, 0x0); | |
1278 | WREG32(mmTPC_PLL_DIV_SEL_1, 0x0); | |
1279 | WREG32(mmTPC_PLL_DIV_SEL_2, 0x0); | |
1280 | WREG32(mmTPC_PLL_DIV_SEL_3, 0x0); | |
1281 | } | |
1282 | ||
1283 | static void goya_disable_clk_rlx(struct hl_device *hdev) | |
1284 | { | |
1285 | WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010); | |
1286 | WREG32(mmIC_PLL_CLK_RLX_0, 0x100010); | |
1287 | } | |
1288 | ||
1289 | static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id) | |
1290 | { | |
1291 | u64 tpc_eml_address; | |
1292 | u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset; | |
1293 | int err, slm_index; | |
1294 | ||
1295 | tpc_offset = tpc_id * 0x40000; | |
1296 | tpc_eml_offset = tpc_id * 0x200000; | |
1297 | tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE); | |
1298 | tpc_slm_offset = tpc_eml_address + 0x100000; | |
1299 | ||
1300 | /* | |
1301 | * Workaround for Bug H2 #2443 : | |
1302 | * "TPC SB is not initialized on chip reset" | |
1303 | */ | |
1304 | ||
1305 | val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset); | |
1306 | if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK) | |
1307 | dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n", | |
1308 | tpc_id); | |
1309 | ||
1310 | WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000); | |
1311 | ||
1312 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF); | |
1313 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F); | |
1314 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF); | |
1315 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF); | |
1316 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF); | |
1317 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF); | |
1318 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF); | |
1319 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF); | |
1320 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF); | |
1321 | WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF); | |
1322 | ||
1323 | WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset, | |
1324 | 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT); | |
1325 | ||
1326 | err = hl_poll_timeout( | |
1327 | hdev, | |
1328 | mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset, | |
1329 | val, | |
1330 | (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK), | |
1331 | 1000, | |
1332 | HL_DEVICE_TIMEOUT_USEC); | |
1333 | ||
1334 | if (err) | |
1335 | dev_err(hdev->dev, | |
1336 | "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id); | |
1337 | ||
1338 | WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset, | |
1339 | 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT); | |
1340 | ||
1341 | msleep(GOYA_RESET_WAIT_MSEC); | |
1342 | ||
1343 | WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset, | |
1344 | ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT)); | |
1345 | ||
1346 | msleep(GOYA_RESET_WAIT_MSEC); | |
1347 | ||
1348 | for (slm_index = 0 ; slm_index < 256 ; slm_index++) | |
1349 | WREG32(tpc_slm_offset + (slm_index << 2), 0); | |
1350 | ||
1351 | val = RREG32(tpc_slm_offset); | |
1352 | } | |
1353 | ||
1354 | static void goya_tpc_mbist_workaround(struct hl_device *hdev) | |
1355 | { | |
1356 | struct goya_device *goya = hdev->asic_specific; | |
1357 | int i; | |
1358 | ||
1359 | if (hdev->pldm) | |
1360 | return; | |
1361 | ||
1362 | if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST) | |
1363 | return; | |
1364 | ||
1365 | /* Workaround for H2 #2443 */ | |
1366 | ||
1367 | for (i = 0 ; i < TPC_MAX_NUM ; i++) | |
1368 | _goya_tpc_mbist_workaround(hdev, i); | |
1369 | ||
1370 | goya->hw_cap_initialized |= HW_CAP_TPC_MBIST; | |
1371 | } | |
1372 | ||
1373 | /* | |
1374 | * goya_init_golden_registers - Initialize golden registers | |
1375 | * | |
1376 | * @hdev: pointer to hl_device structure | |
1377 | * | |
1378 | * Initialize the H/W registers of the device | |
1379 | * | |
1380 | */ | |
1381 | static void goya_init_golden_registers(struct hl_device *hdev) | |
1382 | { | |
1383 | struct goya_device *goya = hdev->asic_specific; | |
1384 | u32 polynom[10], tpc_intr_mask, offset; | |
1385 | int i; | |
1386 | ||
1387 | if (goya->hw_cap_initialized & HW_CAP_GOLDEN) | |
1388 | return; | |
1389 | ||
1390 | polynom[0] = 0x00020080; | |
1391 | polynom[1] = 0x00401000; | |
1392 | polynom[2] = 0x00200800; | |
1393 | polynom[3] = 0x00002000; | |
1394 | polynom[4] = 0x00080200; | |
1395 | polynom[5] = 0x00040100; | |
1396 | polynom[6] = 0x00100400; | |
1397 | polynom[7] = 0x00004000; | |
1398 | polynom[8] = 0x00010000; | |
1399 | polynom[9] = 0x00008000; | |
1400 | ||
1401 | /* Mask all arithmetic interrupts from TPC */ | |
1402 | tpc_intr_mask = 0x7FFF; | |
1403 | ||
1404 | for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) { | |
1405 | WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1406 | WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1407 | WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1408 | WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1409 | WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302); | |
1410 | ||
1411 | WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1412 | WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1413 | WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1414 | WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1415 | WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204); | |
1416 | ||
1417 | ||
1418 | WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206); | |
1419 | WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206); | |
1420 | WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206); | |
1421 | WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207); | |
1422 | WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207); | |
1423 | ||
1424 | WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207); | |
1425 | WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207); | |
1426 | WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206); | |
1427 | WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206); | |
1428 | WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206); | |
1429 | ||
1430 | WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101); | |
1431 | WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102); | |
1432 | WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103); | |
1433 | WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104); | |
1434 | WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105); | |
1435 | ||
1436 | WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105); | |
1437 | WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104); | |
1438 | WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103); | |
1439 | WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102); | |
1440 | WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101); | |
1441 | } | |
1442 | ||
1443 | WREG32(mmMME_STORE_MAX_CREDIT, 0x21); | |
1444 | WREG32(mmMME_AGU, 0x0f0f0f10); | |
1445 | WREG32(mmMME_SEI_MASK, ~0x0); | |
1446 | ||
1447 | WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1448 | WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101); | |
1449 | WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101); | |
1450 | WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101); | |
1451 | WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1452 | WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701); | |
1453 | WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401); | |
1454 | WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401); | |
1455 | WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301); | |
1456 | WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101); | |
1457 | WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101); | |
1458 | WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105); | |
1459 | WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501); | |
1460 | WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501); | |
1461 | WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301); | |
1462 | WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401); | |
1463 | WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101); | |
1464 | WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101); | |
1465 | WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202); | |
1466 | WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101); | |
1467 | WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201); | |
1468 | WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701); | |
1469 | WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101); | |
1470 | WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101); | |
1471 | WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101); | |
1472 | WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101); | |
1473 | WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701); | |
1474 | WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201); | |
1475 | WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101); | |
1476 | WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102); | |
1477 | WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701); | |
1478 | WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701); | |
1479 | WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707); | |
1480 | WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201); | |
1481 | WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201); | |
1482 | WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201); | |
1483 | WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102); | |
1484 | WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102); | |
1485 | WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102); | |
1486 | WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102); | |
1487 | WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102); | |
1488 | WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107); | |
1489 | WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106); | |
1490 | WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102); | |
1491 | WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102); | |
1492 | WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102); | |
1493 | WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102); | |
1494 | WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102); | |
1495 | WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702); | |
1496 | WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702); | |
1497 | WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602); | |
1498 | WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402); | |
1499 | WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202); | |
1500 | WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102); | |
1501 | WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1502 | WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1503 | WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1504 | WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1505 | WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1506 | WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401); | |
1507 | WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101); | |
1508 | WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101); | |
1509 | WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101); | |
1510 | WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101); | |
1511 | WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101); | |
1512 | WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107); | |
1513 | WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107); | |
1514 | WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101); | |
1515 | WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101); | |
1516 | WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101); | |
1517 | WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101); | |
1518 | WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101); | |
1519 | WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501); | |
1520 | WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501); | |
1521 | WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301); | |
1522 | WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401); | |
1523 | WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101); | |
1524 | WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101); | |
1525 | WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1526 | WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1527 | WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1528 | WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1529 | WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1530 | WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101); | |
1531 | ||
1532 | WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1533 | WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101); | |
1534 | WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101); | |
1535 | WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102); | |
1536 | WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101); | |
1537 | WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202); | |
1538 | WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201); | |
1539 | WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201); | |
1540 | WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202); | |
1541 | WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101); | |
1542 | WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101); | |
1543 | WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101); | |
1544 | ||
1545 | WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101); | |
1546 | WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101); | |
1547 | WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201); | |
1548 | WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102); | |
1549 | WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101); | |
1550 | WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202); | |
1551 | WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201); | |
1552 | WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201); | |
1553 | WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202); | |
1554 | WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101); | |
1555 | WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101); | |
1556 | WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101); | |
1557 | ||
1558 | WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101); | |
1559 | WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101); | |
1560 | WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301); | |
1561 | WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102); | |
1562 | WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101); | |
1563 | WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301); | |
1564 | WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201); | |
1565 | WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201); | |
1566 | WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402); | |
1567 | WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101); | |
1568 | WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101); | |
1569 | WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401); | |
1570 | ||
1571 | WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101); | |
1572 | WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101); | |
1573 | WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401); | |
1574 | WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102); | |
1575 | WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101); | |
1576 | WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702); | |
1577 | WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201); | |
1578 | WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201); | |
1579 | WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602); | |
1580 | WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101); | |
1581 | WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101); | |
1582 | WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301); | |
1583 | ||
1584 | WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101); | |
1585 | WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101); | |
1586 | WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501); | |
1587 | WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102); | |
1588 | WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101); | |
1589 | WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602); | |
1590 | WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201); | |
1591 | WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201); | |
1592 | WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702); | |
1593 | WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101); | |
1594 | WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101); | |
1595 | WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501); | |
1596 | ||
1597 | WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101); | |
1598 | WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101); | |
1599 | WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601); | |
1600 | WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101); | |
1601 | WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101); | |
1602 | WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702); | |
1603 | WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101); | |
1604 | WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101); | |
1605 | WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702); | |
1606 | WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101); | |
1607 | WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101); | |
1608 | WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501); | |
1609 | ||
1610 | for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) { | |
1611 | WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1612 | WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1613 | WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1614 | WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1615 | WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1616 | WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1617 | ||
1618 | WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1619 | WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1620 | WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1621 | WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1622 | WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1623 | WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1624 | WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1625 | WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1626 | ||
1627 | WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1628 | WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7); | |
1629 | } | |
1630 | ||
1631 | for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) { | |
1632 | WREG32(mmMME1_RTR_SCRAMB_EN + offset, | |
1633 | 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT); | |
1634 | WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset, | |
1635 | 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT); | |
1636 | } | |
1637 | ||
1638 | for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) { | |
1639 | /* | |
1640 | * Workaround for Bug H2 #2441 : | |
1641 | * "ST.NOP set trace event illegal opcode" | |
1642 | */ | |
1643 | WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask); | |
1644 | ||
1645 | WREG32(mmTPC0_NRTR_SCRAMB_EN + offset, | |
1646 | 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT); | |
1647 | WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset, | |
1648 | 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT); | |
8fdacf2a OG |
1649 | |
1650 | WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset, | |
1651 | ICACHE_FETCH_LINE_NUM, 2); | |
839c4803 OG |
1652 | } |
1653 | ||
1654 | WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT); | |
1655 | WREG32(mmDMA_NRTR_NON_LIN_SCRAMB, | |
1656 | 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT); | |
1657 | ||
1658 | WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT); | |
1659 | WREG32(mmPCI_NRTR_NON_LIN_SCRAMB, | |
1660 | 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT); | |
1661 | ||
1662 | /* | |
1663 | * Workaround for H2 #HW-23 bug | |
bfb57a91 OG |
1664 | * Set DMA max outstanding read requests to 240 on DMA CH 1. |
1665 | * This limitation is still large enough to not affect Gen4 bandwidth. | |
1666 | * We need to only limit that DMA channel because the user can only read | |
839c4803 OG |
1667 | * from Host using DMA CH 1 |
1668 | */ | |
839c4803 | 1669 | WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0); |
8ba2876d | 1670 | |
a691a1eb | 1671 | WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); |
839c4803 OG |
1672 | |
1673 | goya->hw_cap_initialized |= HW_CAP_GOLDEN; | |
1674 | } | |
1675 | ||
9494a8dd | 1676 | static void goya_init_mme_qman(struct hl_device *hdev) |
839c4803 | 1677 | { |
9494a8dd OG |
1678 | u32 mtr_base_lo, mtr_base_hi; |
1679 | u32 so_base_lo, so_base_hi; | |
1680 | u32 gic_base_lo, gic_base_hi; | |
1681 | u64 qman_base_addr; | |
839c4803 | 1682 | |
9494a8dd OG |
1683 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); |
1684 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1685 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1686 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
839c4803 | 1687 | |
9494a8dd OG |
1688 | gic_base_lo = |
1689 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1690 | gic_base_hi = | |
1691 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
839c4803 | 1692 | |
9494a8dd OG |
1693 | qman_base_addr = hdev->asic_prop.sram_base_address + |
1694 | MME_QMAN_BASE_OFFSET; | |
839c4803 | 1695 | |
9494a8dd OG |
1696 | WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr)); |
1697 | WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr)); | |
1698 | WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH)); | |
1699 | WREG32(mmMME_QM_PQ_PI, 0); | |
1700 | WREG32(mmMME_QM_PQ_CI, 0); | |
1701 | WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0); | |
1702 | WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4); | |
1703 | WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8); | |
1704 | WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC); | |
839c4803 | 1705 | |
9494a8dd OG |
1706 | WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); |
1707 | WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); | |
1708 | WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo); | |
1709 | WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi); | |
839c4803 | 1710 | |
9494a8dd OG |
1711 | /* QMAN CQ has 8 cache lines */ |
1712 | WREG32(mmMME_QM_CQ_CFG1, 0x00080008); | |
839c4803 | 1713 | |
9494a8dd OG |
1714 | WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo); |
1715 | WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi); | |
839c4803 | 1716 | |
9494a8dd | 1717 | WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM); |
839c4803 | 1718 | |
9494a8dd | 1719 | WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN); |
839c4803 | 1720 | |
9494a8dd OG |
1721 | WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT); |
1722 | ||
1723 | WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE); | |
839c4803 OG |
1724 | } |
1725 | ||
9494a8dd | 1726 | static void goya_init_mme_cmdq(struct hl_device *hdev) |
839c4803 | 1727 | { |
9494a8dd OG |
1728 | u32 mtr_base_lo, mtr_base_hi; |
1729 | u32 so_base_lo, so_base_hi; | |
1730 | u32 gic_base_lo, gic_base_hi; | |
839c4803 | 1731 | |
9494a8dd OG |
1732 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); |
1733 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1734 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1735 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
839c4803 | 1736 | |
9494a8dd OG |
1737 | gic_base_lo = |
1738 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1739 | gic_base_hi = | |
1740 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
839c4803 | 1741 | |
9494a8dd OG |
1742 | WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); |
1743 | WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); | |
1744 | WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); | |
1745 | WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi); | |
839c4803 | 1746 | |
9494a8dd OG |
1747 | /* CMDQ CQ has 20 cache lines */ |
1748 | WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014); | |
839c4803 | 1749 | |
9494a8dd OG |
1750 | WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo); |
1751 | WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi); | |
839c4803 | 1752 | |
9494a8dd | 1753 | WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ); |
839c4803 | 1754 | |
9494a8dd | 1755 | WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN); |
839c4803 | 1756 | |
9494a8dd OG |
1757 | WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT); |
1758 | ||
1759 | WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE); | |
839c4803 OG |
1760 | } |
1761 | ||
b2377e03 | 1762 | void goya_init_mme_qmans(struct hl_device *hdev) |
839c4803 | 1763 | { |
9494a8dd OG |
1764 | struct goya_device *goya = hdev->asic_specific; |
1765 | u32 so_base_lo, so_base_hi; | |
839c4803 | 1766 | |
9494a8dd | 1767 | if (goya->hw_cap_initialized & HW_CAP_MME) |
839c4803 | 1768 | return; |
839c4803 | 1769 | |
9494a8dd OG |
1770 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); |
1771 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
839c4803 | 1772 | |
9494a8dd OG |
1773 | WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo); |
1774 | WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi); | |
1775 | ||
1776 | goya_init_mme_qman(hdev); | |
1777 | goya_init_mme_cmdq(hdev); | |
1778 | ||
1779 | goya->hw_cap_initialized |= HW_CAP_MME; | |
1780 | } | |
1781 | ||
1782 | static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id) | |
1783 | { | |
1784 | u32 mtr_base_lo, mtr_base_hi; | |
1785 | u32 so_base_lo, so_base_hi; | |
1786 | u32 gic_base_lo, gic_base_hi; | |
1787 | u64 qman_base_addr; | |
1788 | u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI); | |
1789 | ||
1790 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1791 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1792 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1793 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1794 | ||
1795 | gic_base_lo = | |
1796 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1797 | gic_base_hi = | |
1798 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1799 | ||
1800 | qman_base_addr = hdev->asic_prop.sram_base_address + base_off; | |
1801 | ||
1802 | WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr)); | |
1803 | WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr)); | |
1804 | WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH)); | |
1805 | WREG32(mmTPC0_QM_PQ_PI + reg_off, 0); | |
1806 | WREG32(mmTPC0_QM_PQ_CI + reg_off, 0); | |
1807 | WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0); | |
1808 | WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4); | |
1809 | WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8); | |
1810 | WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC); | |
1811 | ||
1812 | WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); | |
1813 | WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); | |
1814 | WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); | |
1815 | WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); | |
1816 | ||
1817 | WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008); | |
1818 | ||
1819 | WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); | |
1820 | WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); | |
1821 | ||
1822 | WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off, | |
1823 | GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id); | |
1824 | ||
1825 | WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN); | |
1826 | ||
1827 | WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT); | |
1828 | ||
1829 | WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE); | |
1830 | } | |
1831 | ||
1832 | static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id) | |
1833 | { | |
1834 | u32 mtr_base_lo, mtr_base_hi; | |
1835 | u32 so_base_lo, so_base_hi; | |
1836 | u32 gic_base_lo, gic_base_hi; | |
1837 | u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1); | |
1838 | ||
1839 | mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1840 | mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); | |
1841 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1842 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1843 | ||
1844 | gic_base_lo = | |
1845 | lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1846 | gic_base_hi = | |
1847 | upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); | |
1848 | ||
1849 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); | |
1850 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); | |
1851 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); | |
1852 | WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); | |
1853 | ||
1854 | WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014); | |
1855 | ||
1856 | WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); | |
1857 | WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); | |
1858 | ||
1859 | WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off, | |
1860 | GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id); | |
1861 | ||
1862 | WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN); | |
1863 | ||
1864 | WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT); | |
1865 | ||
1866 | WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE); | |
1867 | } | |
1868 | ||
b2377e03 | 1869 | void goya_init_tpc_qmans(struct hl_device *hdev) |
9494a8dd OG |
1870 | { |
1871 | struct goya_device *goya = hdev->asic_specific; | |
1872 | u32 so_base_lo, so_base_hi; | |
1873 | u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW - | |
1874 | mmTPC0_CFG_SM_BASE_ADDRESS_LOW; | |
1875 | int i; | |
1876 | ||
1877 | if (goya->hw_cap_initialized & HW_CAP_TPC) | |
1878 | return; | |
1879 | ||
1880 | so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1881 | so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
1882 | ||
1883 | for (i = 0 ; i < TPC_MAX_NUM ; i++) { | |
1884 | WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off, | |
1885 | so_base_lo); | |
1886 | WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off, | |
1887 | so_base_hi); | |
1888 | } | |
1889 | ||
1890 | goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0); | |
1891 | goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1); | |
1892 | goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2); | |
1893 | goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3); | |
1894 | goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4); | |
1895 | goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5); | |
1896 | goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6); | |
1897 | goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7); | |
1898 | ||
1899 | for (i = 0 ; i < TPC_MAX_NUM ; i++) | |
1900 | goya_init_tpc_cmdq(hdev, i); | |
1901 | ||
1902 | goya->hw_cap_initialized |= HW_CAP_TPC; | |
1903 | } | |
1904 | ||
1905 | /* | |
1906 | * goya_disable_internal_queues - Disable internal queues | |
1907 | * | |
1908 | * @hdev: pointer to hl_device structure | |
1909 | * | |
1910 | */ | |
1911 | static void goya_disable_internal_queues(struct hl_device *hdev) | |
1912 | { | |
908087ff OG |
1913 | struct goya_device *goya = hdev->asic_specific; |
1914 | ||
1915 | if (!(goya->hw_cap_initialized & HW_CAP_MME)) | |
1916 | goto disable_tpc; | |
1917 | ||
9494a8dd OG |
1918 | WREG32(mmMME_QM_GLBL_CFG0, 0); |
1919 | WREG32(mmMME_CMDQ_GLBL_CFG0, 0); | |
1920 | ||
908087ff OG |
1921 | disable_tpc: |
1922 | if (!(goya->hw_cap_initialized & HW_CAP_TPC)) | |
1923 | return; | |
1924 | ||
9494a8dd OG |
1925 | WREG32(mmTPC0_QM_GLBL_CFG0, 0); |
1926 | WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0); | |
1927 | ||
1928 | WREG32(mmTPC1_QM_GLBL_CFG0, 0); | |
1929 | WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0); | |
1930 | ||
1931 | WREG32(mmTPC2_QM_GLBL_CFG0, 0); | |
1932 | WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0); | |
1933 | ||
1934 | WREG32(mmTPC3_QM_GLBL_CFG0, 0); | |
1935 | WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0); | |
1936 | ||
1937 | WREG32(mmTPC4_QM_GLBL_CFG0, 0); | |
1938 | WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0); | |
1939 | ||
1940 | WREG32(mmTPC5_QM_GLBL_CFG0, 0); | |
1941 | WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0); | |
1942 | ||
1943 | WREG32(mmTPC6_QM_GLBL_CFG0, 0); | |
1944 | WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0); | |
1945 | ||
1946 | WREG32(mmTPC7_QM_GLBL_CFG0, 0); | |
1947 | WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0); | |
1948 | } | |
1949 | ||
1950 | /* | |
1951 | * goya_stop_internal_queues - Stop internal queues | |
1952 | * | |
1953 | * @hdev: pointer to hl_device structure | |
1954 | * | |
1955 | * Returns 0 on success | |
1956 | * | |
1957 | */ | |
1958 | static int goya_stop_internal_queues(struct hl_device *hdev) | |
1959 | { | |
908087ff | 1960 | struct goya_device *goya = hdev->asic_specific; |
9494a8dd OG |
1961 | int rc, retval = 0; |
1962 | ||
908087ff OG |
1963 | if (!(goya->hw_cap_initialized & HW_CAP_MME)) |
1964 | goto stop_tpc; | |
1965 | ||
9494a8dd OG |
1966 | /* |
1967 | * Each queue (QMAN) is a separate H/W logic. That means that each | |
1968 | * QMAN can be stopped independently and failure to stop one does NOT | |
1969 | * mandate we should not try to stop other QMANs | |
1970 | */ | |
1971 | ||
1972 | rc = goya_stop_queue(hdev, | |
1973 | mmMME_QM_GLBL_CFG1, | |
1974 | mmMME_QM_CP_STS, | |
1975 | mmMME_QM_GLBL_STS0); | |
1976 | ||
1977 | if (rc) { | |
1978 | dev_err(hdev->dev, "failed to stop MME QMAN\n"); | |
1979 | retval = -EIO; | |
1980 | } | |
1981 | ||
1982 | rc = goya_stop_queue(hdev, | |
1983 | mmMME_CMDQ_GLBL_CFG1, | |
1984 | mmMME_CMDQ_CP_STS, | |
1985 | mmMME_CMDQ_GLBL_STS0); | |
1986 | ||
1987 | if (rc) { | |
1988 | dev_err(hdev->dev, "failed to stop MME CMDQ\n"); | |
1989 | retval = -EIO; | |
1990 | } | |
1991 | ||
908087ff OG |
1992 | stop_tpc: |
1993 | if (!(goya->hw_cap_initialized & HW_CAP_TPC)) | |
1994 | return retval; | |
1995 | ||
9494a8dd OG |
1996 | rc = goya_stop_queue(hdev, |
1997 | mmTPC0_QM_GLBL_CFG1, | |
1998 | mmTPC0_QM_CP_STS, | |
1999 | mmTPC0_QM_GLBL_STS0); | |
2000 | ||
2001 | if (rc) { | |
2002 | dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n"); | |
2003 | retval = -EIO; | |
2004 | } | |
2005 | ||
2006 | rc = goya_stop_queue(hdev, | |
2007 | mmTPC0_CMDQ_GLBL_CFG1, | |
2008 | mmTPC0_CMDQ_CP_STS, | |
2009 | mmTPC0_CMDQ_GLBL_STS0); | |
2010 | ||
2011 | if (rc) { | |
2012 | dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n"); | |
2013 | retval = -EIO; | |
2014 | } | |
2015 | ||
2016 | rc = goya_stop_queue(hdev, | |
2017 | mmTPC1_QM_GLBL_CFG1, | |
2018 | mmTPC1_QM_CP_STS, | |
2019 | mmTPC1_QM_GLBL_STS0); | |
2020 | ||
2021 | if (rc) { | |
2022 | dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n"); | |
2023 | retval = -EIO; | |
2024 | } | |
2025 | ||
2026 | rc = goya_stop_queue(hdev, | |
2027 | mmTPC1_CMDQ_GLBL_CFG1, | |
2028 | mmTPC1_CMDQ_CP_STS, | |
2029 | mmTPC1_CMDQ_GLBL_STS0); | |
2030 | ||
2031 | if (rc) { | |
2032 | dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n"); | |
2033 | retval = -EIO; | |
2034 | } | |
2035 | ||
2036 | rc = goya_stop_queue(hdev, | |
2037 | mmTPC2_QM_GLBL_CFG1, | |
2038 | mmTPC2_QM_CP_STS, | |
2039 | mmTPC2_QM_GLBL_STS0); | |
2040 | ||
2041 | if (rc) { | |
2042 | dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n"); | |
2043 | retval = -EIO; | |
2044 | } | |
2045 | ||
2046 | rc = goya_stop_queue(hdev, | |
2047 | mmTPC2_CMDQ_GLBL_CFG1, | |
2048 | mmTPC2_CMDQ_CP_STS, | |
2049 | mmTPC2_CMDQ_GLBL_STS0); | |
2050 | ||
2051 | if (rc) { | |
2052 | dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n"); | |
2053 | retval = -EIO; | |
2054 | } | |
2055 | ||
2056 | rc = goya_stop_queue(hdev, | |
2057 | mmTPC3_QM_GLBL_CFG1, | |
2058 | mmTPC3_QM_CP_STS, | |
2059 | mmTPC3_QM_GLBL_STS0); | |
2060 | ||
2061 | if (rc) { | |
2062 | dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n"); | |
2063 | retval = -EIO; | |
2064 | } | |
2065 | ||
2066 | rc = goya_stop_queue(hdev, | |
2067 | mmTPC3_CMDQ_GLBL_CFG1, | |
2068 | mmTPC3_CMDQ_CP_STS, | |
2069 | mmTPC3_CMDQ_GLBL_STS0); | |
2070 | ||
2071 | if (rc) { | |
2072 | dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n"); | |
2073 | retval = -EIO; | |
2074 | } | |
2075 | ||
2076 | rc = goya_stop_queue(hdev, | |
2077 | mmTPC4_QM_GLBL_CFG1, | |
2078 | mmTPC4_QM_CP_STS, | |
2079 | mmTPC4_QM_GLBL_STS0); | |
2080 | ||
2081 | if (rc) { | |
2082 | dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n"); | |
2083 | retval = -EIO; | |
2084 | } | |
2085 | ||
2086 | rc = goya_stop_queue(hdev, | |
2087 | mmTPC4_CMDQ_GLBL_CFG1, | |
2088 | mmTPC4_CMDQ_CP_STS, | |
2089 | mmTPC4_CMDQ_GLBL_STS0); | |
2090 | ||
2091 | if (rc) { | |
2092 | dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n"); | |
2093 | retval = -EIO; | |
2094 | } | |
2095 | ||
2096 | rc = goya_stop_queue(hdev, | |
2097 | mmTPC5_QM_GLBL_CFG1, | |
2098 | mmTPC5_QM_CP_STS, | |
2099 | mmTPC5_QM_GLBL_STS0); | |
2100 | ||
2101 | if (rc) { | |
2102 | dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n"); | |
2103 | retval = -EIO; | |
2104 | } | |
2105 | ||
2106 | rc = goya_stop_queue(hdev, | |
2107 | mmTPC5_CMDQ_GLBL_CFG1, | |
2108 | mmTPC5_CMDQ_CP_STS, | |
2109 | mmTPC5_CMDQ_GLBL_STS0); | |
2110 | ||
2111 | if (rc) { | |
2112 | dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n"); | |
2113 | retval = -EIO; | |
2114 | } | |
2115 | ||
2116 | rc = goya_stop_queue(hdev, | |
2117 | mmTPC6_QM_GLBL_CFG1, | |
2118 | mmTPC6_QM_CP_STS, | |
2119 | mmTPC6_QM_GLBL_STS0); | |
2120 | ||
2121 | if (rc) { | |
2122 | dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n"); | |
2123 | retval = -EIO; | |
2124 | } | |
2125 | ||
2126 | rc = goya_stop_queue(hdev, | |
2127 | mmTPC6_CMDQ_GLBL_CFG1, | |
2128 | mmTPC6_CMDQ_CP_STS, | |
2129 | mmTPC6_CMDQ_GLBL_STS0); | |
2130 | ||
2131 | if (rc) { | |
2132 | dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n"); | |
2133 | retval = -EIO; | |
2134 | } | |
2135 | ||
2136 | rc = goya_stop_queue(hdev, | |
2137 | mmTPC7_QM_GLBL_CFG1, | |
2138 | mmTPC7_QM_CP_STS, | |
2139 | mmTPC7_QM_GLBL_STS0); | |
2140 | ||
2141 | if (rc) { | |
2142 | dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n"); | |
2143 | retval = -EIO; | |
2144 | } | |
2145 | ||
2146 | rc = goya_stop_queue(hdev, | |
2147 | mmTPC7_CMDQ_GLBL_CFG1, | |
2148 | mmTPC7_CMDQ_CP_STS, | |
2149 | mmTPC7_CMDQ_GLBL_STS0); | |
2150 | ||
2151 | if (rc) { | |
2152 | dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n"); | |
2153 | retval = -EIO; | |
2154 | } | |
2155 | ||
2156 | return retval; | |
2157 | } | |
2158 | ||
1251f23a OG |
2159 | static void goya_dma_stall(struct hl_device *hdev) |
2160 | { | |
908087ff OG |
2161 | struct goya_device *goya = hdev->asic_specific; |
2162 | ||
2163 | if (!(goya->hw_cap_initialized & HW_CAP_DMA)) | |
2164 | return; | |
2165 | ||
1251f23a OG |
2166 | WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT); |
2167 | WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT); | |
2168 | WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT); | |
2169 | WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT); | |
2170 | WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT); | |
2171 | } | |
2172 | ||
2173 | static void goya_tpc_stall(struct hl_device *hdev) | |
2174 | { | |
908087ff OG |
2175 | struct goya_device *goya = hdev->asic_specific; |
2176 | ||
2177 | if (!(goya->hw_cap_initialized & HW_CAP_TPC)) | |
2178 | return; | |
2179 | ||
1251f23a OG |
2180 | WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); |
2181 | WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT); | |
2182 | WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT); | |
2183 | WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT); | |
2184 | WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT); | |
2185 | WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT); | |
2186 | WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT); | |
2187 | WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT); | |
2188 | } | |
2189 | ||
2190 | static void goya_mme_stall(struct hl_device *hdev) | |
2191 | { | |
908087ff OG |
2192 | struct goya_device *goya = hdev->asic_specific; |
2193 | ||
2194 | if (!(goya->hw_cap_initialized & HW_CAP_MME)) | |
2195 | return; | |
2196 | ||
1251f23a OG |
2197 | WREG32(mmMME_STALL, 0xFFFFFFFF); |
2198 | } | |
2199 | ||
2200 | static int goya_enable_msix(struct hl_device *hdev) | |
2201 | { | |
2202 | struct goya_device *goya = hdev->asic_specific; | |
2203 | int cq_cnt = hdev->asic_prop.completion_queues_count; | |
2204 | int rc, i, irq_cnt_init, irq; | |
2205 | ||
2206 | if (goya->hw_cap_initialized & HW_CAP_MSIX) | |
2207 | return 0; | |
2208 | ||
2209 | rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES, | |
2210 | GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX); | |
2211 | if (rc < 0) { | |
2212 | dev_err(hdev->dev, | |
2213 | "MSI-X: Failed to enable support -- %d/%d\n", | |
2214 | GOYA_MSIX_ENTRIES, rc); | |
2215 | return rc; | |
2216 | } | |
2217 | ||
2218 | for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { | |
2219 | irq = pci_irq_vector(hdev->pdev, i); | |
2220 | rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i], | |
2221 | &hdev->completion_queue[i]); | |
2222 | if (rc) { | |
2223 | dev_err(hdev->dev, "Failed to request IRQ %d", irq); | |
2224 | goto free_irqs; | |
2225 | } | |
2226 | } | |
2227 | ||
c535bfdd | 2228 | irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); |
1251f23a OG |
2229 | |
2230 | rc = request_irq(irq, hl_irq_handler_eq, 0, | |
c535bfdd | 2231 | goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX], |
1251f23a OG |
2232 | &hdev->event_queue); |
2233 | if (rc) { | |
2234 | dev_err(hdev->dev, "Failed to request IRQ %d", irq); | |
2235 | goto free_irqs; | |
2236 | } | |
2237 | ||
2238 | goya->hw_cap_initialized |= HW_CAP_MSIX; | |
2239 | return 0; | |
2240 | ||
2241 | free_irqs: | |
2242 | for (i = 0 ; i < irq_cnt_init ; i++) | |
2243 | free_irq(pci_irq_vector(hdev->pdev, i), | |
2244 | &hdev->completion_queue[i]); | |
2245 | ||
2246 | pci_free_irq_vectors(hdev->pdev); | |
2247 | return rc; | |
2248 | } | |
2249 | ||
2250 | static void goya_sync_irqs(struct hl_device *hdev) | |
2251 | { | |
2252 | struct goya_device *goya = hdev->asic_specific; | |
2253 | int i; | |
2254 | ||
2255 | if (!(goya->hw_cap_initialized & HW_CAP_MSIX)) | |
2256 | return; | |
2257 | ||
2258 | /* Wait for all pending IRQs to be finished */ | |
2259 | for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) | |
2260 | synchronize_irq(pci_irq_vector(hdev->pdev, i)); | |
2261 | ||
c535bfdd | 2262 | synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX)); |
1251f23a OG |
2263 | } |
2264 | ||
2265 | static void goya_disable_msix(struct hl_device *hdev) | |
2266 | { | |
2267 | struct goya_device *goya = hdev->asic_specific; | |
2268 | int i, irq; | |
2269 | ||
2270 | if (!(goya->hw_cap_initialized & HW_CAP_MSIX)) | |
2271 | return; | |
2272 | ||
2273 | goya_sync_irqs(hdev); | |
2274 | ||
c535bfdd | 2275 | irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX); |
1251f23a OG |
2276 | free_irq(irq, &hdev->event_queue); |
2277 | ||
2278 | for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { | |
2279 | irq = pci_irq_vector(hdev->pdev, i); | |
2280 | free_irq(irq, &hdev->completion_queue[i]); | |
2281 | } | |
2282 | ||
2283 | pci_free_irq_vectors(hdev->pdev); | |
2284 | ||
2285 | goya->hw_cap_initialized &= ~HW_CAP_MSIX; | |
2286 | } | |
2287 | ||
413cf576 TT |
2288 | static void goya_enable_timestamp(struct hl_device *hdev) |
2289 | { | |
2290 | /* Disable the timestamp counter */ | |
2291 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); | |
2292 | ||
2293 | /* Zero the lower/upper parts of the 64-bit counter */ | |
2294 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); | |
2295 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); | |
2296 | ||
2297 | /* Enable the counter */ | |
2298 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); | |
2299 | } | |
2300 | ||
2301 | static void goya_disable_timestamp(struct hl_device *hdev) | |
2302 | { | |
2303 | /* Disable the timestamp counter */ | |
2304 | WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); | |
2305 | } | |
2306 | ||
1251f23a OG |
2307 | static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) |
2308 | { | |
c83c4171 | 2309 | u32 wait_timeout_ms; |
1251f23a OG |
2310 | |
2311 | dev_info(hdev->dev, | |
2312 | "Halting compute engines and disabling interrupts\n"); | |
2313 | ||
c83c4171 | 2314 | if (hdev->pldm) |
1251f23a | 2315 | wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; |
c83c4171 | 2316 | else |
1251f23a | 2317 | wait_timeout_ms = GOYA_RESET_WAIT_MSEC; |
1251f23a OG |
2318 | |
2319 | goya_stop_external_queues(hdev); | |
2320 | goya_stop_internal_queues(hdev); | |
2321 | ||
2322 | msleep(wait_timeout_ms); | |
2323 | ||
2324 | goya_dma_stall(hdev); | |
2325 | goya_tpc_stall(hdev); | |
2326 | goya_mme_stall(hdev); | |
2327 | ||
2328 | msleep(wait_timeout_ms); | |
2329 | ||
2330 | goya_disable_external_queues(hdev); | |
2331 | goya_disable_internal_queues(hdev); | |
2332 | ||
413cf576 TT |
2333 | goya_disable_timestamp(hdev); |
2334 | ||
95b5a8b8 | 2335 | if (hard_reset) { |
1251f23a | 2336 | goya_disable_msix(hdev); |
95b5a8b8 OG |
2337 | goya_mmu_remove_device_cpu_mappings(hdev); |
2338 | } else { | |
1251f23a | 2339 | goya_sync_irqs(hdev); |
95b5a8b8 | 2340 | } |
1251f23a | 2341 | } |
9494a8dd OG |
2342 | |
2343 | /* | |
47f6b41c | 2344 | * goya_load_firmware_to_device() - Load LINUX FW code to device. |
3110c60f | 2345 | * @hdev: Pointer to hl_device structure. |
9494a8dd | 2346 | * |
47f6b41c | 2347 | * Copy LINUX fw code from firmware file to HBM BAR. |
9494a8dd | 2348 | * |
3110c60f | 2349 | * Return: 0 on success, non-zero for failure. |
9494a8dd | 2350 | */ |
47f6b41c | 2351 | static int goya_load_firmware_to_device(struct hl_device *hdev) |
9494a8dd | 2352 | { |
3110c60f | 2353 | void __iomem *dst; |
9494a8dd | 2354 | |
47f6b41c | 2355 | dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; |
9494a8dd | 2356 | |
9bb86b63 | 2357 | return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0); |
3110c60f | 2358 | } |
9494a8dd | 2359 | |
3110c60f | 2360 | /* |
47f6b41c | 2361 | * goya_load_boot_fit_to_device() - Load boot fit to device. |
3110c60f TT |
2362 | * @hdev: Pointer to hl_device structure. |
2363 | * | |
47f6b41c | 2364 | * Copy boot fit file to SRAM BAR. |
3110c60f TT |
2365 | * |
2366 | * Return: 0 on success, non-zero for failure. | |
2367 | */ | |
47f6b41c | 2368 | static int goya_load_boot_fit_to_device(struct hl_device *hdev) |
3110c60f | 2369 | { |
3110c60f | 2370 | void __iomem *dst; |
9494a8dd | 2371 | |
47f6b41c | 2372 | dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET; |
9494a8dd | 2373 | |
9bb86b63 | 2374 | return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0); |
9494a8dd OG |
2375 | } |
2376 | ||
2377 | /* | |
2378 | * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx. | |
2379 | * The version string should be located by that offset. | |
2380 | */ | |
eb10b897 | 2381 | static int goya_read_device_fw_version(struct hl_device *hdev, |
7e1c07dd | 2382 | enum hl_fw_component fwc) |
9494a8dd OG |
2383 | { |
2384 | const char *name; | |
2385 | u32 ver_off; | |
2386 | char *dest; | |
2387 | ||
2388 | switch (fwc) { | |
2389 | case FW_COMP_UBOOT: | |
4095a176 | 2390 | ver_off = RREG32(mmUBOOT_VER_OFFSET); |
9494a8dd OG |
2391 | dest = hdev->asic_prop.uboot_ver; |
2392 | name = "U-Boot"; | |
2393 | break; | |
2394 | case FW_COMP_PREBOOT: | |
4095a176 | 2395 | ver_off = RREG32(mmPREBOOT_VER_OFFSET); |
9494a8dd OG |
2396 | dest = hdev->asic_prop.preboot_ver; |
2397 | name = "Preboot"; | |
2398 | break; | |
2399 | default: | |
2400 | dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); | |
eb10b897 | 2401 | return -EIO; |
9494a8dd OG |
2402 | } |
2403 | ||
2404 | ver_off &= ~((u32)SRAM_BASE_ADDR); | |
2405 | ||
2406 | if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) { | |
839c4803 OG |
2407 | memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off, |
2408 | VERSION_MAX_LEN); | |
2409 | } else { | |
2410 | dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", | |
2411 | name, ver_off); | |
2412 | strcpy(dest, "unavailable"); | |
eb10b897 | 2413 | |
2414 | return -EIO; | |
839c4803 | 2415 | } |
eb10b897 | 2416 | |
2417 | return 0; | |
839c4803 OG |
2418 | } |
2419 | ||
47f6b41c | 2420 | static int goya_init_cpu(struct hl_device *hdev) |
839c4803 OG |
2421 | { |
2422 | struct goya_device *goya = hdev->asic_specific; | |
839c4803 OG |
2423 | int rc; |
2424 | ||
2425 | if (!hdev->cpu_enable) | |
2426 | return 0; | |
2427 | ||
2428 | if (goya->hw_cap_initialized & HW_CAP_CPU) | |
2429 | return 0; | |
2430 | ||
2431 | /* | |
2432 | * Before pushing u-boot/linux to device, need to set the ddr bar to | |
2433 | * base address of dram | |
2434 | */ | |
a38693d7 | 2435 | if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { |
839c4803 OG |
2436 | dev_err(hdev->dev, |
2437 | "failed to map DDR bar to DRAM base address\n"); | |
a38693d7 | 2438 | return -EIO; |
839c4803 OG |
2439 | } |
2440 | ||
7e1c07dd | 2441 | rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, |
47f6b41c | 2442 | mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, |
323b7267 OB |
2443 | mmCPU_CMD_STATUS_TO_HOST, |
2444 | mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0, | |
47f6b41c OB |
2445 | false, GOYA_CPU_TIMEOUT_USEC, |
2446 | GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); | |
0c169b8a | 2447 | |
839c4803 OG |
2448 | if (rc) |
2449 | return rc; | |
2450 | ||
839c4803 OG |
2451 | goya->hw_cap_initialized |= HW_CAP_CPU; |
2452 | ||
2453 | return 0; | |
2454 | } | |
2455 | ||
bedd1442 OG |
2456 | static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, |
2457 | u64 phys_addr) | |
2458 | { | |
2459 | u32 status, timeout_usec; | |
2460 | int rc; | |
2461 | ||
2462 | if (hdev->pldm) | |
2463 | timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; | |
2464 | else | |
2465 | timeout_usec = MMU_CONFIG_TIMEOUT_USEC; | |
2466 | ||
2467 | WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); | |
2468 | WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); | |
2469 | WREG32(MMU_ASID_BUSY, 0x80000000 | asid); | |
2470 | ||
2471 | rc = hl_poll_timeout( | |
2472 | hdev, | |
2473 | MMU_ASID_BUSY, | |
2474 | status, | |
2475 | !(status & 0x80000000), | |
2476 | 1000, | |
2477 | timeout_usec); | |
2478 | ||
2479 | if (rc) { | |
2480 | dev_err(hdev->dev, | |
2481 | "Timeout during MMU hop0 config of asid %d\n", asid); | |
2482 | return rc; | |
2483 | } | |
2484 | ||
2485 | return 0; | |
2486 | } | |
2487 | ||
b2377e03 | 2488 | int goya_mmu_init(struct hl_device *hdev) |
0feaf86d OS |
2489 | { |
2490 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
2491 | struct goya_device *goya = hdev->asic_specific; | |
2492 | u64 hop0_addr; | |
2493 | int rc, i; | |
2494 | ||
2495 | if (!hdev->mmu_enable) | |
2496 | return 0; | |
2497 | ||
2498 | if (goya->hw_cap_initialized & HW_CAP_MMU) | |
2499 | return 0; | |
2500 | ||
27ca384c | 2501 | hdev->dram_default_page_mapping = true; |
0feaf86d OS |
2502 | |
2503 | for (i = 0 ; i < prop->max_asid ; i++) { | |
2504 | hop0_addr = prop->mmu_pgt_addr + | |
2505 | (i * prop->mmu_hop_table_size); | |
2506 | ||
2507 | rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); | |
2508 | if (rc) { | |
2509 | dev_err(hdev->dev, | |
2510 | "failed to set hop0 addr for asid %d\n", i); | |
2511 | goto err; | |
2512 | } | |
2513 | } | |
2514 | ||
2515 | goya->hw_cap_initialized |= HW_CAP_MMU; | |
2516 | ||
2517 | /* init MMU cache manage page */ | |
1e7c1ec1 OG |
2518 | WREG32(mmSTLB_CACHE_INV_BASE_39_8, |
2519 | lower_32_bits(MMU_CACHE_MNG_ADDR >> 8)); | |
2520 | WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40); | |
0feaf86d OS |
2521 | |
2522 | /* Remove follower feature due to performance bug */ | |
2523 | WREG32_AND(mmSTLB_STLB_FEATURE_EN, | |
2524 | (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); | |
2525 | ||
7b6e4ea0 OS |
2526 | hdev->asic_funcs->mmu_invalidate_cache(hdev, true, |
2527 | VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); | |
0feaf86d OS |
2528 | |
2529 | WREG32(mmMMU_MMU_ENABLE, 1); | |
2530 | WREG32(mmMMU_SPI_MASK, 0xF); | |
2531 | ||
2532 | return 0; | |
2533 | ||
2534 | err: | |
2535 | return rc; | |
2536 | } | |
2537 | ||
839c4803 OG |
2538 | /* |
2539 | * goya_hw_init - Goya hardware initialization code | |
2540 | * | |
2541 | * @hdev: pointer to hl_device structure | |
2542 | * | |
2543 | * Returns 0 on success | |
2544 | * | |
2545 | */ | |
2546 | static int goya_hw_init(struct hl_device *hdev) | |
2547 | { | |
2548 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
839c4803 OG |
2549 | int rc; |
2550 | ||
839c4803 | 2551 | /* Perform read from the device to make sure device is up */ |
68a1fdf2 | 2552 | RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); |
839c4803 | 2553 | |
f8c8c7d5 OG |
2554 | /* |
2555 | * Let's mark in the H/W that we have reached this point. We check | |
2556 | * this value in the reset_before_init function to understand whether | |
2557 | * we need to reset the chip before doing H/W init. This register is | |
2558 | * cleared by the H/W upon H/W reset | |
2559 | */ | |
10d7de2c | 2560 | WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); |
f8c8c7d5 | 2561 | |
47f6b41c | 2562 | rc = goya_init_cpu(hdev); |
839c4803 OG |
2563 | if (rc) { |
2564 | dev_err(hdev->dev, "failed to initialize CPU\n"); | |
2565 | return rc; | |
2566 | } | |
2567 | ||
2568 | goya_tpc_mbist_workaround(hdev); | |
2569 | ||
2570 | goya_init_golden_registers(hdev); | |
2571 | ||
2572 | /* | |
2573 | * After CPU initialization is finished, change DDR bar mapping inside | |
2574 | * iATU to point to the start address of the MMU page tables | |
2575 | */ | |
0c002ceb | 2576 | if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR & |
a38693d7 | 2577 | ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) { |
839c4803 OG |
2578 | dev_err(hdev->dev, |
2579 | "failed to map DDR bar to MMU page tables\n"); | |
a38693d7 | 2580 | return -EIO; |
839c4803 OG |
2581 | } |
2582 | ||
0feaf86d OS |
2583 | rc = goya_mmu_init(hdev); |
2584 | if (rc) | |
2585 | return rc; | |
2586 | ||
839c4803 OG |
2587 | goya_init_security(hdev); |
2588 | ||
9494a8dd OG |
2589 | goya_init_dma_qmans(hdev); |
2590 | ||
2591 | goya_init_mme_qmans(hdev); | |
2592 | ||
2593 | goya_init_tpc_qmans(hdev); | |
2594 | ||
413cf576 TT |
2595 | goya_enable_timestamp(hdev); |
2596 | ||
1251f23a OG |
2597 | /* MSI-X must be enabled before CPU queues are initialized */ |
2598 | rc = goya_enable_msix(hdev); | |
2599 | if (rc) | |
2600 | goto disable_queues; | |
2601 | ||
839c4803 | 2602 | /* Perform read from the device to flush all MSI-X configuration */ |
68a1fdf2 | 2603 | RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); |
839c4803 OG |
2604 | |
2605 | return 0; | |
9494a8dd | 2606 | |
9494a8dd OG |
2607 | disable_queues: |
2608 | goya_disable_internal_queues(hdev); | |
2609 | goya_disable_external_queues(hdev); | |
2610 | ||
2611 | return rc; | |
839c4803 OG |
2612 | } |
2613 | ||
2614 | /* | |
2615 | * goya_hw_fini - Goya hardware tear-down code | |
2616 | * | |
2617 | * @hdev: pointer to hl_device structure | |
2618 | * @hard_reset: should we do hard reset to all engines or just reset the | |
2619 | * compute/dma engines | |
2620 | */ | |
2621 | static void goya_hw_fini(struct hl_device *hdev, bool hard_reset) | |
2622 | { | |
2623 | struct goya_device *goya = hdev->asic_specific; | |
c83c4171 | 2624 | u32 reset_timeout_ms, cpu_timeout_ms, status; |
839c4803 | 2625 | |
c83c4171 | 2626 | if (hdev->pldm) { |
839c4803 | 2627 | reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC; |
c83c4171 OG |
2628 | cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC; |
2629 | } else { | |
839c4803 | 2630 | reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC; |
c83c4171 OG |
2631 | cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC; |
2632 | } | |
839c4803 OG |
2633 | |
2634 | if (hard_reset) { | |
c83c4171 OG |
2635 | /* I don't know what is the state of the CPU so make sure it is |
2636 | * stopped in any means necessary | |
2637 | */ | |
2638 | WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE); | |
2639 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
2640 | GOYA_ASYNC_EVENT_ID_HALT_MACHINE); | |
2641 | ||
2642 | msleep(cpu_timeout_ms); | |
2643 | ||
839c4803 OG |
2644 | goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE); |
2645 | goya_disable_clk_rlx(hdev); | |
2646 | goya_set_pll_refclk(hdev); | |
2647 | ||
2648 | WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL); | |
2649 | dev_info(hdev->dev, | |
2650 | "Issued HARD reset command, going to wait %dms\n", | |
2651 | reset_timeout_ms); | |
2652 | } else { | |
2653 | WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET); | |
2654 | dev_info(hdev->dev, | |
2655 | "Issued SOFT reset command, going to wait %dms\n", | |
2656 | reset_timeout_ms); | |
2657 | } | |
2658 | ||
2659 | /* | |
2660 | * After hard reset, we can't poll the BTM_FSM register because the PSOC | |
2661 | * itself is in reset. In either reset we need to wait until the reset | |
2662 | * is deasserted | |
2663 | */ | |
2664 | msleep(reset_timeout_ms); | |
2665 | ||
2666 | status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); | |
2667 | if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) | |
2668 | dev_err(hdev->dev, | |
2669 | "Timeout while waiting for device to reset 0x%x\n", | |
2670 | status); | |
2671 | ||
d1ddd905 | 2672 | if (!hard_reset && goya) { |
f8c8c7d5 OG |
2673 | goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME | |
2674 | HW_CAP_GOLDEN | HW_CAP_TPC); | |
2675 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
2676 | GOYA_ASYNC_EVENT_ID_SOFT_RESET); | |
2677 | return; | |
2678 | } | |
2679 | ||
839c4803 OG |
2680 | /* Chicken bit to re-initiate boot sequencer flow */ |
2681 | WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, | |
2682 | 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT); | |
2683 | /* Move boot manager FSM to pre boot sequencer init state */ | |
2684 | WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM, | |
2685 | 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT); | |
2686 | ||
eb10b897 | 2687 | if (goya) { |
2688 | goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | | |
2689 | HW_CAP_DDR_0 | HW_CAP_DDR_1 | | |
2690 | HW_CAP_DMA | HW_CAP_MME | | |
2691 | HW_CAP_MMU | HW_CAP_TPC_MBIST | | |
2692 | HW_CAP_GOLDEN | HW_CAP_TPC); | |
d1ddd905 | 2693 | |
eb10b897 | 2694 | memset(goya->events_stat, 0, sizeof(goya->events_stat)); |
2695 | } | |
839c4803 OG |
2696 | } |
2697 | ||
99b9d7b4 OG |
2698 | int goya_suspend(struct hl_device *hdev) |
2699 | { | |
9494a8dd OG |
2700 | int rc; |
2701 | ||
2f55342c | 2702 | rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); |
9494a8dd OG |
2703 | if (rc) |
2704 | dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); | |
2705 | ||
2706 | return rc; | |
99b9d7b4 OG |
2707 | } |
2708 | ||
2709 | int goya_resume(struct hl_device *hdev) | |
2710 | { | |
7cb5101e | 2711 | return goya_init_iatu(hdev); |
99b9d7b4 OG |
2712 | } |
2713 | ||
5e6e0239 | 2714 | static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, |
0db57535 | 2715 | void *cpu_addr, dma_addr_t dma_addr, size_t size) |
be5d926b OG |
2716 | { |
2717 | int rc; | |
2718 | ||
2719 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | | |
2720 | VM_DONTCOPY | VM_NORESERVE; | |
2721 | ||
a9d4ef64 OG |
2722 | rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, |
2723 | (dma_addr - HOST_PHYS_BASE), size); | |
be5d926b | 2724 | if (rc) |
0db57535 | 2725 | dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); |
be5d926b OG |
2726 | |
2727 | return rc; | |
2728 | } | |
2729 | ||
b2377e03 | 2730 | void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) |
9494a8dd OG |
2731 | { |
2732 | u32 db_reg_offset, db_value; | |
9494a8dd OG |
2733 | |
2734 | switch (hw_queue_id) { | |
2735 | case GOYA_QUEUE_ID_DMA_0: | |
2736 | db_reg_offset = mmDMA_QM_0_PQ_PI; | |
2737 | break; | |
2738 | ||
2739 | case GOYA_QUEUE_ID_DMA_1: | |
2740 | db_reg_offset = mmDMA_QM_1_PQ_PI; | |
2741 | break; | |
2742 | ||
2743 | case GOYA_QUEUE_ID_DMA_2: | |
2744 | db_reg_offset = mmDMA_QM_2_PQ_PI; | |
2745 | break; | |
2746 | ||
2747 | case GOYA_QUEUE_ID_DMA_3: | |
2748 | db_reg_offset = mmDMA_QM_3_PQ_PI; | |
2749 | break; | |
2750 | ||
2751 | case GOYA_QUEUE_ID_DMA_4: | |
2752 | db_reg_offset = mmDMA_QM_4_PQ_PI; | |
2753 | break; | |
2754 | ||
2755 | case GOYA_QUEUE_ID_CPU_PQ: | |
34a5fab7 | 2756 | db_reg_offset = mmCPU_IF_PF_PQ_PI; |
9494a8dd OG |
2757 | break; |
2758 | ||
2759 | case GOYA_QUEUE_ID_MME: | |
2760 | db_reg_offset = mmMME_QM_PQ_PI; | |
2761 | break; | |
2762 | ||
2763 | case GOYA_QUEUE_ID_TPC0: | |
2764 | db_reg_offset = mmTPC0_QM_PQ_PI; | |
2765 | break; | |
2766 | ||
2767 | case GOYA_QUEUE_ID_TPC1: | |
2768 | db_reg_offset = mmTPC1_QM_PQ_PI; | |
2769 | break; | |
2770 | ||
2771 | case GOYA_QUEUE_ID_TPC2: | |
2772 | db_reg_offset = mmTPC2_QM_PQ_PI; | |
2773 | break; | |
2774 | ||
2775 | case GOYA_QUEUE_ID_TPC3: | |
2776 | db_reg_offset = mmTPC3_QM_PQ_PI; | |
2777 | break; | |
2778 | ||
2779 | case GOYA_QUEUE_ID_TPC4: | |
2780 | db_reg_offset = mmTPC4_QM_PQ_PI; | |
2781 | break; | |
2782 | ||
2783 | case GOYA_QUEUE_ID_TPC5: | |
2784 | db_reg_offset = mmTPC5_QM_PQ_PI; | |
2785 | break; | |
2786 | ||
2787 | case GOYA_QUEUE_ID_TPC6: | |
2788 | db_reg_offset = mmTPC6_QM_PQ_PI; | |
2789 | break; | |
2790 | ||
2791 | case GOYA_QUEUE_ID_TPC7: | |
2792 | db_reg_offset = mmTPC7_QM_PQ_PI; | |
2793 | break; | |
2794 | ||
2795 | default: | |
9494a8dd | 2796 | /* Should never get here */ |
34a5fab7 | 2797 | dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n", |
9494a8dd OG |
2798 | hw_queue_id); |
2799 | return; | |
2800 | } | |
2801 | ||
2802 | db_value = pi; | |
2803 | ||
2804 | /* ring the doorbell */ | |
2805 | WREG32(db_reg_offset, db_value); | |
2806 | ||
2807 | if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) | |
2808 | WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, | |
2809 | GOYA_ASYNC_EVENT_ID_PI_UPDATE); | |
2810 | } | |
2811 | ||
b9040c99 | 2812 | void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) |
9494a8dd | 2813 | { |
b9040c99 OG |
2814 | /* The QMANs are on the SRAM so need to copy to IO space */ |
2815 | memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd)); | |
9494a8dd OG |
2816 | } |
2817 | ||
5e6e0239 | 2818 | static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, |
99b9d7b4 OG |
2819 | dma_addr_t *dma_handle, gfp_t flags) |
2820 | { | |
94cb669c TT |
2821 | void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, |
2822 | dma_handle, flags); | |
2823 | ||
2824 | /* Shift to the device's base physical address of host memory */ | |
2825 | if (kernel_addr) | |
2826 | *dma_handle += HOST_PHYS_BASE; | |
2827 | ||
2828 | return kernel_addr; | |
99b9d7b4 OG |
2829 | } |
2830 | ||
5e6e0239 OG |
2831 | static void goya_dma_free_coherent(struct hl_device *hdev, size_t size, |
2832 | void *cpu_addr, dma_addr_t dma_handle) | |
99b9d7b4 | 2833 | { |
94cb669c TT |
2834 | /* Cancel the device's base physical address of host memory */ |
2835 | dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; | |
2836 | ||
2837 | dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); | |
99b9d7b4 OG |
2838 | } |
2839 | ||
03df136b | 2840 | int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) |
2841 | { | |
2842 | return 0; | |
2843 | } | |
2844 | ||
9494a8dd OG |
2845 | void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id, |
2846 | dma_addr_t *dma_handle, u16 *queue_len) | |
2847 | { | |
2848 | void *base; | |
2849 | u32 offset; | |
2850 | ||
2851 | *dma_handle = hdev->asic_prop.sram_base_address; | |
2852 | ||
7c22278e | 2853 | base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID]; |
9494a8dd OG |
2854 | |
2855 | switch (queue_id) { | |
2856 | case GOYA_QUEUE_ID_MME: | |
2857 | offset = MME_QMAN_BASE_OFFSET; | |
2858 | *queue_len = MME_QMAN_LENGTH; | |
2859 | break; | |
2860 | case GOYA_QUEUE_ID_TPC0: | |
2861 | offset = TPC0_QMAN_BASE_OFFSET; | |
2862 | *queue_len = TPC_QMAN_LENGTH; | |
2863 | break; | |
2864 | case GOYA_QUEUE_ID_TPC1: | |
2865 | offset = TPC1_QMAN_BASE_OFFSET; | |
2866 | *queue_len = TPC_QMAN_LENGTH; | |
2867 | break; | |
2868 | case GOYA_QUEUE_ID_TPC2: | |
2869 | offset = TPC2_QMAN_BASE_OFFSET; | |
2870 | *queue_len = TPC_QMAN_LENGTH; | |
2871 | break; | |
2872 | case GOYA_QUEUE_ID_TPC3: | |
2873 | offset = TPC3_QMAN_BASE_OFFSET; | |
2874 | *queue_len = TPC_QMAN_LENGTH; | |
2875 | break; | |
2876 | case GOYA_QUEUE_ID_TPC4: | |
2877 | offset = TPC4_QMAN_BASE_OFFSET; | |
2878 | *queue_len = TPC_QMAN_LENGTH; | |
2879 | break; | |
2880 | case GOYA_QUEUE_ID_TPC5: | |
2881 | offset = TPC5_QMAN_BASE_OFFSET; | |
2882 | *queue_len = TPC_QMAN_LENGTH; | |
2883 | break; | |
2884 | case GOYA_QUEUE_ID_TPC6: | |
2885 | offset = TPC6_QMAN_BASE_OFFSET; | |
2886 | *queue_len = TPC_QMAN_LENGTH; | |
2887 | break; | |
2888 | case GOYA_QUEUE_ID_TPC7: | |
2889 | offset = TPC7_QMAN_BASE_OFFSET; | |
2890 | *queue_len = TPC_QMAN_LENGTH; | |
2891 | break; | |
2892 | default: | |
2893 | dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); | |
2894 | return NULL; | |
2895 | } | |
2896 | ||
2897 | base += offset; | |
2898 | *dma_handle += offset; | |
2899 | ||
2900 | return base; | |
2901 | } | |
2902 | ||
5e6e0239 | 2903 | static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) |
eff6f4a0 | 2904 | { |
eff6f4a0 OG |
2905 | struct packet_msg_prot *fence_pkt; |
2906 | u32 *fence_ptr; | |
2907 | dma_addr_t fence_dma_addr; | |
2908 | struct hl_cb *cb; | |
3dccd187 | 2909 | u32 tmp, timeout; |
eff6f4a0 OG |
2910 | int rc; |
2911 | ||
3dccd187 OS |
2912 | if (hdev->pldm) |
2913 | timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC; | |
2914 | else | |
2915 | timeout = HL_DEVICE_TIMEOUT_USEC; | |
2916 | ||
e8960ca0 | 2917 | if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { |
eff6f4a0 | 2918 | dev_err_ratelimited(hdev->dev, |
4c172bbf | 2919 | "Can't send driver job on QMAN0 because the device is not idle\n"); |
af5f7eea | 2920 | return -EBUSY; |
eff6f4a0 OG |
2921 | } |
2922 | ||
d9c3aa80 | 2923 | fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, |
eff6f4a0 OG |
2924 | &fence_dma_addr); |
2925 | if (!fence_ptr) { | |
2926 | dev_err(hdev->dev, | |
2927 | "Failed to allocate fence memory for QMAN0\n"); | |
2928 | return -ENOMEM; | |
2929 | } | |
2930 | ||
b2377e03 | 2931 | goya_qman0_set_security(hdev, true); |
eff6f4a0 | 2932 | |
eff6f4a0 OG |
2933 | cb = job->patched_cb; |
2934 | ||
82948e6e AB |
2935 | fence_pkt = cb->kernel_address + |
2936 | job->job_cb_size - sizeof(struct packet_msg_prot); | |
eff6f4a0 | 2937 | |
df697bce | 2938 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
eff6f4a0 OG |
2939 | (1 << GOYA_PKT_CTL_EB_SHIFT) | |
2940 | (1 << GOYA_PKT_CTL_MB_SHIFT); | |
df697bce TT |
2941 | fence_pkt->ctl = cpu_to_le32(tmp); |
2942 | fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL); | |
94cb669c | 2943 | fence_pkt->addr = cpu_to_le64(fence_dma_addr); |
eff6f4a0 OG |
2944 | |
2945 | rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0, | |
2946 | job->job_cb_size, cb->bus_address); | |
2947 | if (rc) { | |
2948 | dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); | |
2949 | goto free_fence_ptr; | |
2950 | } | |
2951 | ||
a08b51a9 | 2952 | rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, |
2aa4e410 BS |
2953 | (tmp == GOYA_QMAN0_FENCE_VAL), 1000, |
2954 | timeout, true); | |
eff6f4a0 OG |
2955 | |
2956 | hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0); | |
2957 | ||
a08b51a9 OG |
2958 | if (rc == -ETIMEDOUT) { |
2959 | dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); | |
2960 | goto free_fence_ptr; | |
eff6f4a0 OG |
2961 | } |
2962 | ||
2963 | free_fence_ptr: | |
d9c3aa80 | 2964 | hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, |
eff6f4a0 OG |
2965 | fence_dma_addr); |
2966 | ||
b2377e03 | 2967 | goya_qman0_set_security(hdev, false); |
eff6f4a0 OG |
2968 | |
2969 | return rc; | |
2970 | } | |
2971 | ||
9494a8dd | 2972 | int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, |
439bc47b | 2973 | u32 timeout, u64 *result) |
9494a8dd OG |
2974 | { |
2975 | struct goya_device *goya = hdev->asic_specific; | |
9494a8dd OG |
2976 | |
2977 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) { | |
2978 | if (result) | |
2979 | *result = 0; | |
2980 | return 0; | |
2981 | } | |
2982 | ||
788cacf3 OG |
2983 | if (!timeout) |
2984 | timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; | |
2985 | ||
3110c60f TT |
2986 | return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, |
2987 | timeout, result); | |
9494a8dd OG |
2988 | } |
2989 | ||
2990 | int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) | |
2991 | { | |
2992 | struct packet_msg_prot *fence_pkt; | |
2993 | dma_addr_t pkt_dma_addr; | |
2994 | u32 fence_val, tmp; | |
2995 | dma_addr_t fence_dma_addr; | |
2996 | u32 *fence_ptr; | |
2997 | int rc; | |
2998 | ||
2999 | fence_val = GOYA_QMAN0_FENCE_VAL; | |
3000 | ||
d9c3aa80 | 3001 | fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, |
9494a8dd OG |
3002 | &fence_dma_addr); |
3003 | if (!fence_ptr) { | |
3004 | dev_err(hdev->dev, | |
d6b045c0 DB |
3005 | "Failed to allocate memory for H/W queue %d testing\n", |
3006 | hw_queue_id); | |
9494a8dd OG |
3007 | return -ENOMEM; |
3008 | } | |
3009 | ||
3010 | *fence_ptr = 0; | |
3011 | ||
d9c3aa80 | 3012 | fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, |
9494a8dd OG |
3013 | sizeof(struct packet_msg_prot), |
3014 | GFP_KERNEL, &pkt_dma_addr); | |
3015 | if (!fence_pkt) { | |
3016 | dev_err(hdev->dev, | |
d6b045c0 DB |
3017 | "Failed to allocate packet for H/W queue %d testing\n", |
3018 | hw_queue_id); | |
9494a8dd OG |
3019 | rc = -ENOMEM; |
3020 | goto free_fence_ptr; | |
3021 | } | |
3022 | ||
df697bce | 3023 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
9494a8dd OG |
3024 | (1 << GOYA_PKT_CTL_EB_SHIFT) | |
3025 | (1 << GOYA_PKT_CTL_MB_SHIFT); | |
df697bce TT |
3026 | fence_pkt->ctl = cpu_to_le32(tmp); |
3027 | fence_pkt->value = cpu_to_le32(fence_val); | |
94cb669c | 3028 | fence_pkt->addr = cpu_to_le64(fence_dma_addr); |
9494a8dd OG |
3029 | |
3030 | rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, | |
3031 | sizeof(struct packet_msg_prot), | |
3032 | pkt_dma_addr); | |
3033 | if (rc) { | |
3034 | dev_err(hdev->dev, | |
d6b045c0 DB |
3035 | "Failed to send fence packet to H/W queue %d\n", |
3036 | hw_queue_id); | |
9494a8dd OG |
3037 | goto free_pkt; |
3038 | } | |
3039 | ||
a08b51a9 | 3040 | rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), |
2aa4e410 | 3041 | 1000, GOYA_TEST_QUEUE_WAIT_USEC, true); |
9494a8dd OG |
3042 | |
3043 | hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); | |
3044 | ||
a08b51a9 | 3045 | if (rc == -ETIMEDOUT) { |
9494a8dd OG |
3046 | dev_err(hdev->dev, |
3047 | "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", | |
3048 | hw_queue_id, (unsigned long long) fence_dma_addr, tmp); | |
a08b51a9 | 3049 | rc = -EIO; |
9494a8dd OG |
3050 | } |
3051 | ||
3052 | free_pkt: | |
d9c3aa80 | 3053 | hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, |
9494a8dd OG |
3054 | pkt_dma_addr); |
3055 | free_fence_ptr: | |
d9c3aa80 | 3056 | hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, |
9494a8dd OG |
3057 | fence_dma_addr); |
3058 | return rc; | |
3059 | } | |
3060 | ||
3061 | int goya_test_cpu_queue(struct hl_device *hdev) | |
3062 | { | |
3110c60f | 3063 | struct goya_device *goya = hdev->asic_specific; |
9494a8dd | 3064 | |
3110c60f TT |
3065 | /* |
3066 | * check capability here as send_cpu_message() won't update the result | |
3067 | * value if no capability | |
3068 | */ | |
3069 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
3070 | return 0; | |
9494a8dd | 3071 | |
3110c60f | 3072 | return hl_fw_test_cpu_queue(hdev); |
9494a8dd OG |
3073 | } |
3074 | ||
bedd1442 | 3075 | int goya_test_queues(struct hl_device *hdev) |
9494a8dd | 3076 | { |
9494a8dd OG |
3077 | int i, rc, ret_val = 0; |
3078 | ||
3079 | for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { | |
3080 | rc = goya_test_queue(hdev, i); | |
3081 | if (rc) | |
3082 | ret_val = -EINVAL; | |
3083 | } | |
3084 | ||
9494a8dd OG |
3085 | return ret_val; |
3086 | } | |
3087 | ||
5e6e0239 OG |
3088 | static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size, |
3089 | gfp_t mem_flags, dma_addr_t *dma_handle) | |
9494a8dd | 3090 | { |
94cb669c TT |
3091 | void *kernel_addr; |
3092 | ||
9494a8dd OG |
3093 | if (size > GOYA_DMA_POOL_BLK_SIZE) |
3094 | return NULL; | |
3095 | ||
94cb669c TT |
3096 | kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); |
3097 | ||
3098 | /* Shift to the device's base physical address of host memory */ | |
3099 | if (kernel_addr) | |
3100 | *dma_handle += HOST_PHYS_BASE; | |
3101 | ||
3102 | return kernel_addr; | |
9494a8dd OG |
3103 | } |
3104 | ||
5e6e0239 OG |
3105 | static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr, |
3106 | dma_addr_t dma_addr) | |
9494a8dd | 3107 | { |
94cb669c TT |
3108 | /* Cancel the device's base physical address of host memory */ |
3109 | dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; | |
3110 | ||
3111 | dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); | |
9494a8dd OG |
3112 | } |
3113 | ||
bedd1442 OG |
3114 | void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, |
3115 | dma_addr_t *dma_handle) | |
9494a8dd | 3116 | { |
f09415f5 OG |
3117 | void *vaddr; |
3118 | ||
3119 | vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); | |
3120 | *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address + | |
3121 | VA_CPU_ACCESSIBLE_MEM_ADDR; | |
3122 | ||
3123 | return vaddr; | |
9494a8dd OG |
3124 | } |
3125 | ||
bedd1442 OG |
3126 | void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, |
3127 | void *vaddr) | |
9494a8dd | 3128 | { |
3110c60f | 3129 | hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); |
9494a8dd OG |
3130 | } |
3131 | ||
94cb669c | 3132 | static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, |
5e6e0239 | 3133 | int nents, enum dma_data_direction dir) |
eff6f4a0 | 3134 | { |
94cb669c TT |
3135 | struct scatterlist *sg; |
3136 | int i; | |
3137 | ||
3138 | if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) | |
eff6f4a0 OG |
3139 | return -ENOMEM; |
3140 | ||
94cb669c TT |
3141 | /* Shift to the device's base physical address of host memory */ |
3142 | for_each_sg(sgl, sg, nents, i) | |
3143 | sg->dma_address += HOST_PHYS_BASE; | |
3144 | ||
eff6f4a0 OG |
3145 | return 0; |
3146 | } | |
3147 | ||
94cb669c | 3148 | static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, |
5e6e0239 | 3149 | int nents, enum dma_data_direction dir) |
eff6f4a0 | 3150 | { |
94cb669c TT |
3151 | struct scatterlist *sg; |
3152 | int i; | |
3153 | ||
3154 | /* Cancel the device's base physical address of host memory */ | |
3155 | for_each_sg(sgl, sg, nents, i) | |
3156 | sg->dma_address -= HOST_PHYS_BASE; | |
3157 | ||
3158 | dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); | |
eff6f4a0 OG |
3159 | } |
3160 | ||
5e6e0239 | 3161 | u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) |
eff6f4a0 OG |
3162 | { |
3163 | struct scatterlist *sg, *sg_next_iter; | |
e99f1683 OG |
3164 | u32 count, dma_desc_cnt; |
3165 | u64 len, len_next; | |
eff6f4a0 OG |
3166 | dma_addr_t addr, addr_next; |
3167 | ||
3168 | dma_desc_cnt = 0; | |
3169 | ||
3170 | for_each_sg(sgt->sgl, sg, sgt->nents, count) { | |
3171 | ||
3172 | len = sg_dma_len(sg); | |
3173 | addr = sg_dma_address(sg); | |
3174 | ||
3175 | if (len == 0) | |
3176 | break; | |
3177 | ||
3178 | while ((count + 1) < sgt->nents) { | |
3179 | sg_next_iter = sg_next(sg); | |
3180 | len_next = sg_dma_len(sg_next_iter); | |
3181 | addr_next = sg_dma_address(sg_next_iter); | |
3182 | ||
3183 | if (len_next == 0) | |
3184 | break; | |
3185 | ||
3186 | if ((addr + len == addr_next) && | |
3187 | (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { | |
3188 | len += len_next; | |
3189 | count++; | |
3190 | sg = sg_next_iter; | |
3191 | } else { | |
3192 | break; | |
3193 | } | |
3194 | } | |
3195 | ||
3196 | dma_desc_cnt++; | |
3197 | } | |
3198 | ||
3199 | return dma_desc_cnt * sizeof(struct packet_lin_dma); | |
3200 | } | |
3201 | ||
3202 | static int goya_pin_memory_before_cs(struct hl_device *hdev, | |
3203 | struct hl_cs_parser *parser, | |
3204 | struct packet_lin_dma *user_dma_pkt, | |
3205 | u64 addr, enum dma_data_direction dir) | |
3206 | { | |
3207 | struct hl_userptr *userptr; | |
3208 | int rc; | |
3209 | ||
df697bce | 3210 | if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), |
eff6f4a0 OG |
3211 | parser->job_userptr_list, &userptr)) |
3212 | goto already_pinned; | |
3213 | ||
3214 | userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); | |
3215 | if (!userptr) | |
3216 | return -ENOMEM; | |
3217 | ||
df697bce TT |
3218 | rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), |
3219 | userptr); | |
eff6f4a0 OG |
3220 | if (rc) |
3221 | goto free_userptr; | |
3222 | ||
3223 | list_add_tail(&userptr->job_node, parser->job_userptr_list); | |
3224 | ||
3225 | rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, | |
3226 | userptr->sgt->nents, dir); | |
3227 | if (rc) { | |
3228 | dev_err(hdev->dev, "failed to map sgt with DMA region\n"); | |
3229 | goto unpin_memory; | |
3230 | } | |
3231 | ||
3232 | userptr->dma_mapped = true; | |
3233 | userptr->dir = dir; | |
3234 | ||
3235 | already_pinned: | |
3236 | parser->patched_cb_size += | |
3237 | goya_get_dma_desc_list_size(hdev, userptr->sgt); | |
3238 | ||
3239 | return 0; | |
3240 | ||
3241 | unpin_memory: | |
3242 | hl_unpin_host_memory(hdev, userptr); | |
3243 | free_userptr: | |
3244 | kfree(userptr); | |
3245 | return rc; | |
3246 | } | |
3247 | ||
3248 | static int goya_validate_dma_pkt_host(struct hl_device *hdev, | |
3249 | struct hl_cs_parser *parser, | |
3250 | struct packet_lin_dma *user_dma_pkt) | |
3251 | { | |
3252 | u64 device_memory_addr, addr; | |
3253 | enum dma_data_direction dir; | |
3254 | enum goya_dma_direction user_dir; | |
3255 | bool sram_addr = true; | |
3256 | bool skip_host_mem_pin = false; | |
3257 | bool user_memset; | |
df697bce | 3258 | u32 ctl; |
eff6f4a0 OG |
3259 | int rc = 0; |
3260 | ||
df697bce TT |
3261 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3262 | ||
3263 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3264 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3265 | ||
df697bce | 3266 | user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >> |
eff6f4a0 OG |
3267 | GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT; |
3268 | ||
3269 | switch (user_dir) { | |
3270 | case DMA_HOST_TO_DRAM: | |
3271 | dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n"); | |
3272 | dir = DMA_TO_DEVICE; | |
3273 | sram_addr = false; | |
df697bce TT |
3274 | addr = le64_to_cpu(user_dma_pkt->src_addr); |
3275 | device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3276 | if (user_memset) |
3277 | skip_host_mem_pin = true; | |
3278 | break; | |
3279 | ||
3280 | case DMA_DRAM_TO_HOST: | |
3281 | dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n"); | |
3282 | dir = DMA_FROM_DEVICE; | |
3283 | sram_addr = false; | |
df697bce TT |
3284 | addr = le64_to_cpu(user_dma_pkt->dst_addr); |
3285 | device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); | |
eff6f4a0 OG |
3286 | break; |
3287 | ||
3288 | case DMA_HOST_TO_SRAM: | |
3289 | dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n"); | |
3290 | dir = DMA_TO_DEVICE; | |
df697bce TT |
3291 | addr = le64_to_cpu(user_dma_pkt->src_addr); |
3292 | device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3293 | if (user_memset) |
3294 | skip_host_mem_pin = true; | |
3295 | break; | |
3296 | ||
3297 | case DMA_SRAM_TO_HOST: | |
3298 | dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n"); | |
3299 | dir = DMA_FROM_DEVICE; | |
df697bce TT |
3300 | addr = le64_to_cpu(user_dma_pkt->dst_addr); |
3301 | device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); | |
eff6f4a0 OG |
3302 | break; |
3303 | default: | |
3304 | dev_err(hdev->dev, "DMA direction is undefined\n"); | |
3305 | return -EFAULT; | |
3306 | } | |
3307 | ||
f0539fb0 DBZ |
3308 | if (sram_addr) { |
3309 | if (!hl_mem_area_inside_range(device_memory_addr, | |
3310 | le32_to_cpu(user_dma_pkt->tsize), | |
3311 | hdev->asic_prop.sram_user_base_address, | |
3312 | hdev->asic_prop.sram_end_address)) { | |
3313 | ||
3314 | dev_err(hdev->dev, | |
3315 | "SRAM address 0x%llx + 0x%x is invalid\n", | |
3316 | device_memory_addr, | |
3317 | user_dma_pkt->tsize); | |
3318 | return -EFAULT; | |
3319 | } | |
3320 | } else { | |
3321 | if (!hl_mem_area_inside_range(device_memory_addr, | |
3322 | le32_to_cpu(user_dma_pkt->tsize), | |
3323 | hdev->asic_prop.dram_user_base_address, | |
3324 | hdev->asic_prop.dram_end_address)) { | |
3325 | ||
3326 | dev_err(hdev->dev, | |
3327 | "DRAM address 0x%llx + 0x%x is invalid\n", | |
3328 | device_memory_addr, | |
3329 | user_dma_pkt->tsize); | |
3330 | return -EFAULT; | |
eff6f4a0 OG |
3331 | } |
3332 | } | |
3333 | ||
3334 | if (skip_host_mem_pin) | |
3335 | parser->patched_cb_size += sizeof(*user_dma_pkt); | |
3336 | else { | |
3337 | if ((dir == DMA_TO_DEVICE) && | |
3338 | (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) { | |
3339 | dev_err(hdev->dev, | |
3340 | "Can't DMA from host on queue other then 1\n"); | |
3341 | return -EFAULT; | |
3342 | } | |
3343 | ||
3344 | rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt, | |
3345 | addr, dir); | |
3346 | } | |
3347 | ||
3348 | return rc; | |
3349 | } | |
3350 | ||
3351 | static int goya_validate_dma_pkt_no_host(struct hl_device *hdev, | |
3352 | struct hl_cs_parser *parser, | |
3353 | struct packet_lin_dma *user_dma_pkt) | |
3354 | { | |
3355 | u64 sram_memory_addr, dram_memory_addr; | |
3356 | enum goya_dma_direction user_dir; | |
df697bce | 3357 | u32 ctl; |
eff6f4a0 | 3358 | |
df697bce TT |
3359 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3360 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3361 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3362 | ||
3363 | if (user_dir == DMA_DRAM_TO_SRAM) { | |
3364 | dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n"); | |
df697bce TT |
3365 | dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); |
3366 | sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3367 | } else { |
3368 | dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n"); | |
df697bce TT |
3369 | sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); |
3370 | dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3371 | } |
3372 | ||
df697bce TT |
3373 | if (!hl_mem_area_inside_range(sram_memory_addr, |
3374 | le32_to_cpu(user_dma_pkt->tsize), | |
eff6f4a0 OG |
3375 | hdev->asic_prop.sram_user_base_address, |
3376 | hdev->asic_prop.sram_end_address)) { | |
3377 | dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n", | |
3378 | sram_memory_addr, user_dma_pkt->tsize); | |
3379 | return -EFAULT; | |
3380 | } | |
3381 | ||
df697bce TT |
3382 | if (!hl_mem_area_inside_range(dram_memory_addr, |
3383 | le32_to_cpu(user_dma_pkt->tsize), | |
eff6f4a0 OG |
3384 | hdev->asic_prop.dram_user_base_address, |
3385 | hdev->asic_prop.dram_end_address)) { | |
3386 | dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n", | |
3387 | dram_memory_addr, user_dma_pkt->tsize); | |
3388 | return -EFAULT; | |
3389 | } | |
3390 | ||
3391 | parser->patched_cb_size += sizeof(*user_dma_pkt); | |
3392 | ||
3393 | return 0; | |
3394 | } | |
3395 | ||
3396 | static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev, | |
3397 | struct hl_cs_parser *parser, | |
3398 | struct packet_lin_dma *user_dma_pkt) | |
3399 | { | |
3400 | enum goya_dma_direction user_dir; | |
df697bce | 3401 | u32 ctl; |
eff6f4a0 OG |
3402 | int rc; |
3403 | ||
3404 | dev_dbg(hdev->dev, "DMA packet details:\n"); | |
b421d83a BS |
3405 | dev_dbg(hdev->dev, "source == 0x%llx\n", |
3406 | le64_to_cpu(user_dma_pkt->src_addr)); | |
3407 | dev_dbg(hdev->dev, "destination == 0x%llx\n", | |
3408 | le64_to_cpu(user_dma_pkt->dst_addr)); | |
3409 | dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); | |
eff6f4a0 | 3410 | |
df697bce TT |
3411 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3412 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3413 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3414 | ||
3415 | /* | |
3416 | * Special handling for DMA with size 0. The H/W has a bug where | |
3417 | * this can cause the QMAN DMA to get stuck, so block it here. | |
3418 | */ | |
3419 | if (user_dma_pkt->tsize == 0) { | |
3420 | dev_err(hdev->dev, | |
3421 | "Got DMA with size 0, might reset the device\n"); | |
3422 | return -EINVAL; | |
3423 | } | |
3424 | ||
3425 | if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM)) | |
3426 | rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt); | |
3427 | else | |
3428 | rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt); | |
3429 | ||
3430 | return rc; | |
3431 | } | |
3432 | ||
3433 | static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, | |
3434 | struct hl_cs_parser *parser, | |
3435 | struct packet_lin_dma *user_dma_pkt) | |
3436 | { | |
3437 | dev_dbg(hdev->dev, "DMA packet details:\n"); | |
b421d83a BS |
3438 | dev_dbg(hdev->dev, "source == 0x%llx\n", |
3439 | le64_to_cpu(user_dma_pkt->src_addr)); | |
3440 | dev_dbg(hdev->dev, "destination == 0x%llx\n", | |
3441 | le64_to_cpu(user_dma_pkt->dst_addr)); | |
3442 | dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); | |
eff6f4a0 OG |
3443 | |
3444 | /* | |
3445 | * WA for HW-23. | |
3446 | * We can't allow user to read from Host using QMANs other than 1. | |
64a7e295 | 3447 | * PMMU and HPMMU addresses are equal, check only one of them. |
eff6f4a0 | 3448 | */ |
bfb57a91 | 3449 | if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && |
df697bce TT |
3450 | hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), |
3451 | le32_to_cpu(user_dma_pkt->tsize), | |
64a7e295 OS |
3452 | hdev->asic_prop.pmmu.start_addr, |
3453 | hdev->asic_prop.pmmu.end_addr)) { | |
eff6f4a0 OG |
3454 | dev_err(hdev->dev, |
3455 | "Can't DMA from host on queue other then 1\n"); | |
3456 | return -EFAULT; | |
3457 | } | |
3458 | ||
3459 | if (user_dma_pkt->tsize == 0) { | |
3460 | dev_err(hdev->dev, | |
3461 | "Got DMA with size 0, might reset the device\n"); | |
3462 | return -EINVAL; | |
3463 | } | |
3464 | ||
3465 | parser->patched_cb_size += sizeof(*user_dma_pkt); | |
3466 | ||
3467 | return 0; | |
3468 | } | |
3469 | ||
3470 | static int goya_validate_wreg32(struct hl_device *hdev, | |
3471 | struct hl_cs_parser *parser, | |
3472 | struct packet_wreg32 *wreg_pkt) | |
3473 | { | |
3474 | struct goya_device *goya = hdev->asic_specific; | |
3475 | u32 sob_start_addr, sob_end_addr; | |
3476 | u16 reg_offset; | |
3477 | ||
df697bce TT |
3478 | reg_offset = le32_to_cpu(wreg_pkt->ctl) & |
3479 | GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK; | |
eff6f4a0 OG |
3480 | |
3481 | dev_dbg(hdev->dev, "WREG32 packet details:\n"); | |
3482 | dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset); | |
b421d83a BS |
3483 | dev_dbg(hdev->dev, "value == 0x%x\n", |
3484 | le32_to_cpu(wreg_pkt->value)); | |
eff6f4a0 | 3485 | |
6765fda0 | 3486 | if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) { |
eff6f4a0 OG |
3487 | dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n", |
3488 | reg_offset); | |
3489 | return -EPERM; | |
3490 | } | |
3491 | ||
3492 | /* | |
3493 | * With MMU, DMA channels are not secured, so it doesn't matter where | |
3494 | * the WR COMP will be written to because it will go out with | |
3495 | * non-secured property | |
3496 | */ | |
3497 | if (goya->hw_cap_initialized & HW_CAP_MMU) | |
3498 | return 0; | |
3499 | ||
3500 | sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); | |
3501 | sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023); | |
3502 | ||
df697bce TT |
3503 | if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) || |
3504 | (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) { | |
eff6f4a0 OG |
3505 | |
3506 | dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n", | |
3507 | wreg_pkt->value); | |
3508 | return -EPERM; | |
3509 | } | |
3510 | ||
3511 | return 0; | |
3512 | } | |
3513 | ||
3514 | static int goya_validate_cb(struct hl_device *hdev, | |
3515 | struct hl_cs_parser *parser, bool is_mmu) | |
3516 | { | |
3517 | u32 cb_parsed_length = 0; | |
3518 | int rc = 0; | |
3519 | ||
3520 | parser->patched_cb_size = 0; | |
3521 | ||
3522 | /* cb_user_size is more than 0 so loop will always be executed */ | |
3523 | while (cb_parsed_length < parser->user_cb_size) { | |
3524 | enum packet_id pkt_id; | |
3525 | u16 pkt_size; | |
213ad5ad | 3526 | struct goya_packet *user_pkt; |
eff6f4a0 | 3527 | |
82948e6e | 3528 | user_pkt = parser->user_cb->kernel_address + cb_parsed_length; |
eff6f4a0 | 3529 | |
213ad5ad BS |
3530 | pkt_id = (enum packet_id) ( |
3531 | (le64_to_cpu(user_pkt->header) & | |
eff6f4a0 OG |
3532 | PACKET_HEADER_PACKET_ID_MASK) >> |
3533 | PACKET_HEADER_PACKET_ID_SHIFT); | |
3534 | ||
bc75be24 OB |
3535 | if (!validate_packet_id(pkt_id)) { |
3536 | dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); | |
3537 | rc = -EINVAL; | |
3538 | break; | |
3539 | } | |
3540 | ||
eff6f4a0 OG |
3541 | pkt_size = goya_packet_sizes[pkt_id]; |
3542 | cb_parsed_length += pkt_size; | |
3543 | if (cb_parsed_length > parser->user_cb_size) { | |
3544 | dev_err(hdev->dev, | |
3545 | "packet 0x%x is out of CB boundary\n", pkt_id); | |
3546 | rc = -EINVAL; | |
3547 | break; | |
3548 | } | |
3549 | ||
3550 | switch (pkt_id) { | |
3551 | case PACKET_WREG_32: | |
3552 | /* | |
3553 | * Although it is validated after copy in patch_cb(), | |
3554 | * need to validate here as well because patch_cb() is | |
3555 | * not called in MMU path while this function is called | |
3556 | */ | |
213ad5ad BS |
3557 | rc = goya_validate_wreg32(hdev, |
3558 | parser, (struct packet_wreg32 *) user_pkt); | |
87eaea1c | 3559 | parser->patched_cb_size += pkt_size; |
eff6f4a0 OG |
3560 | break; |
3561 | ||
3562 | case PACKET_WREG_BULK: | |
3563 | dev_err(hdev->dev, | |
3564 | "User not allowed to use WREG_BULK\n"); | |
3565 | rc = -EPERM; | |
3566 | break; | |
3567 | ||
3568 | case PACKET_MSG_PROT: | |
3569 | dev_err(hdev->dev, | |
3570 | "User not allowed to use MSG_PROT\n"); | |
3571 | rc = -EPERM; | |
3572 | break; | |
3573 | ||
3574 | case PACKET_CP_DMA: | |
3575 | dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); | |
3576 | rc = -EPERM; | |
3577 | break; | |
3578 | ||
3579 | case PACKET_STOP: | |
3580 | dev_err(hdev->dev, "User not allowed to use STOP\n"); | |
3581 | rc = -EPERM; | |
3582 | break; | |
3583 | ||
3584 | case PACKET_LIN_DMA: | |
3585 | if (is_mmu) | |
3586 | rc = goya_validate_dma_pkt_mmu(hdev, parser, | |
213ad5ad | 3587 | (struct packet_lin_dma *) user_pkt); |
eff6f4a0 OG |
3588 | else |
3589 | rc = goya_validate_dma_pkt_no_mmu(hdev, parser, | |
213ad5ad | 3590 | (struct packet_lin_dma *) user_pkt); |
eff6f4a0 OG |
3591 | break; |
3592 | ||
3593 | case PACKET_MSG_LONG: | |
3594 | case PACKET_MSG_SHORT: | |
3595 | case PACKET_FENCE: | |
3596 | case PACKET_NOP: | |
3597 | parser->patched_cb_size += pkt_size; | |
3598 | break; | |
3599 | ||
3600 | default: | |
3601 | dev_err(hdev->dev, "Invalid packet header 0x%x\n", | |
3602 | pkt_id); | |
3603 | rc = -EINVAL; | |
3604 | break; | |
3605 | } | |
3606 | ||
3607 | if (rc) | |
3608 | break; | |
3609 | } | |
3610 | ||
3611 | /* | |
3612 | * The new CB should have space at the end for two MSG_PROT packets: | |
3613 | * 1. A packet that will act as a completion packet | |
3614 | * 2. A packet that will generate MSI-X interrupt | |
3615 | */ | |
3616 | parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2; | |
3617 | ||
3618 | return rc; | |
3619 | } | |
3620 | ||
3621 | static int goya_patch_dma_packet(struct hl_device *hdev, | |
3622 | struct hl_cs_parser *parser, | |
3623 | struct packet_lin_dma *user_dma_pkt, | |
3624 | struct packet_lin_dma *new_dma_pkt, | |
3625 | u32 *new_dma_pkt_size) | |
3626 | { | |
3627 | struct hl_userptr *userptr; | |
3628 | struct scatterlist *sg, *sg_next_iter; | |
e99f1683 OG |
3629 | u32 count, dma_desc_cnt; |
3630 | u64 len, len_next; | |
eff6f4a0 OG |
3631 | dma_addr_t dma_addr, dma_addr_next; |
3632 | enum goya_dma_direction user_dir; | |
3633 | u64 device_memory_addr, addr; | |
3634 | enum dma_data_direction dir; | |
3635 | struct sg_table *sgt; | |
3636 | bool skip_host_mem_pin = false; | |
3637 | bool user_memset; | |
df697bce | 3638 | u32 user_rdcomp_mask, user_wrcomp_mask, ctl; |
eff6f4a0 | 3639 | |
df697bce TT |
3640 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
3641 | ||
3642 | user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> | |
eff6f4a0 OG |
3643 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; |
3644 | ||
df697bce | 3645 | user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >> |
eff6f4a0 OG |
3646 | GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT; |
3647 | ||
3648 | if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) || | |
3649 | (user_dma_pkt->tsize == 0)) { | |
3650 | memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt)); | |
3651 | *new_dma_pkt_size = sizeof(*new_dma_pkt); | |
3652 | return 0; | |
3653 | } | |
3654 | ||
3655 | if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) { | |
df697bce TT |
3656 | addr = le64_to_cpu(user_dma_pkt->src_addr); |
3657 | device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); | |
eff6f4a0 OG |
3658 | dir = DMA_TO_DEVICE; |
3659 | if (user_memset) | |
3660 | skip_host_mem_pin = true; | |
3661 | } else { | |
df697bce TT |
3662 | addr = le64_to_cpu(user_dma_pkt->dst_addr); |
3663 | device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); | |
eff6f4a0 OG |
3664 | dir = DMA_FROM_DEVICE; |
3665 | } | |
3666 | ||
3667 | if ((!skip_host_mem_pin) && | |
df697bce TT |
3668 | (hl_userptr_is_pinned(hdev, addr, |
3669 | le32_to_cpu(user_dma_pkt->tsize), | |
eff6f4a0 OG |
3670 | parser->job_userptr_list, &userptr) == false)) { |
3671 | dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", | |
3672 | addr, user_dma_pkt->tsize); | |
3673 | return -EFAULT; | |
3674 | } | |
3675 | ||
3676 | if ((user_memset) && (dir == DMA_TO_DEVICE)) { | |
3677 | memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); | |
3678 | *new_dma_pkt_size = sizeof(*user_dma_pkt); | |
3679 | return 0; | |
3680 | } | |
3681 | ||
df697bce | 3682 | user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK; |
eff6f4a0 | 3683 | |
df697bce | 3684 | user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK; |
eff6f4a0 OG |
3685 | |
3686 | sgt = userptr->sgt; | |
3687 | dma_desc_cnt = 0; | |
3688 | ||
3689 | for_each_sg(sgt->sgl, sg, sgt->nents, count) { | |
3690 | len = sg_dma_len(sg); | |
3691 | dma_addr = sg_dma_address(sg); | |
3692 | ||
3693 | if (len == 0) | |
3694 | break; | |
3695 | ||
3696 | while ((count + 1) < sgt->nents) { | |
3697 | sg_next_iter = sg_next(sg); | |
3698 | len_next = sg_dma_len(sg_next_iter); | |
3699 | dma_addr_next = sg_dma_address(sg_next_iter); | |
3700 | ||
3701 | if (len_next == 0) | |
3702 | break; | |
3703 | ||
3704 | if ((dma_addr + len == dma_addr_next) && | |
3705 | (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { | |
3706 | len += len_next; | |
3707 | count++; | |
3708 | sg = sg_next_iter; | |
3709 | } else { | |
3710 | break; | |
3711 | } | |
3712 | } | |
3713 | ||
df697bce | 3714 | ctl = le32_to_cpu(user_dma_pkt->ctl); |
eff6f4a0 | 3715 | if (likely(dma_desc_cnt)) |
df697bce TT |
3716 | ctl &= ~GOYA_PKT_CTL_EB_MASK; |
3717 | ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK | | |
3718 | GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK); | |
3719 | new_dma_pkt->ctl = cpu_to_le32(ctl); | |
3720 | new_dma_pkt->tsize = cpu_to_le32((u32) len); | |
eff6f4a0 | 3721 | |
eff6f4a0 | 3722 | if (dir == DMA_TO_DEVICE) { |
df697bce TT |
3723 | new_dma_pkt->src_addr = cpu_to_le64(dma_addr); |
3724 | new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); | |
eff6f4a0 | 3725 | } else { |
df697bce TT |
3726 | new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); |
3727 | new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); | |
eff6f4a0 OG |
3728 | } |
3729 | ||
3730 | if (!user_memset) | |
3731 | device_memory_addr += len; | |
3732 | dma_desc_cnt++; | |
3733 | new_dma_pkt++; | |
3734 | } | |
3735 | ||
3736 | if (!dma_desc_cnt) { | |
3737 | dev_err(hdev->dev, | |
3738 | "Error of 0 SG entries when patching DMA packet\n"); | |
3739 | return -EFAULT; | |
3740 | } | |
3741 | ||
3742 | /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */ | |
3743 | new_dma_pkt--; | |
df697bce | 3744 | new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask); |
eff6f4a0 OG |
3745 | |
3746 | *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); | |
3747 | ||
3748 | return 0; | |
3749 | } | |
3750 | ||
3751 | static int goya_patch_cb(struct hl_device *hdev, | |
3752 | struct hl_cs_parser *parser) | |
3753 | { | |
3754 | u32 cb_parsed_length = 0; | |
3755 | u32 cb_patched_cur_length = 0; | |
3756 | int rc = 0; | |
3757 | ||
3758 | /* cb_user_size is more than 0 so loop will always be executed */ | |
3759 | while (cb_parsed_length < parser->user_cb_size) { | |
3760 | enum packet_id pkt_id; | |
3761 | u16 pkt_size; | |
3762 | u32 new_pkt_size = 0; | |
213ad5ad | 3763 | struct goya_packet *user_pkt, *kernel_pkt; |
eff6f4a0 | 3764 | |
82948e6e AB |
3765 | user_pkt = parser->user_cb->kernel_address + cb_parsed_length; |
3766 | kernel_pkt = parser->patched_cb->kernel_address + | |
3767 | cb_patched_cur_length; | |
eff6f4a0 | 3768 | |
213ad5ad BS |
3769 | pkt_id = (enum packet_id) ( |
3770 | (le64_to_cpu(user_pkt->header) & | |
eff6f4a0 OG |
3771 | PACKET_HEADER_PACKET_ID_MASK) >> |
3772 | PACKET_HEADER_PACKET_ID_SHIFT); | |
3773 | ||
bc75be24 OB |
3774 | if (!validate_packet_id(pkt_id)) { |
3775 | dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); | |
3776 | rc = -EINVAL; | |
3777 | break; | |
3778 | } | |
3779 | ||
eff6f4a0 OG |
3780 | pkt_size = goya_packet_sizes[pkt_id]; |
3781 | cb_parsed_length += pkt_size; | |
3782 | if (cb_parsed_length > parser->user_cb_size) { | |
3783 | dev_err(hdev->dev, | |
3784 | "packet 0x%x is out of CB boundary\n", pkt_id); | |
3785 | rc = -EINVAL; | |
3786 | break; | |
3787 | } | |
3788 | ||
3789 | switch (pkt_id) { | |
3790 | case PACKET_LIN_DMA: | |
213ad5ad BS |
3791 | rc = goya_patch_dma_packet(hdev, parser, |
3792 | (struct packet_lin_dma *) user_pkt, | |
3793 | (struct packet_lin_dma *) kernel_pkt, | |
3794 | &new_pkt_size); | |
eff6f4a0 OG |
3795 | cb_patched_cur_length += new_pkt_size; |
3796 | break; | |
3797 | ||
3798 | case PACKET_WREG_32: | |
3799 | memcpy(kernel_pkt, user_pkt, pkt_size); | |
3800 | cb_patched_cur_length += pkt_size; | |
213ad5ad BS |
3801 | rc = goya_validate_wreg32(hdev, parser, |
3802 | (struct packet_wreg32 *) kernel_pkt); | |
eff6f4a0 OG |
3803 | break; |
3804 | ||
3805 | case PACKET_WREG_BULK: | |
3806 | dev_err(hdev->dev, | |
3807 | "User not allowed to use WREG_BULK\n"); | |
3808 | rc = -EPERM; | |
3809 | break; | |
3810 | ||
3811 | case PACKET_MSG_PROT: | |
3812 | dev_err(hdev->dev, | |
3813 | "User not allowed to use MSG_PROT\n"); | |
3814 | rc = -EPERM; | |
3815 | break; | |
3816 | ||
3817 | case PACKET_CP_DMA: | |
3818 | dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); | |
3819 | rc = -EPERM; | |
3820 | break; | |
3821 | ||
3822 | case PACKET_STOP: | |
3823 | dev_err(hdev->dev, "User not allowed to use STOP\n"); | |
3824 | rc = -EPERM; | |
3825 | break; | |
3826 | ||
3827 | case PACKET_MSG_LONG: | |
3828 | case PACKET_MSG_SHORT: | |
3829 | case PACKET_FENCE: | |
3830 | case PACKET_NOP: | |
3831 | memcpy(kernel_pkt, user_pkt, pkt_size); | |
3832 | cb_patched_cur_length += pkt_size; | |
3833 | break; | |
3834 | ||
3835 | default: | |
3836 | dev_err(hdev->dev, "Invalid packet header 0x%x\n", | |
3837 | pkt_id); | |
3838 | rc = -EINVAL; | |
3839 | break; | |
3840 | } | |
3841 | ||
3842 | if (rc) | |
3843 | break; | |
3844 | } | |
3845 | ||
3846 | return rc; | |
3847 | } | |
3848 | ||
3849 | static int goya_parse_cb_mmu(struct hl_device *hdev, | |
3850 | struct hl_cs_parser *parser) | |
3851 | { | |
3852 | u64 patched_cb_handle; | |
3853 | u32 patched_cb_size; | |
3854 | struct hl_cb *user_cb; | |
3855 | int rc; | |
3856 | ||
3857 | /* | |
3858 | * The new CB should have space at the end for two MSG_PROT pkt: | |
3859 | * 1. A packet that will act as a completion packet | |
3860 | * 2. A packet that will generate MSI-X interrupt | |
3861 | */ | |
3862 | parser->patched_cb_size = parser->user_cb_size + | |
3863 | sizeof(struct packet_msg_prot) * 2; | |
3864 | ||
fa8641a1 | 3865 | rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, |
ef6a0f6c | 3866 | parser->patched_cb_size, false, false, |
fa8641a1 | 3867 | &patched_cb_handle); |
eff6f4a0 OG |
3868 | |
3869 | if (rc) { | |
3870 | dev_err(hdev->dev, | |
3871 | "Failed to allocate patched CB for DMA CS %d\n", | |
3872 | rc); | |
3873 | return rc; | |
3874 | } | |
3875 | ||
3876 | patched_cb_handle >>= PAGE_SHIFT; | |
3877 | parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, | |
3878 | (u32) patched_cb_handle); | |
75d9a2a0 | 3879 | /* hl_cb_get should never fail here */ |
eff6f4a0 | 3880 | if (!parser->patched_cb) { |
75d9a2a0 AM |
3881 | dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", |
3882 | (u32) patched_cb_handle); | |
eff6f4a0 OG |
3883 | rc = -EFAULT; |
3884 | goto out; | |
3885 | } | |
3886 | ||
3887 | /* | |
3888 | * The check that parser->user_cb_size <= parser->user_cb->size was done | |
3889 | * in validate_queue_index(). | |
3890 | */ | |
82948e6e AB |
3891 | memcpy(parser->patched_cb->kernel_address, |
3892 | parser->user_cb->kernel_address, | |
eff6f4a0 OG |
3893 | parser->user_cb_size); |
3894 | ||
3895 | patched_cb_size = parser->patched_cb_size; | |
3896 | ||
3897 | /* validate patched CB instead of user CB */ | |
3898 | user_cb = parser->user_cb; | |
3899 | parser->user_cb = parser->patched_cb; | |
3900 | rc = goya_validate_cb(hdev, parser, true); | |
3901 | parser->user_cb = user_cb; | |
3902 | ||
3903 | if (rc) { | |
3904 | hl_cb_put(parser->patched_cb); | |
3905 | goto out; | |
3906 | } | |
3907 | ||
3908 | if (patched_cb_size != parser->patched_cb_size) { | |
3909 | dev_err(hdev->dev, "user CB size mismatch\n"); | |
3910 | hl_cb_put(parser->patched_cb); | |
3911 | rc = -EINVAL; | |
3912 | goto out; | |
3913 | } | |
3914 | ||
3915 | out: | |
3916 | /* | |
3917 | * Always call cb destroy here because we still have 1 reference | |
3918 | * to it by calling cb_get earlier. After the job will be completed, | |
3919 | * cb_put will release it, but here we want to remove it from the | |
3920 | * idr | |
3921 | */ | |
3922 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, | |
3923 | patched_cb_handle << PAGE_SHIFT); | |
3924 | ||
3925 | return rc; | |
3926 | } | |
3927 | ||
5e6e0239 OG |
3928 | static int goya_parse_cb_no_mmu(struct hl_device *hdev, |
3929 | struct hl_cs_parser *parser) | |
eff6f4a0 OG |
3930 | { |
3931 | u64 patched_cb_handle; | |
3932 | int rc; | |
3933 | ||
3934 | rc = goya_validate_cb(hdev, parser, false); | |
3935 | ||
3936 | if (rc) | |
3937 | goto free_userptr; | |
3938 | ||
fa8641a1 | 3939 | rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, |
ef6a0f6c | 3940 | parser->patched_cb_size, false, false, |
fa8641a1 | 3941 | &patched_cb_handle); |
eff6f4a0 OG |
3942 | if (rc) { |
3943 | dev_err(hdev->dev, | |
3944 | "Failed to allocate patched CB for DMA CS %d\n", rc); | |
3945 | goto free_userptr; | |
3946 | } | |
3947 | ||
3948 | patched_cb_handle >>= PAGE_SHIFT; | |
3949 | parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, | |
3950 | (u32) patched_cb_handle); | |
75d9a2a0 | 3951 | /* hl_cb_get should never fail here */ |
eff6f4a0 | 3952 | if (!parser->patched_cb) { |
75d9a2a0 AM |
3953 | dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", |
3954 | (u32) patched_cb_handle); | |
eff6f4a0 OG |
3955 | rc = -EFAULT; |
3956 | goto out; | |
3957 | } | |
3958 | ||
3959 | rc = goya_patch_cb(hdev, parser); | |
3960 | ||
3961 | if (rc) | |
3962 | hl_cb_put(parser->patched_cb); | |
3963 | ||
3964 | out: | |
3965 | /* | |
3966 | * Always call cb destroy here because we still have 1 reference | |
3967 | * to it by calling cb_get earlier. After the job will be completed, | |
3968 | * cb_put will release it, but here we want to remove it from the | |
3969 | * idr | |
3970 | */ | |
3971 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, | |
3972 | patched_cb_handle << PAGE_SHIFT); | |
3973 | ||
3974 | free_userptr: | |
3975 | if (rc) | |
3976 | hl_userptr_delete_list(hdev, parser->job_userptr_list); | |
3977 | return rc; | |
3978 | } | |
3979 | ||
883c2459 | 3980 | static int goya_parse_cb_no_ext_queue(struct hl_device *hdev, |
5e6e0239 | 3981 | struct hl_cs_parser *parser) |
eff6f4a0 OG |
3982 | { |
3983 | struct asic_fixed_properties *asic_prop = &hdev->asic_prop; | |
3984 | struct goya_device *goya = hdev->asic_specific; | |
3985 | ||
883c2459 OG |
3986 | if (goya->hw_cap_initialized & HW_CAP_MMU) |
3987 | return 0; | |
eff6f4a0 | 3988 | |
883c2459 OG |
3989 | /* For internal queue jobs, just check if CB address is valid */ |
3990 | if (hl_mem_area_inside_range( | |
3991 | (u64) (uintptr_t) parser->user_cb, | |
3992 | parser->user_cb_size, | |
3993 | asic_prop->sram_user_base_address, | |
3994 | asic_prop->sram_end_address)) | |
3995 | return 0; | |
eff6f4a0 | 3996 | |
883c2459 OG |
3997 | if (hl_mem_area_inside_range( |
3998 | (u64) (uintptr_t) parser->user_cb, | |
3999 | parser->user_cb_size, | |
4000 | asic_prop->dram_user_base_address, | |
4001 | asic_prop->dram_end_address)) | |
4002 | return 0; | |
eff6f4a0 | 4003 | |
883c2459 | 4004 | dev_err(hdev->dev, |
7f74d4d3 | 4005 | "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n", |
883c2459 | 4006 | parser->user_cb, parser->user_cb_size); |
eff6f4a0 | 4007 | |
883c2459 | 4008 | return -EFAULT; |
eff6f4a0 OG |
4009 | } |
4010 | ||
4011 | int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) | |
4012 | { | |
4013 | struct goya_device *goya = hdev->asic_specific; | |
4014 | ||
cb596aee | 4015 | if (parser->queue_type == QUEUE_TYPE_INT) |
883c2459 | 4016 | return goya_parse_cb_no_ext_queue(hdev, parser); |
eff6f4a0 | 4017 | |
5809e18e | 4018 | if (goya->hw_cap_initialized & HW_CAP_MMU) |
eff6f4a0 OG |
4019 | return goya_parse_cb_mmu(hdev, parser); |
4020 | else | |
4021 | return goya_parse_cb_no_mmu(hdev, parser); | |
4022 | } | |
4023 | ||
82948e6e | 4024 | void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, |
926ba4cc OG |
4025 | u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, |
4026 | bool eb) | |
eff6f4a0 OG |
4027 | { |
4028 | struct packet_msg_prot *cq_pkt; | |
df697bce | 4029 | u32 tmp; |
eff6f4a0 | 4030 | |
82948e6e | 4031 | cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); |
eff6f4a0 | 4032 | |
df697bce | 4033 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
eff6f4a0 OG |
4034 | (1 << GOYA_PKT_CTL_EB_SHIFT) | |
4035 | (1 << GOYA_PKT_CTL_MB_SHIFT); | |
df697bce TT |
4036 | cq_pkt->ctl = cpu_to_le32(tmp); |
4037 | cq_pkt->value = cpu_to_le32(cq_val); | |
4038 | cq_pkt->addr = cpu_to_le64(cq_addr); | |
eff6f4a0 OG |
4039 | |
4040 | cq_pkt++; | |
4041 | ||
df697bce | 4042 | tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | |
eff6f4a0 | 4043 | (1 << GOYA_PKT_CTL_MB_SHIFT); |
df697bce TT |
4044 | cq_pkt->ctl = cpu_to_le32(tmp); |
4045 | cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF); | |
4046 | cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF); | |
eff6f4a0 OG |
4047 | } |
4048 | ||
b2377e03 | 4049 | void goya_update_eq_ci(struct hl_device *hdev, u32 val) |
1251f23a | 4050 | { |
4095a176 | 4051 | WREG32(mmCPU_EQ_CI, val); |
1251f23a OG |
4052 | } |
4053 | ||
b2377e03 | 4054 | void goya_restore_phase_topology(struct hl_device *hdev) |
9c46f7b1 DBZ |
4055 | { |
4056 | ||
4057 | } | |
4058 | ||
4059 | static void goya_clear_sm_regs(struct hl_device *hdev) | |
eff6f4a0 OG |
4060 | { |
4061 | int i, num_of_sob_in_longs, num_of_mon_in_longs; | |
4062 | ||
4063 | num_of_sob_in_longs = | |
4064 | ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4); | |
4065 | ||
4066 | num_of_mon_in_longs = | |
4067 | ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4); | |
4068 | ||
4069 | for (i = 0 ; i < num_of_sob_in_longs ; i += 4) | |
4070 | WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0); | |
4071 | ||
4072 | for (i = 0 ; i < num_of_mon_in_longs ; i += 4) | |
4073 | WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0); | |
4074 | ||
4075 | /* Flush all WREG to prevent race */ | |
4076 | i = RREG32(mmSYNC_MNGR_SOB_OBJ_0); | |
4077 | } | |
4078 | ||
c2164773 | 4079 | /* |
4a0ce776 TT |
4080 | * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped |
4081 | * address. | |
c2164773 OG |
4082 | * |
4083 | * @hdev: pointer to hl_device structure | |
4a0ce776 | 4084 | * @addr: device or host mapped address |
c2164773 OG |
4085 | * @val: returned value |
4086 | * | |
4087 | * In case of DDR address that is not mapped into the default aperture that | |
4088 | * the DDR bar exposes, the function will configure the iATU so that the DDR | |
4089 | * bar will be positioned at a base address that allows reading from the | |
4090 | * required address. Configuring the iATU during normal operation can | |
4091 | * lead to undefined behavior and therefore, should be done with extreme care | |
4092 | * | |
4093 | */ | |
5e6e0239 | 4094 | static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) |
c2164773 OG |
4095 | { |
4096 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
a38693d7 | 4097 | u64 ddr_bar_addr; |
c2164773 OG |
4098 | int rc = 0; |
4099 | ||
4100 | if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { | |
4101 | *val = RREG32(addr - CFG_BASE); | |
4102 | ||
4103 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4104 | (addr < SRAM_BASE_ADDR + SRAM_SIZE)) { | |
4105 | ||
4106 | *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4107 | (addr - SRAM_BASE_ADDR)); | |
4108 | ||
2557f27f | 4109 | } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { |
c2164773 OG |
4110 | |
4111 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4112 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4113 | ||
a38693d7 OG |
4114 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); |
4115 | if (ddr_bar_addr != U64_MAX) { | |
c2164773 OG |
4116 | *val = readl(hdev->pcie_bar[DDR_BAR_ID] + |
4117 | (addr - bar_base_addr)); | |
4118 | ||
a38693d7 OG |
4119 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, |
4120 | ddr_bar_addr); | |
c2164773 | 4121 | } |
a38693d7 OG |
4122 | if (ddr_bar_addr == U64_MAX) |
4123 | rc = -EIO; | |
4a0ce776 TT |
4124 | |
4125 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4126 | *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); | |
4127 | ||
c2164773 OG |
4128 | } else { |
4129 | rc = -EFAULT; | |
4130 | } | |
4131 | ||
4132 | return rc; | |
4133 | } | |
4134 | ||
4135 | /* | |
4a0ce776 TT |
4136 | * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped |
4137 | * address. | |
c2164773 OG |
4138 | * |
4139 | * @hdev: pointer to hl_device structure | |
4a0ce776 | 4140 | * @addr: device or host mapped address |
c2164773 OG |
4141 | * @val: returned value |
4142 | * | |
4143 | * In case of DDR address that is not mapped into the default aperture that | |
4144 | * the DDR bar exposes, the function will configure the iATU so that the DDR | |
4145 | * bar will be positioned at a base address that allows writing to the | |
4146 | * required address. Configuring the iATU during normal operation can | |
4147 | * lead to undefined behavior and therefore, should be done with extreme care | |
4148 | * | |
4149 | */ | |
5e6e0239 | 4150 | static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) |
c2164773 OG |
4151 | { |
4152 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
a38693d7 | 4153 | u64 ddr_bar_addr; |
c2164773 OG |
4154 | int rc = 0; |
4155 | ||
4156 | if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { | |
4157 | WREG32(addr - CFG_BASE, val); | |
4158 | ||
4159 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4160 | (addr < SRAM_BASE_ADDR + SRAM_SIZE)) { | |
4161 | ||
4162 | writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4163 | (addr - SRAM_BASE_ADDR)); | |
4164 | ||
2557f27f | 4165 | } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { |
c2164773 OG |
4166 | |
4167 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4168 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4169 | ||
a38693d7 OG |
4170 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); |
4171 | if (ddr_bar_addr != U64_MAX) { | |
c2164773 OG |
4172 | writel(val, hdev->pcie_bar[DDR_BAR_ID] + |
4173 | (addr - bar_base_addr)); | |
4174 | ||
a38693d7 OG |
4175 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, |
4176 | ddr_bar_addr); | |
c2164773 | 4177 | } |
a38693d7 OG |
4178 | if (ddr_bar_addr == U64_MAX) |
4179 | rc = -EIO; | |
4a0ce776 TT |
4180 | |
4181 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4182 | *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; | |
4183 | ||
c2164773 OG |
4184 | } else { |
4185 | rc = -EFAULT; | |
4186 | } | |
4187 | ||
4188 | return rc; | |
4189 | } | |
4190 | ||
5cce5146 MH |
4191 | static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) |
4192 | { | |
4193 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4194 | u64 ddr_bar_addr; | |
4195 | int rc = 0; | |
4196 | ||
4197 | if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { | |
4198 | u32 val_l = RREG32(addr - CFG_BASE); | |
4199 | u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); | |
4200 | ||
4201 | *val = (((u64) val_h) << 32) | val_l; | |
4202 | ||
4203 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4204 | (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { | |
4205 | ||
4206 | *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4207 | (addr - SRAM_BASE_ADDR)); | |
4208 | ||
2557f27f LJ |
4209 | } else if (addr <= |
4210 | DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { | |
5cce5146 MH |
4211 | |
4212 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4213 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4214 | ||
4215 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); | |
4216 | if (ddr_bar_addr != U64_MAX) { | |
4217 | *val = readq(hdev->pcie_bar[DDR_BAR_ID] + | |
4218 | (addr - bar_base_addr)); | |
4219 | ||
4220 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, | |
4221 | ddr_bar_addr); | |
4222 | } | |
4223 | if (ddr_bar_addr == U64_MAX) | |
4224 | rc = -EIO; | |
4225 | ||
4226 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4227 | *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); | |
4228 | ||
4229 | } else { | |
4230 | rc = -EFAULT; | |
4231 | } | |
4232 | ||
4233 | return rc; | |
4234 | } | |
4235 | ||
4236 | static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) | |
4237 | { | |
4238 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4239 | u64 ddr_bar_addr; | |
4240 | int rc = 0; | |
4241 | ||
4242 | if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { | |
4243 | WREG32(addr - CFG_BASE, lower_32_bits(val)); | |
4244 | WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); | |
4245 | ||
4246 | } else if ((addr >= SRAM_BASE_ADDR) && | |
4247 | (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { | |
4248 | ||
4249 | writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + | |
4250 | (addr - SRAM_BASE_ADDR)); | |
4251 | ||
2557f27f LJ |
4252 | } else if (addr <= |
4253 | DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { | |
5cce5146 MH |
4254 | |
4255 | u64 bar_base_addr = DRAM_PHYS_BASE + | |
4256 | (addr & ~(prop->dram_pci_bar_size - 0x1ull)); | |
4257 | ||
4258 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); | |
4259 | if (ddr_bar_addr != U64_MAX) { | |
4260 | writeq(val, hdev->pcie_bar[DDR_BAR_ID] + | |
4261 | (addr - bar_base_addr)); | |
4262 | ||
4263 | ddr_bar_addr = goya_set_ddr_bar_base(hdev, | |
4264 | ddr_bar_addr); | |
4265 | } | |
4266 | if (ddr_bar_addr == U64_MAX) | |
4267 | rc = -EIO; | |
4268 | ||
4269 | } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { | |
4270 | *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; | |
4271 | ||
4272 | } else { | |
4273 | rc = -EFAULT; | |
4274 | } | |
4275 | ||
4276 | return rc; | |
4277 | } | |
4278 | ||
0feaf86d OS |
4279 | static u64 goya_read_pte(struct hl_device *hdev, u64 addr) |
4280 | { | |
4281 | struct goya_device *goya = hdev->asic_specific; | |
4282 | ||
9f201aba OG |
4283 | if (hdev->hard_reset_pending) |
4284 | return U64_MAX; | |
4285 | ||
0feaf86d OS |
4286 | return readq(hdev->pcie_bar[DDR_BAR_ID] + |
4287 | (addr - goya->ddr_bar_cur_addr)); | |
4288 | } | |
4289 | ||
4290 | static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val) | |
4291 | { | |
4292 | struct goya_device *goya = hdev->asic_specific; | |
4293 | ||
9f201aba OG |
4294 | if (hdev->hard_reset_pending) |
4295 | return; | |
4296 | ||
0feaf86d OS |
4297 | writeq(val, hdev->pcie_bar[DDR_BAR_ID] + |
4298 | (addr - goya->ddr_bar_cur_addr)); | |
4299 | } | |
4300 | ||
60b7dcca | 4301 | static const char *_goya_get_event_desc(u16 event_type) |
1251f23a | 4302 | { |
60b7dcca | 4303 | switch (event_type) { |
460696ed OS |
4304 | case GOYA_ASYNC_EVENT_ID_PCIE_IF: |
4305 | return "PCIe_if"; | |
4306 | case GOYA_ASYNC_EVENT_ID_TPC0_ECC: | |
4307 | case GOYA_ASYNC_EVENT_ID_TPC1_ECC: | |
4308 | case GOYA_ASYNC_EVENT_ID_TPC2_ECC: | |
4309 | case GOYA_ASYNC_EVENT_ID_TPC3_ECC: | |
4310 | case GOYA_ASYNC_EVENT_ID_TPC4_ECC: | |
4311 | case GOYA_ASYNC_EVENT_ID_TPC5_ECC: | |
4312 | case GOYA_ASYNC_EVENT_ID_TPC6_ECC: | |
4313 | case GOYA_ASYNC_EVENT_ID_TPC7_ECC: | |
4314 | return "TPC%d_ecc"; | |
4315 | case GOYA_ASYNC_EVENT_ID_MME_ECC: | |
4316 | return "MME_ecc"; | |
4317 | case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT: | |
4318 | return "MME_ecc_ext"; | |
4319 | case GOYA_ASYNC_EVENT_ID_MMU_ECC: | |
4320 | return "MMU_ecc"; | |
4321 | case GOYA_ASYNC_EVENT_ID_DMA_MACRO: | |
4322 | return "DMA_macro"; | |
4323 | case GOYA_ASYNC_EVENT_ID_DMA_ECC: | |
4324 | return "DMA_ecc"; | |
4325 | case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC: | |
4326 | return "CPU_if_ecc"; | |
4327 | case GOYA_ASYNC_EVENT_ID_PSOC_MEM: | |
4328 | return "PSOC_mem"; | |
4329 | case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: | |
4330 | return "PSOC_coresight"; | |
4331 | case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: | |
4332 | return "SRAM%d"; | |
4333 | case GOYA_ASYNC_EVENT_ID_GIC500: | |
4334 | return "GIC500"; | |
4335 | case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: | |
4336 | return "PLL%d"; | |
4337 | case GOYA_ASYNC_EVENT_ID_AXI_ECC: | |
4338 | return "AXI_ecc"; | |
4339 | case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: | |
4340 | return "L2_ram_ecc"; | |
4341 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: | |
4342 | return "PSOC_gpio_05_sw_reset"; | |
4343 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: | |
4344 | return "PSOC_gpio_10_vrhot_icrit"; | |
60b7dcca TT |
4345 | case GOYA_ASYNC_EVENT_ID_PCIE_DEC: |
4346 | return "PCIe_dec"; | |
4347 | case GOYA_ASYNC_EVENT_ID_TPC0_DEC: | |
4348 | case GOYA_ASYNC_EVENT_ID_TPC1_DEC: | |
4349 | case GOYA_ASYNC_EVENT_ID_TPC2_DEC: | |
4350 | case GOYA_ASYNC_EVENT_ID_TPC3_DEC: | |
4351 | case GOYA_ASYNC_EVENT_ID_TPC4_DEC: | |
4352 | case GOYA_ASYNC_EVENT_ID_TPC5_DEC: | |
4353 | case GOYA_ASYNC_EVENT_ID_TPC6_DEC: | |
4354 | case GOYA_ASYNC_EVENT_ID_TPC7_DEC: | |
4355 | return "TPC%d_dec"; | |
4356 | case GOYA_ASYNC_EVENT_ID_MME_WACS: | |
4357 | return "MME_wacs"; | |
4358 | case GOYA_ASYNC_EVENT_ID_MME_WACSD: | |
4359 | return "MME_wacsd"; | |
4360 | case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER: | |
4361 | return "CPU_axi_splitter"; | |
4362 | case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC: | |
4363 | return "PSOC_axi_dec"; | |
4364 | case GOYA_ASYNC_EVENT_ID_PSOC: | |
4365 | return "PSOC"; | |
4366 | case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR: | |
4367 | case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR: | |
4368 | case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR: | |
4369 | case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR: | |
4370 | case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR: | |
4371 | case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR: | |
4372 | case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR: | |
4373 | case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR: | |
4374 | return "TPC%d_krn_err"; | |
4375 | case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ: | |
4376 | return "TPC%d_cq"; | |
4377 | case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM: | |
4378 | return "TPC%d_qm"; | |
4379 | case GOYA_ASYNC_EVENT_ID_MME_QM: | |
4380 | return "MME_qm"; | |
4381 | case GOYA_ASYNC_EVENT_ID_MME_CMDQ: | |
4382 | return "MME_cq"; | |
4383 | case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: | |
4384 | return "DMA%d_qm"; | |
4385 | case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: | |
4386 | return "DMA%d_ch"; | |
460696ed OS |
4387 | case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: |
4388 | case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: | |
4389 | case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: | |
4390 | case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: | |
4391 | case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: | |
4392 | case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: | |
4393 | case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: | |
4394 | case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: | |
4395 | return "TPC%d_bmon_spmu"; | |
4396 | case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: | |
4397 | return "DMA_bm_ch%d"; | |
4f0e6ab7 OS |
4398 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: |
4399 | return "POWER_ENV_S"; | |
4400 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: | |
4401 | return "POWER_ENV_E"; | |
4402 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: | |
4403 | return "THERMAL_ENV_S"; | |
4404 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: | |
4405 | return "THERMAL_ENV_E"; | |
60b7dcca TT |
4406 | default: |
4407 | return "N/A"; | |
4408 | } | |
1251f23a OG |
4409 | } |
4410 | ||
60b7dcca | 4411 | static void goya_get_event_desc(u16 event_type, char *desc, size_t size) |
1251f23a | 4412 | { |
60b7dcca TT |
4413 | u8 index; |
4414 | ||
4415 | switch (event_type) { | |
460696ed OS |
4416 | case GOYA_ASYNC_EVENT_ID_TPC0_ECC: |
4417 | case GOYA_ASYNC_EVENT_ID_TPC1_ECC: | |
4418 | case GOYA_ASYNC_EVENT_ID_TPC2_ECC: | |
4419 | case GOYA_ASYNC_EVENT_ID_TPC3_ECC: | |
4420 | case GOYA_ASYNC_EVENT_ID_TPC4_ECC: | |
4421 | case GOYA_ASYNC_EVENT_ID_TPC5_ECC: | |
4422 | case GOYA_ASYNC_EVENT_ID_TPC6_ECC: | |
4423 | case GOYA_ASYNC_EVENT_ID_TPC7_ECC: | |
4424 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3; | |
4425 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4426 | break; | |
4427 | case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: | |
4428 | index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0; | |
4429 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4430 | break; | |
4431 | case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: | |
4432 | index = event_type - GOYA_ASYNC_EVENT_ID_PLL0; | |
4433 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4434 | break; | |
60b7dcca TT |
4435 | case GOYA_ASYNC_EVENT_ID_TPC0_DEC: |
4436 | case GOYA_ASYNC_EVENT_ID_TPC1_DEC: | |
4437 | case GOYA_ASYNC_EVENT_ID_TPC2_DEC: | |
4438 | case GOYA_ASYNC_EVENT_ID_TPC3_DEC: | |
4439 | case GOYA_ASYNC_EVENT_ID_TPC4_DEC: | |
4440 | case GOYA_ASYNC_EVENT_ID_TPC5_DEC: | |
4441 | case GOYA_ASYNC_EVENT_ID_TPC6_DEC: | |
4442 | case GOYA_ASYNC_EVENT_ID_TPC7_DEC: | |
4443 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3; | |
4444 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4445 | break; | |
4446 | case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR: | |
4447 | case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR: | |
4448 | case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR: | |
4449 | case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR: | |
4450 | case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR: | |
4451 | case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR: | |
4452 | case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR: | |
4453 | case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR: | |
4454 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10; | |
4455 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4456 | break; | |
4457 | case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ: | |
4458 | index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ; | |
4459 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4460 | break; | |
4461 | case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM: | |
4462 | index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM; | |
4463 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4464 | break; | |
4465 | case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: | |
4466 | index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM; | |
4467 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4468 | break; | |
4469 | case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: | |
4470 | index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH; | |
4471 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4472 | break; | |
460696ed OS |
4473 | case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: |
4474 | case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: | |
4475 | case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: | |
4476 | case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: | |
4477 | case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: | |
4478 | case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: | |
4479 | case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: | |
4480 | case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: | |
4481 | index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10; | |
4482 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4483 | break; | |
4484 | case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: | |
4485 | index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0; | |
4486 | snprintf(desc, size, _goya_get_event_desc(event_type), index); | |
4487 | break; | |
60b7dcca TT |
4488 | default: |
4489 | snprintf(desc, size, _goya_get_event_desc(event_type)); | |
4490 | break; | |
1251f23a OG |
4491 | } |
4492 | } | |
4493 | ||
60b7dcca | 4494 | static void goya_print_razwi_info(struct hl_device *hdev) |
1251f23a | 4495 | { |
1251f23a | 4496 | if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) { |
e5509d52 | 4497 | dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n"); |
1251f23a | 4498 | WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0); |
1251f23a | 4499 | } |
60b7dcca | 4500 | |
1251f23a | 4501 | if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) { |
e5509d52 | 4502 | dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n"); |
1251f23a | 4503 | WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0); |
1251f23a | 4504 | } |
60b7dcca | 4505 | |
1251f23a | 4506 | if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) { |
e5509d52 | 4507 | dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n"); |
1251f23a | 4508 | WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0); |
1251f23a | 4509 | } |
60b7dcca | 4510 | |
1251f23a | 4511 | if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) { |
e5509d52 | 4512 | dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n"); |
1251f23a | 4513 | WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0); |
1251f23a | 4514 | } |
60b7dcca | 4515 | } |
1251f23a | 4516 | |
60b7dcca TT |
4517 | static void goya_print_mmu_error_info(struct hl_device *hdev) |
4518 | { | |
4519 | struct goya_device *goya = hdev->asic_specific; | |
4520 | u64 addr; | |
4521 | u32 val; | |
1251f23a | 4522 | |
60b7dcca TT |
4523 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) |
4524 | return; | |
1251f23a | 4525 | |
60b7dcca TT |
4526 | val = RREG32(mmMMU_PAGE_ERROR_CAPTURE); |
4527 | if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { | |
4528 | addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK; | |
4529 | addr <<= 32; | |
4530 | addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA); | |
1251f23a | 4531 | |
e5509d52 OG |
4532 | dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", |
4533 | addr); | |
60b7dcca TT |
4534 | |
4535 | WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0); | |
1251f23a OG |
4536 | } |
4537 | } | |
4538 | ||
460696ed OS |
4539 | static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, |
4540 | bool razwi) | |
60b7dcca TT |
4541 | { |
4542 | char desc[20] = ""; | |
4543 | ||
4544 | goya_get_event_desc(event_type, desc, sizeof(desc)); | |
e5509d52 | 4545 | dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", |
60b7dcca TT |
4546 | event_type, desc); |
4547 | ||
460696ed OS |
4548 | if (razwi) { |
4549 | goya_print_razwi_info(hdev); | |
4550 | goya_print_mmu_error_info(hdev); | |
4551 | } | |
60b7dcca TT |
4552 | } |
4553 | ||
f8c8c7d5 OG |
4554 | static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, |
4555 | size_t irq_arr_size) | |
4556 | { | |
2f55342c | 4557 | struct cpucp_unmask_irq_arr_packet *pkt; |
f8c8c7d5 | 4558 | size_t total_pkt_size; |
439bc47b | 4559 | u64 result; |
f8c8c7d5 | 4560 | int rc; |
b421d83a BS |
4561 | int irq_num_entries, irq_arr_index; |
4562 | __le32 *goya_irq_arr; | |
f8c8c7d5 | 4563 | |
2f55342c | 4564 | total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + |
f8c8c7d5 OG |
4565 | irq_arr_size; |
4566 | ||
6138bbe9 | 4567 | /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ |
f8c8c7d5 OG |
4568 | total_pkt_size = (total_pkt_size + 0x7) & ~0x7; |
4569 | ||
4570 | /* total_pkt_size is casted to u16 later on */ | |
4571 | if (total_pkt_size > USHRT_MAX) { | |
4572 | dev_err(hdev->dev, "too many elements in IRQ array\n"); | |
4573 | return -EINVAL; | |
4574 | } | |
4575 | ||
4576 | pkt = kzalloc(total_pkt_size, GFP_KERNEL); | |
4577 | if (!pkt) | |
4578 | return -ENOMEM; | |
4579 | ||
b421d83a BS |
4580 | irq_num_entries = irq_arr_size / sizeof(irq_arr[0]); |
4581 | pkt->length = cpu_to_le32(irq_num_entries); | |
4582 | ||
4583 | /* We must perform any necessary endianness conversation on the irq | |
4584 | * array being passed to the goya hardware | |
4585 | */ | |
4586 | for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs; | |
4587 | irq_arr_index < irq_num_entries ; irq_arr_index++) | |
4588 | goya_irq_arr[irq_arr_index] = | |
4589 | cpu_to_le32(irq_arr[irq_arr_index]); | |
f8c8c7d5 | 4590 | |
2f55342c OG |
4591 | pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << |
4592 | CPUCP_PKT_CTL_OPCODE_SHIFT); | |
f8c8c7d5 | 4593 | |
788cacf3 OG |
4594 | rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, |
4595 | total_pkt_size, 0, &result); | |
f8c8c7d5 OG |
4596 | |
4597 | if (rc) | |
4598 | dev_err(hdev->dev, "failed to unmask IRQ array\n"); | |
4599 | ||
4600 | kfree(pkt); | |
4601 | ||
4602 | return rc; | |
4603 | } | |
4604 | ||
4605 | static int goya_soft_reset_late_init(struct hl_device *hdev) | |
4606 | { | |
4607 | /* | |
4608 | * Unmask all IRQs since some could have been received | |
4609 | * during the soft reset | |
4610 | */ | |
b24ca458 OG |
4611 | return goya_unmask_irq_arr(hdev, goya_all_events, |
4612 | sizeof(goya_all_events)); | |
f8c8c7d5 OG |
4613 | } |
4614 | ||
1251f23a OG |
4615 | static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) |
4616 | { | |
2f55342c | 4617 | struct cpucp_packet pkt; |
439bc47b | 4618 | u64 result; |
1251f23a OG |
4619 | int rc; |
4620 | ||
4621 | memset(&pkt, 0, sizeof(pkt)); | |
4622 | ||
2f55342c OG |
4623 | pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ << |
4624 | CPUCP_PKT_CTL_OPCODE_SHIFT); | |
df697bce | 4625 | pkt.value = cpu_to_le64(event_type); |
1251f23a | 4626 | |
788cacf3 OG |
4627 | rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), |
4628 | 0, &result); | |
1251f23a OG |
4629 | |
4630 | if (rc) | |
4631 | dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); | |
4632 | ||
4633 | return rc; | |
4634 | } | |
4635 | ||
4f0e6ab7 OS |
4636 | static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) |
4637 | { | |
4638 | switch (event_type) { | |
4639 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: | |
0a068add | 4640 | hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; |
4f0e6ab7 OS |
4641 | dev_info_ratelimited(hdev->dev, |
4642 | "Clock throttling due to power consumption\n"); | |
4643 | break; | |
4644 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: | |
0a068add | 4645 | hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; |
4f0e6ab7 OS |
4646 | dev_info_ratelimited(hdev->dev, |
4647 | "Power envelop is safe, back to optimal clock\n"); | |
4648 | break; | |
4649 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: | |
0a068add | 4650 | hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; |
4f0e6ab7 OS |
4651 | dev_info_ratelimited(hdev->dev, |
4652 | "Clock throttling due to overheating\n"); | |
4653 | break; | |
4654 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: | |
0a068add | 4655 | hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; |
4f0e6ab7 OS |
4656 | dev_info_ratelimited(hdev->dev, |
4657 | "Thermal envelop is safe, back to optimal clock\n"); | |
4658 | break; | |
4659 | ||
4660 | default: | |
4661 | dev_err(hdev->dev, "Received invalid clock change event %d\n", | |
4662 | event_type); | |
4663 | break; | |
4664 | } | |
4665 | } | |
4666 | ||
1251f23a OG |
4667 | void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) |
4668 | { | |
df697bce TT |
4669 | u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); |
4670 | u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) | |
4671 | >> EQ_CTL_EVENT_TYPE_SHIFT); | |
1251f23a OG |
4672 | struct goya_device *goya = hdev->asic_specific; |
4673 | ||
4674 | goya->events_stat[event_type]++; | |
e9730763 | 4675 | goya->events_stat_aggregate[event_type]++; |
1251f23a OG |
4676 | |
4677 | switch (event_type) { | |
4678 | case GOYA_ASYNC_EVENT_ID_PCIE_IF: | |
4679 | case GOYA_ASYNC_EVENT_ID_TPC0_ECC: | |
4680 | case GOYA_ASYNC_EVENT_ID_TPC1_ECC: | |
4681 | case GOYA_ASYNC_EVENT_ID_TPC2_ECC: | |
4682 | case GOYA_ASYNC_EVENT_ID_TPC3_ECC: | |
4683 | case GOYA_ASYNC_EVENT_ID_TPC4_ECC: | |
4684 | case GOYA_ASYNC_EVENT_ID_TPC5_ECC: | |
4685 | case GOYA_ASYNC_EVENT_ID_TPC6_ECC: | |
4686 | case GOYA_ASYNC_EVENT_ID_TPC7_ECC: | |
4687 | case GOYA_ASYNC_EVENT_ID_MME_ECC: | |
4688 | case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT: | |
4689 | case GOYA_ASYNC_EVENT_ID_MMU_ECC: | |
4690 | case GOYA_ASYNC_EVENT_ID_DMA_MACRO: | |
4691 | case GOYA_ASYNC_EVENT_ID_DMA_ECC: | |
4692 | case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC: | |
4693 | case GOYA_ASYNC_EVENT_ID_PSOC_MEM: | |
4694 | case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT: | |
4695 | case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29: | |
4696 | case GOYA_ASYNC_EVENT_ID_GIC500: | |
460696ed | 4697 | case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: |
1251f23a OG |
4698 | case GOYA_ASYNC_EVENT_ID_AXI_ECC: |
4699 | case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: | |
4700 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: | |
460696ed | 4701 | goya_print_irq_info(hdev, event_type, false); |
1fb2f374 OG |
4702 | if (hdev->hard_reset_on_fw_events) |
4703 | hl_device_reset(hdev, true, false); | |
1251f23a OG |
4704 | break; |
4705 | ||
4706 | case GOYA_ASYNC_EVENT_ID_PCIE_DEC: | |
4707 | case GOYA_ASYNC_EVENT_ID_TPC0_DEC: | |
4708 | case GOYA_ASYNC_EVENT_ID_TPC1_DEC: | |
4709 | case GOYA_ASYNC_EVENT_ID_TPC2_DEC: | |
4710 | case GOYA_ASYNC_EVENT_ID_TPC3_DEC: | |
4711 | case GOYA_ASYNC_EVENT_ID_TPC4_DEC: | |
4712 | case GOYA_ASYNC_EVENT_ID_TPC5_DEC: | |
4713 | case GOYA_ASYNC_EVENT_ID_TPC6_DEC: | |
4714 | case GOYA_ASYNC_EVENT_ID_TPC7_DEC: | |
4715 | case GOYA_ASYNC_EVENT_ID_MME_WACS: | |
4716 | case GOYA_ASYNC_EVENT_ID_MME_WACSD: | |
4717 | case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER: | |
4718 | case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC: | |
4719 | case GOYA_ASYNC_EVENT_ID_PSOC: | |
4720 | case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR: | |
4721 | case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR: | |
4722 | case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR: | |
4723 | case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR: | |
4724 | case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR: | |
4725 | case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR: | |
4726 | case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR: | |
4727 | case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR: | |
4728 | case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM: | |
4729 | case GOYA_ASYNC_EVENT_ID_MME_QM: | |
4730 | case GOYA_ASYNC_EVENT_ID_MME_CMDQ: | |
4731 | case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM: | |
4732 | case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH: | |
460696ed | 4733 | goya_print_irq_info(hdev, event_type, true); |
1251f23a OG |
4734 | goya_unmask_irq(hdev, event_type); |
4735 | break; | |
4736 | ||
717261e1 | 4737 | case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT: |
1251f23a OG |
4738 | case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU: |
4739 | case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU: | |
4740 | case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU: | |
4741 | case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU: | |
4742 | case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU: | |
4743 | case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU: | |
4744 | case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU: | |
4745 | case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU: | |
460696ed OS |
4746 | case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4: |
4747 | goya_print_irq_info(hdev, event_type, false); | |
4748 | goya_unmask_irq(hdev, event_type); | |
1251f23a OG |
4749 | break; |
4750 | ||
4f0e6ab7 OS |
4751 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: |
4752 | case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: | |
4753 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: | |
4754 | case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: | |
4755 | goya_print_clk_change_info(hdev, event_type); | |
4756 | goya_unmask_irq(hdev, event_type); | |
4757 | break; | |
4758 | ||
1251f23a OG |
4759 | default: |
4760 | dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", | |
4761 | event_type); | |
4762 | break; | |
4763 | } | |
4764 | } | |
4765 | ||
e9730763 | 4766 | void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) |
1251f23a OG |
4767 | { |
4768 | struct goya_device *goya = hdev->asic_specific; | |
4769 | ||
e9730763 OG |
4770 | if (aggregate) { |
4771 | *size = (u32) sizeof(goya->events_stat_aggregate); | |
4772 | return goya->events_stat_aggregate; | |
4773 | } | |
1251f23a | 4774 | |
e9730763 | 4775 | *size = (u32) sizeof(goya->events_stat); |
1251f23a OG |
4776 | return goya->events_stat; |
4777 | } | |
4778 | ||
ac742737 | 4779 | static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, |
27ca384c | 4780 | u64 val, bool is_dram) |
0feaf86d | 4781 | { |
27ca384c | 4782 | struct packet_lin_dma *lin_dma_pkt; |
0feaf86d | 4783 | struct hl_cs_job *job; |
df697bce | 4784 | u32 cb_size, ctl; |
0feaf86d | 4785 | struct hl_cb *cb; |
ac742737 | 4786 | int rc, lin_dma_pkts_cnt; |
0feaf86d | 4787 | |
ac742737 OG |
4788 | lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G); |
4789 | cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) + | |
4790 | sizeof(struct packet_msg_prot); | |
a04b7cd9 | 4791 | cb = hl_cb_kernel_create(hdev, cb_size, false); |
0feaf86d | 4792 | if (!cb) |
ac742737 | 4793 | return -ENOMEM; |
0feaf86d | 4794 | |
82948e6e | 4795 | lin_dma_pkt = cb->kernel_address; |
27ca384c | 4796 | |
ac742737 OG |
4797 | do { |
4798 | memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); | |
4799 | ||
4800 | ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) | | |
4801 | (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | | |
4802 | (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | | |
4803 | (1 << GOYA_PKT_CTL_RB_SHIFT) | | |
4804 | (1 << GOYA_PKT_CTL_MB_SHIFT)); | |
4805 | ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) << | |
4806 | GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; | |
4807 | lin_dma_pkt->ctl = cpu_to_le32(ctl); | |
4808 | ||
4809 | lin_dma_pkt->src_addr = cpu_to_le64(val); | |
4810 | lin_dma_pkt->dst_addr = cpu_to_le64(addr); | |
4811 | if (lin_dma_pkts_cnt > 1) | |
4812 | lin_dma_pkt->tsize = cpu_to_le32(SZ_2G); | |
4813 | else | |
4814 | lin_dma_pkt->tsize = cpu_to_le32(size); | |
0feaf86d | 4815 | |
ac742737 OG |
4816 | size -= SZ_2G; |
4817 | addr += SZ_2G; | |
4818 | lin_dma_pkt++; | |
4819 | } while (--lin_dma_pkts_cnt); | |
0feaf86d | 4820 | |
cb596aee | 4821 | job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); |
0feaf86d OS |
4822 | if (!job) { |
4823 | dev_err(hdev->dev, "Failed to allocate a new job\n"); | |
4824 | rc = -ENOMEM; | |
4825 | goto release_cb; | |
4826 | } | |
4827 | ||
4828 | job->id = 0; | |
4829 | job->user_cb = cb; | |
f0748674 | 4830 | atomic_inc(&job->user_cb->cs_cnt); |
0feaf86d OS |
4831 | job->user_cb_size = cb_size; |
4832 | job->hw_queue_id = GOYA_QUEUE_ID_DMA_0; | |
3706b470 | 4833 | job->patched_cb = job->user_cb; |
ac742737 | 4834 | job->job_cb_size = job->user_cb_size; |
0feaf86d | 4835 | |
c2164773 OG |
4836 | hl_debugfs_add_job(hdev, job); |
4837 | ||
0feaf86d OS |
4838 | rc = goya_send_job_on_qman0(hdev, job); |
4839 | ||
c2164773 | 4840 | hl_debugfs_remove_job(hdev, job); |
0feaf86d | 4841 | kfree(job); |
f0748674 | 4842 | atomic_dec(&cb->cs_cnt); |
0feaf86d OS |
4843 | |
4844 | release_cb: | |
4845 | hl_cb_put(cb); | |
4846 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); | |
4847 | ||
4848 | return rc; | |
4849 | } | |
4850 | ||
b2377e03 | 4851 | int goya_context_switch(struct hl_device *hdev, u32 asid) |
27ca384c OS |
4852 | { |
4853 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
5c823ae1 | 4854 | u64 addr = prop->sram_base_address, sob_addr; |
27ca384c OS |
4855 | u32 size = hdev->pldm ? 0x10000 : prop->sram_size; |
4856 | u64 val = 0x7777777777777777ull; | |
5c823ae1 DBZ |
4857 | int rc, dma_id; |
4858 | u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO - | |
4859 | mmDMA_CH_0_WR_COMP_ADDR_LO; | |
27ca384c OS |
4860 | |
4861 | rc = goya_memset_device_memory(hdev, addr, size, val, false); | |
4862 | if (rc) { | |
4863 | dev_err(hdev->dev, "Failed to clear SRAM in context switch\n"); | |
4864 | return rc; | |
4865 | } | |
4866 | ||
5c823ae1 DBZ |
4867 | /* we need to reset registers that the user is allowed to change */ |
4868 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; | |
4869 | WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr)); | |
4870 | ||
4871 | for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) { | |
4872 | sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 + | |
4873 | (dma_id - 1) * 4; | |
4874 | WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id, | |
4875 | lower_32_bits(sob_addr)); | |
4876 | } | |
4877 | ||
a691a1eb | 4878 | WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020); |
5c823ae1 | 4879 | |
9c46f7b1 DBZ |
4880 | goya_clear_sm_regs(hdev); |
4881 | ||
27ca384c OS |
4882 | return 0; |
4883 | } | |
4884 | ||
95b5a8b8 | 4885 | static int goya_mmu_clear_pgt_range(struct hl_device *hdev) |
27ca384c OS |
4886 | { |
4887 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4888 | struct goya_device *goya = hdev->asic_specific; | |
4889 | u64 addr = prop->mmu_pgt_addr; | |
4890 | u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE + | |
4891 | MMU_CACHE_MNG_SIZE; | |
4892 | ||
4893 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4894 | return 0; | |
4895 | ||
4896 | return goya_memset_device_memory(hdev, addr, size, 0, true); | |
4897 | } | |
4898 | ||
95b5a8b8 | 4899 | static int goya_mmu_set_dram_default_page(struct hl_device *hdev) |
27ca384c OS |
4900 | { |
4901 | struct goya_device *goya = hdev->asic_specific; | |
4902 | u64 addr = hdev->asic_prop.mmu_dram_default_page_addr; | |
4903 | u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE; | |
4904 | u64 val = 0x9999999999999999ull; | |
4905 | ||
4906 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4907 | return 0; | |
4908 | ||
4909 | return goya_memset_device_memory(hdev, addr, size, val, true); | |
4910 | } | |
4911 | ||
95b5a8b8 OG |
4912 | static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) |
4913 | { | |
4914 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4915 | struct goya_device *goya = hdev->asic_specific; | |
4916 | s64 off, cpu_off; | |
4917 | int rc; | |
4918 | ||
4919 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
4920 | return 0; | |
4921 | ||
4922 | for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { | |
5c05487f OB |
4923 | rc = hl_mmu_map_page(hdev->kernel_ctx, |
4924 | prop->dram_base_address + off, | |
4925 | prop->dram_base_address + off, PAGE_SIZE_2MB, | |
4926 | (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE); | |
95b5a8b8 OG |
4927 | if (rc) { |
4928 | dev_err(hdev->dev, "Map failed for address 0x%llx\n", | |
4929 | prop->dram_base_address + off); | |
4930 | goto unmap; | |
4931 | } | |
4932 | } | |
4933 | ||
4934 | if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { | |
5c05487f OB |
4935 | rc = hl_mmu_map_page(hdev->kernel_ctx, |
4936 | VA_CPU_ACCESSIBLE_MEM_ADDR, | |
4937 | hdev->cpu_accessible_dma_address, | |
4938 | PAGE_SIZE_2MB, true); | |
95b5a8b8 OG |
4939 | |
4940 | if (rc) { | |
4941 | dev_err(hdev->dev, | |
4942 | "Map failed for CPU accessible memory\n"); | |
4943 | off -= PAGE_SIZE_2MB; | |
4944 | goto unmap; | |
4945 | } | |
4946 | } else { | |
4947 | for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) { | |
5c05487f | 4948 | rc = hl_mmu_map_page(hdev->kernel_ctx, |
95b5a8b8 OG |
4949 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, |
4950 | hdev->cpu_accessible_dma_address + cpu_off, | |
7fc40bca | 4951 | PAGE_SIZE_4KB, true); |
95b5a8b8 OG |
4952 | if (rc) { |
4953 | dev_err(hdev->dev, | |
4954 | "Map failed for CPU accessible memory\n"); | |
4955 | cpu_off -= PAGE_SIZE_4KB; | |
4956 | goto unmap_cpu; | |
4957 | } | |
4958 | } | |
4959 | } | |
4960 | ||
f09415f5 OG |
4961 | goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID); |
4962 | goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID); | |
4963 | WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF); | |
4964 | WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF); | |
4965 | ||
4966 | /* Make sure configuration is flushed to device */ | |
4967 | RREG32(mmCPU_IF_AWUSER_OVR_EN); | |
4968 | ||
95b5a8b8 OG |
4969 | goya->device_cpu_mmu_mappings_done = true; |
4970 | ||
4971 | return 0; | |
4972 | ||
4973 | unmap_cpu: | |
4974 | for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) | |
5c05487f | 4975 | if (hl_mmu_unmap_page(hdev->kernel_ctx, |
95b5a8b8 | 4976 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, |
7fc40bca | 4977 | PAGE_SIZE_4KB, true)) |
95b5a8b8 OG |
4978 | dev_warn_ratelimited(hdev->dev, |
4979 | "failed to unmap address 0x%llx\n", | |
4980 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); | |
4981 | unmap: | |
4982 | for (; off >= 0 ; off -= PAGE_SIZE_2MB) | |
5c05487f | 4983 | if (hl_mmu_unmap_page(hdev->kernel_ctx, |
7fc40bca PP |
4984 | prop->dram_base_address + off, PAGE_SIZE_2MB, |
4985 | true)) | |
95b5a8b8 OG |
4986 | dev_warn_ratelimited(hdev->dev, |
4987 | "failed to unmap address 0x%llx\n", | |
4988 | prop->dram_base_address + off); | |
4989 | ||
4990 | return rc; | |
4991 | } | |
4992 | ||
4993 | void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) | |
4994 | { | |
4995 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
4996 | struct goya_device *goya = hdev->asic_specific; | |
4997 | u32 off, cpu_off; | |
4998 | ||
4999 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
5000 | return; | |
5001 | ||
5002 | if (!goya->device_cpu_mmu_mappings_done) | |
5003 | return; | |
5004 | ||
f09415f5 OG |
5005 | WREG32(mmCPU_IF_ARUSER_OVR_EN, 0); |
5006 | WREG32(mmCPU_IF_AWUSER_OVR_EN, 0); | |
5007 | ||
95b5a8b8 | 5008 | if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { |
5c05487f OB |
5009 | if (hl_mmu_unmap_page(hdev->kernel_ctx, |
5010 | VA_CPU_ACCESSIBLE_MEM_ADDR, | |
7fc40bca | 5011 | PAGE_SIZE_2MB, true)) |
95b5a8b8 OG |
5012 | dev_warn(hdev->dev, |
5013 | "Failed to unmap CPU accessible memory\n"); | |
5014 | } else { | |
5015 | for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) | |
5c05487f | 5016 | if (hl_mmu_unmap_page(hdev->kernel_ctx, |
95b5a8b8 | 5017 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, |
7fc40bca PP |
5018 | PAGE_SIZE_4KB, |
5019 | (cpu_off + PAGE_SIZE_4KB) >= SZ_2M)) | |
95b5a8b8 OG |
5020 | dev_warn_ratelimited(hdev->dev, |
5021 | "failed to unmap address 0x%llx\n", | |
5022 | VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); | |
5023 | } | |
5024 | ||
5025 | for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) | |
5c05487f | 5026 | if (hl_mmu_unmap_page(hdev->kernel_ctx, |
7fc40bca PP |
5027 | prop->dram_base_address + off, PAGE_SIZE_2MB, |
5028 | (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE)) | |
95b5a8b8 OG |
5029 | dev_warn_ratelimited(hdev->dev, |
5030 | "Failed to unmap address 0x%llx\n", | |
5031 | prop->dram_base_address + off); | |
5032 | ||
5033 | goya->device_cpu_mmu_mappings_done = false; | |
5034 | } | |
5035 | ||
5036 | static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) | |
0feaf86d OS |
5037 | { |
5038 | struct goya_device *goya = hdev->asic_specific; | |
5039 | int i; | |
5040 | ||
5041 | if (!(goya->hw_cap_initialized & HW_CAP_MMU)) | |
5042 | return; | |
5043 | ||
5044 | if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) { | |
75d9a2a0 | 5045 | dev_crit(hdev->dev, "asid %u is too big\n", asid); |
0feaf86d OS |
5046 | return; |
5047 | } | |
5048 | ||
5049 | /* zero the MMBP and ASID bits and then set the ASID */ | |
bedd1442 | 5050 | for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) |
b2377e03 | 5051 | goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); |
0feaf86d OS |
5052 | } |
5053 | ||
8ff5f4fd | 5054 | static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, |
7b6e4ea0 | 5055 | u32 flags) |
0feaf86d OS |
5056 | { |
5057 | struct goya_device *goya = hdev->asic_specific; | |
5058 | u32 status, timeout_usec; | |
5059 | int rc; | |
5060 | ||
bc75d799 OS |
5061 | if (!(goya->hw_cap_initialized & HW_CAP_MMU) || |
5062 | hdev->hard_reset_pending) | |
8ff5f4fd | 5063 | return 0; |
0feaf86d OS |
5064 | |
5065 | /* no need in L1 only invalidation in Goya */ | |
5066 | if (!is_hard) | |
8ff5f4fd | 5067 | return 0; |
0feaf86d OS |
5068 | |
5069 | if (hdev->pldm) | |
5070 | timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; | |
5071 | else | |
5072 | timeout_usec = MMU_CONFIG_TIMEOUT_USEC; | |
5073 | ||
0feaf86d OS |
5074 | /* L0 & L1 invalidation */ |
5075 | WREG32(mmSTLB_INV_ALL_START, 1); | |
5076 | ||
5077 | rc = hl_poll_timeout( | |
5078 | hdev, | |
5079 | mmSTLB_INV_ALL_START, | |
5080 | status, | |
5081 | !status, | |
5082 | 1000, | |
5083 | timeout_usec); | |
5084 | ||
8ff5f4fd OS |
5085 | if (rc) { |
5086 | dev_err_ratelimited(hdev->dev, | |
5087 | "MMU cache invalidation timeout\n"); | |
5088 | hl_device_reset(hdev, true, false); | |
5089 | } | |
5090 | ||
5091 | return rc; | |
0feaf86d OS |
5092 | } |
5093 | ||
8ff5f4fd OS |
5094 | static int goya_mmu_invalidate_cache_range(struct hl_device *hdev, |
5095 | bool is_hard, u32 asid, u64 va, u64 size) | |
0feaf86d OS |
5096 | { |
5097 | struct goya_device *goya = hdev->asic_specific; | |
5098 | u32 status, timeout_usec, inv_data, pi; | |
5099 | int rc; | |
5100 | ||
bc75d799 OS |
5101 | if (!(goya->hw_cap_initialized & HW_CAP_MMU) || |
5102 | hdev->hard_reset_pending) | |
8ff5f4fd | 5103 | return 0; |
0feaf86d OS |
5104 | |
5105 | /* no need in L1 only invalidation in Goya */ | |
5106 | if (!is_hard) | |
8ff5f4fd | 5107 | return 0; |
0feaf86d OS |
5108 | |
5109 | if (hdev->pldm) | |
5110 | timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC; | |
5111 | else | |
5112 | timeout_usec = MMU_CONFIG_TIMEOUT_USEC; | |
5113 | ||
0feaf86d OS |
5114 | /* |
5115 | * TODO: currently invalidate entire L0 & L1 as in regular hard | |
5116 | * invalidation. Need to apply invalidation of specific cache lines with | |
5117 | * mask of ASID & VA & size. | |
5118 | * Note that L1 with be flushed entirely in any case. | |
5119 | */ | |
5120 | ||
5121 | /* L0 & L1 invalidation */ | |
5122 | inv_data = RREG32(mmSTLB_CACHE_INV); | |
5123 | /* PI is 8 bit */ | |
5124 | pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF; | |
5125 | WREG32(mmSTLB_CACHE_INV, | |
5126 | (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi); | |
5127 | ||
5128 | rc = hl_poll_timeout( | |
5129 | hdev, | |
5130 | mmSTLB_INV_CONSUMER_INDEX, | |
5131 | status, | |
5132 | status == pi, | |
5133 | 1000, | |
5134 | timeout_usec); | |
5135 | ||
8ff5f4fd OS |
5136 | if (rc) { |
5137 | dev_err_ratelimited(hdev->dev, | |
5138 | "MMU cache invalidation timeout\n"); | |
5139 | hl_device_reset(hdev, true, false); | |
5140 | } | |
5141 | ||
5142 | return rc; | |
0feaf86d OS |
5143 | } |
5144 | ||
f8c8c7d5 OG |
5145 | int goya_send_heartbeat(struct hl_device *hdev) |
5146 | { | |
5147 | struct goya_device *goya = hdev->asic_specific; | |
f8c8c7d5 OG |
5148 | |
5149 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
5150 | return 0; | |
5151 | ||
3110c60f | 5152 | return hl_fw_send_heartbeat(hdev); |
f8c8c7d5 OG |
5153 | } |
5154 | ||
2f55342c | 5155 | int goya_cpucp_info_get(struct hl_device *hdev) |
d91389bc OG |
5156 | { |
5157 | struct goya_device *goya = hdev->asic_specific; | |
5158 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
d91389bc | 5159 | u64 dram_size; |
d91389bc OG |
5160 | int rc; |
5161 | ||
5162 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
5163 | return 0; | |
5164 | ||
4147864e | 5165 | rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0); |
3110c60f TT |
5166 | if (rc) |
5167 | return rc; | |
d91389bc | 5168 | |
2f55342c | 5169 | dram_size = le64_to_cpu(prop->cpucp_info.dram_size); |
d91389bc OG |
5170 | if (dram_size) { |
5171 | if ((!is_power_of_2(dram_size)) || | |
5172 | (dram_size < DRAM_PHYS_DEFAULT_SIZE)) { | |
5173 | dev_err(hdev->dev, | |
5174 | "F/W reported invalid DRAM size %llu. Trying to use default size\n", | |
5175 | dram_size); | |
5176 | dram_size = DRAM_PHYS_DEFAULT_SIZE; | |
5177 | } | |
5178 | ||
5179 | prop->dram_size = dram_size; | |
5180 | prop->dram_end_address = prop->dram_base_address + dram_size; | |
5181 | } | |
5182 | ||
2f55342c OG |
5183 | if (!strlen(prop->cpucp_info.card_name)) |
5184 | strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, | |
0996bd1c OG |
5185 | CARD_NAME_MAX_LEN); |
5186 | ||
3110c60f | 5187 | return 0; |
d91389bc OG |
5188 | } |
5189 | ||
e38bfd30 | 5190 | static void goya_set_clock_gating(struct hl_device *hdev) |
ca62433f | 5191 | { |
e38bfd30 | 5192 | /* clock gating not supported in Goya */ |
ca62433f OG |
5193 | } |
5194 | ||
5195 | static void goya_disable_clock_gating(struct hl_device *hdev) | |
5196 | { | |
e38bfd30 | 5197 | /* clock gating not supported in Goya */ |
ca62433f OG |
5198 | } |
5199 | ||
d90416c8 | 5200 | static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask, |
e8960ca0 | 5201 | struct seq_file *s) |
eff6f4a0 | 5202 | { |
06deb86a TT |
5203 | const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; |
5204 | const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; | |
5205 | u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts, | |
5206 | mme_arch_sts; | |
5207 | bool is_idle = true, is_eng_idle; | |
5208 | u64 offset; | |
eff6f4a0 OG |
5209 | int i; |
5210 | ||
06deb86a TT |
5211 | if (s) |
5212 | seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" | |
5213 | "--- ------- ------------ -------------\n"); | |
5214 | ||
eff6f4a0 OG |
5215 | offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; |
5216 | ||
5217 | for (i = 0 ; i < DMA_MAX_NUM ; i++) { | |
06deb86a TT |
5218 | qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset); |
5219 | dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset); | |
5220 | is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) && | |
5221 | IS_DMA_IDLE(dma_core_sts0); | |
5222 | is_idle &= is_eng_idle; | |
eff6f4a0 | 5223 | |
e8960ca0 | 5224 | if (mask) |
f763946a OG |
5225 | *mask |= ((u64) !is_eng_idle) << |
5226 | (GOYA_ENGINE_ID_DMA_0 + i); | |
06deb86a TT |
5227 | if (s) |
5228 | seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", | |
5229 | qm_glbl_sts0, dma_core_sts0); | |
eff6f4a0 OG |
5230 | } |
5231 | ||
06deb86a TT |
5232 | if (s) |
5233 | seq_puts(s, | |
5234 | "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" | |
5235 | "--- ------- ------------ -------------- ----------\n"); | |
5236 | ||
eff6f4a0 OG |
5237 | offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0; |
5238 | ||
5239 | for (i = 0 ; i < TPC_MAX_NUM ; i++) { | |
06deb86a TT |
5240 | qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset); |
5241 | cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset); | |
5242 | tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset); | |
5243 | is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) && | |
5244 | IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) && | |
5245 | IS_TPC_IDLE(tpc_cfg_sts); | |
5246 | is_idle &= is_eng_idle; | |
5247 | ||
e8960ca0 | 5248 | if (mask) |
f763946a OG |
5249 | *mask |= ((u64) !is_eng_idle) << |
5250 | (GOYA_ENGINE_ID_TPC_0 + i); | |
06deb86a TT |
5251 | if (s) |
5252 | seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", | |
5253 | qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); | |
5254 | } | |
5255 | ||
5256 | if (s) | |
5257 | seq_puts(s, | |
5258 | "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" | |
5259 | "--- ------- ------------ -------------- -----------\n"); | |
5260 | ||
5261 | qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0); | |
5262 | cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0); | |
5263 | mme_arch_sts = RREG32(mmMME_ARCH_STATUS); | |
5264 | is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) && | |
5265 | IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) && | |
5266 | IS_MME_IDLE(mme_arch_sts); | |
5267 | is_idle &= is_eng_idle; | |
5268 | ||
e8960ca0 | 5269 | if (mask) |
f763946a | 5270 | *mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0; |
06deb86a TT |
5271 | if (s) { |
5272 | seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, | |
5273 | cmdq_glbl_sts0, mme_arch_sts); | |
5274 | seq_puts(s, "\n"); | |
5275 | } | |
5276 | ||
5277 | return is_idle; | |
eff6f4a0 OG |
5278 | } |
5279 | ||
9494a8dd | 5280 | static void goya_hw_queues_lock(struct hl_device *hdev) |
cf87f966 | 5281 | __acquires(&goya->hw_queues_lock) |
9494a8dd OG |
5282 | { |
5283 | struct goya_device *goya = hdev->asic_specific; | |
5284 | ||
5285 | spin_lock(&goya->hw_queues_lock); | |
5286 | } | |
5287 | ||
5288 | static void goya_hw_queues_unlock(struct hl_device *hdev) | |
8a7a88c1 | 5289 | __releases(&goya->hw_queues_lock) |
9494a8dd OG |
5290 | { |
5291 | struct goya_device *goya = hdev->asic_specific; | |
5292 | ||
5293 | spin_unlock(&goya->hw_queues_lock); | |
5294 | } | |
5295 | ||
d8dd7b0a OG |
5296 | static u32 goya_get_pci_id(struct hl_device *hdev) |
5297 | { | |
5298 | return hdev->pdev->device; | |
5299 | } | |
5300 | ||
5e6e0239 OG |
5301 | static int goya_get_eeprom_data(struct hl_device *hdev, void *data, |
5302 | size_t max_size) | |
d91389bc OG |
5303 | { |
5304 | struct goya_device *goya = hdev->asic_specific; | |
d91389bc OG |
5305 | |
5306 | if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) | |
5307 | return 0; | |
5308 | ||
3110c60f | 5309 | return hl_fw_get_eeprom_data(hdev, data, max_size); |
d91389bc OG |
5310 | } |
5311 | ||
bb34bf79 | 5312 | static int goya_ctx_init(struct hl_ctx *ctx) |
a04b7cd9 | 5313 | { |
e1b85dba OS |
5314 | if (ctx->asid != HL_KERNEL_ASID_ID) |
5315 | goya_mmu_prepare(ctx->hdev, ctx->asid); | |
5316 | ||
a04b7cd9 OB |
5317 | return 0; |
5318 | } | |
5319 | ||
1fa185c6 OS |
5320 | u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) |
5321 | { | |
5322 | return cq_idx; | |
5323 | } | |
5324 | ||
ec2f8a30 OS |
5325 | static u32 goya_get_signal_cb_size(struct hl_device *hdev) |
5326 | { | |
5327 | return 0; | |
5328 | } | |
5329 | ||
5330 | static u32 goya_get_wait_cb_size(struct hl_device *hdev) | |
5331 | { | |
5332 | return 0; | |
5333 | } | |
5334 | ||
2992c1dc | 5335 | static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, |
72ab9ca5 | 5336 | u32 size, bool eb) |
ec2f8a30 | 5337 | { |
2992c1dc | 5338 | return 0; |
ec2f8a30 OS |
5339 | } |
5340 | ||
2992c1dc | 5341 | static u32 goya_gen_wait_cb(struct hl_device *hdev, |
3cf74b36 | 5342 | struct hl_gen_wait_properties *prop) |
ec2f8a30 | 5343 | { |
2992c1dc | 5344 | return 0; |
ec2f8a30 OS |
5345 | } |
5346 | ||
5347 | static void goya_reset_sob(struct hl_device *hdev, void *data) | |
5348 | { | |
5349 | ||
5350 | } | |
5351 | ||
2a570736 | 5352 | static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group) |
5fe1c17d OB |
5353 | { |
5354 | ||
5355 | } | |
5356 | ||
cb056b9f OG |
5357 | static void goya_set_dma_mask_from_fw(struct hl_device *hdev) |
5358 | { | |
5359 | if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == | |
5360 | HL_POWER9_HOST_MAGIC) { | |
5361 | dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n"); | |
5362 | hdev->power9_64bit_dma_enable = 1; | |
5363 | hdev->dma_mask = 64; | |
5364 | } else { | |
5365 | dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n"); | |
5366 | hdev->power9_64bit_dma_enable = 0; | |
5367 | hdev->dma_mask = 48; | |
5368 | } | |
5369 | } | |
5370 | ||
25e7aeba TT |
5371 | u64 goya_get_device_time(struct hl_device *hdev) |
5372 | { | |
5373 | u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; | |
5374 | ||
5375 | return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); | |
5376 | } | |
5377 | ||
2a570736 | 5378 | static void goya_collective_wait_init_cs(struct hl_cs *cs) |
5fe1c17d OB |
5379 | { |
5380 | ||
5381 | } | |
5382 | ||
2a570736 | 5383 | static int goya_collective_wait_create_jobs(struct hl_device *hdev, |
5fe1c17d OB |
5384 | struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, |
5385 | u32 collective_engine_id) | |
5386 | { | |
5387 | return -EINVAL; | |
5388 | } | |
5389 | ||
5de406c0 OB |
5390 | static void goya_ctx_fini(struct hl_ctx *ctx) |
5391 | { | |
5392 | ||
5393 | } | |
5394 | ||
99b9d7b4 OG |
5395 | static const struct hl_asic_funcs goya_funcs = { |
5396 | .early_init = goya_early_init, | |
5397 | .early_fini = goya_early_fini, | |
d91389bc OG |
5398 | .late_init = goya_late_init, |
5399 | .late_fini = goya_late_fini, | |
99b9d7b4 OG |
5400 | .sw_init = goya_sw_init, |
5401 | .sw_fini = goya_sw_fini, | |
839c4803 OG |
5402 | .hw_init = goya_hw_init, |
5403 | .hw_fini = goya_hw_fini, | |
1251f23a | 5404 | .halt_engines = goya_halt_engines, |
99b9d7b4 OG |
5405 | .suspend = goya_suspend, |
5406 | .resume = goya_resume, | |
be5d926b | 5407 | .cb_mmap = goya_cb_mmap, |
9494a8dd | 5408 | .ring_doorbell = goya_ring_doorbell, |
b9040c99 | 5409 | .pqe_write = goya_pqe_write, |
d9c3aa80 OG |
5410 | .asic_dma_alloc_coherent = goya_dma_alloc_coherent, |
5411 | .asic_dma_free_coherent = goya_dma_free_coherent, | |
03df136b | 5412 | .scrub_device_mem = goya_scrub_device_mem, |
9494a8dd OG |
5413 | .get_int_queue_base = goya_get_int_queue_base, |
5414 | .test_queues = goya_test_queues, | |
d9c3aa80 OG |
5415 | .asic_dma_pool_zalloc = goya_dma_pool_zalloc, |
5416 | .asic_dma_pool_free = goya_dma_pool_free, | |
9494a8dd OG |
5417 | .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, |
5418 | .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, | |
eff6f4a0 OG |
5419 | .hl_dma_unmap_sg = goya_dma_unmap_sg, |
5420 | .cs_parser = goya_cs_parser, | |
5421 | .asic_dma_map_sg = goya_dma_map_sg, | |
5422 | .get_dma_desc_list_size = goya_get_dma_desc_list_size, | |
5423 | .add_end_of_cb_packets = goya_add_end_of_cb_packets, | |
1251f23a | 5424 | .update_eq_ci = goya_update_eq_ci, |
eff6f4a0 OG |
5425 | .context_switch = goya_context_switch, |
5426 | .restore_phase_topology = goya_restore_phase_topology, | |
c2164773 OG |
5427 | .debugfs_read32 = goya_debugfs_read32, |
5428 | .debugfs_write32 = goya_debugfs_write32, | |
5cce5146 MH |
5429 | .debugfs_read64 = goya_debugfs_read64, |
5430 | .debugfs_write64 = goya_debugfs_write64, | |
d91389bc | 5431 | .add_device_attr = goya_add_device_attr, |
1251f23a | 5432 | .handle_eqe = goya_handle_eqe, |
d91389bc | 5433 | .set_pll_profile = goya_set_pll_profile, |
1251f23a | 5434 | .get_events_stat = goya_get_events_stat, |
0feaf86d OS |
5435 | .read_pte = goya_read_pte, |
5436 | .write_pte = goya_write_pte, | |
5437 | .mmu_invalidate_cache = goya_mmu_invalidate_cache, | |
5438 | .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, | |
f8c8c7d5 | 5439 | .send_heartbeat = goya_send_heartbeat, |
e38bfd30 | 5440 | .set_clock_gating = goya_set_clock_gating, |
ca62433f | 5441 | .disable_clock_gating = goya_disable_clock_gating, |
315bc055 | 5442 | .debug_coresight = goya_debug_coresight, |
eff6f4a0 | 5443 | .is_device_idle = goya_is_device_idle, |
f8c8c7d5 | 5444 | .soft_reset_late_init = goya_soft_reset_late_init, |
9494a8dd OG |
5445 | .hw_queues_lock = goya_hw_queues_lock, |
5446 | .hw_queues_unlock = goya_hw_queues_unlock, | |
d8dd7b0a | 5447 | .get_pci_id = goya_get_pci_id, |
d91389bc | 5448 | .get_eeprom_data = goya_get_eeprom_data, |
f8c8c7d5 | 5449 | .send_cpu_message = goya_send_cpu_message, |
b6f897d7 | 5450 | .pci_bars_map = goya_pci_bars_map, |
b2377e03 OG |
5451 | .init_iatu = goya_init_iatu, |
5452 | .rreg = hl_rreg, | |
89225ce4 | 5453 | .wreg = hl_wreg, |
62c1e124 | 5454 | .halt_coresight = goya_halt_coresight, |
a04b7cd9 | 5455 | .ctx_init = goya_ctx_init, |
5de406c0 | 5456 | .ctx_fini = goya_ctx_fini, |
1fa185c6 | 5457 | .get_clk_rate = goya_get_clk_rate, |
7e1c07dd OG |
5458 | .get_queue_id_for_cq = goya_get_queue_id_for_cq, |
5459 | .read_device_fw_version = goya_read_device_fw_version, | |
cb056b9f | 5460 | .load_firmware_to_device = goya_load_firmware_to_device, |
47f6b41c | 5461 | .load_boot_fit_to_device = goya_load_boot_fit_to_device, |
ec2f8a30 OS |
5462 | .get_signal_cb_size = goya_get_signal_cb_size, |
5463 | .get_wait_cb_size = goya_get_wait_cb_size, | |
5464 | .gen_signal_cb = goya_gen_signal_cb, | |
5465 | .gen_wait_cb = goya_gen_wait_cb, | |
5466 | .reset_sob = goya_reset_sob, | |
5fe1c17d | 5467 | .reset_sob_group = goya_reset_sob_group, |
25e7aeba | 5468 | .set_dma_mask_from_fw = goya_set_dma_mask_from_fw, |
5fe1c17d OB |
5469 | .get_device_time = goya_get_device_time, |
5470 | .collective_wait_init_cs = goya_collective_wait_init_cs, | |
5471 | .collective_wait_create_jobs = goya_collective_wait_create_jobs | |
99b9d7b4 OG |
5472 | }; |
5473 | ||
5474 | /* | |
5475 | * goya_set_asic_funcs - set Goya function pointers | |
5476 | * | |
5477 | * @*hdev: pointer to hl_device structure | |
5478 | * | |
5479 | */ | |
5480 | void goya_set_asic_funcs(struct hl_device *hdev) | |
5481 | { | |
5482 | hdev->asic_funcs = &goya_funcs; | |
5483 | } |