habanalabs: replace WARN/WARN_ON with dev_crit in driver
[linux-block.git] / drivers / misc / habanalabs / goya / goya.c
CommitLineData
99b9d7b4
OG
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "goyaP.h"
7b16a155
GKH
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_0.h"
11#include "../include/goya/asic_reg/goya_masks.h"
12#include "../include/goya/goya_reg_map.h"
99b9d7b4
OG
13
14#include <linux/pci.h>
d91389bc 15#include <linux/hwmon.h>
4a0ce776 16#include <linux/iommu.h>
06deb86a 17#include <linux/seq_file.h>
99b9d7b4
OG
18
19/*
20 * GOYA security scheme:
21 *
22 * 1. Host is protected by:
23 * - Range registers (When MMU is enabled, DMA RR does NOT protect host)
24 * - MMU
25 *
26 * 2. DRAM is protected by:
27 * - Range registers (protect the first 512MB)
28 * - MMU (isolation between users)
29 *
30 * 3. Configuration is protected by:
31 * - Range registers
32 * - Protection bits
33 *
34 * When MMU is disabled:
35 *
36 * QMAN DMA: PQ, CQ, CP, DMA are secured.
37 * PQ, CB and the data are on the host.
38 *
39 * QMAN TPC/MME:
40 * PQ, CQ and CP are not secured.
41 * PQ, CB and the data are on the SRAM/DRAM.
42 *
4c172bbf
OG
43 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
44 * - checks DMA pointer
99b9d7b4
OG
45 * - WREG, MSG_PROT are not allowed.
46 * - MSG_LONG/SHORT are allowed.
47 *
48 * A read/write transaction by the QMAN to a protected area will succeed if
49 * and only if the QMAN's CP is secured and MSG_PROT is used
50 *
51 *
52 * When MMU is enabled:
53 *
54 * QMAN DMA: PQ, CQ and CP are secured.
55 * MMU is set to bypass on the Secure props register of the QMAN.
56 * The reasons we don't enable MMU for PQ, CQ and CP are:
4c172bbf 57 * - PQ entry is in kernel address space and the driver doesn't map it.
99b9d7b4
OG
58 * - CP writes to MSIX register and to kernel address space (completion
59 * queue).
60 *
4c172bbf
OG
61 * DMA is not secured but because CP is secured, the driver still needs to parse
62 * the CB, but doesn't need to check the DMA addresses.
99b9d7b4 63 *
4c172bbf
OG
64 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
65 * the driver doesn't map memory in MMU.
99b9d7b4
OG
66 *
67 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
68 *
69 * DMA RR does NOT protect host because DMA is not secured
70 *
71 */
72
47f6b41c 73#define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb"
da1342a0
OG
74#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
75
8ba2876d 76#define GOYA_MMU_REGS_NUM 63
99b9d7b4
OG
77
78#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
79
80#define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
81#define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
82#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
83#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
84#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
99b9d7b4 85#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
0feaf86d 86#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
3dccd187 87#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
47f6b41c 88#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
788cacf3 89#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
99b9d7b4
OG
90
91#define GOYA_QMAN0_FENCE_VAL 0xD169B243
92
1251f23a
OG
93#define GOYA_MAX_STRING_LEN 20
94
be5d926b
OG
95#define GOYA_CB_POOL_CB_CNT 512
96#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
97
06deb86a
TT
98#define IS_QM_IDLE(engine, qm_glbl_sts0) \
99 (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
100#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
101#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
102#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
103
104#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
105 (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
106 engine##_CMDQ_IDLE_MASK)
107#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
108 IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
109#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
110 IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
111
112#define IS_DMA_IDLE(dma_core_sts0) \
113 !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
114
115#define IS_TPC_IDLE(tpc_cfg_sts) \
116 (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
117
118#define IS_MME_IDLE(mme_arch_sts) \
119 (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
120
121
1251f23a
OG
122static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
123 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
124 "goya cq 4", "goya cpu eq"
125};
126
eff6f4a0
OG
127static u16 goya_packet_sizes[MAX_PACKET_ID] = {
128 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
129 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
130 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
131 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
132 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
133 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
134 [PACKET_FENCE] = sizeof(struct packet_fence),
135 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
136 [PACKET_NOP] = sizeof(struct packet_nop),
137 [PACKET_STOP] = sizeof(struct packet_stop)
138};
139
bc75be24
OB
140static inline bool validate_packet_id(enum packet_id id)
141{
142 switch (id) {
143 case PACKET_WREG_32:
144 case PACKET_WREG_BULK:
145 case PACKET_MSG_LONG:
146 case PACKET_MSG_SHORT:
147 case PACKET_CP_DMA:
148 case PACKET_MSG_PROT:
149 case PACKET_FENCE:
150 case PACKET_LIN_DMA:
151 case PACKET_NOP:
152 case PACKET_STOP:
153 return true;
154 default:
155 return false;
156 }
157}
158
0feaf86d
OS
159static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
160 mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
161 mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
162 mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
163 mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
164 mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
165 mmTPC0_QM_GLBL_SECURE_PROPS,
166 mmTPC0_QM_GLBL_NON_SECURE_PROPS,
167 mmTPC0_CMDQ_GLBL_SECURE_PROPS,
168 mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
169 mmTPC0_CFG_ARUSER,
170 mmTPC0_CFG_AWUSER,
171 mmTPC1_QM_GLBL_SECURE_PROPS,
172 mmTPC1_QM_GLBL_NON_SECURE_PROPS,
173 mmTPC1_CMDQ_GLBL_SECURE_PROPS,
174 mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
175 mmTPC1_CFG_ARUSER,
176 mmTPC1_CFG_AWUSER,
177 mmTPC2_QM_GLBL_SECURE_PROPS,
178 mmTPC2_QM_GLBL_NON_SECURE_PROPS,
179 mmTPC2_CMDQ_GLBL_SECURE_PROPS,
180 mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
181 mmTPC2_CFG_ARUSER,
182 mmTPC2_CFG_AWUSER,
183 mmTPC3_QM_GLBL_SECURE_PROPS,
184 mmTPC3_QM_GLBL_NON_SECURE_PROPS,
185 mmTPC3_CMDQ_GLBL_SECURE_PROPS,
186 mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
187 mmTPC3_CFG_ARUSER,
188 mmTPC3_CFG_AWUSER,
189 mmTPC4_QM_GLBL_SECURE_PROPS,
190 mmTPC4_QM_GLBL_NON_SECURE_PROPS,
191 mmTPC4_CMDQ_GLBL_SECURE_PROPS,
192 mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
193 mmTPC4_CFG_ARUSER,
194 mmTPC4_CFG_AWUSER,
195 mmTPC5_QM_GLBL_SECURE_PROPS,
196 mmTPC5_QM_GLBL_NON_SECURE_PROPS,
197 mmTPC5_CMDQ_GLBL_SECURE_PROPS,
198 mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
199 mmTPC5_CFG_ARUSER,
200 mmTPC5_CFG_AWUSER,
201 mmTPC6_QM_GLBL_SECURE_PROPS,
202 mmTPC6_QM_GLBL_NON_SECURE_PROPS,
203 mmTPC6_CMDQ_GLBL_SECURE_PROPS,
204 mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
205 mmTPC6_CFG_ARUSER,
206 mmTPC6_CFG_AWUSER,
207 mmTPC7_QM_GLBL_SECURE_PROPS,
208 mmTPC7_QM_GLBL_NON_SECURE_PROPS,
209 mmTPC7_CMDQ_GLBL_SECURE_PROPS,
210 mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
211 mmTPC7_CFG_ARUSER,
212 mmTPC7_CFG_AWUSER,
213 mmMME_QM_GLBL_SECURE_PROPS,
214 mmMME_QM_GLBL_NON_SECURE_PROPS,
215 mmMME_CMDQ_GLBL_SECURE_PROPS,
216 mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
217 mmMME_SBA_CONTROL_DATA,
218 mmMME_SBB_CONTROL_DATA,
219 mmMME_SBC_CONTROL_DATA,
8ba2876d
OS
220 mmMME_WBC_CONTROL_DATA,
221 mmPCIE_WRAP_PSOC_ARUSER,
222 mmPCIE_WRAP_PSOC_AWUSER
0feaf86d
OS
223};
224
b24ca458 225static u32 goya_all_events[] = {
f8c8c7d5
OG
226 GOYA_ASYNC_EVENT_ID_PCIE_IF,
227 GOYA_ASYNC_EVENT_ID_TPC0_ECC,
228 GOYA_ASYNC_EVENT_ID_TPC1_ECC,
229 GOYA_ASYNC_EVENT_ID_TPC2_ECC,
230 GOYA_ASYNC_EVENT_ID_TPC3_ECC,
231 GOYA_ASYNC_EVENT_ID_TPC4_ECC,
232 GOYA_ASYNC_EVENT_ID_TPC5_ECC,
233 GOYA_ASYNC_EVENT_ID_TPC6_ECC,
234 GOYA_ASYNC_EVENT_ID_TPC7_ECC,
235 GOYA_ASYNC_EVENT_ID_MME_ECC,
236 GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
237 GOYA_ASYNC_EVENT_ID_MMU_ECC,
238 GOYA_ASYNC_EVENT_ID_DMA_MACRO,
239 GOYA_ASYNC_EVENT_ID_DMA_ECC,
240 GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
241 GOYA_ASYNC_EVENT_ID_PSOC_MEM,
242 GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
243 GOYA_ASYNC_EVENT_ID_SRAM0,
244 GOYA_ASYNC_EVENT_ID_SRAM1,
245 GOYA_ASYNC_EVENT_ID_SRAM2,
246 GOYA_ASYNC_EVENT_ID_SRAM3,
247 GOYA_ASYNC_EVENT_ID_SRAM4,
248 GOYA_ASYNC_EVENT_ID_SRAM5,
249 GOYA_ASYNC_EVENT_ID_SRAM6,
250 GOYA_ASYNC_EVENT_ID_SRAM7,
251 GOYA_ASYNC_EVENT_ID_SRAM8,
252 GOYA_ASYNC_EVENT_ID_SRAM9,
253 GOYA_ASYNC_EVENT_ID_SRAM10,
254 GOYA_ASYNC_EVENT_ID_SRAM11,
255 GOYA_ASYNC_EVENT_ID_SRAM12,
256 GOYA_ASYNC_EVENT_ID_SRAM13,
257 GOYA_ASYNC_EVENT_ID_SRAM14,
258 GOYA_ASYNC_EVENT_ID_SRAM15,
259 GOYA_ASYNC_EVENT_ID_SRAM16,
260 GOYA_ASYNC_EVENT_ID_SRAM17,
261 GOYA_ASYNC_EVENT_ID_SRAM18,
262 GOYA_ASYNC_EVENT_ID_SRAM19,
263 GOYA_ASYNC_EVENT_ID_SRAM20,
264 GOYA_ASYNC_EVENT_ID_SRAM21,
265 GOYA_ASYNC_EVENT_ID_SRAM22,
266 GOYA_ASYNC_EVENT_ID_SRAM23,
267 GOYA_ASYNC_EVENT_ID_SRAM24,
268 GOYA_ASYNC_EVENT_ID_SRAM25,
269 GOYA_ASYNC_EVENT_ID_SRAM26,
270 GOYA_ASYNC_EVENT_ID_SRAM27,
271 GOYA_ASYNC_EVENT_ID_SRAM28,
272 GOYA_ASYNC_EVENT_ID_SRAM29,
273 GOYA_ASYNC_EVENT_ID_GIC500,
274 GOYA_ASYNC_EVENT_ID_PLL0,
275 GOYA_ASYNC_EVENT_ID_PLL1,
276 GOYA_ASYNC_EVENT_ID_PLL3,
277 GOYA_ASYNC_EVENT_ID_PLL4,
278 GOYA_ASYNC_EVENT_ID_PLL5,
279 GOYA_ASYNC_EVENT_ID_PLL6,
280 GOYA_ASYNC_EVENT_ID_AXI_ECC,
281 GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
282 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
283 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
284 GOYA_ASYNC_EVENT_ID_PCIE_DEC,
285 GOYA_ASYNC_EVENT_ID_TPC0_DEC,
286 GOYA_ASYNC_EVENT_ID_TPC1_DEC,
287 GOYA_ASYNC_EVENT_ID_TPC2_DEC,
288 GOYA_ASYNC_EVENT_ID_TPC3_DEC,
289 GOYA_ASYNC_EVENT_ID_TPC4_DEC,
290 GOYA_ASYNC_EVENT_ID_TPC5_DEC,
291 GOYA_ASYNC_EVENT_ID_TPC6_DEC,
292 GOYA_ASYNC_EVENT_ID_TPC7_DEC,
293 GOYA_ASYNC_EVENT_ID_MME_WACS,
294 GOYA_ASYNC_EVENT_ID_MME_WACSD,
295 GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
296 GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
297 GOYA_ASYNC_EVENT_ID_PSOC,
298 GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
299 GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
300 GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
301 GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
302 GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
303 GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
304 GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
305 GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
306 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
307 GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
308 GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
309 GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
310 GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
311 GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
312 GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
313 GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
314 GOYA_ASYNC_EVENT_ID_TPC0_QM,
315 GOYA_ASYNC_EVENT_ID_TPC1_QM,
316 GOYA_ASYNC_EVENT_ID_TPC2_QM,
317 GOYA_ASYNC_EVENT_ID_TPC3_QM,
318 GOYA_ASYNC_EVENT_ID_TPC4_QM,
319 GOYA_ASYNC_EVENT_ID_TPC5_QM,
320 GOYA_ASYNC_EVENT_ID_TPC6_QM,
321 GOYA_ASYNC_EVENT_ID_TPC7_QM,
322 GOYA_ASYNC_EVENT_ID_MME_QM,
323 GOYA_ASYNC_EVENT_ID_MME_CMDQ,
324 GOYA_ASYNC_EVENT_ID_DMA0_QM,
325 GOYA_ASYNC_EVENT_ID_DMA1_QM,
326 GOYA_ASYNC_EVENT_ID_DMA2_QM,
327 GOYA_ASYNC_EVENT_ID_DMA3_QM,
328 GOYA_ASYNC_EVENT_ID_DMA4_QM,
329 GOYA_ASYNC_EVENT_ID_DMA0_CH,
330 GOYA_ASYNC_EVENT_ID_DMA1_CH,
331 GOYA_ASYNC_EVENT_ID_DMA2_CH,
332 GOYA_ASYNC_EVENT_ID_DMA3_CH,
333 GOYA_ASYNC_EVENT_ID_DMA4_CH,
334 GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
335 GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
336 GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
337 GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
338 GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
339 GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
340 GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
341 GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
342 GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
343 GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
344 GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
345 GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
4f0e6ab7
OS
346 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
347 GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
348 GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
349 GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
350 GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
f8c8c7d5
OG
351};
352
95b5a8b8
OG
353static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
354static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
355static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
356static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
357
3abc99bb 358int goya_get_fixed_properties(struct hl_device *hdev)
99b9d7b4
OG
359{
360 struct asic_fixed_properties *prop = &hdev->asic_prop;
9494a8dd
OG
361 int i;
362
3abc99bb
OB
363 prop->max_queues = GOYA_QUEUE_ID_SIZE;
364 prop->hw_queues_props = kcalloc(prop->max_queues,
365 sizeof(struct hw_queue_properties),
366 GFP_KERNEL);
367
368 if (!prop->hw_queues_props)
369 return -ENOMEM;
370
9494a8dd
OG
371 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
372 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
4c172bbf 373 prop->hw_queues_props[i].driver_only = 0;
4bb1f2f3 374 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
9494a8dd
OG
375 }
376
377 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
378 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
4c172bbf 379 prop->hw_queues_props[i].driver_only = 1;
4bb1f2f3 380 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
9494a8dd
OG
381 }
382
383 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
384 NUMBER_OF_INT_HW_QUEUES; i++) {
385 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
4c172bbf 386 prop->hw_queues_props[i].driver_only = 0;
4bb1f2f3 387 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
9494a8dd
OG
388 }
389
99b9d7b4
OG
390 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
391
392 prop->dram_base_address = DRAM_PHYS_BASE;
393 prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
394 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
395 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
396
397 prop->sram_base_address = SRAM_BASE_ADDR;
398 prop->sram_size = SRAM_SIZE;
399 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
400 prop->sram_user_base_address = prop->sram_base_address +
401 SRAM_USER_BASE_OFFSET;
402
0feaf86d 403 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
27ca384c 404 prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
0feaf86d
OS
405 if (hdev->pldm)
406 prop->mmu_pgt_size = 0x800000; /* 8MB */
407 else
408 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
409 prop->mmu_pte_size = HL_PTE_SIZE;
410 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
411 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
412 prop->dram_page_size = PAGE_SIZE_2MB;
7f070c91 413 prop->dram_supports_virtual_memory = true;
0feaf86d 414
54bb6744
OS
415 prop->dmmu.hop0_shift = HOP0_SHIFT;
416 prop->dmmu.hop1_shift = HOP1_SHIFT;
417 prop->dmmu.hop2_shift = HOP2_SHIFT;
418 prop->dmmu.hop3_shift = HOP3_SHIFT;
419 prop->dmmu.hop4_shift = HOP4_SHIFT;
420 prop->dmmu.hop0_mask = HOP0_MASK;
421 prop->dmmu.hop1_mask = HOP1_MASK;
422 prop->dmmu.hop2_mask = HOP2_MASK;
423 prop->dmmu.hop3_mask = HOP3_MASK;
424 prop->dmmu.hop4_mask = HOP4_MASK;
64a7e295
OS
425 prop->dmmu.start_addr = VA_DDR_SPACE_START;
426 prop->dmmu.end_addr = VA_DDR_SPACE_END;
427 prop->dmmu.page_size = PAGE_SIZE_2MB;
7edf341b 428 prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
54bb6744 429
64a7e295 430 /* shifts and masks are the same in PMMU and DMMU */
54bb6744 431 memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
64a7e295
OS
432 prop->pmmu.start_addr = VA_HOST_SPACE_START;
433 prop->pmmu.end_addr = VA_HOST_SPACE_END;
54bb6744 434 prop->pmmu.page_size = PAGE_SIZE_4KB;
7edf341b 435 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
54bb6744 436
64a7e295
OS
437 /* PMMU and HPMMU are the same except of page size */
438 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
439 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
440
441 prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
99b9d7b4
OG
442 prop->cfg_size = CFG_SIZE;
443 prop->max_asid = MAX_ASID;
1251f23a 444 prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
b6f897d7 445 prop->high_pll = PLL_HIGH_DEFAULT;
839c4803
OG
446 prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
447 prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
d91389bc 448 prop->max_power_default = MAX_POWER_DEFAULT;
99b9d7b4 449 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
b6f897d7
TT
450 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
451 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
91edbf2c 452
2f55342c 453 strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
91edbf2c 454 CARD_NAME_MAX_LEN);
c16d45f4
OB
455
456 prop->max_pending_cs = GOYA_MAX_PENDING_CS;
3abc99bb 457
323b7267
OB
458 /* disable fw security for now, set it in a later stage */
459 prop->fw_security_disabled = true;
460 prop->fw_security_status_valid = false;
d611b9f0 461 prop->hard_reset_done_by_fw = false;
323b7267 462
3abc99bb 463 return 0;
99b9d7b4
OG
464}
465
466/*
467 * goya_pci_bars_map - Map PCI BARS of Goya device
468 *
469 * @hdev: pointer to hl_device structure
470 *
471 * Request PCI regions and map them to kernel virtual addresses.
472 * Returns 0 on success
473 *
474 */
5e6e0239 475static int goya_pci_bars_map(struct hl_device *hdev)
99b9d7b4 476{
b6f897d7
TT
477 static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
478 bool is_wc[3] = {false, false, true};
99b9d7b4
OG
479 int rc;
480
b6f897d7
TT
481 rc = hl_pci_bars_map(hdev, name, is_wc);
482 if (rc)
99b9d7b4 483 return rc;
99b9d7b4
OG
484
485 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
b6f897d7 486 (CFG_BASE - SRAM_BASE_ADDR);
99b9d7b4
OG
487
488 return 0;
489}
490
a38693d7 491static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
99b9d7b4
OG
492{
493 struct goya_device *goya = hdev->asic_specific;
f4cbfd24 494 struct hl_inbound_pci_region pci_region;
a38693d7 495 u64 old_addr = addr;
99b9d7b4
OG
496 int rc;
497
498 if ((goya) && (goya->ddr_bar_cur_addr == addr))
a38693d7 499 return old_addr;
99b9d7b4
OG
500
501 /* Inbound Region 1 - Bar 4 - Point to DDR */
f4cbfd24
OB
502 pci_region.mode = PCI_BAR_MATCH_MODE;
503 pci_region.bar = DDR_BAR_ID;
504 pci_region.addr = addr;
505 rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
b6f897d7 506 if (rc)
a38693d7 507 return U64_MAX;
99b9d7b4 508
a38693d7
OG
509 if (goya) {
510 old_addr = goya->ddr_bar_cur_addr;
99b9d7b4 511 goya->ddr_bar_cur_addr = addr;
a38693d7 512 }
99b9d7b4 513
a38693d7 514 return old_addr;
99b9d7b4
OG
515}
516
517/*
518 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
519 *
520 * @hdev: pointer to hl_device structure
521 *
522 * This is needed in case the firmware doesn't initialize the iATU
523 *
524 */
525static int goya_init_iatu(struct hl_device *hdev)
526{
f4cbfd24
OB
527 struct hl_inbound_pci_region inbound_region;
528 struct hl_outbound_pci_region outbound_region;
529 int rc;
530
531 /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
532 inbound_region.mode = PCI_BAR_MATCH_MODE;
533 inbound_region.bar = SRAM_CFG_BAR_ID;
534 inbound_region.addr = SRAM_BASE_ADDR;
535 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
536 if (rc)
537 goto done;
538
539 /* Inbound Region 1 - Bar 4 - Point to DDR */
540 inbound_region.mode = PCI_BAR_MATCH_MODE;
541 inbound_region.bar = DDR_BAR_ID;
542 inbound_region.addr = DRAM_PHYS_BASE;
543 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
544 if (rc)
545 goto done;
546
547 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
548
549 /* Outbound Region 0 - Point to Host */
550 outbound_region.addr = HOST_PHYS_BASE;
551 outbound_region.size = HOST_PHYS_SIZE;
552 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
553
554done:
555 return rc;
99b9d7b4
OG
556}
557
d1ddd905
OB
558static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
559{
560 return RREG32(mmHW_STATE);
561}
562
99b9d7b4
OG
563/*
564 * goya_early_init - GOYA early initialization code
565 *
566 * @hdev: pointer to hl_device structure
567 *
568 * Verify PCI bars
569 * Set DMA masks
570 * PCI controller initialization
571 * Map PCI bars
572 *
573 */
574static int goya_early_init(struct hl_device *hdev)
575{
576 struct asic_fixed_properties *prop = &hdev->asic_prop;
577 struct pci_dev *pdev = hdev->pdev;
578 u32 val;
579 int rc;
580
3abc99bb
OB
581 rc = goya_get_fixed_properties(hdev);
582 if (rc) {
583 dev_err(hdev->dev, "Failed to get fixed properties\n");
584 return rc;
585 }
99b9d7b4
OG
586
587 /* Check BAR sizes */
588 if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
589 dev_err(hdev->dev,
590 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
591 SRAM_CFG_BAR_ID,
592 (unsigned long long) pci_resource_len(pdev,
593 SRAM_CFG_BAR_ID),
594 CFG_BAR_SIZE);
3abc99bb
OB
595 rc = -ENODEV;
596 goto free_queue_props;
99b9d7b4
OG
597 }
598
599 if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
600 dev_err(hdev->dev,
601 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
602 MSIX_BAR_ID,
603 (unsigned long long) pci_resource_len(pdev,
604 MSIX_BAR_ID),
605 MSIX_BAR_SIZE);
3abc99bb
OB
606 rc = -ENODEV;
607 goto free_queue_props;
99b9d7b4
OG
608 }
609
610 prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
611
d1ddd905 612 rc = hl_pci_init(hdev);
b6f897d7 613 if (rc)
3abc99bb 614 goto free_queue_props;
99b9d7b4 615
d1ddd905
OB
616 /* Before continuing in the initialization, we need to read the preboot
617 * version to determine whether we run with a security-enabled firmware
618 */
619 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
620 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
621 GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
622 if (rc) {
623 if (hdev->reset_on_preboot_fail)
624 hdev->asic_funcs->hw_fini(hdev, true);
625 goto pci_fini;
626 }
627
9c9013cb
OB
628 if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
629 dev_info(hdev->dev,
630 "H/W state is dirty, must reset before initializing\n");
631 hdev->asic_funcs->hw_fini(hdev, true);
632 }
633
839c4803
OG
634 if (!hdev->pldm) {
635 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
636 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
637 dev_warn(hdev->dev,
638 "PCI strap is not configured correctly, PCI bus errors may occur\n");
639 }
99b9d7b4
OG
640
641 return 0;
3abc99bb 642
d1ddd905
OB
643pci_fini:
644 hl_pci_fini(hdev);
3abc99bb
OB
645free_queue_props:
646 kfree(hdev->asic_prop.hw_queues_props);
647 return rc;
99b9d7b4
OG
648}
649
650/*
651 * goya_early_fini - GOYA early finalization code
652 *
653 * @hdev: pointer to hl_device structure
654 *
655 * Unmap PCI bars
656 *
657 */
5e6e0239 658static int goya_early_fini(struct hl_device *hdev)
99b9d7b4 659{
3abc99bb 660 kfree(hdev->asic_prop.hw_queues_props);
b6f897d7 661 hl_pci_fini(hdev);
99b9d7b4
OG
662
663 return 0;
664}
665
bedd1442
OG
666static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
667{
668 /* mask to zero the MMBP and ASID bits */
669 WREG32_AND(reg, ~0x7FF);
670 WREG32_OR(reg, asid);
671}
672
673static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
674{
675 struct goya_device *goya = hdev->asic_specific;
676
677 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
678 return;
679
680 if (secure)
681 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
682 else
683 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
684
685 RREG32(mmDMA_QM_0_GLBL_PROT);
686}
687
d91389bc
OG
688/*
689 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
690 *
691 * @hdev: pointer to hl_device structure
692 *
693 */
694static void goya_fetch_psoc_frequency(struct hl_device *hdev)
695{
696 struct asic_fixed_properties *prop = &hdev->asic_prop;
47834899
AM
697 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
698 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
699 int rc;
700
701 if (hdev->asic_prop.fw_security_disabled) {
702 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
703 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
704 nr = RREG32(mmPSOC_PCI_PLL_NR);
705 nf = RREG32(mmPSOC_PCI_PLL_NF);
706 od = RREG32(mmPSOC_PCI_PLL_OD);
707
708 if (div_sel == DIV_SEL_REF_CLK ||
709 div_sel == DIV_SEL_DIVIDED_REF) {
710 if (div_sel == DIV_SEL_REF_CLK)
711 freq = PLL_REF_CLK;
712 else
713 freq = PLL_REF_CLK / (div_fctr + 1);
714 } else if (div_sel == DIV_SEL_PLL_CLK ||
715 div_sel == DIV_SEL_DIVIDED_PLL) {
716 pll_clk = PLL_REF_CLK * (nf + 1) /
717 ((nr + 1) * (od + 1));
718 if (div_sel == DIV_SEL_PLL_CLK)
719 freq = pll_clk;
720 else
721 freq = pll_clk / (div_fctr + 1);
722 } else {
723 dev_warn(hdev->dev,
724 "Received invalid div select value: %d",
725 div_sel);
726 freq = 0;
727 }
e8edded6 728 } else {
47834899
AM
729 rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
730
731 if (rc)
732 return;
733
734 freq = pll_freq_arr[1];
e8edded6 735 }
d91389bc 736
47834899 737 prop->psoc_timestamp_frequency = freq;
e8edded6
AA
738 prop->psoc_pci_pll_nr = nr;
739 prop->psoc_pci_pll_nf = nf;
740 prop->psoc_pci_pll_od = od;
741 prop->psoc_pci_pll_div_factor = div_fctr;
d91389bc
OG
742}
743
b2377e03 744int goya_late_init(struct hl_device *hdev)
d91389bc
OG
745{
746 struct asic_fixed_properties *prop = &hdev->asic_prop;
d91389bc
OG
747 int rc;
748
0b28d26b
OG
749 goya_fetch_psoc_frequency(hdev);
750
751 rc = goya_mmu_clear_pgt_range(hdev);
752 if (rc) {
753 dev_err(hdev->dev,
754 "Failed to clear MMU page tables range %d\n", rc);
755 return rc;
756 }
757
758 rc = goya_mmu_set_dram_default_page(hdev);
759 if (rc) {
760 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
761 return rc;
762 }
763
95b5a8b8
OG
764 rc = goya_mmu_add_mappings_for_device_cpu(hdev);
765 if (rc)
766 return rc;
767
0b28d26b
OG
768 rc = goya_init_cpu_queues(hdev);
769 if (rc)
770 return rc;
771
772 rc = goya_test_cpu_queue(hdev);
773 if (rc)
774 return rc;
775
2f55342c 776 rc = goya_cpucp_info_get(hdev);
d91389bc 777 if (rc) {
2f55342c 778 dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
d91389bc
OG
779 return rc;
780 }
781
782 /* Now that we have the DRAM size in ASIC prop, we need to check
783 * its size and configure the DMA_IF DDR wrap protection (which is in
784 * the MMU block) accordingly. The value is the log2 of the DRAM size
785 */
786 WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
787
2f55342c 788 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
d91389bc 789 if (rc) {
0b28d26b
OG
790 dev_err(hdev->dev,
791 "Failed to enable PCI access from CPU %d\n", rc);
d91389bc
OG
792 return rc;
793 }
794
795 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
796 GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
797
d91389bc
OG
798 return 0;
799}
800
801/*
802 * goya_late_fini - GOYA late tear-down code
803 *
804 * @hdev: pointer to hl_device structure
805 *
806 * Free sensors allocated structures
807 */
808void goya_late_fini(struct hl_device *hdev)
809{
810 const struct hwmon_channel_info **channel_info_arr;
811 int i = 0;
812
813 if (!hdev->hl_chip_info->info)
814 return;
815
816 channel_info_arr = hdev->hl_chip_info->info;
817
818 while (channel_info_arr[i]) {
819 kfree(channel_info_arr[i]->config);
820 kfree(channel_info_arr[i]);
821 i++;
822 }
823
824 kfree(channel_info_arr);
825
826 hdev->hl_chip_info->info = NULL;
827}
828
99b9d7b4
OG
829/*
830 * goya_sw_init - Goya software initialization code
831 *
832 * @hdev: pointer to hl_device structure
833 *
834 */
835static int goya_sw_init(struct hl_device *hdev)
836{
837 struct goya_device *goya;
838 int rc;
839
840 /* Allocate device structure */
841 goya = kzalloc(sizeof(*goya), GFP_KERNEL);
842 if (!goya)
843 return -ENOMEM;
844
845 /* according to goya_init_iatu */
846 goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
d91389bc
OG
847
848 goya->mme_clk = GOYA_PLL_FREQ_LOW;
849 goya->tpc_clk = GOYA_PLL_FREQ_LOW;
850 goya->ic_clk = GOYA_PLL_FREQ_LOW;
851
99b9d7b4
OG
852 hdev->asic_specific = goya;
853
854 /* Create DMA pool for small allocations */
855 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
856 &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
857 if (!hdev->dma_pool) {
858 dev_err(hdev->dev, "failed to create DMA pool\n");
859 rc = -ENOMEM;
860 goto free_goya_device;
861 }
862
863 hdev->cpu_accessible_dma_mem =
d9c3aa80 864 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
3110c60f 865 HL_CPU_ACCESSIBLE_MEM_SIZE,
99b9d7b4
OG
866 &hdev->cpu_accessible_dma_address,
867 GFP_KERNEL | __GFP_ZERO);
868
869 if (!hdev->cpu_accessible_dma_mem) {
99b9d7b4
OG
870 rc = -ENOMEM;
871 goto free_dma_pool;
872 }
873
f62fa0ce
AB
874 dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
875 &hdev->cpu_accessible_dma_address);
2a51558c 876
cbb10f1e 877 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
99b9d7b4
OG
878 if (!hdev->cpu_accessible_dma_pool) {
879 dev_err(hdev->dev,
880 "Failed to create CPU accessible DMA pool\n");
881 rc = -ENOMEM;
9f832fda 882 goto free_cpu_dma_mem;
99b9d7b4
OG
883 }
884
885 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
886 (uintptr_t) hdev->cpu_accessible_dma_mem,
3110c60f 887 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
99b9d7b4
OG
888 if (rc) {
889 dev_err(hdev->dev,
890 "Failed to add memory to CPU accessible DMA pool\n");
891 rc = -EFAULT;
ba209e15 892 goto free_cpu_accessible_dma_pool;
99b9d7b4
OG
893 }
894
895 spin_lock_init(&goya->hw_queues_lock);
9e5e49cd 896 hdev->supports_coresight = true;
66446820 897 hdev->supports_soft_reset = true;
99b9d7b4
OG
898
899 return 0;
900
ba209e15 901free_cpu_accessible_dma_pool:
99b9d7b4 902 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
9f832fda 903free_cpu_dma_mem:
d9c3aa80
OG
904 hdev->asic_funcs->asic_dma_free_coherent(hdev,
905 HL_CPU_ACCESSIBLE_MEM_SIZE,
99b9d7b4
OG
906 hdev->cpu_accessible_dma_mem,
907 hdev->cpu_accessible_dma_address);
908free_dma_pool:
909 dma_pool_destroy(hdev->dma_pool);
910free_goya_device:
911 kfree(goya);
912
913 return rc;
914}
915
916/*
917 * goya_sw_fini - Goya software tear-down code
918 *
919 * @hdev: pointer to hl_device structure
920 *
921 */
5e6e0239 922static int goya_sw_fini(struct hl_device *hdev)
99b9d7b4
OG
923{
924 struct goya_device *goya = hdev->asic_specific;
925
926 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
927
d9c3aa80
OG
928 hdev->asic_funcs->asic_dma_free_coherent(hdev,
929 HL_CPU_ACCESSIBLE_MEM_SIZE,
99b9d7b4
OG
930 hdev->cpu_accessible_dma_mem,
931 hdev->cpu_accessible_dma_address);
932
933 dma_pool_destroy(hdev->dma_pool);
934
935 kfree(goya);
936
937 return 0;
938}
939
9494a8dd
OG
940static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
941 dma_addr_t bus_address)
942{
943 struct goya_device *goya = hdev->asic_specific;
944 u32 mtr_base_lo, mtr_base_hi;
945 u32 so_base_lo, so_base_hi;
946 u32 gic_base_lo, gic_base_hi;
947 u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
76cedc73 948 u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
9494a8dd
OG
949
950 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
951 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
952 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
953 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
954
955 gic_base_lo =
956 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
957 gic_base_hi =
958 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
959
960 WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
961 WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
962
963 WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
964 WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
965 WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
966
967 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
968 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
969 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
970 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
971 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
972 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
973 WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
974 GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
975
976 /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
977 WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
978 WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
979
1251f23a
OG
980 if (goya->hw_cap_initialized & HW_CAP_MMU)
981 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
9494a8dd 982 else
1251f23a 983 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
9494a8dd 984
76cedc73
OS
985 if (hdev->stop_on_err)
986 dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
987
988 WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
9494a8dd
OG
989 WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
990}
991
992static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
993{
994 u32 gic_base_lo, gic_base_hi;
995 u64 sob_addr;
996 u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
997
998 gic_base_lo =
999 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1000 gic_base_hi =
1001 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1002
1003 WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1004 WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1005 WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1006 GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1007
887f7d38 1008 if (dma_id)
9494a8dd
OG
1009 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1010 (dma_id - 1) * 4;
887f7d38
OG
1011 else
1012 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1013
887f7d38
OG
1014 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1015 WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
9494a8dd
OG
1016}
1017
1018/*
1019 * goya_init_dma_qmans - Initialize QMAN DMA registers
1020 *
1021 * @hdev: pointer to hl_device structure
1022 *
1023 * Initialize the H/W registers of the QMAN DMA channels
1024 *
1025 */
b2377e03 1026void goya_init_dma_qmans(struct hl_device *hdev)
9494a8dd
OG
1027{
1028 struct goya_device *goya = hdev->asic_specific;
1029 struct hl_hw_queue *q;
9494a8dd
OG
1030 int i;
1031
1032 if (goya->hw_cap_initialized & HW_CAP_DMA)
1033 return;
1034
1035 q = &hdev->kernel_queues[0];
1036
1037 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1fa185c6 1038 q->cq_id = q->msi_vec = i;
94cb669c 1039 goya_init_dma_qman(hdev, i, q->bus_address);
9494a8dd
OG
1040 goya_init_dma_ch(hdev, i);
1041 }
1042
1043 goya->hw_cap_initialized |= HW_CAP_DMA;
1044}
1045
1046/*
1047 * goya_disable_external_queues - Disable external queues
1048 *
1049 * @hdev: pointer to hl_device structure
1050 *
1051 */
1052static void goya_disable_external_queues(struct hl_device *hdev)
1053{
908087ff
OG
1054 struct goya_device *goya = hdev->asic_specific;
1055
1056 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1057 return;
1058
9494a8dd
OG
1059 WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1060 WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1061 WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1062 WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1063 WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1064}
1065
1066static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1067 u32 cp_sts_reg, u32 glbl_sts0_reg)
1068{
1069 int rc;
1070 u32 status;
1071
1072 /* use the values of TPC0 as they are all the same*/
1073
1074 WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1075
1076 status = RREG32(cp_sts_reg);
1077 if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1078 rc = hl_poll_timeout(
1079 hdev,
1080 cp_sts_reg,
1081 status,
1082 !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1083 1000,
1084 QMAN_FENCE_TIMEOUT_USEC);
1085
1086 /* if QMAN is stuck in fence no need to check for stop */
1087 if (rc)
1088 return 0;
1089 }
1090
1091 rc = hl_poll_timeout(
1092 hdev,
1093 glbl_sts0_reg,
1094 status,
1095 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1096 1000,
1097 QMAN_STOP_TIMEOUT_USEC);
1098
1099 if (rc) {
1100 dev_err(hdev->dev,
1101 "Timeout while waiting for QMAN to stop\n");
1102 return -EINVAL;
1103 }
1104
1105 return 0;
1106}
1107
1108/*
1109 * goya_stop_external_queues - Stop external queues
1110 *
1111 * @hdev: pointer to hl_device structure
1112 *
1113 * Returns 0 on success
1114 *
1115 */
1116static int goya_stop_external_queues(struct hl_device *hdev)
1117{
1118 int rc, retval = 0;
1119
908087ff
OG
1120 struct goya_device *goya = hdev->asic_specific;
1121
1122 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1123 return retval;
1124
9494a8dd
OG
1125 rc = goya_stop_queue(hdev,
1126 mmDMA_QM_0_GLBL_CFG1,
1127 mmDMA_QM_0_CP_STS,
1128 mmDMA_QM_0_GLBL_STS0);
1129
1130 if (rc) {
1131 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1132 retval = -EIO;
1133 }
1134
1135 rc = goya_stop_queue(hdev,
1136 mmDMA_QM_1_GLBL_CFG1,
1137 mmDMA_QM_1_CP_STS,
1138 mmDMA_QM_1_GLBL_STS0);
1139
1140 if (rc) {
1141 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1142 retval = -EIO;
1143 }
1144
1145 rc = goya_stop_queue(hdev,
1146 mmDMA_QM_2_GLBL_CFG1,
1147 mmDMA_QM_2_CP_STS,
1148 mmDMA_QM_2_GLBL_STS0);
1149
1150 if (rc) {
1151 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1152 retval = -EIO;
1153 }
1154
1155 rc = goya_stop_queue(hdev,
1156 mmDMA_QM_3_GLBL_CFG1,
1157 mmDMA_QM_3_CP_STS,
1158 mmDMA_QM_3_GLBL_STS0);
1159
1160 if (rc) {
1161 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1162 retval = -EIO;
1163 }
1164
1165 rc = goya_stop_queue(hdev,
1166 mmDMA_QM_4_GLBL_CFG1,
1167 mmDMA_QM_4_CP_STS,
1168 mmDMA_QM_4_GLBL_STS0);
1169
1170 if (rc) {
1171 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1172 retval = -EIO;
1173 }
1174
1175 return retval;
1176}
1177
9494a8dd
OG
1178/*
1179 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1180 *
1181 * @hdev: pointer to hl_device structure
1182 *
1183 * Returns 0 on success
1184 *
1185 */
b2377e03 1186int goya_init_cpu_queues(struct hl_device *hdev)
9494a8dd
OG
1187{
1188 struct goya_device *goya = hdev->asic_specific;
1251f23a 1189 struct hl_eq *eq;
9494a8dd
OG
1190 u32 status;
1191 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1192 int err;
1193
1194 if (!hdev->cpu_queues_enable)
1195 return 0;
1196
1197 if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1198 return 0;
1199
1251f23a
OG
1200 eq = &hdev->event_queue;
1201
4095a176
TT
1202 WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1203 WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
9494a8dd 1204
4095a176
TT
1205 WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1206 WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1251f23a 1207
4095a176 1208 WREG32(mmCPU_CQ_BASE_ADDR_LOW,
f09415f5 1209 lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
4095a176 1210 WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
f09415f5 1211 upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
9494a8dd 1212
4095a176
TT
1213 WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1214 WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1215 WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
9494a8dd
OG
1216
1217 /* Used for EQ CI */
4095a176 1218 WREG32(mmCPU_EQ_CI, 0);
9494a8dd
OG
1219
1220 WREG32(mmCPU_IF_PF_PQ_PI, 0);
1221
4095a176 1222 WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
9494a8dd
OG
1223
1224 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1225 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1226
1227 err = hl_poll_timeout(
1228 hdev,
4095a176 1229 mmCPU_PQ_INIT_STATUS,
9494a8dd
OG
1230 status,
1231 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1232 1000,
1233 GOYA_CPU_TIMEOUT_USEC);
1234
1235 if (err) {
1236 dev_err(hdev->dev,
0b28d26b 1237 "Failed to setup communication with device CPU\n");
9494a8dd
OG
1238 return -EIO;
1239 }
1240
1241 goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1242 return 0;
1243}
1244
839c4803
OG
1245static void goya_set_pll_refclk(struct hl_device *hdev)
1246{
1247 WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1248 WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1249 WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1250 WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1251
1252 WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1253 WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1254 WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1255 WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1256
1257 WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1258 WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1259 WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1260 WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1261
1262 WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1263 WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1264 WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1265 WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1266
1267 WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1268 WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1269 WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1270 WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1271
1272 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1273 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1274 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1275 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1276
1277 WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1278 WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1279 WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1280 WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1281}
1282
1283static void goya_disable_clk_rlx(struct hl_device *hdev)
1284{
1285 WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1286 WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1287}
1288
1289static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1290{
1291 u64 tpc_eml_address;
1292 u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1293 int err, slm_index;
1294
1295 tpc_offset = tpc_id * 0x40000;
1296 tpc_eml_offset = tpc_id * 0x200000;
1297 tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1298 tpc_slm_offset = tpc_eml_address + 0x100000;
1299
1300 /*
1301 * Workaround for Bug H2 #2443 :
1302 * "TPC SB is not initialized on chip reset"
1303 */
1304
1305 val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1306 if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1307 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1308 tpc_id);
1309
1310 WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1311
1312 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1313 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1314 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1315 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1316 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1317 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1318 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1319 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1320 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1321 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1322
1323 WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1324 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1325
1326 err = hl_poll_timeout(
1327 hdev,
1328 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1329 val,
1330 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1331 1000,
1332 HL_DEVICE_TIMEOUT_USEC);
1333
1334 if (err)
1335 dev_err(hdev->dev,
1336 "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1337
1338 WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1339 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1340
1341 msleep(GOYA_RESET_WAIT_MSEC);
1342
1343 WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1344 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1345
1346 msleep(GOYA_RESET_WAIT_MSEC);
1347
1348 for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1349 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1350
1351 val = RREG32(tpc_slm_offset);
1352}
1353
1354static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1355{
1356 struct goya_device *goya = hdev->asic_specific;
1357 int i;
1358
1359 if (hdev->pldm)
1360 return;
1361
1362 if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1363 return;
1364
1365 /* Workaround for H2 #2443 */
1366
1367 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1368 _goya_tpc_mbist_workaround(hdev, i);
1369
1370 goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1371}
1372
1373/*
1374 * goya_init_golden_registers - Initialize golden registers
1375 *
1376 * @hdev: pointer to hl_device structure
1377 *
1378 * Initialize the H/W registers of the device
1379 *
1380 */
1381static void goya_init_golden_registers(struct hl_device *hdev)
1382{
1383 struct goya_device *goya = hdev->asic_specific;
1384 u32 polynom[10], tpc_intr_mask, offset;
1385 int i;
1386
1387 if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1388 return;
1389
1390 polynom[0] = 0x00020080;
1391 polynom[1] = 0x00401000;
1392 polynom[2] = 0x00200800;
1393 polynom[3] = 0x00002000;
1394 polynom[4] = 0x00080200;
1395 polynom[5] = 0x00040100;
1396 polynom[6] = 0x00100400;
1397 polynom[7] = 0x00004000;
1398 polynom[8] = 0x00010000;
1399 polynom[9] = 0x00008000;
1400
1401 /* Mask all arithmetic interrupts from TPC */
1402 tpc_intr_mask = 0x7FFF;
1403
1404 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1405 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1406 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1407 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1408 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1409 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1410
1411 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1412 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1413 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1414 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1415 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1416
1417
1418 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1419 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1420 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1421 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1422 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1423
1424 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1425 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1426 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1427 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1428 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1429
1430 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1431 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1432 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1433 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1434 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1435
1436 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1437 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1438 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1439 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1440 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1441 }
1442
1443 WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1444 WREG32(mmMME_AGU, 0x0f0f0f10);
1445 WREG32(mmMME_SEI_MASK, ~0x0);
1446
1447 WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1448 WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1449 WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1450 WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1451 WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1452 WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1453 WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1454 WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1455 WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1456 WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1457 WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1458 WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1459 WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1460 WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1461 WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1462 WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1463 WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1464 WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1465 WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1466 WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1467 WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1468 WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1469 WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1470 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1471 WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1472 WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1473 WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1474 WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1475 WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1476 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1477 WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1478 WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1479 WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1480 WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1481 WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1482 WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1483 WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1484 WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1485 WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1486 WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1487 WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1488 WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1489 WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1490 WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1491 WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1492 WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1493 WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1494 WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1495 WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1496 WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1497 WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1498 WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1499 WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1500 WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1501 WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1502 WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1503 WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1504 WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1505 WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1506 WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1507 WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1508 WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1509 WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1510 WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1511 WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1512 WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1513 WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1514 WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1515 WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1516 WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1517 WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1518 WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1519 WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1520 WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1521 WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1522 WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1523 WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1524 WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1525 WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1526 WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1527 WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1528 WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1529 WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1530 WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1531
1532 WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1533 WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1534 WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1535 WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1536 WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1537 WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1538 WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1539 WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1540 WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1541 WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1542 WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1543 WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1544
1545 WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1546 WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1547 WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1548 WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1549 WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1550 WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1551 WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1552 WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1553 WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1554 WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1555 WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1556 WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1557
1558 WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1559 WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1560 WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1561 WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1562 WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1563 WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1564 WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1565 WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1566 WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1567 WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1568 WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1569 WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1570
1571 WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1572 WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1573 WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1574 WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1575 WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1576 WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1577 WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1578 WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1579 WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1580 WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1581 WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1582 WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1583
1584 WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1585 WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1586 WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1587 WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1588 WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1589 WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1590 WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1591 WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1592 WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1593 WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1594 WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1595 WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1596
1597 WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1598 WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1599 WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1600 WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1601 WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1602 WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1603 WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1604 WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1605 WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1606 WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1607 WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1608 WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1609
1610 for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1611 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1612 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1613 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1614 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1615 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1616 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1617
1618 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1619 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1620 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1621 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1622 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1623 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1624 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1625 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1626
1627 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1628 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1629 }
1630
1631 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1632 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1633 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1634 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1635 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1636 }
1637
1638 for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1639 /*
1640 * Workaround for Bug H2 #2441 :
1641 * "ST.NOP set trace event illegal opcode"
1642 */
1643 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1644
1645 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1646 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1647 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1648 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
8fdacf2a
OG
1649
1650 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1651 ICACHE_FETCH_LINE_NUM, 2);
839c4803
OG
1652 }
1653
1654 WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1655 WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1656 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1657
1658 WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1659 WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1660 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1661
1662 /*
1663 * Workaround for H2 #HW-23 bug
bfb57a91
OG
1664 * Set DMA max outstanding read requests to 240 on DMA CH 1.
1665 * This limitation is still large enough to not affect Gen4 bandwidth.
1666 * We need to only limit that DMA channel because the user can only read
839c4803
OG
1667 * from Host using DMA CH 1
1668 */
839c4803 1669 WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
8ba2876d 1670
a691a1eb 1671 WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
839c4803
OG
1672
1673 goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1674}
1675
9494a8dd 1676static void goya_init_mme_qman(struct hl_device *hdev)
839c4803 1677{
9494a8dd
OG
1678 u32 mtr_base_lo, mtr_base_hi;
1679 u32 so_base_lo, so_base_hi;
1680 u32 gic_base_lo, gic_base_hi;
1681 u64 qman_base_addr;
839c4803 1682
9494a8dd
OG
1683 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1684 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1685 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1686 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
839c4803 1687
9494a8dd
OG
1688 gic_base_lo =
1689 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1690 gic_base_hi =
1691 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
839c4803 1692
9494a8dd
OG
1693 qman_base_addr = hdev->asic_prop.sram_base_address +
1694 MME_QMAN_BASE_OFFSET;
839c4803 1695
9494a8dd
OG
1696 WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1697 WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1698 WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1699 WREG32(mmMME_QM_PQ_PI, 0);
1700 WREG32(mmMME_QM_PQ_CI, 0);
1701 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1702 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1703 WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1704 WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
839c4803 1705
9494a8dd
OG
1706 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1707 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1708 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1709 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
839c4803 1710
9494a8dd
OG
1711 /* QMAN CQ has 8 cache lines */
1712 WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
839c4803 1713
9494a8dd
OG
1714 WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1715 WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
839c4803 1716
9494a8dd 1717 WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
839c4803 1718
9494a8dd 1719 WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
839c4803 1720
9494a8dd
OG
1721 WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1722
1723 WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
839c4803
OG
1724}
1725
9494a8dd 1726static void goya_init_mme_cmdq(struct hl_device *hdev)
839c4803 1727{
9494a8dd
OG
1728 u32 mtr_base_lo, mtr_base_hi;
1729 u32 so_base_lo, so_base_hi;
1730 u32 gic_base_lo, gic_base_hi;
839c4803 1731
9494a8dd
OG
1732 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1733 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1734 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1735 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
839c4803 1736
9494a8dd
OG
1737 gic_base_lo =
1738 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1739 gic_base_hi =
1740 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
839c4803 1741
9494a8dd
OG
1742 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1743 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1744 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1745 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
839c4803 1746
9494a8dd
OG
1747 /* CMDQ CQ has 20 cache lines */
1748 WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
839c4803 1749
9494a8dd
OG
1750 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1751 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
839c4803 1752
9494a8dd 1753 WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
839c4803 1754
9494a8dd 1755 WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
839c4803 1756
9494a8dd
OG
1757 WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1758
1759 WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
839c4803
OG
1760}
1761
b2377e03 1762void goya_init_mme_qmans(struct hl_device *hdev)
839c4803 1763{
9494a8dd
OG
1764 struct goya_device *goya = hdev->asic_specific;
1765 u32 so_base_lo, so_base_hi;
839c4803 1766
9494a8dd 1767 if (goya->hw_cap_initialized & HW_CAP_MME)
839c4803 1768 return;
839c4803 1769
9494a8dd
OG
1770 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1771 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
839c4803 1772
9494a8dd
OG
1773 WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1774 WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1775
1776 goya_init_mme_qman(hdev);
1777 goya_init_mme_cmdq(hdev);
1778
1779 goya->hw_cap_initialized |= HW_CAP_MME;
1780}
1781
1782static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1783{
1784 u32 mtr_base_lo, mtr_base_hi;
1785 u32 so_base_lo, so_base_hi;
1786 u32 gic_base_lo, gic_base_hi;
1787 u64 qman_base_addr;
1788 u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1789
1790 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1791 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1792 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1793 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1794
1795 gic_base_lo =
1796 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1797 gic_base_hi =
1798 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1799
1800 qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1801
1802 WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1803 WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1804 WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1805 WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1806 WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1807 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1808 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1809 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1810 WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1811
1812 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1813 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1814 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1815 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1816
1817 WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1818
1819 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1820 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1821
1822 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1823 GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1824
1825 WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1826
1827 WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1828
1829 WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1830}
1831
1832static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1833{
1834 u32 mtr_base_lo, mtr_base_hi;
1835 u32 so_base_lo, so_base_hi;
1836 u32 gic_base_lo, gic_base_hi;
1837 u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1838
1839 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1840 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1841 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1842 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1843
1844 gic_base_lo =
1845 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1846 gic_base_hi =
1847 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1848
1849 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1850 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1851 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1852 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1853
1854 WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1855
1856 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1857 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1858
1859 WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1860 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1861
1862 WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1863
1864 WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1865
1866 WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1867}
1868
b2377e03 1869void goya_init_tpc_qmans(struct hl_device *hdev)
9494a8dd
OG
1870{
1871 struct goya_device *goya = hdev->asic_specific;
1872 u32 so_base_lo, so_base_hi;
1873 u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1874 mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1875 int i;
1876
1877 if (goya->hw_cap_initialized & HW_CAP_TPC)
1878 return;
1879
1880 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1881 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1882
1883 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1884 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1885 so_base_lo);
1886 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1887 so_base_hi);
1888 }
1889
1890 goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1891 goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1892 goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1893 goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1894 goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1895 goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1896 goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1897 goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1898
1899 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1900 goya_init_tpc_cmdq(hdev, i);
1901
1902 goya->hw_cap_initialized |= HW_CAP_TPC;
1903}
1904
1905/*
1906 * goya_disable_internal_queues - Disable internal queues
1907 *
1908 * @hdev: pointer to hl_device structure
1909 *
1910 */
1911static void goya_disable_internal_queues(struct hl_device *hdev)
1912{
908087ff
OG
1913 struct goya_device *goya = hdev->asic_specific;
1914
1915 if (!(goya->hw_cap_initialized & HW_CAP_MME))
1916 goto disable_tpc;
1917
9494a8dd
OG
1918 WREG32(mmMME_QM_GLBL_CFG0, 0);
1919 WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1920
908087ff
OG
1921disable_tpc:
1922 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1923 return;
1924
9494a8dd
OG
1925 WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1926 WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1927
1928 WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1929 WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1930
1931 WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1932 WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1933
1934 WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1935 WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1936
1937 WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1938 WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1939
1940 WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1941 WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1942
1943 WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1944 WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1945
1946 WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1947 WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1948}
1949
1950/*
1951 * goya_stop_internal_queues - Stop internal queues
1952 *
1953 * @hdev: pointer to hl_device structure
1954 *
1955 * Returns 0 on success
1956 *
1957 */
1958static int goya_stop_internal_queues(struct hl_device *hdev)
1959{
908087ff 1960 struct goya_device *goya = hdev->asic_specific;
9494a8dd
OG
1961 int rc, retval = 0;
1962
908087ff
OG
1963 if (!(goya->hw_cap_initialized & HW_CAP_MME))
1964 goto stop_tpc;
1965
9494a8dd
OG
1966 /*
1967 * Each queue (QMAN) is a separate H/W logic. That means that each
1968 * QMAN can be stopped independently and failure to stop one does NOT
1969 * mandate we should not try to stop other QMANs
1970 */
1971
1972 rc = goya_stop_queue(hdev,
1973 mmMME_QM_GLBL_CFG1,
1974 mmMME_QM_CP_STS,
1975 mmMME_QM_GLBL_STS0);
1976
1977 if (rc) {
1978 dev_err(hdev->dev, "failed to stop MME QMAN\n");
1979 retval = -EIO;
1980 }
1981
1982 rc = goya_stop_queue(hdev,
1983 mmMME_CMDQ_GLBL_CFG1,
1984 mmMME_CMDQ_CP_STS,
1985 mmMME_CMDQ_GLBL_STS0);
1986
1987 if (rc) {
1988 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1989 retval = -EIO;
1990 }
1991
908087ff
OG
1992stop_tpc:
1993 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1994 return retval;
1995
9494a8dd
OG
1996 rc = goya_stop_queue(hdev,
1997 mmTPC0_QM_GLBL_CFG1,
1998 mmTPC0_QM_CP_STS,
1999 mmTPC0_QM_GLBL_STS0);
2000
2001 if (rc) {
2002 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2003 retval = -EIO;
2004 }
2005
2006 rc = goya_stop_queue(hdev,
2007 mmTPC0_CMDQ_GLBL_CFG1,
2008 mmTPC0_CMDQ_CP_STS,
2009 mmTPC0_CMDQ_GLBL_STS0);
2010
2011 if (rc) {
2012 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2013 retval = -EIO;
2014 }
2015
2016 rc = goya_stop_queue(hdev,
2017 mmTPC1_QM_GLBL_CFG1,
2018 mmTPC1_QM_CP_STS,
2019 mmTPC1_QM_GLBL_STS0);
2020
2021 if (rc) {
2022 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2023 retval = -EIO;
2024 }
2025
2026 rc = goya_stop_queue(hdev,
2027 mmTPC1_CMDQ_GLBL_CFG1,
2028 mmTPC1_CMDQ_CP_STS,
2029 mmTPC1_CMDQ_GLBL_STS0);
2030
2031 if (rc) {
2032 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2033 retval = -EIO;
2034 }
2035
2036 rc = goya_stop_queue(hdev,
2037 mmTPC2_QM_GLBL_CFG1,
2038 mmTPC2_QM_CP_STS,
2039 mmTPC2_QM_GLBL_STS0);
2040
2041 if (rc) {
2042 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2043 retval = -EIO;
2044 }
2045
2046 rc = goya_stop_queue(hdev,
2047 mmTPC2_CMDQ_GLBL_CFG1,
2048 mmTPC2_CMDQ_CP_STS,
2049 mmTPC2_CMDQ_GLBL_STS0);
2050
2051 if (rc) {
2052 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2053 retval = -EIO;
2054 }
2055
2056 rc = goya_stop_queue(hdev,
2057 mmTPC3_QM_GLBL_CFG1,
2058 mmTPC3_QM_CP_STS,
2059 mmTPC3_QM_GLBL_STS0);
2060
2061 if (rc) {
2062 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2063 retval = -EIO;
2064 }
2065
2066 rc = goya_stop_queue(hdev,
2067 mmTPC3_CMDQ_GLBL_CFG1,
2068 mmTPC3_CMDQ_CP_STS,
2069 mmTPC3_CMDQ_GLBL_STS0);
2070
2071 if (rc) {
2072 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2073 retval = -EIO;
2074 }
2075
2076 rc = goya_stop_queue(hdev,
2077 mmTPC4_QM_GLBL_CFG1,
2078 mmTPC4_QM_CP_STS,
2079 mmTPC4_QM_GLBL_STS0);
2080
2081 if (rc) {
2082 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2083 retval = -EIO;
2084 }
2085
2086 rc = goya_stop_queue(hdev,
2087 mmTPC4_CMDQ_GLBL_CFG1,
2088 mmTPC4_CMDQ_CP_STS,
2089 mmTPC4_CMDQ_GLBL_STS0);
2090
2091 if (rc) {
2092 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2093 retval = -EIO;
2094 }
2095
2096 rc = goya_stop_queue(hdev,
2097 mmTPC5_QM_GLBL_CFG1,
2098 mmTPC5_QM_CP_STS,
2099 mmTPC5_QM_GLBL_STS0);
2100
2101 if (rc) {
2102 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2103 retval = -EIO;
2104 }
2105
2106 rc = goya_stop_queue(hdev,
2107 mmTPC5_CMDQ_GLBL_CFG1,
2108 mmTPC5_CMDQ_CP_STS,
2109 mmTPC5_CMDQ_GLBL_STS0);
2110
2111 if (rc) {
2112 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2113 retval = -EIO;
2114 }
2115
2116 rc = goya_stop_queue(hdev,
2117 mmTPC6_QM_GLBL_CFG1,
2118 mmTPC6_QM_CP_STS,
2119 mmTPC6_QM_GLBL_STS0);
2120
2121 if (rc) {
2122 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2123 retval = -EIO;
2124 }
2125
2126 rc = goya_stop_queue(hdev,
2127 mmTPC6_CMDQ_GLBL_CFG1,
2128 mmTPC6_CMDQ_CP_STS,
2129 mmTPC6_CMDQ_GLBL_STS0);
2130
2131 if (rc) {
2132 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2133 retval = -EIO;
2134 }
2135
2136 rc = goya_stop_queue(hdev,
2137 mmTPC7_QM_GLBL_CFG1,
2138 mmTPC7_QM_CP_STS,
2139 mmTPC7_QM_GLBL_STS0);
2140
2141 if (rc) {
2142 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2143 retval = -EIO;
2144 }
2145
2146 rc = goya_stop_queue(hdev,
2147 mmTPC7_CMDQ_GLBL_CFG1,
2148 mmTPC7_CMDQ_CP_STS,
2149 mmTPC7_CMDQ_GLBL_STS0);
2150
2151 if (rc) {
2152 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2153 retval = -EIO;
2154 }
2155
2156 return retval;
2157}
2158
1251f23a
OG
2159static void goya_dma_stall(struct hl_device *hdev)
2160{
908087ff
OG
2161 struct goya_device *goya = hdev->asic_specific;
2162
2163 if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2164 return;
2165
1251f23a
OG
2166 WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2167 WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2168 WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2169 WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2170 WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2171}
2172
2173static void goya_tpc_stall(struct hl_device *hdev)
2174{
908087ff
OG
2175 struct goya_device *goya = hdev->asic_specific;
2176
2177 if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2178 return;
2179
1251f23a
OG
2180 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2181 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2182 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2183 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2184 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2185 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2186 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2187 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2188}
2189
2190static void goya_mme_stall(struct hl_device *hdev)
2191{
908087ff
OG
2192 struct goya_device *goya = hdev->asic_specific;
2193
2194 if (!(goya->hw_cap_initialized & HW_CAP_MME))
2195 return;
2196
1251f23a
OG
2197 WREG32(mmMME_STALL, 0xFFFFFFFF);
2198}
2199
2200static int goya_enable_msix(struct hl_device *hdev)
2201{
2202 struct goya_device *goya = hdev->asic_specific;
2203 int cq_cnt = hdev->asic_prop.completion_queues_count;
2204 int rc, i, irq_cnt_init, irq;
2205
2206 if (goya->hw_cap_initialized & HW_CAP_MSIX)
2207 return 0;
2208
2209 rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2210 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2211 if (rc < 0) {
2212 dev_err(hdev->dev,
2213 "MSI-X: Failed to enable support -- %d/%d\n",
2214 GOYA_MSIX_ENTRIES, rc);
2215 return rc;
2216 }
2217
2218 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2219 irq = pci_irq_vector(hdev->pdev, i);
2220 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2221 &hdev->completion_queue[i]);
2222 if (rc) {
2223 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2224 goto free_irqs;
2225 }
2226 }
2227
c535bfdd 2228 irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
1251f23a
OG
2229
2230 rc = request_irq(irq, hl_irq_handler_eq, 0,
c535bfdd 2231 goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
1251f23a
OG
2232 &hdev->event_queue);
2233 if (rc) {
2234 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2235 goto free_irqs;
2236 }
2237
2238 goya->hw_cap_initialized |= HW_CAP_MSIX;
2239 return 0;
2240
2241free_irqs:
2242 for (i = 0 ; i < irq_cnt_init ; i++)
2243 free_irq(pci_irq_vector(hdev->pdev, i),
2244 &hdev->completion_queue[i]);
2245
2246 pci_free_irq_vectors(hdev->pdev);
2247 return rc;
2248}
2249
2250static void goya_sync_irqs(struct hl_device *hdev)
2251{
2252 struct goya_device *goya = hdev->asic_specific;
2253 int i;
2254
2255 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2256 return;
2257
2258 /* Wait for all pending IRQs to be finished */
2259 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2260 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2261
c535bfdd 2262 synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
1251f23a
OG
2263}
2264
2265static void goya_disable_msix(struct hl_device *hdev)
2266{
2267 struct goya_device *goya = hdev->asic_specific;
2268 int i, irq;
2269
2270 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2271 return;
2272
2273 goya_sync_irqs(hdev);
2274
c535bfdd 2275 irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
1251f23a
OG
2276 free_irq(irq, &hdev->event_queue);
2277
2278 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2279 irq = pci_irq_vector(hdev->pdev, i);
2280 free_irq(irq, &hdev->completion_queue[i]);
2281 }
2282
2283 pci_free_irq_vectors(hdev->pdev);
2284
2285 goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2286}
2287
413cf576
TT
2288static void goya_enable_timestamp(struct hl_device *hdev)
2289{
2290 /* Disable the timestamp counter */
2291 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2292
2293 /* Zero the lower/upper parts of the 64-bit counter */
2294 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2295 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2296
2297 /* Enable the counter */
2298 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2299}
2300
2301static void goya_disable_timestamp(struct hl_device *hdev)
2302{
2303 /* Disable the timestamp counter */
2304 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2305}
2306
1251f23a
OG
2307static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2308{
c83c4171 2309 u32 wait_timeout_ms;
1251f23a
OG
2310
2311 dev_info(hdev->dev,
2312 "Halting compute engines and disabling interrupts\n");
2313
c83c4171 2314 if (hdev->pldm)
1251f23a 2315 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
c83c4171 2316 else
1251f23a 2317 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
1251f23a
OG
2318
2319 goya_stop_external_queues(hdev);
2320 goya_stop_internal_queues(hdev);
2321
2322 msleep(wait_timeout_ms);
2323
2324 goya_dma_stall(hdev);
2325 goya_tpc_stall(hdev);
2326 goya_mme_stall(hdev);
2327
2328 msleep(wait_timeout_ms);
2329
2330 goya_disable_external_queues(hdev);
2331 goya_disable_internal_queues(hdev);
2332
413cf576
TT
2333 goya_disable_timestamp(hdev);
2334
95b5a8b8 2335 if (hard_reset) {
1251f23a 2336 goya_disable_msix(hdev);
95b5a8b8
OG
2337 goya_mmu_remove_device_cpu_mappings(hdev);
2338 } else {
1251f23a 2339 goya_sync_irqs(hdev);
95b5a8b8 2340 }
1251f23a 2341}
9494a8dd
OG
2342
2343/*
47f6b41c 2344 * goya_load_firmware_to_device() - Load LINUX FW code to device.
3110c60f 2345 * @hdev: Pointer to hl_device structure.
9494a8dd 2346 *
47f6b41c 2347 * Copy LINUX fw code from firmware file to HBM BAR.
9494a8dd 2348 *
3110c60f 2349 * Return: 0 on success, non-zero for failure.
9494a8dd 2350 */
47f6b41c 2351static int goya_load_firmware_to_device(struct hl_device *hdev)
9494a8dd 2352{
3110c60f 2353 void __iomem *dst;
9494a8dd 2354
47f6b41c 2355 dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
9494a8dd 2356
9bb86b63 2357 return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
3110c60f 2358}
9494a8dd 2359
3110c60f 2360/*
47f6b41c 2361 * goya_load_boot_fit_to_device() - Load boot fit to device.
3110c60f
TT
2362 * @hdev: Pointer to hl_device structure.
2363 *
47f6b41c 2364 * Copy boot fit file to SRAM BAR.
3110c60f
TT
2365 *
2366 * Return: 0 on success, non-zero for failure.
2367 */
47f6b41c 2368static int goya_load_boot_fit_to_device(struct hl_device *hdev)
3110c60f 2369{
3110c60f 2370 void __iomem *dst;
9494a8dd 2371
47f6b41c 2372 dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
9494a8dd 2373
9bb86b63 2374 return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
9494a8dd
OG
2375}
2376
2377/*
2378 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2379 * The version string should be located by that offset.
2380 */
eb10b897 2381static int goya_read_device_fw_version(struct hl_device *hdev,
7e1c07dd 2382 enum hl_fw_component fwc)
9494a8dd
OG
2383{
2384 const char *name;
2385 u32 ver_off;
2386 char *dest;
2387
2388 switch (fwc) {
2389 case FW_COMP_UBOOT:
4095a176 2390 ver_off = RREG32(mmUBOOT_VER_OFFSET);
9494a8dd
OG
2391 dest = hdev->asic_prop.uboot_ver;
2392 name = "U-Boot";
2393 break;
2394 case FW_COMP_PREBOOT:
4095a176 2395 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
9494a8dd
OG
2396 dest = hdev->asic_prop.preboot_ver;
2397 name = "Preboot";
2398 break;
2399 default:
2400 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
eb10b897 2401 return -EIO;
9494a8dd
OG
2402 }
2403
2404 ver_off &= ~((u32)SRAM_BASE_ADDR);
2405
2406 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
839c4803
OG
2407 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2408 VERSION_MAX_LEN);
2409 } else {
2410 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2411 name, ver_off);
2412 strcpy(dest, "unavailable");
eb10b897 2413
2414 return -EIO;
839c4803 2415 }
eb10b897 2416
2417 return 0;
839c4803
OG
2418}
2419
47f6b41c 2420static int goya_init_cpu(struct hl_device *hdev)
839c4803
OG
2421{
2422 struct goya_device *goya = hdev->asic_specific;
839c4803
OG
2423 int rc;
2424
2425 if (!hdev->cpu_enable)
2426 return 0;
2427
2428 if (goya->hw_cap_initialized & HW_CAP_CPU)
2429 return 0;
2430
2431 /*
2432 * Before pushing u-boot/linux to device, need to set the ddr bar to
2433 * base address of dram
2434 */
a38693d7 2435 if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
839c4803
OG
2436 dev_err(hdev->dev,
2437 "failed to map DDR bar to DRAM base address\n");
a38693d7 2438 return -EIO;
839c4803
OG
2439 }
2440
7e1c07dd 2441 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
47f6b41c 2442 mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
323b7267
OB
2443 mmCPU_CMD_STATUS_TO_HOST,
2444 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
47f6b41c
OB
2445 false, GOYA_CPU_TIMEOUT_USEC,
2446 GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
0c169b8a 2447
839c4803
OG
2448 if (rc)
2449 return rc;
2450
839c4803
OG
2451 goya->hw_cap_initialized |= HW_CAP_CPU;
2452
2453 return 0;
2454}
2455
bedd1442
OG
2456static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2457 u64 phys_addr)
2458{
2459 u32 status, timeout_usec;
2460 int rc;
2461
2462 if (hdev->pldm)
2463 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2464 else
2465 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2466
2467 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2468 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2469 WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2470
2471 rc = hl_poll_timeout(
2472 hdev,
2473 MMU_ASID_BUSY,
2474 status,
2475 !(status & 0x80000000),
2476 1000,
2477 timeout_usec);
2478
2479 if (rc) {
2480 dev_err(hdev->dev,
2481 "Timeout during MMU hop0 config of asid %d\n", asid);
2482 return rc;
2483 }
2484
2485 return 0;
2486}
2487
b2377e03 2488int goya_mmu_init(struct hl_device *hdev)
0feaf86d
OS
2489{
2490 struct asic_fixed_properties *prop = &hdev->asic_prop;
2491 struct goya_device *goya = hdev->asic_specific;
2492 u64 hop0_addr;
2493 int rc, i;
2494
2495 if (!hdev->mmu_enable)
2496 return 0;
2497
2498 if (goya->hw_cap_initialized & HW_CAP_MMU)
2499 return 0;
2500
27ca384c 2501 hdev->dram_default_page_mapping = true;
0feaf86d
OS
2502
2503 for (i = 0 ; i < prop->max_asid ; i++) {
2504 hop0_addr = prop->mmu_pgt_addr +
2505 (i * prop->mmu_hop_table_size);
2506
2507 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2508 if (rc) {
2509 dev_err(hdev->dev,
2510 "failed to set hop0 addr for asid %d\n", i);
2511 goto err;
2512 }
2513 }
2514
2515 goya->hw_cap_initialized |= HW_CAP_MMU;
2516
2517 /* init MMU cache manage page */
1e7c1ec1
OG
2518 WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2519 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2520 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
0feaf86d
OS
2521
2522 /* Remove follower feature due to performance bug */
2523 WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2524 (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2525
7b6e4ea0
OS
2526 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2527 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
0feaf86d
OS
2528
2529 WREG32(mmMMU_MMU_ENABLE, 1);
2530 WREG32(mmMMU_SPI_MASK, 0xF);
2531
2532 return 0;
2533
2534err:
2535 return rc;
2536}
2537
839c4803
OG
2538/*
2539 * goya_hw_init - Goya hardware initialization code
2540 *
2541 * @hdev: pointer to hl_device structure
2542 *
2543 * Returns 0 on success
2544 *
2545 */
2546static int goya_hw_init(struct hl_device *hdev)
2547{
2548 struct asic_fixed_properties *prop = &hdev->asic_prop;
839c4803
OG
2549 int rc;
2550
839c4803 2551 /* Perform read from the device to make sure device is up */
68a1fdf2 2552 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
839c4803 2553
f8c8c7d5
OG
2554 /*
2555 * Let's mark in the H/W that we have reached this point. We check
2556 * this value in the reset_before_init function to understand whether
2557 * we need to reset the chip before doing H/W init. This register is
2558 * cleared by the H/W upon H/W reset
2559 */
10d7de2c 2560 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
f8c8c7d5 2561
47f6b41c 2562 rc = goya_init_cpu(hdev);
839c4803
OG
2563 if (rc) {
2564 dev_err(hdev->dev, "failed to initialize CPU\n");
2565 return rc;
2566 }
2567
2568 goya_tpc_mbist_workaround(hdev);
2569
2570 goya_init_golden_registers(hdev);
2571
2572 /*
2573 * After CPU initialization is finished, change DDR bar mapping inside
2574 * iATU to point to the start address of the MMU page tables
2575 */
0c002ceb 2576 if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
a38693d7 2577 ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
839c4803
OG
2578 dev_err(hdev->dev,
2579 "failed to map DDR bar to MMU page tables\n");
a38693d7 2580 return -EIO;
839c4803
OG
2581 }
2582
0feaf86d
OS
2583 rc = goya_mmu_init(hdev);
2584 if (rc)
2585 return rc;
2586
839c4803
OG
2587 goya_init_security(hdev);
2588
9494a8dd
OG
2589 goya_init_dma_qmans(hdev);
2590
2591 goya_init_mme_qmans(hdev);
2592
2593 goya_init_tpc_qmans(hdev);
2594
413cf576
TT
2595 goya_enable_timestamp(hdev);
2596
1251f23a
OG
2597 /* MSI-X must be enabled before CPU queues are initialized */
2598 rc = goya_enable_msix(hdev);
2599 if (rc)
2600 goto disable_queues;
2601
839c4803 2602 /* Perform read from the device to flush all MSI-X configuration */
68a1fdf2 2603 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
839c4803
OG
2604
2605 return 0;
9494a8dd 2606
9494a8dd
OG
2607disable_queues:
2608 goya_disable_internal_queues(hdev);
2609 goya_disable_external_queues(hdev);
2610
2611 return rc;
839c4803
OG
2612}
2613
2614/*
2615 * goya_hw_fini - Goya hardware tear-down code
2616 *
2617 * @hdev: pointer to hl_device structure
2618 * @hard_reset: should we do hard reset to all engines or just reset the
2619 * compute/dma engines
2620 */
2621static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2622{
2623 struct goya_device *goya = hdev->asic_specific;
c83c4171 2624 u32 reset_timeout_ms, cpu_timeout_ms, status;
839c4803 2625
c83c4171 2626 if (hdev->pldm) {
839c4803 2627 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
c83c4171
OG
2628 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2629 } else {
839c4803 2630 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
c83c4171
OG
2631 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2632 }
839c4803
OG
2633
2634 if (hard_reset) {
c83c4171
OG
2635 /* I don't know what is the state of the CPU so make sure it is
2636 * stopped in any means necessary
2637 */
2638 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2639 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2640 GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2641
2642 msleep(cpu_timeout_ms);
2643
839c4803
OG
2644 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2645 goya_disable_clk_rlx(hdev);
2646 goya_set_pll_refclk(hdev);
2647
2648 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2649 dev_info(hdev->dev,
2650 "Issued HARD reset command, going to wait %dms\n",
2651 reset_timeout_ms);
2652 } else {
2653 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2654 dev_info(hdev->dev,
2655 "Issued SOFT reset command, going to wait %dms\n",
2656 reset_timeout_ms);
2657 }
2658
2659 /*
2660 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2661 * itself is in reset. In either reset we need to wait until the reset
2662 * is deasserted
2663 */
2664 msleep(reset_timeout_ms);
2665
2666 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2667 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2668 dev_err(hdev->dev,
2669 "Timeout while waiting for device to reset 0x%x\n",
2670 status);
2671
d1ddd905 2672 if (!hard_reset && goya) {
f8c8c7d5
OG
2673 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2674 HW_CAP_GOLDEN | HW_CAP_TPC);
2675 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2676 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2677 return;
2678 }
2679
839c4803
OG
2680 /* Chicken bit to re-initiate boot sequencer flow */
2681 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2682 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2683 /* Move boot manager FSM to pre boot sequencer init state */
2684 WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2685 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2686
eb10b897 2687 if (goya) {
2688 goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2689 HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2690 HW_CAP_DMA | HW_CAP_MME |
2691 HW_CAP_MMU | HW_CAP_TPC_MBIST |
2692 HW_CAP_GOLDEN | HW_CAP_TPC);
d1ddd905 2693
eb10b897 2694 memset(goya->events_stat, 0, sizeof(goya->events_stat));
2695 }
839c4803
OG
2696}
2697
99b9d7b4
OG
2698int goya_suspend(struct hl_device *hdev)
2699{
9494a8dd
OG
2700 int rc;
2701
2f55342c 2702 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
9494a8dd
OG
2703 if (rc)
2704 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2705
2706 return rc;
99b9d7b4
OG
2707}
2708
2709int goya_resume(struct hl_device *hdev)
2710{
7cb5101e 2711 return goya_init_iatu(hdev);
99b9d7b4
OG
2712}
2713
5e6e0239 2714static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
0db57535 2715 void *cpu_addr, dma_addr_t dma_addr, size_t size)
be5d926b
OG
2716{
2717 int rc;
2718
2719 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2720 VM_DONTCOPY | VM_NORESERVE;
2721
a9d4ef64
OG
2722 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2723 (dma_addr - HOST_PHYS_BASE), size);
be5d926b 2724 if (rc)
0db57535 2725 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
be5d926b
OG
2726
2727 return rc;
2728}
2729
b2377e03 2730void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
9494a8dd
OG
2731{
2732 u32 db_reg_offset, db_value;
9494a8dd
OG
2733
2734 switch (hw_queue_id) {
2735 case GOYA_QUEUE_ID_DMA_0:
2736 db_reg_offset = mmDMA_QM_0_PQ_PI;
2737 break;
2738
2739 case GOYA_QUEUE_ID_DMA_1:
2740 db_reg_offset = mmDMA_QM_1_PQ_PI;
2741 break;
2742
2743 case GOYA_QUEUE_ID_DMA_2:
2744 db_reg_offset = mmDMA_QM_2_PQ_PI;
2745 break;
2746
2747 case GOYA_QUEUE_ID_DMA_3:
2748 db_reg_offset = mmDMA_QM_3_PQ_PI;
2749 break;
2750
2751 case GOYA_QUEUE_ID_DMA_4:
2752 db_reg_offset = mmDMA_QM_4_PQ_PI;
2753 break;
2754
2755 case GOYA_QUEUE_ID_CPU_PQ:
34a5fab7 2756 db_reg_offset = mmCPU_IF_PF_PQ_PI;
9494a8dd
OG
2757 break;
2758
2759 case GOYA_QUEUE_ID_MME:
2760 db_reg_offset = mmMME_QM_PQ_PI;
2761 break;
2762
2763 case GOYA_QUEUE_ID_TPC0:
2764 db_reg_offset = mmTPC0_QM_PQ_PI;
2765 break;
2766
2767 case GOYA_QUEUE_ID_TPC1:
2768 db_reg_offset = mmTPC1_QM_PQ_PI;
2769 break;
2770
2771 case GOYA_QUEUE_ID_TPC2:
2772 db_reg_offset = mmTPC2_QM_PQ_PI;
2773 break;
2774
2775 case GOYA_QUEUE_ID_TPC3:
2776 db_reg_offset = mmTPC3_QM_PQ_PI;
2777 break;
2778
2779 case GOYA_QUEUE_ID_TPC4:
2780 db_reg_offset = mmTPC4_QM_PQ_PI;
2781 break;
2782
2783 case GOYA_QUEUE_ID_TPC5:
2784 db_reg_offset = mmTPC5_QM_PQ_PI;
2785 break;
2786
2787 case GOYA_QUEUE_ID_TPC6:
2788 db_reg_offset = mmTPC6_QM_PQ_PI;
2789 break;
2790
2791 case GOYA_QUEUE_ID_TPC7:
2792 db_reg_offset = mmTPC7_QM_PQ_PI;
2793 break;
2794
2795 default:
9494a8dd 2796 /* Should never get here */
34a5fab7 2797 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
9494a8dd
OG
2798 hw_queue_id);
2799 return;
2800 }
2801
2802 db_value = pi;
2803
2804 /* ring the doorbell */
2805 WREG32(db_reg_offset, db_value);
2806
2807 if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2808 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2809 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2810}
2811
b9040c99 2812void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
9494a8dd 2813{
b9040c99
OG
2814 /* The QMANs are on the SRAM so need to copy to IO space */
2815 memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
9494a8dd
OG
2816}
2817
5e6e0239 2818static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
99b9d7b4
OG
2819 dma_addr_t *dma_handle, gfp_t flags)
2820{
94cb669c
TT
2821 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2822 dma_handle, flags);
2823
2824 /* Shift to the device's base physical address of host memory */
2825 if (kernel_addr)
2826 *dma_handle += HOST_PHYS_BASE;
2827
2828 return kernel_addr;
99b9d7b4
OG
2829}
2830
5e6e0239
OG
2831static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2832 void *cpu_addr, dma_addr_t dma_handle)
99b9d7b4 2833{
94cb669c
TT
2834 /* Cancel the device's base physical address of host memory */
2835 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2836
2837 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
99b9d7b4
OG
2838}
2839
03df136b 2840int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
2841{
2842 return 0;
2843}
2844
9494a8dd
OG
2845void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2846 dma_addr_t *dma_handle, u16 *queue_len)
2847{
2848 void *base;
2849 u32 offset;
2850
2851 *dma_handle = hdev->asic_prop.sram_base_address;
2852
7c22278e 2853 base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
9494a8dd
OG
2854
2855 switch (queue_id) {
2856 case GOYA_QUEUE_ID_MME:
2857 offset = MME_QMAN_BASE_OFFSET;
2858 *queue_len = MME_QMAN_LENGTH;
2859 break;
2860 case GOYA_QUEUE_ID_TPC0:
2861 offset = TPC0_QMAN_BASE_OFFSET;
2862 *queue_len = TPC_QMAN_LENGTH;
2863 break;
2864 case GOYA_QUEUE_ID_TPC1:
2865 offset = TPC1_QMAN_BASE_OFFSET;
2866 *queue_len = TPC_QMAN_LENGTH;
2867 break;
2868 case GOYA_QUEUE_ID_TPC2:
2869 offset = TPC2_QMAN_BASE_OFFSET;
2870 *queue_len = TPC_QMAN_LENGTH;
2871 break;
2872 case GOYA_QUEUE_ID_TPC3:
2873 offset = TPC3_QMAN_BASE_OFFSET;
2874 *queue_len = TPC_QMAN_LENGTH;
2875 break;
2876 case GOYA_QUEUE_ID_TPC4:
2877 offset = TPC4_QMAN_BASE_OFFSET;
2878 *queue_len = TPC_QMAN_LENGTH;
2879 break;
2880 case GOYA_QUEUE_ID_TPC5:
2881 offset = TPC5_QMAN_BASE_OFFSET;
2882 *queue_len = TPC_QMAN_LENGTH;
2883 break;
2884 case GOYA_QUEUE_ID_TPC6:
2885 offset = TPC6_QMAN_BASE_OFFSET;
2886 *queue_len = TPC_QMAN_LENGTH;
2887 break;
2888 case GOYA_QUEUE_ID_TPC7:
2889 offset = TPC7_QMAN_BASE_OFFSET;
2890 *queue_len = TPC_QMAN_LENGTH;
2891 break;
2892 default:
2893 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2894 return NULL;
2895 }
2896
2897 base += offset;
2898 *dma_handle += offset;
2899
2900 return base;
2901}
2902
5e6e0239 2903static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
eff6f4a0 2904{
eff6f4a0
OG
2905 struct packet_msg_prot *fence_pkt;
2906 u32 *fence_ptr;
2907 dma_addr_t fence_dma_addr;
2908 struct hl_cb *cb;
3dccd187 2909 u32 tmp, timeout;
eff6f4a0
OG
2910 int rc;
2911
3dccd187
OS
2912 if (hdev->pldm)
2913 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2914 else
2915 timeout = HL_DEVICE_TIMEOUT_USEC;
2916
e8960ca0 2917 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
eff6f4a0 2918 dev_err_ratelimited(hdev->dev,
4c172bbf 2919 "Can't send driver job on QMAN0 because the device is not idle\n");
af5f7eea 2920 return -EBUSY;
eff6f4a0
OG
2921 }
2922
d9c3aa80 2923 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
eff6f4a0
OG
2924 &fence_dma_addr);
2925 if (!fence_ptr) {
2926 dev_err(hdev->dev,
2927 "Failed to allocate fence memory for QMAN0\n");
2928 return -ENOMEM;
2929 }
2930
b2377e03 2931 goya_qman0_set_security(hdev, true);
eff6f4a0 2932
eff6f4a0
OG
2933 cb = job->patched_cb;
2934
82948e6e
AB
2935 fence_pkt = cb->kernel_address +
2936 job->job_cb_size - sizeof(struct packet_msg_prot);
eff6f4a0 2937
df697bce 2938 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
eff6f4a0
OG
2939 (1 << GOYA_PKT_CTL_EB_SHIFT) |
2940 (1 << GOYA_PKT_CTL_MB_SHIFT);
df697bce
TT
2941 fence_pkt->ctl = cpu_to_le32(tmp);
2942 fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
94cb669c 2943 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
eff6f4a0
OG
2944
2945 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2946 job->job_cb_size, cb->bus_address);
2947 if (rc) {
2948 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2949 goto free_fence_ptr;
2950 }
2951
a08b51a9 2952 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2aa4e410
BS
2953 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2954 timeout, true);
eff6f4a0
OG
2955
2956 hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2957
a08b51a9
OG
2958 if (rc == -ETIMEDOUT) {
2959 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2960 goto free_fence_ptr;
eff6f4a0
OG
2961 }
2962
2963free_fence_ptr:
d9c3aa80 2964 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
eff6f4a0
OG
2965 fence_dma_addr);
2966
b2377e03 2967 goya_qman0_set_security(hdev, false);
eff6f4a0
OG
2968
2969 return rc;
2970}
2971
9494a8dd 2972int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
439bc47b 2973 u32 timeout, u64 *result)
9494a8dd
OG
2974{
2975 struct goya_device *goya = hdev->asic_specific;
9494a8dd
OG
2976
2977 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2978 if (result)
2979 *result = 0;
2980 return 0;
2981 }
2982
788cacf3
OG
2983 if (!timeout)
2984 timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
2985
3110c60f
TT
2986 return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2987 timeout, result);
9494a8dd
OG
2988}
2989
2990int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2991{
2992 struct packet_msg_prot *fence_pkt;
2993 dma_addr_t pkt_dma_addr;
2994 u32 fence_val, tmp;
2995 dma_addr_t fence_dma_addr;
2996 u32 *fence_ptr;
2997 int rc;
2998
2999 fence_val = GOYA_QMAN0_FENCE_VAL;
3000
d9c3aa80 3001 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
9494a8dd
OG
3002 &fence_dma_addr);
3003 if (!fence_ptr) {
3004 dev_err(hdev->dev,
d6b045c0
DB
3005 "Failed to allocate memory for H/W queue %d testing\n",
3006 hw_queue_id);
9494a8dd
OG
3007 return -ENOMEM;
3008 }
3009
3010 *fence_ptr = 0;
3011
d9c3aa80 3012 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
9494a8dd
OG
3013 sizeof(struct packet_msg_prot),
3014 GFP_KERNEL, &pkt_dma_addr);
3015 if (!fence_pkt) {
3016 dev_err(hdev->dev,
d6b045c0
DB
3017 "Failed to allocate packet for H/W queue %d testing\n",
3018 hw_queue_id);
9494a8dd
OG
3019 rc = -ENOMEM;
3020 goto free_fence_ptr;
3021 }
3022
df697bce 3023 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
9494a8dd
OG
3024 (1 << GOYA_PKT_CTL_EB_SHIFT) |
3025 (1 << GOYA_PKT_CTL_MB_SHIFT);
df697bce
TT
3026 fence_pkt->ctl = cpu_to_le32(tmp);
3027 fence_pkt->value = cpu_to_le32(fence_val);
94cb669c 3028 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
9494a8dd
OG
3029
3030 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3031 sizeof(struct packet_msg_prot),
3032 pkt_dma_addr);
3033 if (rc) {
3034 dev_err(hdev->dev,
d6b045c0
DB
3035 "Failed to send fence packet to H/W queue %d\n",
3036 hw_queue_id);
9494a8dd
OG
3037 goto free_pkt;
3038 }
3039
a08b51a9 3040 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2aa4e410 3041 1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
9494a8dd
OG
3042
3043 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3044
a08b51a9 3045 if (rc == -ETIMEDOUT) {
9494a8dd
OG
3046 dev_err(hdev->dev,
3047 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3048 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
a08b51a9 3049 rc = -EIO;
9494a8dd
OG
3050 }
3051
3052free_pkt:
d9c3aa80 3053 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
9494a8dd
OG
3054 pkt_dma_addr);
3055free_fence_ptr:
d9c3aa80 3056 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
9494a8dd
OG
3057 fence_dma_addr);
3058 return rc;
3059}
3060
3061int goya_test_cpu_queue(struct hl_device *hdev)
3062{
3110c60f 3063 struct goya_device *goya = hdev->asic_specific;
9494a8dd 3064
3110c60f
TT
3065 /*
3066 * check capability here as send_cpu_message() won't update the result
3067 * value if no capability
3068 */
3069 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3070 return 0;
9494a8dd 3071
3110c60f 3072 return hl_fw_test_cpu_queue(hdev);
9494a8dd
OG
3073}
3074
bedd1442 3075int goya_test_queues(struct hl_device *hdev)
9494a8dd 3076{
9494a8dd
OG
3077 int i, rc, ret_val = 0;
3078
3079 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3080 rc = goya_test_queue(hdev, i);
3081 if (rc)
3082 ret_val = -EINVAL;
3083 }
3084
9494a8dd
OG
3085 return ret_val;
3086}
3087
5e6e0239
OG
3088static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3089 gfp_t mem_flags, dma_addr_t *dma_handle)
9494a8dd 3090{
94cb669c
TT
3091 void *kernel_addr;
3092
9494a8dd
OG
3093 if (size > GOYA_DMA_POOL_BLK_SIZE)
3094 return NULL;
3095
94cb669c
TT
3096 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3097
3098 /* Shift to the device's base physical address of host memory */
3099 if (kernel_addr)
3100 *dma_handle += HOST_PHYS_BASE;
3101
3102 return kernel_addr;
9494a8dd
OG
3103}
3104
5e6e0239
OG
3105static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3106 dma_addr_t dma_addr)
9494a8dd 3107{
94cb669c
TT
3108 /* Cancel the device's base physical address of host memory */
3109 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3110
3111 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
9494a8dd
OG
3112}
3113
bedd1442
OG
3114void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3115 dma_addr_t *dma_handle)
9494a8dd 3116{
f09415f5
OG
3117 void *vaddr;
3118
3119 vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3120 *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3121 VA_CPU_ACCESSIBLE_MEM_ADDR;
3122
3123 return vaddr;
9494a8dd
OG
3124}
3125
bedd1442
OG
3126void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3127 void *vaddr)
9494a8dd 3128{
3110c60f 3129 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
9494a8dd
OG
3130}
3131
94cb669c 3132static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5e6e0239 3133 int nents, enum dma_data_direction dir)
eff6f4a0 3134{
94cb669c
TT
3135 struct scatterlist *sg;
3136 int i;
3137
3138 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
eff6f4a0
OG
3139 return -ENOMEM;
3140
94cb669c
TT
3141 /* Shift to the device's base physical address of host memory */
3142 for_each_sg(sgl, sg, nents, i)
3143 sg->dma_address += HOST_PHYS_BASE;
3144
eff6f4a0
OG
3145 return 0;
3146}
3147
94cb669c 3148static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5e6e0239 3149 int nents, enum dma_data_direction dir)
eff6f4a0 3150{
94cb669c
TT
3151 struct scatterlist *sg;
3152 int i;
3153
3154 /* Cancel the device's base physical address of host memory */
3155 for_each_sg(sgl, sg, nents, i)
3156 sg->dma_address -= HOST_PHYS_BASE;
3157
3158 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
eff6f4a0
OG
3159}
3160
5e6e0239 3161u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
eff6f4a0
OG
3162{
3163 struct scatterlist *sg, *sg_next_iter;
e99f1683
OG
3164 u32 count, dma_desc_cnt;
3165 u64 len, len_next;
eff6f4a0
OG
3166 dma_addr_t addr, addr_next;
3167
3168 dma_desc_cnt = 0;
3169
3170 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3171
3172 len = sg_dma_len(sg);
3173 addr = sg_dma_address(sg);
3174
3175 if (len == 0)
3176 break;
3177
3178 while ((count + 1) < sgt->nents) {
3179 sg_next_iter = sg_next(sg);
3180 len_next = sg_dma_len(sg_next_iter);
3181 addr_next = sg_dma_address(sg_next_iter);
3182
3183 if (len_next == 0)
3184 break;
3185
3186 if ((addr + len == addr_next) &&
3187 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3188 len += len_next;
3189 count++;
3190 sg = sg_next_iter;
3191 } else {
3192 break;
3193 }
3194 }
3195
3196 dma_desc_cnt++;
3197 }
3198
3199 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3200}
3201
3202static int goya_pin_memory_before_cs(struct hl_device *hdev,
3203 struct hl_cs_parser *parser,
3204 struct packet_lin_dma *user_dma_pkt,
3205 u64 addr, enum dma_data_direction dir)
3206{
3207 struct hl_userptr *userptr;
3208 int rc;
3209
df697bce 3210 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
eff6f4a0
OG
3211 parser->job_userptr_list, &userptr))
3212 goto already_pinned;
3213
3214 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3215 if (!userptr)
3216 return -ENOMEM;
3217
df697bce
TT
3218 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3219 userptr);
eff6f4a0
OG
3220 if (rc)
3221 goto free_userptr;
3222
3223 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3224
3225 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3226 userptr->sgt->nents, dir);
3227 if (rc) {
3228 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3229 goto unpin_memory;
3230 }
3231
3232 userptr->dma_mapped = true;
3233 userptr->dir = dir;
3234
3235already_pinned:
3236 parser->patched_cb_size +=
3237 goya_get_dma_desc_list_size(hdev, userptr->sgt);
3238
3239 return 0;
3240
3241unpin_memory:
3242 hl_unpin_host_memory(hdev, userptr);
3243free_userptr:
3244 kfree(userptr);
3245 return rc;
3246}
3247
3248static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3249 struct hl_cs_parser *parser,
3250 struct packet_lin_dma *user_dma_pkt)
3251{
3252 u64 device_memory_addr, addr;
3253 enum dma_data_direction dir;
3254 enum goya_dma_direction user_dir;
3255 bool sram_addr = true;
3256 bool skip_host_mem_pin = false;
3257 bool user_memset;
df697bce 3258 u32 ctl;
eff6f4a0
OG
3259 int rc = 0;
3260
df697bce
TT
3261 ctl = le32_to_cpu(user_dma_pkt->ctl);
3262
3263 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
eff6f4a0
OG
3264 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3265
df697bce 3266 user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
eff6f4a0
OG
3267 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3268
3269 switch (user_dir) {
3270 case DMA_HOST_TO_DRAM:
3271 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3272 dir = DMA_TO_DEVICE;
3273 sram_addr = false;
df697bce
TT
3274 addr = le64_to_cpu(user_dma_pkt->src_addr);
3275 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
eff6f4a0
OG
3276 if (user_memset)
3277 skip_host_mem_pin = true;
3278 break;
3279
3280 case DMA_DRAM_TO_HOST:
3281 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3282 dir = DMA_FROM_DEVICE;
3283 sram_addr = false;
df697bce
TT
3284 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3285 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
eff6f4a0
OG
3286 break;
3287
3288 case DMA_HOST_TO_SRAM:
3289 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3290 dir = DMA_TO_DEVICE;
df697bce
TT
3291 addr = le64_to_cpu(user_dma_pkt->src_addr);
3292 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
eff6f4a0
OG
3293 if (user_memset)
3294 skip_host_mem_pin = true;
3295 break;
3296
3297 case DMA_SRAM_TO_HOST:
3298 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3299 dir = DMA_FROM_DEVICE;
df697bce
TT
3300 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3301 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
eff6f4a0
OG
3302 break;
3303 default:
3304 dev_err(hdev->dev, "DMA direction is undefined\n");
3305 return -EFAULT;
3306 }
3307
f0539fb0
DBZ
3308 if (sram_addr) {
3309 if (!hl_mem_area_inside_range(device_memory_addr,
3310 le32_to_cpu(user_dma_pkt->tsize),
3311 hdev->asic_prop.sram_user_base_address,
3312 hdev->asic_prop.sram_end_address)) {
3313
3314 dev_err(hdev->dev,
3315 "SRAM address 0x%llx + 0x%x is invalid\n",
3316 device_memory_addr,
3317 user_dma_pkt->tsize);
3318 return -EFAULT;
3319 }
3320 } else {
3321 if (!hl_mem_area_inside_range(device_memory_addr,
3322 le32_to_cpu(user_dma_pkt->tsize),
3323 hdev->asic_prop.dram_user_base_address,
3324 hdev->asic_prop.dram_end_address)) {
3325
3326 dev_err(hdev->dev,
3327 "DRAM address 0x%llx + 0x%x is invalid\n",
3328 device_memory_addr,
3329 user_dma_pkt->tsize);
3330 return -EFAULT;
eff6f4a0
OG
3331 }
3332 }
3333
3334 if (skip_host_mem_pin)
3335 parser->patched_cb_size += sizeof(*user_dma_pkt);
3336 else {
3337 if ((dir == DMA_TO_DEVICE) &&
3338 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3339 dev_err(hdev->dev,
3340 "Can't DMA from host on queue other then 1\n");
3341 return -EFAULT;
3342 }
3343
3344 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3345 addr, dir);
3346 }
3347
3348 return rc;
3349}
3350
3351static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3352 struct hl_cs_parser *parser,
3353 struct packet_lin_dma *user_dma_pkt)
3354{
3355 u64 sram_memory_addr, dram_memory_addr;
3356 enum goya_dma_direction user_dir;
df697bce 3357 u32 ctl;
eff6f4a0 3358
df697bce
TT
3359 ctl = le32_to_cpu(user_dma_pkt->ctl);
3360 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
eff6f4a0
OG
3361 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3362
3363 if (user_dir == DMA_DRAM_TO_SRAM) {
3364 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
df697bce
TT
3365 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3366 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
eff6f4a0
OG
3367 } else {
3368 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
df697bce
TT
3369 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3370 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
eff6f4a0
OG
3371 }
3372
df697bce
TT
3373 if (!hl_mem_area_inside_range(sram_memory_addr,
3374 le32_to_cpu(user_dma_pkt->tsize),
eff6f4a0
OG
3375 hdev->asic_prop.sram_user_base_address,
3376 hdev->asic_prop.sram_end_address)) {
3377 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3378 sram_memory_addr, user_dma_pkt->tsize);
3379 return -EFAULT;
3380 }
3381
df697bce
TT
3382 if (!hl_mem_area_inside_range(dram_memory_addr,
3383 le32_to_cpu(user_dma_pkt->tsize),
eff6f4a0
OG
3384 hdev->asic_prop.dram_user_base_address,
3385 hdev->asic_prop.dram_end_address)) {
3386 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3387 dram_memory_addr, user_dma_pkt->tsize);
3388 return -EFAULT;
3389 }
3390
3391 parser->patched_cb_size += sizeof(*user_dma_pkt);
3392
3393 return 0;
3394}
3395
3396static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3397 struct hl_cs_parser *parser,
3398 struct packet_lin_dma *user_dma_pkt)
3399{
3400 enum goya_dma_direction user_dir;
df697bce 3401 u32 ctl;
eff6f4a0
OG
3402 int rc;
3403
3404 dev_dbg(hdev->dev, "DMA packet details:\n");
b421d83a
BS
3405 dev_dbg(hdev->dev, "source == 0x%llx\n",
3406 le64_to_cpu(user_dma_pkt->src_addr));
3407 dev_dbg(hdev->dev, "destination == 0x%llx\n",
3408 le64_to_cpu(user_dma_pkt->dst_addr));
3409 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
eff6f4a0 3410
df697bce
TT
3411 ctl = le32_to_cpu(user_dma_pkt->ctl);
3412 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
eff6f4a0
OG
3413 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3414
3415 /*
3416 * Special handling for DMA with size 0. The H/W has a bug where
3417 * this can cause the QMAN DMA to get stuck, so block it here.
3418 */
3419 if (user_dma_pkt->tsize == 0) {
3420 dev_err(hdev->dev,
3421 "Got DMA with size 0, might reset the device\n");
3422 return -EINVAL;
3423 }
3424
3425 if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3426 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3427 else
3428 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3429
3430 return rc;
3431}
3432
3433static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3434 struct hl_cs_parser *parser,
3435 struct packet_lin_dma *user_dma_pkt)
3436{
3437 dev_dbg(hdev->dev, "DMA packet details:\n");
b421d83a
BS
3438 dev_dbg(hdev->dev, "source == 0x%llx\n",
3439 le64_to_cpu(user_dma_pkt->src_addr));
3440 dev_dbg(hdev->dev, "destination == 0x%llx\n",
3441 le64_to_cpu(user_dma_pkt->dst_addr));
3442 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
eff6f4a0
OG
3443
3444 /*
3445 * WA for HW-23.
3446 * We can't allow user to read from Host using QMANs other than 1.
64a7e295 3447 * PMMU and HPMMU addresses are equal, check only one of them.
eff6f4a0 3448 */
bfb57a91 3449 if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
df697bce
TT
3450 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3451 le32_to_cpu(user_dma_pkt->tsize),
64a7e295
OS
3452 hdev->asic_prop.pmmu.start_addr,
3453 hdev->asic_prop.pmmu.end_addr)) {
eff6f4a0
OG
3454 dev_err(hdev->dev,
3455 "Can't DMA from host on queue other then 1\n");
3456 return -EFAULT;
3457 }
3458
3459 if (user_dma_pkt->tsize == 0) {
3460 dev_err(hdev->dev,
3461 "Got DMA with size 0, might reset the device\n");
3462 return -EINVAL;
3463 }
3464
3465 parser->patched_cb_size += sizeof(*user_dma_pkt);
3466
3467 return 0;
3468}
3469
3470static int goya_validate_wreg32(struct hl_device *hdev,
3471 struct hl_cs_parser *parser,
3472 struct packet_wreg32 *wreg_pkt)
3473{
3474 struct goya_device *goya = hdev->asic_specific;
3475 u32 sob_start_addr, sob_end_addr;
3476 u16 reg_offset;
3477
df697bce
TT
3478 reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3479 GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
eff6f4a0
OG
3480
3481 dev_dbg(hdev->dev, "WREG32 packet details:\n");
3482 dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
b421d83a
BS
3483 dev_dbg(hdev->dev, "value == 0x%x\n",
3484 le32_to_cpu(wreg_pkt->value));
eff6f4a0 3485
6765fda0 3486 if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
eff6f4a0
OG
3487 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3488 reg_offset);
3489 return -EPERM;
3490 }
3491
3492 /*
3493 * With MMU, DMA channels are not secured, so it doesn't matter where
3494 * the WR COMP will be written to because it will go out with
3495 * non-secured property
3496 */
3497 if (goya->hw_cap_initialized & HW_CAP_MMU)
3498 return 0;
3499
3500 sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3501 sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3502
df697bce
TT
3503 if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3504 (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
eff6f4a0
OG
3505
3506 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3507 wreg_pkt->value);
3508 return -EPERM;
3509 }
3510
3511 return 0;
3512}
3513
3514static int goya_validate_cb(struct hl_device *hdev,
3515 struct hl_cs_parser *parser, bool is_mmu)
3516{
3517 u32 cb_parsed_length = 0;
3518 int rc = 0;
3519
3520 parser->patched_cb_size = 0;
3521
3522 /* cb_user_size is more than 0 so loop will always be executed */
3523 while (cb_parsed_length < parser->user_cb_size) {
3524 enum packet_id pkt_id;
3525 u16 pkt_size;
213ad5ad 3526 struct goya_packet *user_pkt;
eff6f4a0 3527
82948e6e 3528 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
eff6f4a0 3529
213ad5ad
BS
3530 pkt_id = (enum packet_id) (
3531 (le64_to_cpu(user_pkt->header) &
eff6f4a0
OG
3532 PACKET_HEADER_PACKET_ID_MASK) >>
3533 PACKET_HEADER_PACKET_ID_SHIFT);
3534
bc75be24
OB
3535 if (!validate_packet_id(pkt_id)) {
3536 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3537 rc = -EINVAL;
3538 break;
3539 }
3540
eff6f4a0
OG
3541 pkt_size = goya_packet_sizes[pkt_id];
3542 cb_parsed_length += pkt_size;
3543 if (cb_parsed_length > parser->user_cb_size) {
3544 dev_err(hdev->dev,
3545 "packet 0x%x is out of CB boundary\n", pkt_id);
3546 rc = -EINVAL;
3547 break;
3548 }
3549
3550 switch (pkt_id) {
3551 case PACKET_WREG_32:
3552 /*
3553 * Although it is validated after copy in patch_cb(),
3554 * need to validate here as well because patch_cb() is
3555 * not called in MMU path while this function is called
3556 */
213ad5ad
BS
3557 rc = goya_validate_wreg32(hdev,
3558 parser, (struct packet_wreg32 *) user_pkt);
87eaea1c 3559 parser->patched_cb_size += pkt_size;
eff6f4a0
OG
3560 break;
3561
3562 case PACKET_WREG_BULK:
3563 dev_err(hdev->dev,
3564 "User not allowed to use WREG_BULK\n");
3565 rc = -EPERM;
3566 break;
3567
3568 case PACKET_MSG_PROT:
3569 dev_err(hdev->dev,
3570 "User not allowed to use MSG_PROT\n");
3571 rc = -EPERM;
3572 break;
3573
3574 case PACKET_CP_DMA:
3575 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3576 rc = -EPERM;
3577 break;
3578
3579 case PACKET_STOP:
3580 dev_err(hdev->dev, "User not allowed to use STOP\n");
3581 rc = -EPERM;
3582 break;
3583
3584 case PACKET_LIN_DMA:
3585 if (is_mmu)
3586 rc = goya_validate_dma_pkt_mmu(hdev, parser,
213ad5ad 3587 (struct packet_lin_dma *) user_pkt);
eff6f4a0
OG
3588 else
3589 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
213ad5ad 3590 (struct packet_lin_dma *) user_pkt);
eff6f4a0
OG
3591 break;
3592
3593 case PACKET_MSG_LONG:
3594 case PACKET_MSG_SHORT:
3595 case PACKET_FENCE:
3596 case PACKET_NOP:
3597 parser->patched_cb_size += pkt_size;
3598 break;
3599
3600 default:
3601 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3602 pkt_id);
3603 rc = -EINVAL;
3604 break;
3605 }
3606
3607 if (rc)
3608 break;
3609 }
3610
3611 /*
3612 * The new CB should have space at the end for two MSG_PROT packets:
3613 * 1. A packet that will act as a completion packet
3614 * 2. A packet that will generate MSI-X interrupt
3615 */
3616 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3617
3618 return rc;
3619}
3620
3621static int goya_patch_dma_packet(struct hl_device *hdev,
3622 struct hl_cs_parser *parser,
3623 struct packet_lin_dma *user_dma_pkt,
3624 struct packet_lin_dma *new_dma_pkt,
3625 u32 *new_dma_pkt_size)
3626{
3627 struct hl_userptr *userptr;
3628 struct scatterlist *sg, *sg_next_iter;
e99f1683
OG
3629 u32 count, dma_desc_cnt;
3630 u64 len, len_next;
eff6f4a0
OG
3631 dma_addr_t dma_addr, dma_addr_next;
3632 enum goya_dma_direction user_dir;
3633 u64 device_memory_addr, addr;
3634 enum dma_data_direction dir;
3635 struct sg_table *sgt;
3636 bool skip_host_mem_pin = false;
3637 bool user_memset;
df697bce 3638 u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
eff6f4a0 3639
df697bce
TT
3640 ctl = le32_to_cpu(user_dma_pkt->ctl);
3641
3642 user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
eff6f4a0
OG
3643 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3644
df697bce 3645 user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
eff6f4a0
OG
3646 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3647
3648 if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3649 (user_dma_pkt->tsize == 0)) {
3650 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3651 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3652 return 0;
3653 }
3654
3655 if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
df697bce
TT
3656 addr = le64_to_cpu(user_dma_pkt->src_addr);
3657 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
eff6f4a0
OG
3658 dir = DMA_TO_DEVICE;
3659 if (user_memset)
3660 skip_host_mem_pin = true;
3661 } else {
df697bce
TT
3662 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3663 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
eff6f4a0
OG
3664 dir = DMA_FROM_DEVICE;
3665 }
3666
3667 if ((!skip_host_mem_pin) &&
df697bce
TT
3668 (hl_userptr_is_pinned(hdev, addr,
3669 le32_to_cpu(user_dma_pkt->tsize),
eff6f4a0
OG
3670 parser->job_userptr_list, &userptr) == false)) {
3671 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3672 addr, user_dma_pkt->tsize);
3673 return -EFAULT;
3674 }
3675
3676 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3677 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3678 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3679 return 0;
3680 }
3681
df697bce 3682 user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
eff6f4a0 3683
df697bce 3684 user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
eff6f4a0
OG
3685
3686 sgt = userptr->sgt;
3687 dma_desc_cnt = 0;
3688
3689 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3690 len = sg_dma_len(sg);
3691 dma_addr = sg_dma_address(sg);
3692
3693 if (len == 0)
3694 break;
3695
3696 while ((count + 1) < sgt->nents) {
3697 sg_next_iter = sg_next(sg);
3698 len_next = sg_dma_len(sg_next_iter);
3699 dma_addr_next = sg_dma_address(sg_next_iter);
3700
3701 if (len_next == 0)
3702 break;
3703
3704 if ((dma_addr + len == dma_addr_next) &&
3705 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3706 len += len_next;
3707 count++;
3708 sg = sg_next_iter;
3709 } else {
3710 break;
3711 }
3712 }
3713
df697bce 3714 ctl = le32_to_cpu(user_dma_pkt->ctl);
eff6f4a0 3715 if (likely(dma_desc_cnt))
df697bce
TT
3716 ctl &= ~GOYA_PKT_CTL_EB_MASK;
3717 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3718 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3719 new_dma_pkt->ctl = cpu_to_le32(ctl);
3720 new_dma_pkt->tsize = cpu_to_le32((u32) len);
eff6f4a0 3721
eff6f4a0 3722 if (dir == DMA_TO_DEVICE) {
df697bce
TT
3723 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3724 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
eff6f4a0 3725 } else {
df697bce
TT
3726 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3727 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
eff6f4a0
OG
3728 }
3729
3730 if (!user_memset)
3731 device_memory_addr += len;
3732 dma_desc_cnt++;
3733 new_dma_pkt++;
3734 }
3735
3736 if (!dma_desc_cnt) {
3737 dev_err(hdev->dev,
3738 "Error of 0 SG entries when patching DMA packet\n");
3739 return -EFAULT;
3740 }
3741
3742 /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3743 new_dma_pkt--;
df697bce 3744 new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
eff6f4a0
OG
3745
3746 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3747
3748 return 0;
3749}
3750
3751static int goya_patch_cb(struct hl_device *hdev,
3752 struct hl_cs_parser *parser)
3753{
3754 u32 cb_parsed_length = 0;
3755 u32 cb_patched_cur_length = 0;
3756 int rc = 0;
3757
3758 /* cb_user_size is more than 0 so loop will always be executed */
3759 while (cb_parsed_length < parser->user_cb_size) {
3760 enum packet_id pkt_id;
3761 u16 pkt_size;
3762 u32 new_pkt_size = 0;
213ad5ad 3763 struct goya_packet *user_pkt, *kernel_pkt;
eff6f4a0 3764
82948e6e
AB
3765 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3766 kernel_pkt = parser->patched_cb->kernel_address +
3767 cb_patched_cur_length;
eff6f4a0 3768
213ad5ad
BS
3769 pkt_id = (enum packet_id) (
3770 (le64_to_cpu(user_pkt->header) &
eff6f4a0
OG
3771 PACKET_HEADER_PACKET_ID_MASK) >>
3772 PACKET_HEADER_PACKET_ID_SHIFT);
3773
bc75be24
OB
3774 if (!validate_packet_id(pkt_id)) {
3775 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3776 rc = -EINVAL;
3777 break;
3778 }
3779
eff6f4a0
OG
3780 pkt_size = goya_packet_sizes[pkt_id];
3781 cb_parsed_length += pkt_size;
3782 if (cb_parsed_length > parser->user_cb_size) {
3783 dev_err(hdev->dev,
3784 "packet 0x%x is out of CB boundary\n", pkt_id);
3785 rc = -EINVAL;
3786 break;
3787 }
3788
3789 switch (pkt_id) {
3790 case PACKET_LIN_DMA:
213ad5ad
BS
3791 rc = goya_patch_dma_packet(hdev, parser,
3792 (struct packet_lin_dma *) user_pkt,
3793 (struct packet_lin_dma *) kernel_pkt,
3794 &new_pkt_size);
eff6f4a0
OG
3795 cb_patched_cur_length += new_pkt_size;
3796 break;
3797
3798 case PACKET_WREG_32:
3799 memcpy(kernel_pkt, user_pkt, pkt_size);
3800 cb_patched_cur_length += pkt_size;
213ad5ad
BS
3801 rc = goya_validate_wreg32(hdev, parser,
3802 (struct packet_wreg32 *) kernel_pkt);
eff6f4a0
OG
3803 break;
3804
3805 case PACKET_WREG_BULK:
3806 dev_err(hdev->dev,
3807 "User not allowed to use WREG_BULK\n");
3808 rc = -EPERM;
3809 break;
3810
3811 case PACKET_MSG_PROT:
3812 dev_err(hdev->dev,
3813 "User not allowed to use MSG_PROT\n");
3814 rc = -EPERM;
3815 break;
3816
3817 case PACKET_CP_DMA:
3818 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3819 rc = -EPERM;
3820 break;
3821
3822 case PACKET_STOP:
3823 dev_err(hdev->dev, "User not allowed to use STOP\n");
3824 rc = -EPERM;
3825 break;
3826
3827 case PACKET_MSG_LONG:
3828 case PACKET_MSG_SHORT:
3829 case PACKET_FENCE:
3830 case PACKET_NOP:
3831 memcpy(kernel_pkt, user_pkt, pkt_size);
3832 cb_patched_cur_length += pkt_size;
3833 break;
3834
3835 default:
3836 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3837 pkt_id);
3838 rc = -EINVAL;
3839 break;
3840 }
3841
3842 if (rc)
3843 break;
3844 }
3845
3846 return rc;
3847}
3848
3849static int goya_parse_cb_mmu(struct hl_device *hdev,
3850 struct hl_cs_parser *parser)
3851{
3852 u64 patched_cb_handle;
3853 u32 patched_cb_size;
3854 struct hl_cb *user_cb;
3855 int rc;
3856
3857 /*
3858 * The new CB should have space at the end for two MSG_PROT pkt:
3859 * 1. A packet that will act as a completion packet
3860 * 2. A packet that will generate MSI-X interrupt
3861 */
3862 parser->patched_cb_size = parser->user_cb_size +
3863 sizeof(struct packet_msg_prot) * 2;
3864
fa8641a1 3865 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
ef6a0f6c 3866 parser->patched_cb_size, false, false,
fa8641a1 3867 &patched_cb_handle);
eff6f4a0
OG
3868
3869 if (rc) {
3870 dev_err(hdev->dev,
3871 "Failed to allocate patched CB for DMA CS %d\n",
3872 rc);
3873 return rc;
3874 }
3875
3876 patched_cb_handle >>= PAGE_SHIFT;
3877 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3878 (u32) patched_cb_handle);
75d9a2a0 3879 /* hl_cb_get should never fail here */
eff6f4a0 3880 if (!parser->patched_cb) {
75d9a2a0
AM
3881 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
3882 (u32) patched_cb_handle);
eff6f4a0
OG
3883 rc = -EFAULT;
3884 goto out;
3885 }
3886
3887 /*
3888 * The check that parser->user_cb_size <= parser->user_cb->size was done
3889 * in validate_queue_index().
3890 */
82948e6e
AB
3891 memcpy(parser->patched_cb->kernel_address,
3892 parser->user_cb->kernel_address,
eff6f4a0
OG
3893 parser->user_cb_size);
3894
3895 patched_cb_size = parser->patched_cb_size;
3896
3897 /* validate patched CB instead of user CB */
3898 user_cb = parser->user_cb;
3899 parser->user_cb = parser->patched_cb;
3900 rc = goya_validate_cb(hdev, parser, true);
3901 parser->user_cb = user_cb;
3902
3903 if (rc) {
3904 hl_cb_put(parser->patched_cb);
3905 goto out;
3906 }
3907
3908 if (patched_cb_size != parser->patched_cb_size) {
3909 dev_err(hdev->dev, "user CB size mismatch\n");
3910 hl_cb_put(parser->patched_cb);
3911 rc = -EINVAL;
3912 goto out;
3913 }
3914
3915out:
3916 /*
3917 * Always call cb destroy here because we still have 1 reference
3918 * to it by calling cb_get earlier. After the job will be completed,
3919 * cb_put will release it, but here we want to remove it from the
3920 * idr
3921 */
3922 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3923 patched_cb_handle << PAGE_SHIFT);
3924
3925 return rc;
3926}
3927
5e6e0239
OG
3928static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3929 struct hl_cs_parser *parser)
eff6f4a0
OG
3930{
3931 u64 patched_cb_handle;
3932 int rc;
3933
3934 rc = goya_validate_cb(hdev, parser, false);
3935
3936 if (rc)
3937 goto free_userptr;
3938
fa8641a1 3939 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
ef6a0f6c 3940 parser->patched_cb_size, false, false,
fa8641a1 3941 &patched_cb_handle);
eff6f4a0
OG
3942 if (rc) {
3943 dev_err(hdev->dev,
3944 "Failed to allocate patched CB for DMA CS %d\n", rc);
3945 goto free_userptr;
3946 }
3947
3948 patched_cb_handle >>= PAGE_SHIFT;
3949 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3950 (u32) patched_cb_handle);
75d9a2a0 3951 /* hl_cb_get should never fail here */
eff6f4a0 3952 if (!parser->patched_cb) {
75d9a2a0
AM
3953 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
3954 (u32) patched_cb_handle);
eff6f4a0
OG
3955 rc = -EFAULT;
3956 goto out;
3957 }
3958
3959 rc = goya_patch_cb(hdev, parser);
3960
3961 if (rc)
3962 hl_cb_put(parser->patched_cb);
3963
3964out:
3965 /*
3966 * Always call cb destroy here because we still have 1 reference
3967 * to it by calling cb_get earlier. After the job will be completed,
3968 * cb_put will release it, but here we want to remove it from the
3969 * idr
3970 */
3971 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3972 patched_cb_handle << PAGE_SHIFT);
3973
3974free_userptr:
3975 if (rc)
3976 hl_userptr_delete_list(hdev, parser->job_userptr_list);
3977 return rc;
3978}
3979
883c2459 3980static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
5e6e0239 3981 struct hl_cs_parser *parser)
eff6f4a0
OG
3982{
3983 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3984 struct goya_device *goya = hdev->asic_specific;
3985
883c2459
OG
3986 if (goya->hw_cap_initialized & HW_CAP_MMU)
3987 return 0;
eff6f4a0 3988
883c2459
OG
3989 /* For internal queue jobs, just check if CB address is valid */
3990 if (hl_mem_area_inside_range(
3991 (u64) (uintptr_t) parser->user_cb,
3992 parser->user_cb_size,
3993 asic_prop->sram_user_base_address,
3994 asic_prop->sram_end_address))
3995 return 0;
eff6f4a0 3996
883c2459
OG
3997 if (hl_mem_area_inside_range(
3998 (u64) (uintptr_t) parser->user_cb,
3999 parser->user_cb_size,
4000 asic_prop->dram_user_base_address,
4001 asic_prop->dram_end_address))
4002 return 0;
eff6f4a0 4003
883c2459 4004 dev_err(hdev->dev,
7f74d4d3 4005 "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
883c2459 4006 parser->user_cb, parser->user_cb_size);
eff6f4a0 4007
883c2459 4008 return -EFAULT;
eff6f4a0
OG
4009}
4010
4011int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4012{
4013 struct goya_device *goya = hdev->asic_specific;
4014
cb596aee 4015 if (parser->queue_type == QUEUE_TYPE_INT)
883c2459 4016 return goya_parse_cb_no_ext_queue(hdev, parser);
eff6f4a0 4017
5809e18e 4018 if (goya->hw_cap_initialized & HW_CAP_MMU)
eff6f4a0
OG
4019 return goya_parse_cb_mmu(hdev, parser);
4020 else
4021 return goya_parse_cb_no_mmu(hdev, parser);
4022}
4023
82948e6e 4024void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
926ba4cc
OG
4025 u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
4026 bool eb)
eff6f4a0
OG
4027{
4028 struct packet_msg_prot *cq_pkt;
df697bce 4029 u32 tmp;
eff6f4a0 4030
82948e6e 4031 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
eff6f4a0 4032
df697bce 4033 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
eff6f4a0
OG
4034 (1 << GOYA_PKT_CTL_EB_SHIFT) |
4035 (1 << GOYA_PKT_CTL_MB_SHIFT);
df697bce
TT
4036 cq_pkt->ctl = cpu_to_le32(tmp);
4037 cq_pkt->value = cpu_to_le32(cq_val);
4038 cq_pkt->addr = cpu_to_le64(cq_addr);
eff6f4a0
OG
4039
4040 cq_pkt++;
4041
df697bce 4042 tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
eff6f4a0 4043 (1 << GOYA_PKT_CTL_MB_SHIFT);
df697bce
TT
4044 cq_pkt->ctl = cpu_to_le32(tmp);
4045 cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4046 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
eff6f4a0
OG
4047}
4048
b2377e03 4049void goya_update_eq_ci(struct hl_device *hdev, u32 val)
1251f23a 4050{
4095a176 4051 WREG32(mmCPU_EQ_CI, val);
1251f23a
OG
4052}
4053
b2377e03 4054void goya_restore_phase_topology(struct hl_device *hdev)
9c46f7b1
DBZ
4055{
4056
4057}
4058
4059static void goya_clear_sm_regs(struct hl_device *hdev)
eff6f4a0
OG
4060{
4061 int i, num_of_sob_in_longs, num_of_mon_in_longs;
4062
4063 num_of_sob_in_longs =
4064 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4065
4066 num_of_mon_in_longs =
4067 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4068
4069 for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4070 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4071
4072 for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4073 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4074
4075 /* Flush all WREG to prevent race */
4076 i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4077}
4078
c2164773 4079/*
4a0ce776
TT
4080 * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4081 * address.
c2164773
OG
4082 *
4083 * @hdev: pointer to hl_device structure
4a0ce776 4084 * @addr: device or host mapped address
c2164773
OG
4085 * @val: returned value
4086 *
4087 * In case of DDR address that is not mapped into the default aperture that
4088 * the DDR bar exposes, the function will configure the iATU so that the DDR
4089 * bar will be positioned at a base address that allows reading from the
4090 * required address. Configuring the iATU during normal operation can
4091 * lead to undefined behavior and therefore, should be done with extreme care
4092 *
4093 */
5e6e0239 4094static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
c2164773
OG
4095{
4096 struct asic_fixed_properties *prop = &hdev->asic_prop;
a38693d7 4097 u64 ddr_bar_addr;
c2164773
OG
4098 int rc = 0;
4099
4100 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4101 *val = RREG32(addr - CFG_BASE);
4102
4103 } else if ((addr >= SRAM_BASE_ADDR) &&
4104 (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4105
4106 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4107 (addr - SRAM_BASE_ADDR));
4108
2557f27f 4109 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
c2164773
OG
4110
4111 u64 bar_base_addr = DRAM_PHYS_BASE +
4112 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4113
a38693d7
OG
4114 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4115 if (ddr_bar_addr != U64_MAX) {
c2164773
OG
4116 *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4117 (addr - bar_base_addr));
4118
a38693d7
OG
4119 ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4120 ddr_bar_addr);
c2164773 4121 }
a38693d7
OG
4122 if (ddr_bar_addr == U64_MAX)
4123 rc = -EIO;
4a0ce776
TT
4124
4125 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4126 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4127
c2164773
OG
4128 } else {
4129 rc = -EFAULT;
4130 }
4131
4132 return rc;
4133}
4134
4135/*
4a0ce776
TT
4136 * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4137 * address.
c2164773
OG
4138 *
4139 * @hdev: pointer to hl_device structure
4a0ce776 4140 * @addr: device or host mapped address
c2164773
OG
4141 * @val: returned value
4142 *
4143 * In case of DDR address that is not mapped into the default aperture that
4144 * the DDR bar exposes, the function will configure the iATU so that the DDR
4145 * bar will be positioned at a base address that allows writing to the
4146 * required address. Configuring the iATU during normal operation can
4147 * lead to undefined behavior and therefore, should be done with extreme care
4148 *
4149 */
5e6e0239 4150static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
c2164773
OG
4151{
4152 struct asic_fixed_properties *prop = &hdev->asic_prop;
a38693d7 4153 u64 ddr_bar_addr;
c2164773
OG
4154 int rc = 0;
4155
4156 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4157 WREG32(addr - CFG_BASE, val);
4158
4159 } else if ((addr >= SRAM_BASE_ADDR) &&
4160 (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4161
4162 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4163 (addr - SRAM_BASE_ADDR));
4164
2557f27f 4165 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
c2164773
OG
4166
4167 u64 bar_base_addr = DRAM_PHYS_BASE +
4168 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4169
a38693d7
OG
4170 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4171 if (ddr_bar_addr != U64_MAX) {
c2164773
OG
4172 writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4173 (addr - bar_base_addr));
4174
a38693d7
OG
4175 ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4176 ddr_bar_addr);
c2164773 4177 }
a38693d7
OG
4178 if (ddr_bar_addr == U64_MAX)
4179 rc = -EIO;
4a0ce776
TT
4180
4181 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4182 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4183
c2164773
OG
4184 } else {
4185 rc = -EFAULT;
4186 }
4187
4188 return rc;
4189}
4190
5cce5146
MH
4191static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4192{
4193 struct asic_fixed_properties *prop = &hdev->asic_prop;
4194 u64 ddr_bar_addr;
4195 int rc = 0;
4196
4197 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4198 u32 val_l = RREG32(addr - CFG_BASE);
4199 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4200
4201 *val = (((u64) val_h) << 32) | val_l;
4202
4203 } else if ((addr >= SRAM_BASE_ADDR) &&
4204 (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4205
4206 *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4207 (addr - SRAM_BASE_ADDR));
4208
2557f27f
LJ
4209 } else if (addr <=
4210 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5cce5146
MH
4211
4212 u64 bar_base_addr = DRAM_PHYS_BASE +
4213 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4214
4215 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4216 if (ddr_bar_addr != U64_MAX) {
4217 *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4218 (addr - bar_base_addr));
4219
4220 ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4221 ddr_bar_addr);
4222 }
4223 if (ddr_bar_addr == U64_MAX)
4224 rc = -EIO;
4225
4226 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4227 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4228
4229 } else {
4230 rc = -EFAULT;
4231 }
4232
4233 return rc;
4234}
4235
4236static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4237{
4238 struct asic_fixed_properties *prop = &hdev->asic_prop;
4239 u64 ddr_bar_addr;
4240 int rc = 0;
4241
4242 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4243 WREG32(addr - CFG_BASE, lower_32_bits(val));
4244 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4245
4246 } else if ((addr >= SRAM_BASE_ADDR) &&
4247 (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4248
4249 writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4250 (addr - SRAM_BASE_ADDR));
4251
2557f27f
LJ
4252 } else if (addr <=
4253 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5cce5146
MH
4254
4255 u64 bar_base_addr = DRAM_PHYS_BASE +
4256 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4257
4258 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4259 if (ddr_bar_addr != U64_MAX) {
4260 writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4261 (addr - bar_base_addr));
4262
4263 ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4264 ddr_bar_addr);
4265 }
4266 if (ddr_bar_addr == U64_MAX)
4267 rc = -EIO;
4268
4269 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4270 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4271
4272 } else {
4273 rc = -EFAULT;
4274 }
4275
4276 return rc;
4277}
4278
0feaf86d
OS
4279static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4280{
4281 struct goya_device *goya = hdev->asic_specific;
4282
9f201aba
OG
4283 if (hdev->hard_reset_pending)
4284 return U64_MAX;
4285
0feaf86d
OS
4286 return readq(hdev->pcie_bar[DDR_BAR_ID] +
4287 (addr - goya->ddr_bar_cur_addr));
4288}
4289
4290static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4291{
4292 struct goya_device *goya = hdev->asic_specific;
4293
9f201aba
OG
4294 if (hdev->hard_reset_pending)
4295 return;
4296
0feaf86d
OS
4297 writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4298 (addr - goya->ddr_bar_cur_addr));
4299}
4300
60b7dcca 4301static const char *_goya_get_event_desc(u16 event_type)
1251f23a 4302{
60b7dcca 4303 switch (event_type) {
460696ed
OS
4304 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4305 return "PCIe_if";
4306 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4307 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4308 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4309 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4310 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4311 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4312 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4313 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4314 return "TPC%d_ecc";
4315 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4316 return "MME_ecc";
4317 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4318 return "MME_ecc_ext";
4319 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4320 return "MMU_ecc";
4321 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4322 return "DMA_macro";
4323 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4324 return "DMA_ecc";
4325 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4326 return "CPU_if_ecc";
4327 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4328 return "PSOC_mem";
4329 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4330 return "PSOC_coresight";
4331 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4332 return "SRAM%d";
4333 case GOYA_ASYNC_EVENT_ID_GIC500:
4334 return "GIC500";
4335 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4336 return "PLL%d";
4337 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4338 return "AXI_ecc";
4339 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4340 return "L2_ram_ecc";
4341 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4342 return "PSOC_gpio_05_sw_reset";
4343 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4344 return "PSOC_gpio_10_vrhot_icrit";
60b7dcca
TT
4345 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4346 return "PCIe_dec";
4347 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4348 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4349 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4350 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4351 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4352 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4353 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4354 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4355 return "TPC%d_dec";
4356 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4357 return "MME_wacs";
4358 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4359 return "MME_wacsd";
4360 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4361 return "CPU_axi_splitter";
4362 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4363 return "PSOC_axi_dec";
4364 case GOYA_ASYNC_EVENT_ID_PSOC:
4365 return "PSOC";
4366 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4367 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4368 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4369 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4370 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4371 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4372 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4373 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4374 return "TPC%d_krn_err";
4375 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4376 return "TPC%d_cq";
4377 case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4378 return "TPC%d_qm";
4379 case GOYA_ASYNC_EVENT_ID_MME_QM:
4380 return "MME_qm";
4381 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4382 return "MME_cq";
4383 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4384 return "DMA%d_qm";
4385 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4386 return "DMA%d_ch";
460696ed
OS
4387 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4388 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4389 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4390 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4391 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4392 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4393 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4394 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4395 return "TPC%d_bmon_spmu";
4396 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4397 return "DMA_bm_ch%d";
4f0e6ab7
OS
4398 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4399 return "POWER_ENV_S";
4400 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4401 return "POWER_ENV_E";
4402 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4403 return "THERMAL_ENV_S";
4404 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4405 return "THERMAL_ENV_E";
60b7dcca
TT
4406 default:
4407 return "N/A";
4408 }
1251f23a
OG
4409}
4410
60b7dcca 4411static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
1251f23a 4412{
60b7dcca
TT
4413 u8 index;
4414
4415 switch (event_type) {
460696ed
OS
4416 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4417 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4418 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4419 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4420 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4421 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4422 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4423 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4424 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4425 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4426 break;
4427 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4428 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4429 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4430 break;
4431 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4432 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4433 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4434 break;
60b7dcca
TT
4435 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4436 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4437 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4438 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4439 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4440 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4441 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4442 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4443 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4444 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4445 break;
4446 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4447 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4448 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4449 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4450 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4451 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4452 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4453 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4454 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4455 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4456 break;
4457 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4458 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4459 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4460 break;
4461 case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4462 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4463 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4464 break;
4465 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4466 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4467 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4468 break;
4469 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4470 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4471 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4472 break;
460696ed
OS
4473 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4474 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4475 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4476 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4477 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4478 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4479 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4480 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4481 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4482 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4483 break;
4484 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4485 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4486 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4487 break;
60b7dcca
TT
4488 default:
4489 snprintf(desc, size, _goya_get_event_desc(event_type));
4490 break;
1251f23a
OG
4491 }
4492}
4493
60b7dcca 4494static void goya_print_razwi_info(struct hl_device *hdev)
1251f23a 4495{
1251f23a 4496 if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
e5509d52 4497 dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
1251f23a 4498 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
1251f23a 4499 }
60b7dcca 4500
1251f23a 4501 if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
e5509d52 4502 dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
1251f23a 4503 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
1251f23a 4504 }
60b7dcca 4505
1251f23a 4506 if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
e5509d52 4507 dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
1251f23a 4508 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
1251f23a 4509 }
60b7dcca 4510
1251f23a 4511 if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
e5509d52 4512 dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
1251f23a 4513 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
1251f23a 4514 }
60b7dcca 4515}
1251f23a 4516
60b7dcca
TT
4517static void goya_print_mmu_error_info(struct hl_device *hdev)
4518{
4519 struct goya_device *goya = hdev->asic_specific;
4520 u64 addr;
4521 u32 val;
1251f23a 4522
60b7dcca
TT
4523 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4524 return;
1251f23a 4525
60b7dcca
TT
4526 val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4527 if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4528 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4529 addr <<= 32;
4530 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
1251f23a 4531
e5509d52
OG
4532 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4533 addr);
60b7dcca
TT
4534
4535 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
1251f23a
OG
4536 }
4537}
4538
460696ed
OS
4539static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4540 bool razwi)
60b7dcca
TT
4541{
4542 char desc[20] = "";
4543
4544 goya_get_event_desc(event_type, desc, sizeof(desc));
e5509d52 4545 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
60b7dcca
TT
4546 event_type, desc);
4547
460696ed
OS
4548 if (razwi) {
4549 goya_print_razwi_info(hdev);
4550 goya_print_mmu_error_info(hdev);
4551 }
60b7dcca
TT
4552}
4553
f8c8c7d5
OG
4554static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4555 size_t irq_arr_size)
4556{
2f55342c 4557 struct cpucp_unmask_irq_arr_packet *pkt;
f8c8c7d5 4558 size_t total_pkt_size;
439bc47b 4559 u64 result;
f8c8c7d5 4560 int rc;
b421d83a
BS
4561 int irq_num_entries, irq_arr_index;
4562 __le32 *goya_irq_arr;
f8c8c7d5 4563
2f55342c 4564 total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
f8c8c7d5
OG
4565 irq_arr_size;
4566
6138bbe9 4567 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
f8c8c7d5
OG
4568 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4569
4570 /* total_pkt_size is casted to u16 later on */
4571 if (total_pkt_size > USHRT_MAX) {
4572 dev_err(hdev->dev, "too many elements in IRQ array\n");
4573 return -EINVAL;
4574 }
4575
4576 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4577 if (!pkt)
4578 return -ENOMEM;
4579
b421d83a
BS
4580 irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4581 pkt->length = cpu_to_le32(irq_num_entries);
4582
4583 /* We must perform any necessary endianness conversation on the irq
4584 * array being passed to the goya hardware
4585 */
4586 for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4587 irq_arr_index < irq_num_entries ; irq_arr_index++)
4588 goya_irq_arr[irq_arr_index] =
4589 cpu_to_le32(irq_arr[irq_arr_index]);
f8c8c7d5 4590
2f55342c
OG
4591 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4592 CPUCP_PKT_CTL_OPCODE_SHIFT);
f8c8c7d5 4593
788cacf3
OG
4594 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4595 total_pkt_size, 0, &result);
f8c8c7d5
OG
4596
4597 if (rc)
4598 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4599
4600 kfree(pkt);
4601
4602 return rc;
4603}
4604
4605static int goya_soft_reset_late_init(struct hl_device *hdev)
4606{
4607 /*
4608 * Unmask all IRQs since some could have been received
4609 * during the soft reset
4610 */
b24ca458
OG
4611 return goya_unmask_irq_arr(hdev, goya_all_events,
4612 sizeof(goya_all_events));
f8c8c7d5
OG
4613}
4614
1251f23a
OG
4615static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4616{
2f55342c 4617 struct cpucp_packet pkt;
439bc47b 4618 u64 result;
1251f23a
OG
4619 int rc;
4620
4621 memset(&pkt, 0, sizeof(pkt));
4622
2f55342c
OG
4623 pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4624 CPUCP_PKT_CTL_OPCODE_SHIFT);
df697bce 4625 pkt.value = cpu_to_le64(event_type);
1251f23a 4626
788cacf3
OG
4627 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4628 0, &result);
1251f23a
OG
4629
4630 if (rc)
4631 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4632
4633 return rc;
4634}
4635
4f0e6ab7
OS
4636static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4637{
4638 switch (event_type) {
4639 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
0a068add 4640 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
4f0e6ab7
OS
4641 dev_info_ratelimited(hdev->dev,
4642 "Clock throttling due to power consumption\n");
4643 break;
4644 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
0a068add 4645 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
4f0e6ab7
OS
4646 dev_info_ratelimited(hdev->dev,
4647 "Power envelop is safe, back to optimal clock\n");
4648 break;
4649 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
0a068add 4650 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
4f0e6ab7
OS
4651 dev_info_ratelimited(hdev->dev,
4652 "Clock throttling due to overheating\n");
4653 break;
4654 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
0a068add 4655 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
4f0e6ab7
OS
4656 dev_info_ratelimited(hdev->dev,
4657 "Thermal envelop is safe, back to optimal clock\n");
4658 break;
4659
4660 default:
4661 dev_err(hdev->dev, "Received invalid clock change event %d\n",
4662 event_type);
4663 break;
4664 }
4665}
4666
1251f23a
OG
4667void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4668{
df697bce
TT
4669 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4670 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4671 >> EQ_CTL_EVENT_TYPE_SHIFT);
1251f23a
OG
4672 struct goya_device *goya = hdev->asic_specific;
4673
4674 goya->events_stat[event_type]++;
e9730763 4675 goya->events_stat_aggregate[event_type]++;
1251f23a
OG
4676
4677 switch (event_type) {
4678 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4679 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4680 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4681 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4682 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4683 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4684 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4685 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4686 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4687 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4688 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4689 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4690 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4691 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4692 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4693 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4694 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4695 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4696 case GOYA_ASYNC_EVENT_ID_GIC500:
460696ed 4697 case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
1251f23a
OG
4698 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4699 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4700 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
460696ed 4701 goya_print_irq_info(hdev, event_type, false);
1fb2f374
OG
4702 if (hdev->hard_reset_on_fw_events)
4703 hl_device_reset(hdev, true, false);
1251f23a
OG
4704 break;
4705
4706 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4707 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4708 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4709 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4710 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4711 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4712 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4713 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4714 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4715 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4716 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4717 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4718 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4719 case GOYA_ASYNC_EVENT_ID_PSOC:
4720 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4721 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4722 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4723 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4724 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4725 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4726 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4727 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4728 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4729 case GOYA_ASYNC_EVENT_ID_MME_QM:
4730 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4731 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4732 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
460696ed 4733 goya_print_irq_info(hdev, event_type, true);
1251f23a
OG
4734 goya_unmask_irq(hdev, event_type);
4735 break;
4736
717261e1 4737 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
1251f23a
OG
4738 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4739 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4740 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4741 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4742 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4743 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4744 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4745 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
460696ed
OS
4746 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4747 goya_print_irq_info(hdev, event_type, false);
4748 goya_unmask_irq(hdev, event_type);
1251f23a
OG
4749 break;
4750
4f0e6ab7
OS
4751 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4752 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4753 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4754 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4755 goya_print_clk_change_info(hdev, event_type);
4756 goya_unmask_irq(hdev, event_type);
4757 break;
4758
1251f23a
OG
4759 default:
4760 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4761 event_type);
4762 break;
4763 }
4764}
4765
e9730763 4766void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
1251f23a
OG
4767{
4768 struct goya_device *goya = hdev->asic_specific;
4769
e9730763
OG
4770 if (aggregate) {
4771 *size = (u32) sizeof(goya->events_stat_aggregate);
4772 return goya->events_stat_aggregate;
4773 }
1251f23a 4774
e9730763 4775 *size = (u32) sizeof(goya->events_stat);
1251f23a
OG
4776 return goya->events_stat;
4777}
4778
ac742737 4779static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
27ca384c 4780 u64 val, bool is_dram)
0feaf86d 4781{
27ca384c 4782 struct packet_lin_dma *lin_dma_pkt;
0feaf86d 4783 struct hl_cs_job *job;
df697bce 4784 u32 cb_size, ctl;
0feaf86d 4785 struct hl_cb *cb;
ac742737 4786 int rc, lin_dma_pkts_cnt;
0feaf86d 4787
ac742737
OG
4788 lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4789 cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4790 sizeof(struct packet_msg_prot);
a04b7cd9 4791 cb = hl_cb_kernel_create(hdev, cb_size, false);
0feaf86d 4792 if (!cb)
ac742737 4793 return -ENOMEM;
0feaf86d 4794
82948e6e 4795 lin_dma_pkt = cb->kernel_address;
27ca384c 4796
ac742737
OG
4797 do {
4798 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4799
4800 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4801 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4802 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4803 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4804 (1 << GOYA_PKT_CTL_MB_SHIFT));
4805 ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4806 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4807 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4808
4809 lin_dma_pkt->src_addr = cpu_to_le64(val);
4810 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4811 if (lin_dma_pkts_cnt > 1)
4812 lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4813 else
4814 lin_dma_pkt->tsize = cpu_to_le32(size);
0feaf86d 4815
ac742737
OG
4816 size -= SZ_2G;
4817 addr += SZ_2G;
4818 lin_dma_pkt++;
4819 } while (--lin_dma_pkts_cnt);
0feaf86d 4820
cb596aee 4821 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
0feaf86d
OS
4822 if (!job) {
4823 dev_err(hdev->dev, "Failed to allocate a new job\n");
4824 rc = -ENOMEM;
4825 goto release_cb;
4826 }
4827
4828 job->id = 0;
4829 job->user_cb = cb;
f0748674 4830 atomic_inc(&job->user_cb->cs_cnt);
0feaf86d
OS
4831 job->user_cb_size = cb_size;
4832 job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
3706b470 4833 job->patched_cb = job->user_cb;
ac742737 4834 job->job_cb_size = job->user_cb_size;
0feaf86d 4835
c2164773
OG
4836 hl_debugfs_add_job(hdev, job);
4837
0feaf86d
OS
4838 rc = goya_send_job_on_qman0(hdev, job);
4839
c2164773 4840 hl_debugfs_remove_job(hdev, job);
0feaf86d 4841 kfree(job);
f0748674 4842 atomic_dec(&cb->cs_cnt);
0feaf86d
OS
4843
4844release_cb:
4845 hl_cb_put(cb);
4846 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4847
4848 return rc;
4849}
4850
b2377e03 4851int goya_context_switch(struct hl_device *hdev, u32 asid)
27ca384c
OS
4852{
4853 struct asic_fixed_properties *prop = &hdev->asic_prop;
5c823ae1 4854 u64 addr = prop->sram_base_address, sob_addr;
27ca384c
OS
4855 u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4856 u64 val = 0x7777777777777777ull;
5c823ae1
DBZ
4857 int rc, dma_id;
4858 u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4859 mmDMA_CH_0_WR_COMP_ADDR_LO;
27ca384c
OS
4860
4861 rc = goya_memset_device_memory(hdev, addr, size, val, false);
4862 if (rc) {
4863 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4864 return rc;
4865 }
4866
5c823ae1
DBZ
4867 /* we need to reset registers that the user is allowed to change */
4868 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4869 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4870
4871 for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4872 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4873 (dma_id - 1) * 4;
4874 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4875 lower_32_bits(sob_addr));
4876 }
4877
a691a1eb 4878 WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
5c823ae1 4879
9c46f7b1
DBZ
4880 goya_clear_sm_regs(hdev);
4881
27ca384c
OS
4882 return 0;
4883}
4884
95b5a8b8 4885static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
27ca384c
OS
4886{
4887 struct asic_fixed_properties *prop = &hdev->asic_prop;
4888 struct goya_device *goya = hdev->asic_specific;
4889 u64 addr = prop->mmu_pgt_addr;
4890 u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4891 MMU_CACHE_MNG_SIZE;
4892
4893 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4894 return 0;
4895
4896 return goya_memset_device_memory(hdev, addr, size, 0, true);
4897}
4898
95b5a8b8 4899static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
27ca384c
OS
4900{
4901 struct goya_device *goya = hdev->asic_specific;
4902 u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4903 u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4904 u64 val = 0x9999999999999999ull;
4905
4906 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4907 return 0;
4908
4909 return goya_memset_device_memory(hdev, addr, size, val, true);
4910}
4911
95b5a8b8
OG
4912static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4913{
4914 struct asic_fixed_properties *prop = &hdev->asic_prop;
4915 struct goya_device *goya = hdev->asic_specific;
4916 s64 off, cpu_off;
4917 int rc;
4918
4919 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4920 return 0;
4921
4922 for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
5c05487f
OB
4923 rc = hl_mmu_map_page(hdev->kernel_ctx,
4924 prop->dram_base_address + off,
4925 prop->dram_base_address + off, PAGE_SIZE_2MB,
4926 (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
95b5a8b8
OG
4927 if (rc) {
4928 dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4929 prop->dram_base_address + off);
4930 goto unmap;
4931 }
4932 }
4933
4934 if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
5c05487f
OB
4935 rc = hl_mmu_map_page(hdev->kernel_ctx,
4936 VA_CPU_ACCESSIBLE_MEM_ADDR,
4937 hdev->cpu_accessible_dma_address,
4938 PAGE_SIZE_2MB, true);
95b5a8b8
OG
4939
4940 if (rc) {
4941 dev_err(hdev->dev,
4942 "Map failed for CPU accessible memory\n");
4943 off -= PAGE_SIZE_2MB;
4944 goto unmap;
4945 }
4946 } else {
4947 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
5c05487f 4948 rc = hl_mmu_map_page(hdev->kernel_ctx,
95b5a8b8
OG
4949 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4950 hdev->cpu_accessible_dma_address + cpu_off,
7fc40bca 4951 PAGE_SIZE_4KB, true);
95b5a8b8
OG
4952 if (rc) {
4953 dev_err(hdev->dev,
4954 "Map failed for CPU accessible memory\n");
4955 cpu_off -= PAGE_SIZE_4KB;
4956 goto unmap_cpu;
4957 }
4958 }
4959 }
4960
f09415f5
OG
4961 goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4962 goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4963 WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4964 WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4965
4966 /* Make sure configuration is flushed to device */
4967 RREG32(mmCPU_IF_AWUSER_OVR_EN);
4968
95b5a8b8
OG
4969 goya->device_cpu_mmu_mappings_done = true;
4970
4971 return 0;
4972
4973unmap_cpu:
4974 for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
5c05487f 4975 if (hl_mmu_unmap_page(hdev->kernel_ctx,
95b5a8b8 4976 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
7fc40bca 4977 PAGE_SIZE_4KB, true))
95b5a8b8
OG
4978 dev_warn_ratelimited(hdev->dev,
4979 "failed to unmap address 0x%llx\n",
4980 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4981unmap:
4982 for (; off >= 0 ; off -= PAGE_SIZE_2MB)
5c05487f 4983 if (hl_mmu_unmap_page(hdev->kernel_ctx,
7fc40bca
PP
4984 prop->dram_base_address + off, PAGE_SIZE_2MB,
4985 true))
95b5a8b8
OG
4986 dev_warn_ratelimited(hdev->dev,
4987 "failed to unmap address 0x%llx\n",
4988 prop->dram_base_address + off);
4989
4990 return rc;
4991}
4992
4993void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4994{
4995 struct asic_fixed_properties *prop = &hdev->asic_prop;
4996 struct goya_device *goya = hdev->asic_specific;
4997 u32 off, cpu_off;
4998
4999 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5000 return;
5001
5002 if (!goya->device_cpu_mmu_mappings_done)
5003 return;
5004
f09415f5
OG
5005 WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
5006 WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
5007
95b5a8b8 5008 if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
5c05487f
OB
5009 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5010 VA_CPU_ACCESSIBLE_MEM_ADDR,
7fc40bca 5011 PAGE_SIZE_2MB, true))
95b5a8b8
OG
5012 dev_warn(hdev->dev,
5013 "Failed to unmap CPU accessible memory\n");
5014 } else {
5015 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
5c05487f 5016 if (hl_mmu_unmap_page(hdev->kernel_ctx,
95b5a8b8 5017 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
7fc40bca
PP
5018 PAGE_SIZE_4KB,
5019 (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
95b5a8b8
OG
5020 dev_warn_ratelimited(hdev->dev,
5021 "failed to unmap address 0x%llx\n",
5022 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
5023 }
5024
5025 for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
5c05487f 5026 if (hl_mmu_unmap_page(hdev->kernel_ctx,
7fc40bca
PP
5027 prop->dram_base_address + off, PAGE_SIZE_2MB,
5028 (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
95b5a8b8
OG
5029 dev_warn_ratelimited(hdev->dev,
5030 "Failed to unmap address 0x%llx\n",
5031 prop->dram_base_address + off);
5032
5033 goya->device_cpu_mmu_mappings_done = false;
5034}
5035
5036static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
0feaf86d
OS
5037{
5038 struct goya_device *goya = hdev->asic_specific;
5039 int i;
5040
5041 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5042 return;
5043
5044 if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
75d9a2a0 5045 dev_crit(hdev->dev, "asid %u is too big\n", asid);
0feaf86d
OS
5046 return;
5047 }
5048
5049 /* zero the MMBP and ASID bits and then set the ASID */
bedd1442 5050 for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
b2377e03 5051 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
0feaf86d
OS
5052}
5053
8ff5f4fd 5054static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
7b6e4ea0 5055 u32 flags)
0feaf86d
OS
5056{
5057 struct goya_device *goya = hdev->asic_specific;
5058 u32 status, timeout_usec;
5059 int rc;
5060
bc75d799
OS
5061 if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5062 hdev->hard_reset_pending)
8ff5f4fd 5063 return 0;
0feaf86d
OS
5064
5065 /* no need in L1 only invalidation in Goya */
5066 if (!is_hard)
8ff5f4fd 5067 return 0;
0feaf86d
OS
5068
5069 if (hdev->pldm)
5070 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5071 else
5072 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5073
0feaf86d
OS
5074 /* L0 & L1 invalidation */
5075 WREG32(mmSTLB_INV_ALL_START, 1);
5076
5077 rc = hl_poll_timeout(
5078 hdev,
5079 mmSTLB_INV_ALL_START,
5080 status,
5081 !status,
5082 1000,
5083 timeout_usec);
5084
8ff5f4fd
OS
5085 if (rc) {
5086 dev_err_ratelimited(hdev->dev,
5087 "MMU cache invalidation timeout\n");
5088 hl_device_reset(hdev, true, false);
5089 }
5090
5091 return rc;
0feaf86d
OS
5092}
5093
8ff5f4fd
OS
5094static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5095 bool is_hard, u32 asid, u64 va, u64 size)
0feaf86d
OS
5096{
5097 struct goya_device *goya = hdev->asic_specific;
5098 u32 status, timeout_usec, inv_data, pi;
5099 int rc;
5100
bc75d799
OS
5101 if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5102 hdev->hard_reset_pending)
8ff5f4fd 5103 return 0;
0feaf86d
OS
5104
5105 /* no need in L1 only invalidation in Goya */
5106 if (!is_hard)
8ff5f4fd 5107 return 0;
0feaf86d
OS
5108
5109 if (hdev->pldm)
5110 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5111 else
5112 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5113
0feaf86d
OS
5114 /*
5115 * TODO: currently invalidate entire L0 & L1 as in regular hard
5116 * invalidation. Need to apply invalidation of specific cache lines with
5117 * mask of ASID & VA & size.
5118 * Note that L1 with be flushed entirely in any case.
5119 */
5120
5121 /* L0 & L1 invalidation */
5122 inv_data = RREG32(mmSTLB_CACHE_INV);
5123 /* PI is 8 bit */
5124 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5125 WREG32(mmSTLB_CACHE_INV,
5126 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5127
5128 rc = hl_poll_timeout(
5129 hdev,
5130 mmSTLB_INV_CONSUMER_INDEX,
5131 status,
5132 status == pi,
5133 1000,
5134 timeout_usec);
5135
8ff5f4fd
OS
5136 if (rc) {
5137 dev_err_ratelimited(hdev->dev,
5138 "MMU cache invalidation timeout\n");
5139 hl_device_reset(hdev, true, false);
5140 }
5141
5142 return rc;
0feaf86d
OS
5143}
5144
f8c8c7d5
OG
5145int goya_send_heartbeat(struct hl_device *hdev)
5146{
5147 struct goya_device *goya = hdev->asic_specific;
f8c8c7d5
OG
5148
5149 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5150 return 0;
5151
3110c60f 5152 return hl_fw_send_heartbeat(hdev);
f8c8c7d5
OG
5153}
5154
2f55342c 5155int goya_cpucp_info_get(struct hl_device *hdev)
d91389bc
OG
5156{
5157 struct goya_device *goya = hdev->asic_specific;
5158 struct asic_fixed_properties *prop = &hdev->asic_prop;
d91389bc 5159 u64 dram_size;
d91389bc
OG
5160 int rc;
5161
5162 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5163 return 0;
5164
4147864e 5165 rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
3110c60f
TT
5166 if (rc)
5167 return rc;
d91389bc 5168
2f55342c 5169 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
d91389bc
OG
5170 if (dram_size) {
5171 if ((!is_power_of_2(dram_size)) ||
5172 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5173 dev_err(hdev->dev,
5174 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5175 dram_size);
5176 dram_size = DRAM_PHYS_DEFAULT_SIZE;
5177 }
5178
5179 prop->dram_size = dram_size;
5180 prop->dram_end_address = prop->dram_base_address + dram_size;
5181 }
5182
2f55342c
OG
5183 if (!strlen(prop->cpucp_info.card_name))
5184 strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
0996bd1c
OG
5185 CARD_NAME_MAX_LEN);
5186
3110c60f 5187 return 0;
d91389bc
OG
5188}
5189
e38bfd30 5190static void goya_set_clock_gating(struct hl_device *hdev)
ca62433f 5191{
e38bfd30 5192 /* clock gating not supported in Goya */
ca62433f
OG
5193}
5194
5195static void goya_disable_clock_gating(struct hl_device *hdev)
5196{
e38bfd30 5197 /* clock gating not supported in Goya */
ca62433f
OG
5198}
5199
d90416c8 5200static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
e8960ca0 5201 struct seq_file *s)
eff6f4a0 5202{
06deb86a
TT
5203 const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5204 const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5205 u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5206 mme_arch_sts;
5207 bool is_idle = true, is_eng_idle;
5208 u64 offset;
eff6f4a0
OG
5209 int i;
5210
06deb86a
TT
5211 if (s)
5212 seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
5213 "--- ------- ------------ -------------\n");
5214
eff6f4a0
OG
5215 offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5216
5217 for (i = 0 ; i < DMA_MAX_NUM ; i++) {
06deb86a
TT
5218 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5219 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5220 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5221 IS_DMA_IDLE(dma_core_sts0);
5222 is_idle &= is_eng_idle;
eff6f4a0 5223
e8960ca0 5224 if (mask)
f763946a
OG
5225 *mask |= ((u64) !is_eng_idle) <<
5226 (GOYA_ENGINE_ID_DMA_0 + i);
06deb86a
TT
5227 if (s)
5228 seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5229 qm_glbl_sts0, dma_core_sts0);
eff6f4a0
OG
5230 }
5231
06deb86a
TT
5232 if (s)
5233 seq_puts(s,
5234 "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
5235 "--- ------- ------------ -------------- ----------\n");
5236
eff6f4a0
OG
5237 offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5238
5239 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
06deb86a
TT
5240 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5241 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5242 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5243 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5244 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5245 IS_TPC_IDLE(tpc_cfg_sts);
5246 is_idle &= is_eng_idle;
5247
e8960ca0 5248 if (mask)
f763946a
OG
5249 *mask |= ((u64) !is_eng_idle) <<
5250 (GOYA_ENGINE_ID_TPC_0 + i);
06deb86a
TT
5251 if (s)
5252 seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5253 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5254 }
5255
5256 if (s)
5257 seq_puts(s,
5258 "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
5259 "--- ------- ------------ -------------- -----------\n");
5260
5261 qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5262 cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5263 mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5264 is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5265 IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5266 IS_MME_IDLE(mme_arch_sts);
5267 is_idle &= is_eng_idle;
5268
e8960ca0 5269 if (mask)
f763946a 5270 *mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
06deb86a
TT
5271 if (s) {
5272 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5273 cmdq_glbl_sts0, mme_arch_sts);
5274 seq_puts(s, "\n");
5275 }
5276
5277 return is_idle;
eff6f4a0
OG
5278}
5279
9494a8dd 5280static void goya_hw_queues_lock(struct hl_device *hdev)
cf87f966 5281 __acquires(&goya->hw_queues_lock)
9494a8dd
OG
5282{
5283 struct goya_device *goya = hdev->asic_specific;
5284
5285 spin_lock(&goya->hw_queues_lock);
5286}
5287
5288static void goya_hw_queues_unlock(struct hl_device *hdev)
8a7a88c1 5289 __releases(&goya->hw_queues_lock)
9494a8dd
OG
5290{
5291 struct goya_device *goya = hdev->asic_specific;
5292
5293 spin_unlock(&goya->hw_queues_lock);
5294}
5295
d8dd7b0a
OG
5296static u32 goya_get_pci_id(struct hl_device *hdev)
5297{
5298 return hdev->pdev->device;
5299}
5300
5e6e0239
OG
5301static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5302 size_t max_size)
d91389bc
OG
5303{
5304 struct goya_device *goya = hdev->asic_specific;
d91389bc
OG
5305
5306 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5307 return 0;
5308
3110c60f 5309 return hl_fw_get_eeprom_data(hdev, data, max_size);
d91389bc
OG
5310}
5311
bb34bf79 5312static int goya_ctx_init(struct hl_ctx *ctx)
a04b7cd9 5313{
e1b85dba
OS
5314 if (ctx->asid != HL_KERNEL_ASID_ID)
5315 goya_mmu_prepare(ctx->hdev, ctx->asid);
5316
a04b7cd9
OB
5317 return 0;
5318}
5319
1fa185c6
OS
5320u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5321{
5322 return cq_idx;
5323}
5324
ec2f8a30
OS
5325static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5326{
5327 return 0;
5328}
5329
5330static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5331{
5332 return 0;
5333}
5334
2992c1dc 5335static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
72ab9ca5 5336 u32 size, bool eb)
ec2f8a30 5337{
2992c1dc 5338 return 0;
ec2f8a30
OS
5339}
5340
2992c1dc 5341static u32 goya_gen_wait_cb(struct hl_device *hdev,
3cf74b36 5342 struct hl_gen_wait_properties *prop)
ec2f8a30 5343{
2992c1dc 5344 return 0;
ec2f8a30
OS
5345}
5346
5347static void goya_reset_sob(struct hl_device *hdev, void *data)
5348{
5349
5350}
5351
2a570736 5352static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
5fe1c17d
OB
5353{
5354
5355}
5356
cb056b9f
OG
5357static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5358{
5359 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
5360 HL_POWER9_HOST_MAGIC) {
5361 dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
5362 hdev->power9_64bit_dma_enable = 1;
5363 hdev->dma_mask = 64;
5364 } else {
5365 dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
5366 hdev->power9_64bit_dma_enable = 0;
5367 hdev->dma_mask = 48;
5368 }
5369}
5370
25e7aeba
TT
5371u64 goya_get_device_time(struct hl_device *hdev)
5372{
5373 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5374
5375 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5376}
5377
2a570736 5378static void goya_collective_wait_init_cs(struct hl_cs *cs)
5fe1c17d
OB
5379{
5380
5381}
5382
2a570736 5383static int goya_collective_wait_create_jobs(struct hl_device *hdev,
5fe1c17d
OB
5384 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
5385 u32 collective_engine_id)
5386{
5387 return -EINVAL;
5388}
5389
5de406c0
OB
5390static void goya_ctx_fini(struct hl_ctx *ctx)
5391{
5392
5393}
5394
99b9d7b4
OG
5395static const struct hl_asic_funcs goya_funcs = {
5396 .early_init = goya_early_init,
5397 .early_fini = goya_early_fini,
d91389bc
OG
5398 .late_init = goya_late_init,
5399 .late_fini = goya_late_fini,
99b9d7b4
OG
5400 .sw_init = goya_sw_init,
5401 .sw_fini = goya_sw_fini,
839c4803
OG
5402 .hw_init = goya_hw_init,
5403 .hw_fini = goya_hw_fini,
1251f23a 5404 .halt_engines = goya_halt_engines,
99b9d7b4
OG
5405 .suspend = goya_suspend,
5406 .resume = goya_resume,
be5d926b 5407 .cb_mmap = goya_cb_mmap,
9494a8dd 5408 .ring_doorbell = goya_ring_doorbell,
b9040c99 5409 .pqe_write = goya_pqe_write,
d9c3aa80
OG
5410 .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5411 .asic_dma_free_coherent = goya_dma_free_coherent,
03df136b 5412 .scrub_device_mem = goya_scrub_device_mem,
9494a8dd
OG
5413 .get_int_queue_base = goya_get_int_queue_base,
5414 .test_queues = goya_test_queues,
d9c3aa80
OG
5415 .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5416 .asic_dma_pool_free = goya_dma_pool_free,
9494a8dd
OG
5417 .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5418 .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
eff6f4a0
OG
5419 .hl_dma_unmap_sg = goya_dma_unmap_sg,
5420 .cs_parser = goya_cs_parser,
5421 .asic_dma_map_sg = goya_dma_map_sg,
5422 .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5423 .add_end_of_cb_packets = goya_add_end_of_cb_packets,
1251f23a 5424 .update_eq_ci = goya_update_eq_ci,
eff6f4a0
OG
5425 .context_switch = goya_context_switch,
5426 .restore_phase_topology = goya_restore_phase_topology,
c2164773
OG
5427 .debugfs_read32 = goya_debugfs_read32,
5428 .debugfs_write32 = goya_debugfs_write32,
5cce5146
MH
5429 .debugfs_read64 = goya_debugfs_read64,
5430 .debugfs_write64 = goya_debugfs_write64,
d91389bc 5431 .add_device_attr = goya_add_device_attr,
1251f23a 5432 .handle_eqe = goya_handle_eqe,
d91389bc 5433 .set_pll_profile = goya_set_pll_profile,
1251f23a 5434 .get_events_stat = goya_get_events_stat,
0feaf86d
OS
5435 .read_pte = goya_read_pte,
5436 .write_pte = goya_write_pte,
5437 .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5438 .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
f8c8c7d5 5439 .send_heartbeat = goya_send_heartbeat,
e38bfd30 5440 .set_clock_gating = goya_set_clock_gating,
ca62433f 5441 .disable_clock_gating = goya_disable_clock_gating,
315bc055 5442 .debug_coresight = goya_debug_coresight,
eff6f4a0 5443 .is_device_idle = goya_is_device_idle,
f8c8c7d5 5444 .soft_reset_late_init = goya_soft_reset_late_init,
9494a8dd
OG
5445 .hw_queues_lock = goya_hw_queues_lock,
5446 .hw_queues_unlock = goya_hw_queues_unlock,
d8dd7b0a 5447 .get_pci_id = goya_get_pci_id,
d91389bc 5448 .get_eeprom_data = goya_get_eeprom_data,
f8c8c7d5 5449 .send_cpu_message = goya_send_cpu_message,
b6f897d7 5450 .pci_bars_map = goya_pci_bars_map,
b2377e03
OG
5451 .init_iatu = goya_init_iatu,
5452 .rreg = hl_rreg,
89225ce4 5453 .wreg = hl_wreg,
62c1e124 5454 .halt_coresight = goya_halt_coresight,
a04b7cd9 5455 .ctx_init = goya_ctx_init,
5de406c0 5456 .ctx_fini = goya_ctx_fini,
1fa185c6 5457 .get_clk_rate = goya_get_clk_rate,
7e1c07dd
OG
5458 .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5459 .read_device_fw_version = goya_read_device_fw_version,
cb056b9f 5460 .load_firmware_to_device = goya_load_firmware_to_device,
47f6b41c 5461 .load_boot_fit_to_device = goya_load_boot_fit_to_device,
ec2f8a30
OS
5462 .get_signal_cb_size = goya_get_signal_cb_size,
5463 .get_wait_cb_size = goya_get_wait_cb_size,
5464 .gen_signal_cb = goya_gen_signal_cb,
5465 .gen_wait_cb = goya_gen_wait_cb,
5466 .reset_sob = goya_reset_sob,
5fe1c17d 5467 .reset_sob_group = goya_reset_sob_group,
25e7aeba 5468 .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5fe1c17d
OB
5469 .get_device_time = goya_get_device_time,
5470 .collective_wait_init_cs = goya_collective_wait_init_cs,
5471 .collective_wait_create_jobs = goya_collective_wait_create_jobs
99b9d7b4
OG
5472};
5473
5474/*
5475 * goya_set_asic_funcs - set Goya function pointers
5476 *
5477 * @*hdev: pointer to hl_device structure
5478 *
5479 */
5480void goya_set_asic_funcs(struct hl_device *hdev)
5481{
5482 hdev->asic_funcs = &goya_funcs;
5483}