objtool: Re-arrange validate_functions()
[linux-block.git] / drivers / misc / habanalabs / goya / goya.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "goyaP.h"
9 #include "include/hw_ip/mmu/mmu_general.h"
10 #include "include/hw_ip/mmu/mmu_v1_0.h"
11 #include "include/goya/asic_reg/goya_masks.h"
12 #include "include/goya/goya_reg_map.h"
13
14 #include <linux/pci.h>
15 #include <linux/genalloc.h>
16 #include <linux/hwmon.h>
17 #include <linux/io-64-nonatomic-lo-hi.h>
18 #include <linux/iommu.h>
19 #include <linux/seq_file.h>
20
21 /*
22  * GOYA security scheme:
23  *
24  * 1. Host is protected by:
25  *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
26  *        - MMU
27  *
28  * 2. DRAM is protected by:
29  *        - Range registers (protect the first 512MB)
30  *        - MMU (isolation between users)
31  *
32  * 3. Configuration is protected by:
33  *        - Range registers
34  *        - Protection bits
35  *
36  * When MMU is disabled:
37  *
38  * QMAN DMA: PQ, CQ, CP, DMA are secured.
39  * PQ, CB and the data are on the host.
40  *
41  * QMAN TPC/MME:
42  * PQ, CQ and CP are not secured.
43  * PQ, CB and the data are on the SRAM/DRAM.
44  *
45  * Since QMAN DMA is secured, the driver is parsing the DMA CB:
46  *     - checks DMA pointer
47  *     - WREG, MSG_PROT are not allowed.
48  *     - MSG_LONG/SHORT are allowed.
49  *
50  * A read/write transaction by the QMAN to a protected area will succeed if
51  * and only if the QMAN's CP is secured and MSG_PROT is used
52  *
53  *
54  * When MMU is enabled:
55  *
56  * QMAN DMA: PQ, CQ and CP are secured.
57  * MMU is set to bypass on the Secure props register of the QMAN.
58  * The reasons we don't enable MMU for PQ, CQ and CP are:
59  *     - PQ entry is in kernel address space and the driver doesn't map it.
60  *     - CP writes to MSIX register and to kernel address space (completion
61  *       queue).
62  *
63  * DMA is not secured but because CP is secured, the driver still needs to parse
64  * the CB, but doesn't need to check the DMA addresses.
65  *
66  * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
67  * the driver doesn't map memory in MMU.
68  *
69  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
70  *
71  * DMA RR does NOT protect host because DMA is not secured
72  *
73  */
74
75 #define GOYA_UBOOT_FW_FILE      "habanalabs/goya/goya-u-boot.bin"
76 #define GOYA_LINUX_FW_FILE      "habanalabs/goya/goya-fit.itb"
77
78 #define GOYA_MMU_REGS_NUM               63
79
80 #define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
81
82 #define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
83 #define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
84 #define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
85 #define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
86 #define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
87 #define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
88 #define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
89 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
90
91 #define GOYA_QMAN0_FENCE_VAL            0xD169B243
92
93 #define GOYA_MAX_STRING_LEN             20
94
95 #define GOYA_CB_POOL_CB_CNT             512
96 #define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
97
98 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
99         (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
100 #define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
101 #define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
102 #define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
103
104 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
105         (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
106                         engine##_CMDQ_IDLE_MASK)
107 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
108         IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
109 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
110         IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
111
112 #define IS_DMA_IDLE(dma_core_sts0) \
113         !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
114
115 #define IS_TPC_IDLE(tpc_cfg_sts) \
116         (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
117
118 #define IS_MME_IDLE(mme_arch_sts) \
119         (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
120
121
122 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
123                 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
124                 "goya cq 4", "goya cpu eq"
125 };
126
127 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
128         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
129         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
130         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
131         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
132         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
133         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
134         [PACKET_FENCE]          = sizeof(struct packet_fence),
135         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
136         [PACKET_NOP]            = sizeof(struct packet_nop),
137         [PACKET_STOP]           = sizeof(struct packet_stop)
138 };
139
140 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
141         mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
142         mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
143         mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
144         mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
145         mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
146         mmTPC0_QM_GLBL_SECURE_PROPS,
147         mmTPC0_QM_GLBL_NON_SECURE_PROPS,
148         mmTPC0_CMDQ_GLBL_SECURE_PROPS,
149         mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
150         mmTPC0_CFG_ARUSER,
151         mmTPC0_CFG_AWUSER,
152         mmTPC1_QM_GLBL_SECURE_PROPS,
153         mmTPC1_QM_GLBL_NON_SECURE_PROPS,
154         mmTPC1_CMDQ_GLBL_SECURE_PROPS,
155         mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
156         mmTPC1_CFG_ARUSER,
157         mmTPC1_CFG_AWUSER,
158         mmTPC2_QM_GLBL_SECURE_PROPS,
159         mmTPC2_QM_GLBL_NON_SECURE_PROPS,
160         mmTPC2_CMDQ_GLBL_SECURE_PROPS,
161         mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
162         mmTPC2_CFG_ARUSER,
163         mmTPC2_CFG_AWUSER,
164         mmTPC3_QM_GLBL_SECURE_PROPS,
165         mmTPC3_QM_GLBL_NON_SECURE_PROPS,
166         mmTPC3_CMDQ_GLBL_SECURE_PROPS,
167         mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
168         mmTPC3_CFG_ARUSER,
169         mmTPC3_CFG_AWUSER,
170         mmTPC4_QM_GLBL_SECURE_PROPS,
171         mmTPC4_QM_GLBL_NON_SECURE_PROPS,
172         mmTPC4_CMDQ_GLBL_SECURE_PROPS,
173         mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
174         mmTPC4_CFG_ARUSER,
175         mmTPC4_CFG_AWUSER,
176         mmTPC5_QM_GLBL_SECURE_PROPS,
177         mmTPC5_QM_GLBL_NON_SECURE_PROPS,
178         mmTPC5_CMDQ_GLBL_SECURE_PROPS,
179         mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
180         mmTPC5_CFG_ARUSER,
181         mmTPC5_CFG_AWUSER,
182         mmTPC6_QM_GLBL_SECURE_PROPS,
183         mmTPC6_QM_GLBL_NON_SECURE_PROPS,
184         mmTPC6_CMDQ_GLBL_SECURE_PROPS,
185         mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
186         mmTPC6_CFG_ARUSER,
187         mmTPC6_CFG_AWUSER,
188         mmTPC7_QM_GLBL_SECURE_PROPS,
189         mmTPC7_QM_GLBL_NON_SECURE_PROPS,
190         mmTPC7_CMDQ_GLBL_SECURE_PROPS,
191         mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
192         mmTPC7_CFG_ARUSER,
193         mmTPC7_CFG_AWUSER,
194         mmMME_QM_GLBL_SECURE_PROPS,
195         mmMME_QM_GLBL_NON_SECURE_PROPS,
196         mmMME_CMDQ_GLBL_SECURE_PROPS,
197         mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
198         mmMME_SBA_CONTROL_DATA,
199         mmMME_SBB_CONTROL_DATA,
200         mmMME_SBC_CONTROL_DATA,
201         mmMME_WBC_CONTROL_DATA,
202         mmPCIE_WRAP_PSOC_ARUSER,
203         mmPCIE_WRAP_PSOC_AWUSER
204 };
205
206 static u32 goya_all_events[] = {
207         GOYA_ASYNC_EVENT_ID_PCIE_IF,
208         GOYA_ASYNC_EVENT_ID_TPC0_ECC,
209         GOYA_ASYNC_EVENT_ID_TPC1_ECC,
210         GOYA_ASYNC_EVENT_ID_TPC2_ECC,
211         GOYA_ASYNC_EVENT_ID_TPC3_ECC,
212         GOYA_ASYNC_EVENT_ID_TPC4_ECC,
213         GOYA_ASYNC_EVENT_ID_TPC5_ECC,
214         GOYA_ASYNC_EVENT_ID_TPC6_ECC,
215         GOYA_ASYNC_EVENT_ID_TPC7_ECC,
216         GOYA_ASYNC_EVENT_ID_MME_ECC,
217         GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
218         GOYA_ASYNC_EVENT_ID_MMU_ECC,
219         GOYA_ASYNC_EVENT_ID_DMA_MACRO,
220         GOYA_ASYNC_EVENT_ID_DMA_ECC,
221         GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
222         GOYA_ASYNC_EVENT_ID_PSOC_MEM,
223         GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
224         GOYA_ASYNC_EVENT_ID_SRAM0,
225         GOYA_ASYNC_EVENT_ID_SRAM1,
226         GOYA_ASYNC_EVENT_ID_SRAM2,
227         GOYA_ASYNC_EVENT_ID_SRAM3,
228         GOYA_ASYNC_EVENT_ID_SRAM4,
229         GOYA_ASYNC_EVENT_ID_SRAM5,
230         GOYA_ASYNC_EVENT_ID_SRAM6,
231         GOYA_ASYNC_EVENT_ID_SRAM7,
232         GOYA_ASYNC_EVENT_ID_SRAM8,
233         GOYA_ASYNC_EVENT_ID_SRAM9,
234         GOYA_ASYNC_EVENT_ID_SRAM10,
235         GOYA_ASYNC_EVENT_ID_SRAM11,
236         GOYA_ASYNC_EVENT_ID_SRAM12,
237         GOYA_ASYNC_EVENT_ID_SRAM13,
238         GOYA_ASYNC_EVENT_ID_SRAM14,
239         GOYA_ASYNC_EVENT_ID_SRAM15,
240         GOYA_ASYNC_EVENT_ID_SRAM16,
241         GOYA_ASYNC_EVENT_ID_SRAM17,
242         GOYA_ASYNC_EVENT_ID_SRAM18,
243         GOYA_ASYNC_EVENT_ID_SRAM19,
244         GOYA_ASYNC_EVENT_ID_SRAM20,
245         GOYA_ASYNC_EVENT_ID_SRAM21,
246         GOYA_ASYNC_EVENT_ID_SRAM22,
247         GOYA_ASYNC_EVENT_ID_SRAM23,
248         GOYA_ASYNC_EVENT_ID_SRAM24,
249         GOYA_ASYNC_EVENT_ID_SRAM25,
250         GOYA_ASYNC_EVENT_ID_SRAM26,
251         GOYA_ASYNC_EVENT_ID_SRAM27,
252         GOYA_ASYNC_EVENT_ID_SRAM28,
253         GOYA_ASYNC_EVENT_ID_SRAM29,
254         GOYA_ASYNC_EVENT_ID_GIC500,
255         GOYA_ASYNC_EVENT_ID_PLL0,
256         GOYA_ASYNC_EVENT_ID_PLL1,
257         GOYA_ASYNC_EVENT_ID_PLL3,
258         GOYA_ASYNC_EVENT_ID_PLL4,
259         GOYA_ASYNC_EVENT_ID_PLL5,
260         GOYA_ASYNC_EVENT_ID_PLL6,
261         GOYA_ASYNC_EVENT_ID_AXI_ECC,
262         GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
263         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
264         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
265         GOYA_ASYNC_EVENT_ID_PCIE_DEC,
266         GOYA_ASYNC_EVENT_ID_TPC0_DEC,
267         GOYA_ASYNC_EVENT_ID_TPC1_DEC,
268         GOYA_ASYNC_EVENT_ID_TPC2_DEC,
269         GOYA_ASYNC_EVENT_ID_TPC3_DEC,
270         GOYA_ASYNC_EVENT_ID_TPC4_DEC,
271         GOYA_ASYNC_EVENT_ID_TPC5_DEC,
272         GOYA_ASYNC_EVENT_ID_TPC6_DEC,
273         GOYA_ASYNC_EVENT_ID_TPC7_DEC,
274         GOYA_ASYNC_EVENT_ID_MME_WACS,
275         GOYA_ASYNC_EVENT_ID_MME_WACSD,
276         GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
277         GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
278         GOYA_ASYNC_EVENT_ID_PSOC,
279         GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
280         GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
281         GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
282         GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
283         GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
284         GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
285         GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
286         GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
287         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
288         GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
289         GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
290         GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
291         GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
292         GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
293         GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
294         GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
295         GOYA_ASYNC_EVENT_ID_TPC0_QM,
296         GOYA_ASYNC_EVENT_ID_TPC1_QM,
297         GOYA_ASYNC_EVENT_ID_TPC2_QM,
298         GOYA_ASYNC_EVENT_ID_TPC3_QM,
299         GOYA_ASYNC_EVENT_ID_TPC4_QM,
300         GOYA_ASYNC_EVENT_ID_TPC5_QM,
301         GOYA_ASYNC_EVENT_ID_TPC6_QM,
302         GOYA_ASYNC_EVENT_ID_TPC7_QM,
303         GOYA_ASYNC_EVENT_ID_MME_QM,
304         GOYA_ASYNC_EVENT_ID_MME_CMDQ,
305         GOYA_ASYNC_EVENT_ID_DMA0_QM,
306         GOYA_ASYNC_EVENT_ID_DMA1_QM,
307         GOYA_ASYNC_EVENT_ID_DMA2_QM,
308         GOYA_ASYNC_EVENT_ID_DMA3_QM,
309         GOYA_ASYNC_EVENT_ID_DMA4_QM,
310         GOYA_ASYNC_EVENT_ID_DMA0_CH,
311         GOYA_ASYNC_EVENT_ID_DMA1_CH,
312         GOYA_ASYNC_EVENT_ID_DMA2_CH,
313         GOYA_ASYNC_EVENT_ID_DMA3_CH,
314         GOYA_ASYNC_EVENT_ID_DMA4_CH,
315         GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
316         GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
317         GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
318         GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
319         GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
320         GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
321         GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
322         GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
323         GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
324         GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
325         GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
326         GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
327         GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
328 };
329
330 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
331 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
332 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
333 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
334
335 void goya_get_fixed_properties(struct hl_device *hdev)
336 {
337         struct asic_fixed_properties *prop = &hdev->asic_prop;
338         int i;
339
340         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
341                 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
342                 prop->hw_queues_props[i].driver_only = 0;
343                 prop->hw_queues_props[i].requires_kernel_cb = 1;
344         }
345
346         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
347                 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
348                 prop->hw_queues_props[i].driver_only = 1;
349                 prop->hw_queues_props[i].requires_kernel_cb = 0;
350         }
351
352         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
353                         NUMBER_OF_INT_HW_QUEUES; i++) {
354                 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
355                 prop->hw_queues_props[i].driver_only = 0;
356                 prop->hw_queues_props[i].requires_kernel_cb = 0;
357         }
358
359         for (; i < HL_MAX_QUEUES; i++)
360                 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
361
362         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
363
364         prop->dram_base_address = DRAM_PHYS_BASE;
365         prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
366         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
367         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
368
369         prop->sram_base_address = SRAM_BASE_ADDR;
370         prop->sram_size = SRAM_SIZE;
371         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
372         prop->sram_user_base_address = prop->sram_base_address +
373                                                 SRAM_USER_BASE_OFFSET;
374
375         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
376         prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
377         if (hdev->pldm)
378                 prop->mmu_pgt_size = 0x800000; /* 8MB */
379         else
380                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
381         prop->mmu_pte_size = HL_PTE_SIZE;
382         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
383         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
384         prop->dram_page_size = PAGE_SIZE_2MB;
385
386         prop->dmmu.hop0_shift = HOP0_SHIFT;
387         prop->dmmu.hop1_shift = HOP1_SHIFT;
388         prop->dmmu.hop2_shift = HOP2_SHIFT;
389         prop->dmmu.hop3_shift = HOP3_SHIFT;
390         prop->dmmu.hop4_shift = HOP4_SHIFT;
391         prop->dmmu.hop0_mask = HOP0_MASK;
392         prop->dmmu.hop1_mask = HOP1_MASK;
393         prop->dmmu.hop2_mask = HOP2_MASK;
394         prop->dmmu.hop3_mask = HOP3_MASK;
395         prop->dmmu.hop4_mask = HOP4_MASK;
396         prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
397
398         /* No difference between PMMU and DMMU except of page size */
399         memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
400         prop->dmmu.page_size = PAGE_SIZE_2MB;
401         prop->pmmu.page_size = PAGE_SIZE_4KB;
402
403         prop->va_space_host_start_address = VA_HOST_SPACE_START;
404         prop->va_space_host_end_address = VA_HOST_SPACE_END;
405         prop->va_space_dram_start_address = VA_DDR_SPACE_START;
406         prop->va_space_dram_end_address = VA_DDR_SPACE_END;
407         prop->dram_size_for_default_page_mapping =
408                         prop->va_space_dram_end_address;
409         prop->cfg_size = CFG_SIZE;
410         prop->max_asid = MAX_ASID;
411         prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
412         prop->high_pll = PLL_HIGH_DEFAULT;
413         prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
414         prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
415         prop->max_power_default = MAX_POWER_DEFAULT;
416         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
417         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
418         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
419
420         strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
421                 CARD_NAME_MAX_LEN);
422 }
423
424 /*
425  * goya_pci_bars_map - Map PCI BARS of Goya device
426  *
427  * @hdev: pointer to hl_device structure
428  *
429  * Request PCI regions and map them to kernel virtual addresses.
430  * Returns 0 on success
431  *
432  */
433 static int goya_pci_bars_map(struct hl_device *hdev)
434 {
435         static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
436         bool is_wc[3] = {false, false, true};
437         int rc;
438
439         rc = hl_pci_bars_map(hdev, name, is_wc);
440         if (rc)
441                 return rc;
442
443         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
444                         (CFG_BASE - SRAM_BASE_ADDR);
445
446         return 0;
447 }
448
449 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
450 {
451         struct goya_device *goya = hdev->asic_specific;
452         u64 old_addr = addr;
453         int rc;
454
455         if ((goya) && (goya->ddr_bar_cur_addr == addr))
456                 return old_addr;
457
458         /* Inbound Region 1 - Bar 4 - Point to DDR */
459         rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
460         if (rc)
461                 return U64_MAX;
462
463         if (goya) {
464                 old_addr = goya->ddr_bar_cur_addr;
465                 goya->ddr_bar_cur_addr = addr;
466         }
467
468         return old_addr;
469 }
470
471 /*
472  * goya_init_iatu - Initialize the iATU unit inside the PCI controller
473  *
474  * @hdev: pointer to hl_device structure
475  *
476  * This is needed in case the firmware doesn't initialize the iATU
477  *
478  */
479 static int goya_init_iatu(struct hl_device *hdev)
480 {
481         return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
482                                 HOST_PHYS_BASE, HOST_PHYS_SIZE);
483 }
484
485 /*
486  * goya_early_init - GOYA early initialization code
487  *
488  * @hdev: pointer to hl_device structure
489  *
490  * Verify PCI bars
491  * Set DMA masks
492  * PCI controller initialization
493  * Map PCI bars
494  *
495  */
496 static int goya_early_init(struct hl_device *hdev)
497 {
498         struct asic_fixed_properties *prop = &hdev->asic_prop;
499         struct pci_dev *pdev = hdev->pdev;
500         u32 val;
501         int rc;
502
503         goya_get_fixed_properties(hdev);
504
505         /* Check BAR sizes */
506         if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
507                 dev_err(hdev->dev,
508                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
509                         SRAM_CFG_BAR_ID,
510                         (unsigned long long) pci_resource_len(pdev,
511                                                         SRAM_CFG_BAR_ID),
512                         CFG_BAR_SIZE);
513                 return -ENODEV;
514         }
515
516         if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
517                 dev_err(hdev->dev,
518                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
519                         MSIX_BAR_ID,
520                         (unsigned long long) pci_resource_len(pdev,
521                                                                 MSIX_BAR_ID),
522                         MSIX_BAR_SIZE);
523                 return -ENODEV;
524         }
525
526         prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
527
528         rc = hl_pci_init(hdev, 48);
529         if (rc)
530                 return rc;
531
532         if (!hdev->pldm) {
533                 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
534                 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
535                         dev_warn(hdev->dev,
536                                 "PCI strap is not configured correctly, PCI bus errors may occur\n");
537         }
538
539         return 0;
540 }
541
542 /*
543  * goya_early_fini - GOYA early finalization code
544  *
545  * @hdev: pointer to hl_device structure
546  *
547  * Unmap PCI bars
548  *
549  */
550 static int goya_early_fini(struct hl_device *hdev)
551 {
552         hl_pci_fini(hdev);
553
554         return 0;
555 }
556
557 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
558 {
559         /* mask to zero the MMBP and ASID bits */
560         WREG32_AND(reg, ~0x7FF);
561         WREG32_OR(reg, asid);
562 }
563
564 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
565 {
566         struct goya_device *goya = hdev->asic_specific;
567
568         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
569                 return;
570
571         if (secure)
572                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
573         else
574                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
575
576         RREG32(mmDMA_QM_0_GLBL_PROT);
577 }
578
579 /*
580  * goya_fetch_psoc_frequency - Fetch PSOC frequency values
581  *
582  * @hdev: pointer to hl_device structure
583  *
584  */
585 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
586 {
587         struct asic_fixed_properties *prop = &hdev->asic_prop;
588
589         prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
590         prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
591         prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
592         prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
593 }
594
595 int goya_late_init(struct hl_device *hdev)
596 {
597         struct asic_fixed_properties *prop = &hdev->asic_prop;
598         int rc;
599
600         goya_fetch_psoc_frequency(hdev);
601
602         rc = goya_mmu_clear_pgt_range(hdev);
603         if (rc) {
604                 dev_err(hdev->dev,
605                         "Failed to clear MMU page tables range %d\n", rc);
606                 return rc;
607         }
608
609         rc = goya_mmu_set_dram_default_page(hdev);
610         if (rc) {
611                 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
612                 return rc;
613         }
614
615         rc = goya_mmu_add_mappings_for_device_cpu(hdev);
616         if (rc)
617                 return rc;
618
619         rc = goya_init_cpu_queues(hdev);
620         if (rc)
621                 return rc;
622
623         rc = goya_test_cpu_queue(hdev);
624         if (rc)
625                 return rc;
626
627         rc = goya_armcp_info_get(hdev);
628         if (rc) {
629                 dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
630                 return rc;
631         }
632
633         /* Now that we have the DRAM size in ASIC prop, we need to check
634          * its size and configure the DMA_IF DDR wrap protection (which is in
635          * the MMU block) accordingly. The value is the log2 of the DRAM size
636          */
637         WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
638
639         rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
640         if (rc) {
641                 dev_err(hdev->dev,
642                         "Failed to enable PCI access from CPU %d\n", rc);
643                 return rc;
644         }
645
646         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
647                         GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
648
649         return 0;
650 }
651
652 /*
653  * goya_late_fini - GOYA late tear-down code
654  *
655  * @hdev: pointer to hl_device structure
656  *
657  * Free sensors allocated structures
658  */
659 void goya_late_fini(struct hl_device *hdev)
660 {
661         const struct hwmon_channel_info **channel_info_arr;
662         int i = 0;
663
664         if (!hdev->hl_chip_info->info)
665                 return;
666
667         channel_info_arr = hdev->hl_chip_info->info;
668
669         while (channel_info_arr[i]) {
670                 kfree(channel_info_arr[i]->config);
671                 kfree(channel_info_arr[i]);
672                 i++;
673         }
674
675         kfree(channel_info_arr);
676
677         hdev->hl_chip_info->info = NULL;
678 }
679
680 /*
681  * goya_sw_init - Goya software initialization code
682  *
683  * @hdev: pointer to hl_device structure
684  *
685  */
686 static int goya_sw_init(struct hl_device *hdev)
687 {
688         struct goya_device *goya;
689         int rc;
690
691         /* Allocate device structure */
692         goya = kzalloc(sizeof(*goya), GFP_KERNEL);
693         if (!goya)
694                 return -ENOMEM;
695
696         /* according to goya_init_iatu */
697         goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
698
699         goya->mme_clk = GOYA_PLL_FREQ_LOW;
700         goya->tpc_clk = GOYA_PLL_FREQ_LOW;
701         goya->ic_clk = GOYA_PLL_FREQ_LOW;
702
703         hdev->asic_specific = goya;
704
705         /* Create DMA pool for small allocations */
706         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
707                         &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
708         if (!hdev->dma_pool) {
709                 dev_err(hdev->dev, "failed to create DMA pool\n");
710                 rc = -ENOMEM;
711                 goto free_goya_device;
712         }
713
714         hdev->cpu_accessible_dma_mem =
715                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
716                                         HL_CPU_ACCESSIBLE_MEM_SIZE,
717                                         &hdev->cpu_accessible_dma_address,
718                                         GFP_KERNEL | __GFP_ZERO);
719
720         if (!hdev->cpu_accessible_dma_mem) {
721                 rc = -ENOMEM;
722                 goto free_dma_pool;
723         }
724
725         dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
726                 &hdev->cpu_accessible_dma_address);
727
728         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
729         if (!hdev->cpu_accessible_dma_pool) {
730                 dev_err(hdev->dev,
731                         "Failed to create CPU accessible DMA pool\n");
732                 rc = -ENOMEM;
733                 goto free_cpu_dma_mem;
734         }
735
736         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
737                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
738                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
739         if (rc) {
740                 dev_err(hdev->dev,
741                         "Failed to add memory to CPU accessible DMA pool\n");
742                 rc = -EFAULT;
743                 goto free_cpu_accessible_dma_pool;
744         }
745
746         spin_lock_init(&goya->hw_queues_lock);
747
748         return 0;
749
750 free_cpu_accessible_dma_pool:
751         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
752 free_cpu_dma_mem:
753         hdev->asic_funcs->asic_dma_free_coherent(hdev,
754                         HL_CPU_ACCESSIBLE_MEM_SIZE,
755                         hdev->cpu_accessible_dma_mem,
756                         hdev->cpu_accessible_dma_address);
757 free_dma_pool:
758         dma_pool_destroy(hdev->dma_pool);
759 free_goya_device:
760         kfree(goya);
761
762         return rc;
763 }
764
765 /*
766  * goya_sw_fini - Goya software tear-down code
767  *
768  * @hdev: pointer to hl_device structure
769  *
770  */
771 static int goya_sw_fini(struct hl_device *hdev)
772 {
773         struct goya_device *goya = hdev->asic_specific;
774
775         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
776
777         hdev->asic_funcs->asic_dma_free_coherent(hdev,
778                         HL_CPU_ACCESSIBLE_MEM_SIZE,
779                         hdev->cpu_accessible_dma_mem,
780                         hdev->cpu_accessible_dma_address);
781
782         dma_pool_destroy(hdev->dma_pool);
783
784         kfree(goya);
785
786         return 0;
787 }
788
789 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
790                 dma_addr_t bus_address)
791 {
792         struct goya_device *goya = hdev->asic_specific;
793         u32 mtr_base_lo, mtr_base_hi;
794         u32 so_base_lo, so_base_hi;
795         u32 gic_base_lo, gic_base_hi;
796         u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
797
798         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
799         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
800         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
801         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
802
803         gic_base_lo =
804                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
805         gic_base_hi =
806                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
807
808         WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
809         WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
810
811         WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
812         WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
813         WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
814
815         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
816         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
817         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
818         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
819         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
820         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
821         WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
822                         GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
823
824         /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
825         WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
826         WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
827
828         if (goya->hw_cap_initialized & HW_CAP_MMU)
829                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
830         else
831                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
832
833         WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
834         WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
835 }
836
837 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
838 {
839         u32 gic_base_lo, gic_base_hi;
840         u64 sob_addr;
841         u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
842
843         gic_base_lo =
844                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
845         gic_base_hi =
846                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
847
848         WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
849         WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
850         WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
851                         GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
852
853         if (dma_id)
854                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
855                                 (dma_id - 1) * 4;
856         else
857                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
858
859         WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
860         WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
861 }
862
863 /*
864  * goya_init_dma_qmans - Initialize QMAN DMA registers
865  *
866  * @hdev: pointer to hl_device structure
867  *
868  * Initialize the H/W registers of the QMAN DMA channels
869  *
870  */
871 void goya_init_dma_qmans(struct hl_device *hdev)
872 {
873         struct goya_device *goya = hdev->asic_specific;
874         struct hl_hw_queue *q;
875         int i;
876
877         if (goya->hw_cap_initialized & HW_CAP_DMA)
878                 return;
879
880         q = &hdev->kernel_queues[0];
881
882         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
883                 goya_init_dma_qman(hdev, i, q->bus_address);
884                 goya_init_dma_ch(hdev, i);
885         }
886
887         goya->hw_cap_initialized |= HW_CAP_DMA;
888 }
889
890 /*
891  * goya_disable_external_queues - Disable external queues
892  *
893  * @hdev: pointer to hl_device structure
894  *
895  */
896 static void goya_disable_external_queues(struct hl_device *hdev)
897 {
898         WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
899         WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
900         WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
901         WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
902         WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
903 }
904
905 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
906                                 u32 cp_sts_reg, u32 glbl_sts0_reg)
907 {
908         int rc;
909         u32 status;
910
911         /* use the values of TPC0 as they are all the same*/
912
913         WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
914
915         status = RREG32(cp_sts_reg);
916         if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
917                 rc = hl_poll_timeout(
918                         hdev,
919                         cp_sts_reg,
920                         status,
921                         !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
922                         1000,
923                         QMAN_FENCE_TIMEOUT_USEC);
924
925                 /* if QMAN is stuck in fence no need to check for stop */
926                 if (rc)
927                         return 0;
928         }
929
930         rc = hl_poll_timeout(
931                 hdev,
932                 glbl_sts0_reg,
933                 status,
934                 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
935                 1000,
936                 QMAN_STOP_TIMEOUT_USEC);
937
938         if (rc) {
939                 dev_err(hdev->dev,
940                         "Timeout while waiting for QMAN to stop\n");
941                 return -EINVAL;
942         }
943
944         return 0;
945 }
946
947 /*
948  * goya_stop_external_queues - Stop external queues
949  *
950  * @hdev: pointer to hl_device structure
951  *
952  * Returns 0 on success
953  *
954  */
955 static int goya_stop_external_queues(struct hl_device *hdev)
956 {
957         int rc, retval = 0;
958
959         rc = goya_stop_queue(hdev,
960                         mmDMA_QM_0_GLBL_CFG1,
961                         mmDMA_QM_0_CP_STS,
962                         mmDMA_QM_0_GLBL_STS0);
963
964         if (rc) {
965                 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
966                 retval = -EIO;
967         }
968
969         rc = goya_stop_queue(hdev,
970                         mmDMA_QM_1_GLBL_CFG1,
971                         mmDMA_QM_1_CP_STS,
972                         mmDMA_QM_1_GLBL_STS0);
973
974         if (rc) {
975                 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
976                 retval = -EIO;
977         }
978
979         rc = goya_stop_queue(hdev,
980                         mmDMA_QM_2_GLBL_CFG1,
981                         mmDMA_QM_2_CP_STS,
982                         mmDMA_QM_2_GLBL_STS0);
983
984         if (rc) {
985                 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
986                 retval = -EIO;
987         }
988
989         rc = goya_stop_queue(hdev,
990                         mmDMA_QM_3_GLBL_CFG1,
991                         mmDMA_QM_3_CP_STS,
992                         mmDMA_QM_3_GLBL_STS0);
993
994         if (rc) {
995                 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
996                 retval = -EIO;
997         }
998
999         rc = goya_stop_queue(hdev,
1000                         mmDMA_QM_4_GLBL_CFG1,
1001                         mmDMA_QM_4_CP_STS,
1002                         mmDMA_QM_4_GLBL_STS0);
1003
1004         if (rc) {
1005                 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1006                 retval = -EIO;
1007         }
1008
1009         return retval;
1010 }
1011
1012 /*
1013  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1014  *
1015  * @hdev: pointer to hl_device structure
1016  *
1017  * Returns 0 on success
1018  *
1019  */
1020 int goya_init_cpu_queues(struct hl_device *hdev)
1021 {
1022         struct goya_device *goya = hdev->asic_specific;
1023         struct hl_eq *eq;
1024         u32 status;
1025         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1026         int err;
1027
1028         if (!hdev->cpu_queues_enable)
1029                 return 0;
1030
1031         if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1032                 return 0;
1033
1034         eq = &hdev->event_queue;
1035
1036         WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1037         WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1038
1039         WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1040         WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1041
1042         WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1043                         lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1044         WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1045                         upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1046
1047         WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1048         WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1049         WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1050
1051         /* Used for EQ CI */
1052         WREG32(mmCPU_EQ_CI, 0);
1053
1054         WREG32(mmCPU_IF_PF_PQ_PI, 0);
1055
1056         WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1057
1058         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1059                         GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1060
1061         err = hl_poll_timeout(
1062                 hdev,
1063                 mmCPU_PQ_INIT_STATUS,
1064                 status,
1065                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1066                 1000,
1067                 GOYA_CPU_TIMEOUT_USEC);
1068
1069         if (err) {
1070                 dev_err(hdev->dev,
1071                         "Failed to setup communication with device CPU\n");
1072                 return -EIO;
1073         }
1074
1075         goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1076         return 0;
1077 }
1078
1079 static void goya_set_pll_refclk(struct hl_device *hdev)
1080 {
1081         WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1082         WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1083         WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1084         WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1085
1086         WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1087         WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1088         WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1089         WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1090
1091         WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1092         WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1093         WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1094         WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1095
1096         WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1097         WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1098         WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1099         WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1100
1101         WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1102         WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1103         WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1104         WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1105
1106         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1107         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1108         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1109         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1110
1111         WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1112         WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1113         WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1114         WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1115 }
1116
1117 static void goya_disable_clk_rlx(struct hl_device *hdev)
1118 {
1119         WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1120         WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1121 }
1122
1123 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1124 {
1125         u64 tpc_eml_address;
1126         u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1127         int err, slm_index;
1128
1129         tpc_offset = tpc_id * 0x40000;
1130         tpc_eml_offset = tpc_id * 0x200000;
1131         tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1132         tpc_slm_offset = tpc_eml_address + 0x100000;
1133
1134         /*
1135          * Workaround for Bug H2 #2443 :
1136          * "TPC SB is not initialized on chip reset"
1137          */
1138
1139         val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1140         if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1141                 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1142                         tpc_id);
1143
1144         WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1145
1146         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1147         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1148         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1149         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1150         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1151         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1152         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1153         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1154         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1155         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1156
1157         WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1158                 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1159
1160         err = hl_poll_timeout(
1161                 hdev,
1162                 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1163                 val,
1164                 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1165                 1000,
1166                 HL_DEVICE_TIMEOUT_USEC);
1167
1168         if (err)
1169                 dev_err(hdev->dev,
1170                         "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1171
1172         WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1173                 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1174
1175         msleep(GOYA_RESET_WAIT_MSEC);
1176
1177         WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1178                 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1179
1180         msleep(GOYA_RESET_WAIT_MSEC);
1181
1182         for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1183                 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1184
1185         val = RREG32(tpc_slm_offset);
1186 }
1187
1188 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1189 {
1190         struct goya_device *goya = hdev->asic_specific;
1191         int i;
1192
1193         if (hdev->pldm)
1194                 return;
1195
1196         if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1197                 return;
1198
1199         /* Workaround for H2 #2443 */
1200
1201         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1202                 _goya_tpc_mbist_workaround(hdev, i);
1203
1204         goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1205 }
1206
1207 /*
1208  * goya_init_golden_registers - Initialize golden registers
1209  *
1210  * @hdev: pointer to hl_device structure
1211  *
1212  * Initialize the H/W registers of the device
1213  *
1214  */
1215 static void goya_init_golden_registers(struct hl_device *hdev)
1216 {
1217         struct goya_device *goya = hdev->asic_specific;
1218         u32 polynom[10], tpc_intr_mask, offset;
1219         int i;
1220
1221         if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1222                 return;
1223
1224         polynom[0] = 0x00020080;
1225         polynom[1] = 0x00401000;
1226         polynom[2] = 0x00200800;
1227         polynom[3] = 0x00002000;
1228         polynom[4] = 0x00080200;
1229         polynom[5] = 0x00040100;
1230         polynom[6] = 0x00100400;
1231         polynom[7] = 0x00004000;
1232         polynom[8] = 0x00010000;
1233         polynom[9] = 0x00008000;
1234
1235         /* Mask all arithmetic interrupts from TPC */
1236         tpc_intr_mask = 0x7FFF;
1237
1238         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1239                 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1240                 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1241                 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1242                 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1243                 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1244
1245                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1246                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1247                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1248                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1249                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1250
1251
1252                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1253                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1254                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1255                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1256                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1257
1258                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1259                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1260                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1261                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1262                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1263
1264                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1265                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1266                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1267                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1268                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1269
1270                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1271                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1272                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1273                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1274                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1275         }
1276
1277         WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1278         WREG32(mmMME_AGU, 0x0f0f0f10);
1279         WREG32(mmMME_SEI_MASK, ~0x0);
1280
1281         WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1282         WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1283         WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1284         WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1285         WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1286         WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1287         WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1288         WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1289         WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1290         WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1291         WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1292         WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1293         WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1294         WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1295         WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1296         WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1297         WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1298         WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1299         WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1300         WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1301         WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1302         WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1303         WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1304         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1305         WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1306         WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1307         WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1308         WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1309         WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1310         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1311         WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1312         WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1313         WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1314         WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1315         WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1316         WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1317         WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1318         WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1319         WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1320         WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1321         WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1322         WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1323         WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1324         WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1325         WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1326         WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1327         WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1328         WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1329         WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1330         WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1331         WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1332         WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1333         WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1334         WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1335         WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1336         WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1337         WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1338         WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1339         WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1340         WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1341         WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1342         WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1343         WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1344         WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1345         WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1346         WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1347         WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1348         WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1349         WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1350         WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1351         WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1352         WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1353         WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1354         WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1355         WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1356         WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1357         WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1358         WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1359         WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1360         WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1361         WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1362         WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1363         WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1364         WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1365
1366         WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1367         WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1368         WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1369         WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1370         WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1371         WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1372         WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1373         WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1374         WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1375         WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1376         WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1377         WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1378
1379         WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1380         WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1381         WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1382         WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1383         WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1384         WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1385         WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1386         WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1387         WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1388         WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1389         WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1390         WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1391
1392         WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1393         WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1394         WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1395         WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1396         WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1397         WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1398         WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1399         WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1400         WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1401         WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1402         WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1403         WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1404
1405         WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1406         WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1407         WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1408         WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1409         WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1410         WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1411         WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1412         WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1413         WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1414         WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1415         WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1416         WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1417
1418         WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1419         WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1420         WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1421         WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1422         WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1423         WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1424         WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1425         WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1426         WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1427         WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1428         WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1429         WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1430
1431         WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1432         WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1433         WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1434         WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1435         WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1436         WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1437         WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1438         WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1439         WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1440         WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1441         WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1442         WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1443
1444         for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1445                 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1446                 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1447                 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1448                 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1449                 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1450                 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1451
1452                 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1453                 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1454                 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1455                 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1456                 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1457                 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1458                 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1459                 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1460
1461                 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1462                 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1463         }
1464
1465         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1466                 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1467                                 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1468                 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1469                                 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1470         }
1471
1472         for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1473                 /*
1474                  * Workaround for Bug H2 #2441 :
1475                  * "ST.NOP set trace event illegal opcode"
1476                  */
1477                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1478
1479                 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1480                                 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1481                 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1482                                 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1483
1484                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1485                                 ICACHE_FETCH_LINE_NUM, 2);
1486         }
1487
1488         WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1489         WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1490                         1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1491
1492         WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1493         WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1494                         1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1495
1496         /*
1497          * Workaround for H2 #HW-23 bug
1498          * Set DMA max outstanding read requests to 240 on DMA CH 1.
1499          * This limitation is still large enough to not affect Gen4 bandwidth.
1500          * We need to only limit that DMA channel because the user can only read
1501          * from Host using DMA CH 1
1502          */
1503         WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1504
1505         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1506
1507         goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1508 }
1509
1510 static void goya_init_mme_qman(struct hl_device *hdev)
1511 {
1512         u32 mtr_base_lo, mtr_base_hi;
1513         u32 so_base_lo, so_base_hi;
1514         u32 gic_base_lo, gic_base_hi;
1515         u64 qman_base_addr;
1516
1517         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1518         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1519         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1520         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1521
1522         gic_base_lo =
1523                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1524         gic_base_hi =
1525                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1526
1527         qman_base_addr = hdev->asic_prop.sram_base_address +
1528                                 MME_QMAN_BASE_OFFSET;
1529
1530         WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1531         WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1532         WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1533         WREG32(mmMME_QM_PQ_PI, 0);
1534         WREG32(mmMME_QM_PQ_CI, 0);
1535         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1536         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1537         WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1538         WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1539
1540         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1541         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1542         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1543         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1544
1545         /* QMAN CQ has 8 cache lines */
1546         WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1547
1548         WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1549         WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1550
1551         WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1552
1553         WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1554
1555         WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1556
1557         WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1558 }
1559
1560 static void goya_init_mme_cmdq(struct hl_device *hdev)
1561 {
1562         u32 mtr_base_lo, mtr_base_hi;
1563         u32 so_base_lo, so_base_hi;
1564         u32 gic_base_lo, gic_base_hi;
1565
1566         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1567         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1568         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1569         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1570
1571         gic_base_lo =
1572                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1573         gic_base_hi =
1574                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1575
1576         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1577         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1578         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1579         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1580
1581         /* CMDQ CQ has 20 cache lines */
1582         WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1583
1584         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1585         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1586
1587         WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1588
1589         WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1590
1591         WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1592
1593         WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1594 }
1595
1596 void goya_init_mme_qmans(struct hl_device *hdev)
1597 {
1598         struct goya_device *goya = hdev->asic_specific;
1599         u32 so_base_lo, so_base_hi;
1600
1601         if (goya->hw_cap_initialized & HW_CAP_MME)
1602                 return;
1603
1604         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1605         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1606
1607         WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1608         WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1609
1610         goya_init_mme_qman(hdev);
1611         goya_init_mme_cmdq(hdev);
1612
1613         goya->hw_cap_initialized |= HW_CAP_MME;
1614 }
1615
1616 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1617 {
1618         u32 mtr_base_lo, mtr_base_hi;
1619         u32 so_base_lo, so_base_hi;
1620         u32 gic_base_lo, gic_base_hi;
1621         u64 qman_base_addr;
1622         u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1623
1624         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1625         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1626         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1627         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1628
1629         gic_base_lo =
1630                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1631         gic_base_hi =
1632                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1633
1634         qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1635
1636         WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1637         WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1638         WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1639         WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1640         WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1641         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1642         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1643         WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1644         WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1645
1646         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1647         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1648         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1649         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1650
1651         WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1652
1653         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1654         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1655
1656         WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1657                         GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1658
1659         WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1660
1661         WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1662
1663         WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1664 }
1665
1666 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1667 {
1668         u32 mtr_base_lo, mtr_base_hi;
1669         u32 so_base_lo, so_base_hi;
1670         u32 gic_base_lo, gic_base_hi;
1671         u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1672
1673         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1674         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1675         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1676         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1677
1678         gic_base_lo =
1679                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1680         gic_base_hi =
1681                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1682
1683         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1684         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1685         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1686         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1687
1688         WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1689
1690         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1691         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1692
1693         WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1694                         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1695
1696         WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1697
1698         WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1699
1700         WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1701 }
1702
1703 void goya_init_tpc_qmans(struct hl_device *hdev)
1704 {
1705         struct goya_device *goya = hdev->asic_specific;
1706         u32 so_base_lo, so_base_hi;
1707         u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1708                         mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1709         int i;
1710
1711         if (goya->hw_cap_initialized & HW_CAP_TPC)
1712                 return;
1713
1714         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1715         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1716
1717         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1718                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1719                                 so_base_lo);
1720                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1721                                 so_base_hi);
1722         }
1723
1724         goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1725         goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1726         goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1727         goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1728         goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1729         goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1730         goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1731         goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1732
1733         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1734                 goya_init_tpc_cmdq(hdev, i);
1735
1736         goya->hw_cap_initialized |= HW_CAP_TPC;
1737 }
1738
1739 /*
1740  * goya_disable_internal_queues - Disable internal queues
1741  *
1742  * @hdev: pointer to hl_device structure
1743  *
1744  */
1745 static void goya_disable_internal_queues(struct hl_device *hdev)
1746 {
1747         WREG32(mmMME_QM_GLBL_CFG0, 0);
1748         WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1749
1750         WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1751         WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1752
1753         WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1754         WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1755
1756         WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1757         WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1758
1759         WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1760         WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1761
1762         WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1763         WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1764
1765         WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1766         WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1767
1768         WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1769         WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1770
1771         WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1772         WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1773 }
1774
1775 /*
1776  * goya_stop_internal_queues - Stop internal queues
1777  *
1778  * @hdev: pointer to hl_device structure
1779  *
1780  * Returns 0 on success
1781  *
1782  */
1783 static int goya_stop_internal_queues(struct hl_device *hdev)
1784 {
1785         int rc, retval = 0;
1786
1787         /*
1788          * Each queue (QMAN) is a separate H/W logic. That means that each
1789          * QMAN can be stopped independently and failure to stop one does NOT
1790          * mandate we should not try to stop other QMANs
1791          */
1792
1793         rc = goya_stop_queue(hdev,
1794                         mmMME_QM_GLBL_CFG1,
1795                         mmMME_QM_CP_STS,
1796                         mmMME_QM_GLBL_STS0);
1797
1798         if (rc) {
1799                 dev_err(hdev->dev, "failed to stop MME QMAN\n");
1800                 retval = -EIO;
1801         }
1802
1803         rc = goya_stop_queue(hdev,
1804                         mmMME_CMDQ_GLBL_CFG1,
1805                         mmMME_CMDQ_CP_STS,
1806                         mmMME_CMDQ_GLBL_STS0);
1807
1808         if (rc) {
1809                 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1810                 retval = -EIO;
1811         }
1812
1813         rc = goya_stop_queue(hdev,
1814                         mmTPC0_QM_GLBL_CFG1,
1815                         mmTPC0_QM_CP_STS,
1816                         mmTPC0_QM_GLBL_STS0);
1817
1818         if (rc) {
1819                 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1820                 retval = -EIO;
1821         }
1822
1823         rc = goya_stop_queue(hdev,
1824                         mmTPC0_CMDQ_GLBL_CFG1,
1825                         mmTPC0_CMDQ_CP_STS,
1826                         mmTPC0_CMDQ_GLBL_STS0);
1827
1828         if (rc) {
1829                 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1830                 retval = -EIO;
1831         }
1832
1833         rc = goya_stop_queue(hdev,
1834                         mmTPC1_QM_GLBL_CFG1,
1835                         mmTPC1_QM_CP_STS,
1836                         mmTPC1_QM_GLBL_STS0);
1837
1838         if (rc) {
1839                 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1840                 retval = -EIO;
1841         }
1842
1843         rc = goya_stop_queue(hdev,
1844                         mmTPC1_CMDQ_GLBL_CFG1,
1845                         mmTPC1_CMDQ_CP_STS,
1846                         mmTPC1_CMDQ_GLBL_STS0);
1847
1848         if (rc) {
1849                 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1850                 retval = -EIO;
1851         }
1852
1853         rc = goya_stop_queue(hdev,
1854                         mmTPC2_QM_GLBL_CFG1,
1855                         mmTPC2_QM_CP_STS,
1856                         mmTPC2_QM_GLBL_STS0);
1857
1858         if (rc) {
1859                 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
1860                 retval = -EIO;
1861         }
1862
1863         rc = goya_stop_queue(hdev,
1864                         mmTPC2_CMDQ_GLBL_CFG1,
1865                         mmTPC2_CMDQ_CP_STS,
1866                         mmTPC2_CMDQ_GLBL_STS0);
1867
1868         if (rc) {
1869                 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
1870                 retval = -EIO;
1871         }
1872
1873         rc = goya_stop_queue(hdev,
1874                         mmTPC3_QM_GLBL_CFG1,
1875                         mmTPC3_QM_CP_STS,
1876                         mmTPC3_QM_GLBL_STS0);
1877
1878         if (rc) {
1879                 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
1880                 retval = -EIO;
1881         }
1882
1883         rc = goya_stop_queue(hdev,
1884                         mmTPC3_CMDQ_GLBL_CFG1,
1885                         mmTPC3_CMDQ_CP_STS,
1886                         mmTPC3_CMDQ_GLBL_STS0);
1887
1888         if (rc) {
1889                 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
1890                 retval = -EIO;
1891         }
1892
1893         rc = goya_stop_queue(hdev,
1894                         mmTPC4_QM_GLBL_CFG1,
1895                         mmTPC4_QM_CP_STS,
1896                         mmTPC4_QM_GLBL_STS0);
1897
1898         if (rc) {
1899                 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
1900                 retval = -EIO;
1901         }
1902
1903         rc = goya_stop_queue(hdev,
1904                         mmTPC4_CMDQ_GLBL_CFG1,
1905                         mmTPC4_CMDQ_CP_STS,
1906                         mmTPC4_CMDQ_GLBL_STS0);
1907
1908         if (rc) {
1909                 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
1910                 retval = -EIO;
1911         }
1912
1913         rc = goya_stop_queue(hdev,
1914                         mmTPC5_QM_GLBL_CFG1,
1915                         mmTPC5_QM_CP_STS,
1916                         mmTPC5_QM_GLBL_STS0);
1917
1918         if (rc) {
1919                 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
1920                 retval = -EIO;
1921         }
1922
1923         rc = goya_stop_queue(hdev,
1924                         mmTPC5_CMDQ_GLBL_CFG1,
1925                         mmTPC5_CMDQ_CP_STS,
1926                         mmTPC5_CMDQ_GLBL_STS0);
1927
1928         if (rc) {
1929                 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
1930                 retval = -EIO;
1931         }
1932
1933         rc = goya_stop_queue(hdev,
1934                         mmTPC6_QM_GLBL_CFG1,
1935                         mmTPC6_QM_CP_STS,
1936                         mmTPC6_QM_GLBL_STS0);
1937
1938         if (rc) {
1939                 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
1940                 retval = -EIO;
1941         }
1942
1943         rc = goya_stop_queue(hdev,
1944                         mmTPC6_CMDQ_GLBL_CFG1,
1945                         mmTPC6_CMDQ_CP_STS,
1946                         mmTPC6_CMDQ_GLBL_STS0);
1947
1948         if (rc) {
1949                 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
1950                 retval = -EIO;
1951         }
1952
1953         rc = goya_stop_queue(hdev,
1954                         mmTPC7_QM_GLBL_CFG1,
1955                         mmTPC7_QM_CP_STS,
1956                         mmTPC7_QM_GLBL_STS0);
1957
1958         if (rc) {
1959                 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
1960                 retval = -EIO;
1961         }
1962
1963         rc = goya_stop_queue(hdev,
1964                         mmTPC7_CMDQ_GLBL_CFG1,
1965                         mmTPC7_CMDQ_CP_STS,
1966                         mmTPC7_CMDQ_GLBL_STS0);
1967
1968         if (rc) {
1969                 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
1970                 retval = -EIO;
1971         }
1972
1973         return retval;
1974 }
1975
1976 static void goya_dma_stall(struct hl_device *hdev)
1977 {
1978         WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
1979         WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
1980         WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
1981         WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
1982         WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
1983 }
1984
1985 static void goya_tpc_stall(struct hl_device *hdev)
1986 {
1987         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
1988         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
1989         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
1990         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
1991         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
1992         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
1993         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
1994         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
1995 }
1996
1997 static void goya_mme_stall(struct hl_device *hdev)
1998 {
1999         WREG32(mmMME_STALL, 0xFFFFFFFF);
2000 }
2001
2002 static int goya_enable_msix(struct hl_device *hdev)
2003 {
2004         struct goya_device *goya = hdev->asic_specific;
2005         int cq_cnt = hdev->asic_prop.completion_queues_count;
2006         int rc, i, irq_cnt_init, irq;
2007
2008         if (goya->hw_cap_initialized & HW_CAP_MSIX)
2009                 return 0;
2010
2011         rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2012                                 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2013         if (rc < 0) {
2014                 dev_err(hdev->dev,
2015                         "MSI-X: Failed to enable support -- %d/%d\n",
2016                         GOYA_MSIX_ENTRIES, rc);
2017                 return rc;
2018         }
2019
2020         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2021                 irq = pci_irq_vector(hdev->pdev, i);
2022                 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2023                                 &hdev->completion_queue[i]);
2024                 if (rc) {
2025                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2026                         goto free_irqs;
2027                 }
2028         }
2029
2030         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2031
2032         rc = request_irq(irq, hl_irq_handler_eq, 0,
2033                         goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2034                         &hdev->event_queue);
2035         if (rc) {
2036                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2037                 goto free_irqs;
2038         }
2039
2040         goya->hw_cap_initialized |= HW_CAP_MSIX;
2041         return 0;
2042
2043 free_irqs:
2044         for (i = 0 ; i < irq_cnt_init ; i++)
2045                 free_irq(pci_irq_vector(hdev->pdev, i),
2046                         &hdev->completion_queue[i]);
2047
2048         pci_free_irq_vectors(hdev->pdev);
2049         return rc;
2050 }
2051
2052 static void goya_sync_irqs(struct hl_device *hdev)
2053 {
2054         struct goya_device *goya = hdev->asic_specific;
2055         int i;
2056
2057         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2058                 return;
2059
2060         /* Wait for all pending IRQs to be finished */
2061         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2062                 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2063
2064         synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2065 }
2066
2067 static void goya_disable_msix(struct hl_device *hdev)
2068 {
2069         struct goya_device *goya = hdev->asic_specific;
2070         int i, irq;
2071
2072         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2073                 return;
2074
2075         goya_sync_irqs(hdev);
2076
2077         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2078         free_irq(irq, &hdev->event_queue);
2079
2080         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2081                 irq = pci_irq_vector(hdev->pdev, i);
2082                 free_irq(irq, &hdev->completion_queue[i]);
2083         }
2084
2085         pci_free_irq_vectors(hdev->pdev);
2086
2087         goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2088 }
2089
2090 static void goya_enable_timestamp(struct hl_device *hdev)
2091 {
2092         /* Disable the timestamp counter */
2093         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2094
2095         /* Zero the lower/upper parts of the 64-bit counter */
2096         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2097         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2098
2099         /* Enable the counter */
2100         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2101 }
2102
2103 static void goya_disable_timestamp(struct hl_device *hdev)
2104 {
2105         /* Disable the timestamp counter */
2106         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2107 }
2108
2109 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2110 {
2111         u32 wait_timeout_ms, cpu_timeout_ms;
2112
2113         dev_info(hdev->dev,
2114                 "Halting compute engines and disabling interrupts\n");
2115
2116         if (hdev->pldm) {
2117                 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2118                 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2119         } else {
2120                 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2121                 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2122         }
2123
2124         if (hard_reset) {
2125                 /*
2126                  * I don't know what is the state of the CPU so make sure it is
2127                  * stopped in any means necessary
2128                  */
2129                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2130                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2131                         GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2132                 msleep(cpu_timeout_ms);
2133         }
2134
2135         goya_stop_external_queues(hdev);
2136         goya_stop_internal_queues(hdev);
2137
2138         msleep(wait_timeout_ms);
2139
2140         goya_dma_stall(hdev);
2141         goya_tpc_stall(hdev);
2142         goya_mme_stall(hdev);
2143
2144         msleep(wait_timeout_ms);
2145
2146         goya_disable_external_queues(hdev);
2147         goya_disable_internal_queues(hdev);
2148
2149         goya_disable_timestamp(hdev);
2150
2151         if (hard_reset) {
2152                 goya_disable_msix(hdev);
2153                 goya_mmu_remove_device_cpu_mappings(hdev);
2154         } else {
2155                 goya_sync_irqs(hdev);
2156         }
2157 }
2158
2159 /*
2160  * goya_push_uboot_to_device() - Push u-boot FW code to device.
2161  * @hdev: Pointer to hl_device structure.
2162  *
2163  * Copy u-boot fw code from firmware file to SRAM BAR.
2164  *
2165  * Return: 0 on success, non-zero for failure.
2166  */
2167 static int goya_push_uboot_to_device(struct hl_device *hdev)
2168 {
2169         void __iomem *dst;
2170
2171         dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
2172
2173         return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst);
2174 }
2175
2176 /*
2177  * goya_push_linux_to_device() - Push LINUX FW code to device.
2178  * @hdev: Pointer to hl_device structure.
2179  *
2180  * Copy LINUX fw code from firmware file to HBM BAR.
2181  *
2182  * Return: 0 on success, non-zero for failure.
2183  */
2184 static int goya_push_linux_to_device(struct hl_device *hdev)
2185 {
2186         void __iomem *dst;
2187
2188         dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2189
2190         return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
2191 }
2192
2193 static int goya_pldm_init_cpu(struct hl_device *hdev)
2194 {
2195         u32 unit_rst_val;
2196         int rc;
2197
2198         /* Must initialize SRAM scrambler before pushing u-boot to SRAM */
2199         goya_init_golden_registers(hdev);
2200
2201         /* Put ARM cores into reset */
2202         WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
2203         RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2204
2205         /* Reset the CA53 MACRO */
2206         unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2207         WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
2208         RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2209         WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
2210         RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2211
2212         rc = goya_push_uboot_to_device(hdev);
2213         if (rc)
2214                 return rc;
2215
2216         rc = goya_push_linux_to_device(hdev);
2217         if (rc)
2218                 return rc;
2219
2220         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2221         WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
2222
2223         WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
2224                 lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2225         WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
2226                 upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2227
2228         /* Release ARM core 0 from reset */
2229         WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
2230                                         CPU_RESET_CORE0_DEASSERT);
2231         RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2232
2233         return 0;
2234 }
2235
2236 /*
2237  * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2238  * The version string should be located by that offset.
2239  */
2240 static void goya_read_device_fw_version(struct hl_device *hdev,
2241                                         enum goya_fw_component fwc)
2242 {
2243         const char *name;
2244         u32 ver_off;
2245         char *dest;
2246
2247         switch (fwc) {
2248         case FW_COMP_UBOOT:
2249                 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2250                 dest = hdev->asic_prop.uboot_ver;
2251                 name = "U-Boot";
2252                 break;
2253         case FW_COMP_PREBOOT:
2254                 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2255                 dest = hdev->asic_prop.preboot_ver;
2256                 name = "Preboot";
2257                 break;
2258         default:
2259                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2260                 return;
2261         }
2262
2263         ver_off &= ~((u32)SRAM_BASE_ADDR);
2264
2265         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2266                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2267                                                         VERSION_MAX_LEN);
2268         } else {
2269                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2270                                                                 name, ver_off);
2271                 strcpy(dest, "unavailable");
2272         }
2273 }
2274
2275 static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
2276 {
2277         struct goya_device *goya = hdev->asic_specific;
2278         u32 status;
2279         int rc;
2280
2281         if (!hdev->cpu_enable)
2282                 return 0;
2283
2284         if (goya->hw_cap_initialized & HW_CAP_CPU)
2285                 return 0;
2286
2287         /*
2288          * Before pushing u-boot/linux to device, need to set the ddr bar to
2289          * base address of dram
2290          */
2291         if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2292                 dev_err(hdev->dev,
2293                         "failed to map DDR bar to DRAM base address\n");
2294                 return -EIO;
2295         }
2296
2297         if (hdev->pldm) {
2298                 rc = goya_pldm_init_cpu(hdev);
2299                 if (rc)
2300                         return rc;
2301
2302                 goto out;
2303         }
2304
2305         /* Make sure CPU boot-loader is running */
2306         rc = hl_poll_timeout(
2307                 hdev,
2308                 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2309                 status,
2310                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
2311                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2312                 10000,
2313                 cpu_timeout);
2314
2315         /* Read U-Boot version now in case we will later fail */
2316         goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
2317         goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
2318
2319         if (rc) {
2320                 dev_err(hdev->dev, "Error in ARM u-boot!");
2321                 switch (status) {
2322                 case CPU_BOOT_STATUS_NA:
2323                         dev_err(hdev->dev,
2324                                 "ARM status %d - BTL did NOT run\n", status);
2325                         break;
2326                 case CPU_BOOT_STATUS_IN_WFE:
2327                         dev_err(hdev->dev,
2328                                 "ARM status %d - Inside WFE loop\n", status);
2329                         break;
2330                 case CPU_BOOT_STATUS_IN_BTL:
2331                         dev_err(hdev->dev,
2332                                 "ARM status %d - Stuck in BTL\n", status);
2333                         break;
2334                 case CPU_BOOT_STATUS_IN_PREBOOT:
2335                         dev_err(hdev->dev,
2336                                 "ARM status %d - Stuck in Preboot\n", status);
2337                         break;
2338                 case CPU_BOOT_STATUS_IN_SPL:
2339                         dev_err(hdev->dev,
2340                                 "ARM status %d - Stuck in SPL\n", status);
2341                         break;
2342                 case CPU_BOOT_STATUS_IN_UBOOT:
2343                         dev_err(hdev->dev,
2344                                 "ARM status %d - Stuck in u-boot\n", status);
2345                         break;
2346                 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
2347                         dev_err(hdev->dev,
2348                                 "ARM status %d - DDR initialization failed\n",
2349                                 status);
2350                         break;
2351                 case CPU_BOOT_STATUS_UBOOT_NOT_READY:
2352                         dev_err(hdev->dev,
2353                                 "ARM status %d - u-boot stopped by user\n",
2354                                 status);
2355                         break;
2356                 case CPU_BOOT_STATUS_TS_INIT_FAIL:
2357                         dev_err(hdev->dev,
2358                                 "ARM status %d - Thermal Sensor initialization failed\n",
2359                                 status);
2360                         break;
2361                 default:
2362                         dev_err(hdev->dev,
2363                                 "ARM status %d - Invalid status code\n",
2364                                 status);
2365                         break;
2366                 }
2367                 return -EIO;
2368         }
2369
2370         if (!hdev->fw_loading) {
2371                 dev_info(hdev->dev, "Skip loading FW\n");
2372                 goto out;
2373         }
2374
2375         if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
2376                 goto out;
2377
2378         rc = goya_push_linux_to_device(hdev);
2379         if (rc)
2380                 return rc;
2381
2382         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2383
2384         rc = hl_poll_timeout(
2385                 hdev,
2386                 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2387                 status,
2388                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2389                 10000,
2390                 cpu_timeout);
2391
2392         if (rc) {
2393                 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
2394                         dev_err(hdev->dev,
2395                                 "ARM u-boot reports FIT image is corrupted\n");
2396                 else
2397                         dev_err(hdev->dev,
2398                                 "ARM Linux failed to load, %d\n", status);
2399                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_NA);
2400                 return -EIO;
2401         }
2402
2403         dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2404
2405 out:
2406         goya->hw_cap_initialized |= HW_CAP_CPU;
2407
2408         return 0;
2409 }
2410
2411 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2412                                                 u64 phys_addr)
2413 {
2414         u32 status, timeout_usec;
2415         int rc;
2416
2417         if (hdev->pldm)
2418                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2419         else
2420                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2421
2422         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2423         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2424         WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2425
2426         rc = hl_poll_timeout(
2427                 hdev,
2428                 MMU_ASID_BUSY,
2429                 status,
2430                 !(status & 0x80000000),
2431                 1000,
2432                 timeout_usec);
2433
2434         if (rc) {
2435                 dev_err(hdev->dev,
2436                         "Timeout during MMU hop0 config of asid %d\n", asid);
2437                 return rc;
2438         }
2439
2440         return 0;
2441 }
2442
2443 int goya_mmu_init(struct hl_device *hdev)
2444 {
2445         struct asic_fixed_properties *prop = &hdev->asic_prop;
2446         struct goya_device *goya = hdev->asic_specific;
2447         u64 hop0_addr;
2448         int rc, i;
2449
2450         if (!hdev->mmu_enable)
2451                 return 0;
2452
2453         if (goya->hw_cap_initialized & HW_CAP_MMU)
2454                 return 0;
2455
2456         hdev->dram_supports_virtual_memory = true;
2457         hdev->dram_default_page_mapping = true;
2458
2459         for (i = 0 ; i < prop->max_asid ; i++) {
2460                 hop0_addr = prop->mmu_pgt_addr +
2461                                 (i * prop->mmu_hop_table_size);
2462
2463                 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2464                 if (rc) {
2465                         dev_err(hdev->dev,
2466                                 "failed to set hop0 addr for asid %d\n", i);
2467                         goto err;
2468                 }
2469         }
2470
2471         goya->hw_cap_initialized |= HW_CAP_MMU;
2472
2473         /* init MMU cache manage page */
2474         WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2475                                 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2476         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2477
2478         /* Remove follower feature due to performance bug */
2479         WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2480                         (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2481
2482         hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2483                                         VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2484
2485         WREG32(mmMMU_MMU_ENABLE, 1);
2486         WREG32(mmMMU_SPI_MASK, 0xF);
2487
2488         return 0;
2489
2490 err:
2491         return rc;
2492 }
2493
2494 /*
2495  * goya_hw_init - Goya hardware initialization code
2496  *
2497  * @hdev: pointer to hl_device structure
2498  *
2499  * Returns 0 on success
2500  *
2501  */
2502 static int goya_hw_init(struct hl_device *hdev)
2503 {
2504         struct asic_fixed_properties *prop = &hdev->asic_prop;
2505         int rc;
2506
2507         dev_info(hdev->dev, "Starting initialization of H/W\n");
2508
2509         /* Perform read from the device to make sure device is up */
2510         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2511
2512         /*
2513          * Let's mark in the H/W that we have reached this point. We check
2514          * this value in the reset_before_init function to understand whether
2515          * we need to reset the chip before doing H/W init. This register is
2516          * cleared by the H/W upon H/W reset
2517          */
2518         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2519
2520         rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
2521         if (rc) {
2522                 dev_err(hdev->dev, "failed to initialize CPU\n");
2523                 return rc;
2524         }
2525
2526         goya_tpc_mbist_workaround(hdev);
2527
2528         goya_init_golden_registers(hdev);
2529
2530         /*
2531          * After CPU initialization is finished, change DDR bar mapping inside
2532          * iATU to point to the start address of the MMU page tables
2533          */
2534         if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
2535                         (MMU_PAGE_TABLES_ADDR &
2536                         ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2537                 dev_err(hdev->dev,
2538                         "failed to map DDR bar to MMU page tables\n");
2539                 return -EIO;
2540         }
2541
2542         rc = goya_mmu_init(hdev);
2543         if (rc)
2544                 return rc;
2545
2546         goya_init_security(hdev);
2547
2548         goya_init_dma_qmans(hdev);
2549
2550         goya_init_mme_qmans(hdev);
2551
2552         goya_init_tpc_qmans(hdev);
2553
2554         goya_enable_timestamp(hdev);
2555
2556         /* MSI-X must be enabled before CPU queues are initialized */
2557         rc = goya_enable_msix(hdev);
2558         if (rc)
2559                 goto disable_queues;
2560
2561         /* Perform read from the device to flush all MSI-X configuration */
2562         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2563
2564         return 0;
2565
2566 disable_queues:
2567         goya_disable_internal_queues(hdev);
2568         goya_disable_external_queues(hdev);
2569
2570         return rc;
2571 }
2572
2573 /*
2574  * goya_hw_fini - Goya hardware tear-down code
2575  *
2576  * @hdev: pointer to hl_device structure
2577  * @hard_reset: should we do hard reset to all engines or just reset the
2578  *              compute/dma engines
2579  */
2580 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2581 {
2582         struct goya_device *goya = hdev->asic_specific;
2583         u32 reset_timeout_ms, status;
2584
2585         if (hdev->pldm)
2586                 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2587         else
2588                 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2589
2590         if (hard_reset) {
2591                 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2592                 goya_disable_clk_rlx(hdev);
2593                 goya_set_pll_refclk(hdev);
2594
2595                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2596                 dev_info(hdev->dev,
2597                         "Issued HARD reset command, going to wait %dms\n",
2598                         reset_timeout_ms);
2599         } else {
2600                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2601                 dev_info(hdev->dev,
2602                         "Issued SOFT reset command, going to wait %dms\n",
2603                         reset_timeout_ms);
2604         }
2605
2606         /*
2607          * After hard reset, we can't poll the BTM_FSM register because the PSOC
2608          * itself is in reset. In either reset we need to wait until the reset
2609          * is deasserted
2610          */
2611         msleep(reset_timeout_ms);
2612
2613         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2614         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2615                 dev_err(hdev->dev,
2616                         "Timeout while waiting for device to reset 0x%x\n",
2617                         status);
2618
2619         if (!hard_reset) {
2620                 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2621                                                 HW_CAP_GOLDEN | HW_CAP_TPC);
2622                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2623                                 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2624                 return;
2625         }
2626
2627         /* Chicken bit to re-initiate boot sequencer flow */
2628         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2629                 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2630         /* Move boot manager FSM to pre boot sequencer init state */
2631         WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2632                         0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2633
2634         goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2635                                         HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2636                                         HW_CAP_DMA | HW_CAP_MME |
2637                                         HW_CAP_MMU | HW_CAP_TPC_MBIST |
2638                                         HW_CAP_GOLDEN | HW_CAP_TPC);
2639         memset(goya->events_stat, 0, sizeof(goya->events_stat));
2640
2641         if (!hdev->pldm) {
2642                 int rc;
2643                 /* In case we are running inside VM and the VM is
2644                  * shutting down, we need to make sure CPU boot-loader
2645                  * is running before we can continue the VM shutdown.
2646                  * That is because the VM will send an FLR signal that
2647                  * we must answer
2648                  */
2649                 dev_info(hdev->dev,
2650                         "Going to wait up to %ds for CPU boot loader\n",
2651                         GOYA_CPU_TIMEOUT_USEC / 1000 / 1000);
2652
2653                 rc = hl_poll_timeout(
2654                         hdev,
2655                         mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2656                         status,
2657                         (status == CPU_BOOT_STATUS_DRAM_RDY),
2658                         10000,
2659                         GOYA_CPU_TIMEOUT_USEC);
2660                 if (rc)
2661                         dev_err(hdev->dev,
2662                                 "failed to wait for CPU boot loader\n");
2663         }
2664 }
2665
2666 int goya_suspend(struct hl_device *hdev)
2667 {
2668         int rc;
2669
2670         rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2671         if (rc)
2672                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2673
2674         return rc;
2675 }
2676
2677 int goya_resume(struct hl_device *hdev)
2678 {
2679         return goya_init_iatu(hdev);
2680 }
2681
2682 static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2683                 u64 kaddress, phys_addr_t paddress, u32 size)
2684 {
2685         int rc;
2686
2687         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2688                         VM_DONTCOPY | VM_NORESERVE;
2689
2690         rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2691                                 size, vma->vm_page_prot);
2692         if (rc)
2693                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2694
2695         return rc;
2696 }
2697
2698 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2699 {
2700         u32 db_reg_offset, db_value;
2701
2702         switch (hw_queue_id) {
2703         case GOYA_QUEUE_ID_DMA_0:
2704                 db_reg_offset = mmDMA_QM_0_PQ_PI;
2705                 break;
2706
2707         case GOYA_QUEUE_ID_DMA_1:
2708                 db_reg_offset = mmDMA_QM_1_PQ_PI;
2709                 break;
2710
2711         case GOYA_QUEUE_ID_DMA_2:
2712                 db_reg_offset = mmDMA_QM_2_PQ_PI;
2713                 break;
2714
2715         case GOYA_QUEUE_ID_DMA_3:
2716                 db_reg_offset = mmDMA_QM_3_PQ_PI;
2717                 break;
2718
2719         case GOYA_QUEUE_ID_DMA_4:
2720                 db_reg_offset = mmDMA_QM_4_PQ_PI;
2721                 break;
2722
2723         case GOYA_QUEUE_ID_CPU_PQ:
2724                 db_reg_offset = mmCPU_IF_PF_PQ_PI;
2725                 break;
2726
2727         case GOYA_QUEUE_ID_MME:
2728                 db_reg_offset = mmMME_QM_PQ_PI;
2729                 break;
2730
2731         case GOYA_QUEUE_ID_TPC0:
2732                 db_reg_offset = mmTPC0_QM_PQ_PI;
2733                 break;
2734
2735         case GOYA_QUEUE_ID_TPC1:
2736                 db_reg_offset = mmTPC1_QM_PQ_PI;
2737                 break;
2738
2739         case GOYA_QUEUE_ID_TPC2:
2740                 db_reg_offset = mmTPC2_QM_PQ_PI;
2741                 break;
2742
2743         case GOYA_QUEUE_ID_TPC3:
2744                 db_reg_offset = mmTPC3_QM_PQ_PI;
2745                 break;
2746
2747         case GOYA_QUEUE_ID_TPC4:
2748                 db_reg_offset = mmTPC4_QM_PQ_PI;
2749                 break;
2750
2751         case GOYA_QUEUE_ID_TPC5:
2752                 db_reg_offset = mmTPC5_QM_PQ_PI;
2753                 break;
2754
2755         case GOYA_QUEUE_ID_TPC6:
2756                 db_reg_offset = mmTPC6_QM_PQ_PI;
2757                 break;
2758
2759         case GOYA_QUEUE_ID_TPC7:
2760                 db_reg_offset = mmTPC7_QM_PQ_PI;
2761                 break;
2762
2763         default:
2764                 /* Should never get here */
2765                 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2766                         hw_queue_id);
2767                 return;
2768         }
2769
2770         db_value = pi;
2771
2772         /* ring the doorbell */
2773         WREG32(db_reg_offset, db_value);
2774
2775         if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2776                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2777                                 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2778 }
2779
2780 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2781 {
2782         /* The QMANs are on the SRAM so need to copy to IO space */
2783         memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2784 }
2785
2786 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2787                                         dma_addr_t *dma_handle, gfp_t flags)
2788 {
2789         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2790                                                 dma_handle, flags);
2791
2792         /* Shift to the device's base physical address of host memory */
2793         if (kernel_addr)
2794                 *dma_handle += HOST_PHYS_BASE;
2795
2796         return kernel_addr;
2797 }
2798
2799 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2800                                         void *cpu_addr, dma_addr_t dma_handle)
2801 {
2802         /* Cancel the device's base physical address of host memory */
2803         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2804
2805         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2806 }
2807
2808 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2809                                 dma_addr_t *dma_handle, u16 *queue_len)
2810 {
2811         void *base;
2812         u32 offset;
2813
2814         *dma_handle = hdev->asic_prop.sram_base_address;
2815
2816         base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2817
2818         switch (queue_id) {
2819         case GOYA_QUEUE_ID_MME:
2820                 offset = MME_QMAN_BASE_OFFSET;
2821                 *queue_len = MME_QMAN_LENGTH;
2822                 break;
2823         case GOYA_QUEUE_ID_TPC0:
2824                 offset = TPC0_QMAN_BASE_OFFSET;
2825                 *queue_len = TPC_QMAN_LENGTH;
2826                 break;
2827         case GOYA_QUEUE_ID_TPC1:
2828                 offset = TPC1_QMAN_BASE_OFFSET;
2829                 *queue_len = TPC_QMAN_LENGTH;
2830                 break;
2831         case GOYA_QUEUE_ID_TPC2:
2832                 offset = TPC2_QMAN_BASE_OFFSET;
2833                 *queue_len = TPC_QMAN_LENGTH;
2834                 break;
2835         case GOYA_QUEUE_ID_TPC3:
2836                 offset = TPC3_QMAN_BASE_OFFSET;
2837                 *queue_len = TPC_QMAN_LENGTH;
2838                 break;
2839         case GOYA_QUEUE_ID_TPC4:
2840                 offset = TPC4_QMAN_BASE_OFFSET;
2841                 *queue_len = TPC_QMAN_LENGTH;
2842                 break;
2843         case GOYA_QUEUE_ID_TPC5:
2844                 offset = TPC5_QMAN_BASE_OFFSET;
2845                 *queue_len = TPC_QMAN_LENGTH;
2846                 break;
2847         case GOYA_QUEUE_ID_TPC6:
2848                 offset = TPC6_QMAN_BASE_OFFSET;
2849                 *queue_len = TPC_QMAN_LENGTH;
2850                 break;
2851         case GOYA_QUEUE_ID_TPC7:
2852                 offset = TPC7_QMAN_BASE_OFFSET;
2853                 *queue_len = TPC_QMAN_LENGTH;
2854                 break;
2855         default:
2856                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2857                 return NULL;
2858         }
2859
2860         base += offset;
2861         *dma_handle += offset;
2862
2863         return base;
2864 }
2865
2866 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2867 {
2868         struct packet_msg_prot *fence_pkt;
2869         u32 *fence_ptr;
2870         dma_addr_t fence_dma_addr;
2871         struct hl_cb *cb;
2872         u32 tmp, timeout;
2873         int rc;
2874
2875         if (hdev->pldm)
2876                 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2877         else
2878                 timeout = HL_DEVICE_TIMEOUT_USEC;
2879
2880         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2881                 dev_err_ratelimited(hdev->dev,
2882                         "Can't send driver job on QMAN0 because the device is not idle\n");
2883                 return -EBUSY;
2884         }
2885
2886         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2887                                                         &fence_dma_addr);
2888         if (!fence_ptr) {
2889                 dev_err(hdev->dev,
2890                         "Failed to allocate fence memory for QMAN0\n");
2891                 return -ENOMEM;
2892         }
2893
2894         goya_qman0_set_security(hdev, true);
2895
2896         cb = job->patched_cb;
2897
2898         fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
2899                         job->job_cb_size - sizeof(struct packet_msg_prot));
2900
2901         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2902                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
2903                         (1 << GOYA_PKT_CTL_MB_SHIFT);
2904         fence_pkt->ctl = cpu_to_le32(tmp);
2905         fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2906         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2907
2908         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2909                                         job->job_cb_size, cb->bus_address);
2910         if (rc) {
2911                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2912                 goto free_fence_ptr;
2913         }
2914
2915         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2916                                 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2917                                 timeout, true);
2918
2919         hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2920
2921         if (rc == -ETIMEDOUT) {
2922                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2923                 goto free_fence_ptr;
2924         }
2925
2926 free_fence_ptr:
2927         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2928                                         fence_dma_addr);
2929
2930         goya_qman0_set_security(hdev, false);
2931
2932         return rc;
2933 }
2934
2935 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2936                                 u32 timeout, long *result)
2937 {
2938         struct goya_device *goya = hdev->asic_specific;
2939
2940         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2941                 if (result)
2942                         *result = 0;
2943                 return 0;
2944         }
2945
2946         return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2947                                         timeout, result);
2948 }
2949
2950 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2951 {
2952         struct packet_msg_prot *fence_pkt;
2953         dma_addr_t pkt_dma_addr;
2954         u32 fence_val, tmp;
2955         dma_addr_t fence_dma_addr;
2956         u32 *fence_ptr;
2957         int rc;
2958
2959         fence_val = GOYA_QMAN0_FENCE_VAL;
2960
2961         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2962                                                         &fence_dma_addr);
2963         if (!fence_ptr) {
2964                 dev_err(hdev->dev,
2965                         "Failed to allocate memory for queue testing\n");
2966                 return -ENOMEM;
2967         }
2968
2969         *fence_ptr = 0;
2970
2971         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2972                                         sizeof(struct packet_msg_prot),
2973                                         GFP_KERNEL, &pkt_dma_addr);
2974         if (!fence_pkt) {
2975                 dev_err(hdev->dev,
2976                         "Failed to allocate packet for queue testing\n");
2977                 rc = -ENOMEM;
2978                 goto free_fence_ptr;
2979         }
2980
2981         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2982                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
2983                         (1 << GOYA_PKT_CTL_MB_SHIFT);
2984         fence_pkt->ctl = cpu_to_le32(tmp);
2985         fence_pkt->value = cpu_to_le32(fence_val);
2986         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2987
2988         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2989                                         sizeof(struct packet_msg_prot),
2990                                         pkt_dma_addr);
2991         if (rc) {
2992                 dev_err(hdev->dev,
2993                         "Failed to send fence packet\n");
2994                 goto free_pkt;
2995         }
2996
2997         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2998                                         1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2999
3000         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3001
3002         if (rc == -ETIMEDOUT) {
3003                 dev_err(hdev->dev,
3004                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3005                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3006                 rc = -EIO;
3007         }
3008
3009 free_pkt:
3010         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3011                                         pkt_dma_addr);
3012 free_fence_ptr:
3013         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3014                                         fence_dma_addr);
3015         return rc;
3016 }
3017
3018 int goya_test_cpu_queue(struct hl_device *hdev)
3019 {
3020         struct goya_device *goya = hdev->asic_specific;
3021
3022         /*
3023          * check capability here as send_cpu_message() won't update the result
3024          * value if no capability
3025          */
3026         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3027                 return 0;
3028
3029         return hl_fw_test_cpu_queue(hdev);
3030 }
3031
3032 int goya_test_queues(struct hl_device *hdev)
3033 {
3034         int i, rc, ret_val = 0;
3035
3036         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3037                 rc = goya_test_queue(hdev, i);
3038                 if (rc)
3039                         ret_val = -EINVAL;
3040         }
3041
3042         return ret_val;
3043 }
3044
3045 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3046                                         gfp_t mem_flags, dma_addr_t *dma_handle)
3047 {
3048         void *kernel_addr;
3049
3050         if (size > GOYA_DMA_POOL_BLK_SIZE)
3051                 return NULL;
3052
3053         kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3054
3055         /* Shift to the device's base physical address of host memory */
3056         if (kernel_addr)
3057                 *dma_handle += HOST_PHYS_BASE;
3058
3059         return kernel_addr;
3060 }
3061
3062 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3063                                 dma_addr_t dma_addr)
3064 {
3065         /* Cancel the device's base physical address of host memory */
3066         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3067
3068         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3069 }
3070
3071 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3072                                         dma_addr_t *dma_handle)
3073 {
3074         void *vaddr;
3075
3076         vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3077         *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3078                         VA_CPU_ACCESSIBLE_MEM_ADDR;
3079
3080         return vaddr;
3081 }
3082
3083 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3084                                         void *vaddr)
3085 {
3086         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3087 }
3088
3089 static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3090                                 int nents, enum dma_data_direction dir)
3091 {
3092         struct scatterlist *sg;
3093         int i;
3094
3095         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3096                 return -ENOMEM;
3097
3098         /* Shift to the device's base physical address of host memory */
3099         for_each_sg(sgl, sg, nents, i)
3100                 sg->dma_address += HOST_PHYS_BASE;
3101
3102         return 0;
3103 }
3104
3105 static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3106                                 int nents, enum dma_data_direction dir)
3107 {
3108         struct scatterlist *sg;
3109         int i;
3110
3111         /* Cancel the device's base physical address of host memory */
3112         for_each_sg(sgl, sg, nents, i)
3113                 sg->dma_address -= HOST_PHYS_BASE;
3114
3115         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3116 }
3117
3118 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3119 {
3120         struct scatterlist *sg, *sg_next_iter;
3121         u32 count, dma_desc_cnt;
3122         u64 len, len_next;
3123         dma_addr_t addr, addr_next;
3124
3125         dma_desc_cnt = 0;
3126
3127         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3128
3129                 len = sg_dma_len(sg);
3130                 addr = sg_dma_address(sg);
3131
3132                 if (len == 0)
3133                         break;
3134
3135                 while ((count + 1) < sgt->nents) {
3136                         sg_next_iter = sg_next(sg);
3137                         len_next = sg_dma_len(sg_next_iter);
3138                         addr_next = sg_dma_address(sg_next_iter);
3139
3140                         if (len_next == 0)
3141                                 break;
3142
3143                         if ((addr + len == addr_next) &&
3144                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3145                                 len += len_next;
3146                                 count++;
3147                                 sg = sg_next_iter;
3148                         } else {
3149                                 break;
3150                         }
3151                 }
3152
3153                 dma_desc_cnt++;
3154         }
3155
3156         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3157 }
3158
3159 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3160                                 struct hl_cs_parser *parser,
3161                                 struct packet_lin_dma *user_dma_pkt,
3162                                 u64 addr, enum dma_data_direction dir)
3163 {
3164         struct hl_userptr *userptr;
3165         int rc;
3166
3167         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3168                         parser->job_userptr_list, &userptr))
3169                 goto already_pinned;
3170
3171         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3172         if (!userptr)
3173                 return -ENOMEM;
3174
3175         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3176                                 userptr);
3177         if (rc)
3178                 goto free_userptr;
3179
3180         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3181
3182         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3183                                         userptr->sgt->nents, dir);
3184         if (rc) {
3185                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3186                 goto unpin_memory;
3187         }
3188
3189         userptr->dma_mapped = true;
3190         userptr->dir = dir;
3191
3192 already_pinned:
3193         parser->patched_cb_size +=
3194                         goya_get_dma_desc_list_size(hdev, userptr->sgt);
3195
3196         return 0;
3197
3198 unpin_memory:
3199         hl_unpin_host_memory(hdev, userptr);
3200 free_userptr:
3201         kfree(userptr);
3202         return rc;
3203 }
3204
3205 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3206                                 struct hl_cs_parser *parser,
3207                                 struct packet_lin_dma *user_dma_pkt)
3208 {
3209         u64 device_memory_addr, addr;
3210         enum dma_data_direction dir;
3211         enum goya_dma_direction user_dir;
3212         bool sram_addr = true;
3213         bool skip_host_mem_pin = false;
3214         bool user_memset;
3215         u32 ctl;
3216         int rc = 0;
3217
3218         ctl = le32_to_cpu(user_dma_pkt->ctl);
3219
3220         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3221                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3222
3223         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3224                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3225
3226         switch (user_dir) {
3227         case DMA_HOST_TO_DRAM:
3228                 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3229                 dir = DMA_TO_DEVICE;
3230                 sram_addr = false;
3231                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3232                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3233                 if (user_memset)
3234                         skip_host_mem_pin = true;
3235                 break;
3236
3237         case DMA_DRAM_TO_HOST:
3238                 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3239                 dir = DMA_FROM_DEVICE;
3240                 sram_addr = false;
3241                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3242                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3243                 break;
3244
3245         case DMA_HOST_TO_SRAM:
3246                 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3247                 dir = DMA_TO_DEVICE;
3248                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3249                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3250                 if (user_memset)
3251                         skip_host_mem_pin = true;
3252                 break;
3253
3254         case DMA_SRAM_TO_HOST:
3255                 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3256                 dir = DMA_FROM_DEVICE;
3257                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3258                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3259                 break;
3260         default:
3261                 dev_err(hdev->dev, "DMA direction is undefined\n");
3262                 return -EFAULT;
3263         }
3264
3265         if (sram_addr) {
3266                 if (!hl_mem_area_inside_range(device_memory_addr,
3267                                 le32_to_cpu(user_dma_pkt->tsize),
3268                                 hdev->asic_prop.sram_user_base_address,
3269                                 hdev->asic_prop.sram_end_address)) {
3270
3271                         dev_err(hdev->dev,
3272                                 "SRAM address 0x%llx + 0x%x is invalid\n",
3273                                 device_memory_addr,
3274                                 user_dma_pkt->tsize);
3275                         return -EFAULT;
3276                 }
3277         } else {
3278                 if (!hl_mem_area_inside_range(device_memory_addr,
3279                                 le32_to_cpu(user_dma_pkt->tsize),
3280                                 hdev->asic_prop.dram_user_base_address,
3281                                 hdev->asic_prop.dram_end_address)) {
3282
3283                         dev_err(hdev->dev,
3284                                 "DRAM address 0x%llx + 0x%x is invalid\n",
3285                                 device_memory_addr,
3286                                 user_dma_pkt->tsize);
3287                         return -EFAULT;
3288                 }
3289         }
3290
3291         if (skip_host_mem_pin)
3292                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3293         else {
3294                 if ((dir == DMA_TO_DEVICE) &&
3295                                 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3296                         dev_err(hdev->dev,
3297                                 "Can't DMA from host on queue other then 1\n");
3298                         return -EFAULT;
3299                 }
3300
3301                 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3302                                                 addr, dir);
3303         }
3304
3305         return rc;
3306 }
3307
3308 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3309                                 struct hl_cs_parser *parser,
3310                                 struct packet_lin_dma *user_dma_pkt)
3311 {
3312         u64 sram_memory_addr, dram_memory_addr;
3313         enum goya_dma_direction user_dir;
3314         u32 ctl;
3315
3316         ctl = le32_to_cpu(user_dma_pkt->ctl);
3317         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3318                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3319
3320         if (user_dir == DMA_DRAM_TO_SRAM) {
3321                 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3322                 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3323                 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3324         } else {
3325                 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3326                 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3327                 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3328         }
3329
3330         if (!hl_mem_area_inside_range(sram_memory_addr,
3331                                 le32_to_cpu(user_dma_pkt->tsize),
3332                                 hdev->asic_prop.sram_user_base_address,
3333                                 hdev->asic_prop.sram_end_address)) {
3334                 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3335                         sram_memory_addr, user_dma_pkt->tsize);
3336                 return -EFAULT;
3337         }
3338
3339         if (!hl_mem_area_inside_range(dram_memory_addr,
3340                                 le32_to_cpu(user_dma_pkt->tsize),
3341                                 hdev->asic_prop.dram_user_base_address,
3342                                 hdev->asic_prop.dram_end_address)) {
3343                 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3344                         dram_memory_addr, user_dma_pkt->tsize);
3345                 return -EFAULT;
3346         }
3347
3348         parser->patched_cb_size += sizeof(*user_dma_pkt);
3349
3350         return 0;
3351 }
3352
3353 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3354                                 struct hl_cs_parser *parser,
3355                                 struct packet_lin_dma *user_dma_pkt)
3356 {
3357         enum goya_dma_direction user_dir;
3358         u32 ctl;
3359         int rc;
3360
3361         dev_dbg(hdev->dev, "DMA packet details:\n");
3362         dev_dbg(hdev->dev, "source == 0x%llx\n",
3363                 le64_to_cpu(user_dma_pkt->src_addr));
3364         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3365                 le64_to_cpu(user_dma_pkt->dst_addr));
3366         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3367
3368         ctl = le32_to_cpu(user_dma_pkt->ctl);
3369         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3370                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3371
3372         /*
3373          * Special handling for DMA with size 0. The H/W has a bug where
3374          * this can cause the QMAN DMA to get stuck, so block it here.
3375          */
3376         if (user_dma_pkt->tsize == 0) {
3377                 dev_err(hdev->dev,
3378                         "Got DMA with size 0, might reset the device\n");
3379                 return -EINVAL;
3380         }
3381
3382         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3383                 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3384         else
3385                 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3386
3387         return rc;
3388 }
3389
3390 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3391                                 struct hl_cs_parser *parser,
3392                                 struct packet_lin_dma *user_dma_pkt)
3393 {
3394         dev_dbg(hdev->dev, "DMA packet details:\n");
3395         dev_dbg(hdev->dev, "source == 0x%llx\n",
3396                 le64_to_cpu(user_dma_pkt->src_addr));
3397         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3398                 le64_to_cpu(user_dma_pkt->dst_addr));
3399         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3400
3401         /*
3402          * WA for HW-23.
3403          * We can't allow user to read from Host using QMANs other than 1.
3404          */
3405         if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3406                 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3407                                 le32_to_cpu(user_dma_pkt->tsize),
3408                                 hdev->asic_prop.va_space_host_start_address,
3409                                 hdev->asic_prop.va_space_host_end_address)) {
3410                 dev_err(hdev->dev,
3411                         "Can't DMA from host on queue other then 1\n");
3412                 return -EFAULT;
3413         }
3414
3415         if (user_dma_pkt->tsize == 0) {
3416                 dev_err(hdev->dev,
3417                         "Got DMA with size 0, might reset the device\n");
3418                 return -EINVAL;
3419         }
3420
3421         parser->patched_cb_size += sizeof(*user_dma_pkt);
3422
3423         return 0;
3424 }
3425
3426 static int goya_validate_wreg32(struct hl_device *hdev,
3427                                 struct hl_cs_parser *parser,
3428                                 struct packet_wreg32 *wreg_pkt)
3429 {
3430         struct goya_device *goya = hdev->asic_specific;
3431         u32 sob_start_addr, sob_end_addr;
3432         u16 reg_offset;
3433
3434         reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3435                         GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3436
3437         dev_dbg(hdev->dev, "WREG32 packet details:\n");
3438         dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3439         dev_dbg(hdev->dev, "value      == 0x%x\n",
3440                 le32_to_cpu(wreg_pkt->value));
3441
3442         if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3443                 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3444                         reg_offset);
3445                 return -EPERM;
3446         }
3447
3448         /*
3449          * With MMU, DMA channels are not secured, so it doesn't matter where
3450          * the WR COMP will be written to because it will go out with
3451          * non-secured property
3452          */
3453         if (goya->hw_cap_initialized & HW_CAP_MMU)
3454                 return 0;
3455
3456         sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3457         sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3458
3459         if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3460                         (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3461
3462                 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3463                         wreg_pkt->value);
3464                 return -EPERM;
3465         }
3466
3467         return 0;
3468 }
3469
3470 static int goya_validate_cb(struct hl_device *hdev,
3471                         struct hl_cs_parser *parser, bool is_mmu)
3472 {
3473         u32 cb_parsed_length = 0;
3474         int rc = 0;
3475
3476         parser->patched_cb_size = 0;
3477
3478         /* cb_user_size is more than 0 so loop will always be executed */
3479         while (cb_parsed_length < parser->user_cb_size) {
3480                 enum packet_id pkt_id;
3481                 u16 pkt_size;
3482                 struct goya_packet *user_pkt;
3483
3484                 user_pkt = (struct goya_packet *) (uintptr_t)
3485                         (parser->user_cb->kernel_address + cb_parsed_length);
3486
3487                 pkt_id = (enum packet_id) (
3488                                 (le64_to_cpu(user_pkt->header) &
3489                                 PACKET_HEADER_PACKET_ID_MASK) >>
3490                                         PACKET_HEADER_PACKET_ID_SHIFT);
3491
3492                 pkt_size = goya_packet_sizes[pkt_id];
3493                 cb_parsed_length += pkt_size;
3494                 if (cb_parsed_length > parser->user_cb_size) {
3495                         dev_err(hdev->dev,
3496                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3497                         rc = -EINVAL;
3498                         break;
3499                 }
3500
3501                 switch (pkt_id) {
3502                 case PACKET_WREG_32:
3503                         /*
3504                          * Although it is validated after copy in patch_cb(),
3505                          * need to validate here as well because patch_cb() is
3506                          * not called in MMU path while this function is called
3507                          */
3508                         rc = goya_validate_wreg32(hdev,
3509                                 parser, (struct packet_wreg32 *) user_pkt);
3510                         break;
3511
3512                 case PACKET_WREG_BULK:
3513                         dev_err(hdev->dev,
3514                                 "User not allowed to use WREG_BULK\n");
3515                         rc = -EPERM;
3516                         break;
3517
3518                 case PACKET_MSG_PROT:
3519                         dev_err(hdev->dev,
3520                                 "User not allowed to use MSG_PROT\n");
3521                         rc = -EPERM;
3522                         break;
3523
3524                 case PACKET_CP_DMA:
3525                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3526                         rc = -EPERM;
3527                         break;
3528
3529                 case PACKET_STOP:
3530                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3531                         rc = -EPERM;
3532                         break;
3533
3534                 case PACKET_LIN_DMA:
3535                         if (is_mmu)
3536                                 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3537                                         (struct packet_lin_dma *) user_pkt);
3538                         else
3539                                 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3540                                         (struct packet_lin_dma *) user_pkt);
3541                         break;
3542
3543                 case PACKET_MSG_LONG:
3544                 case PACKET_MSG_SHORT:
3545                 case PACKET_FENCE:
3546                 case PACKET_NOP:
3547                         parser->patched_cb_size += pkt_size;
3548                         break;
3549
3550                 default:
3551                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3552                                 pkt_id);
3553                         rc = -EINVAL;
3554                         break;
3555                 }
3556
3557                 if (rc)
3558                         break;
3559         }
3560
3561         /*
3562          * The new CB should have space at the end for two MSG_PROT packets:
3563          * 1. A packet that will act as a completion packet
3564          * 2. A packet that will generate MSI-X interrupt
3565          */
3566         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3567
3568         return rc;
3569 }
3570
3571 static int goya_patch_dma_packet(struct hl_device *hdev,
3572                                 struct hl_cs_parser *parser,
3573                                 struct packet_lin_dma *user_dma_pkt,
3574                                 struct packet_lin_dma *new_dma_pkt,
3575                                 u32 *new_dma_pkt_size)
3576 {
3577         struct hl_userptr *userptr;
3578         struct scatterlist *sg, *sg_next_iter;
3579         u32 count, dma_desc_cnt;
3580         u64 len, len_next;
3581         dma_addr_t dma_addr, dma_addr_next;
3582         enum goya_dma_direction user_dir;
3583         u64 device_memory_addr, addr;
3584         enum dma_data_direction dir;
3585         struct sg_table *sgt;
3586         bool skip_host_mem_pin = false;
3587         bool user_memset;
3588         u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3589
3590         ctl = le32_to_cpu(user_dma_pkt->ctl);
3591
3592         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3593                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3594
3595         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3596                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3597
3598         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3599                         (user_dma_pkt->tsize == 0)) {
3600                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3601                 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3602                 return 0;
3603         }
3604
3605         if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3606                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3607                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3608                 dir = DMA_TO_DEVICE;
3609                 if (user_memset)
3610                         skip_host_mem_pin = true;
3611         } else {
3612                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3613                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3614                 dir = DMA_FROM_DEVICE;
3615         }
3616
3617         if ((!skip_host_mem_pin) &&
3618                 (hl_userptr_is_pinned(hdev, addr,
3619                         le32_to_cpu(user_dma_pkt->tsize),
3620                         parser->job_userptr_list, &userptr) == false)) {
3621                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3622                                 addr, user_dma_pkt->tsize);
3623                 return -EFAULT;
3624         }
3625
3626         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3627                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3628                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3629                 return 0;
3630         }
3631
3632         user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3633
3634         user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3635
3636         sgt = userptr->sgt;
3637         dma_desc_cnt = 0;
3638
3639         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640                 len = sg_dma_len(sg);
3641                 dma_addr = sg_dma_address(sg);
3642
3643                 if (len == 0)
3644                         break;
3645
3646                 while ((count + 1) < sgt->nents) {
3647                         sg_next_iter = sg_next(sg);
3648                         len_next = sg_dma_len(sg_next_iter);
3649                         dma_addr_next = sg_dma_address(sg_next_iter);
3650
3651                         if (len_next == 0)
3652                                 break;
3653
3654                         if ((dma_addr + len == dma_addr_next) &&
3655                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3656                                 len += len_next;
3657                                 count++;
3658                                 sg = sg_next_iter;
3659                         } else {
3660                                 break;
3661                         }
3662                 }
3663
3664                 ctl = le32_to_cpu(user_dma_pkt->ctl);
3665                 if (likely(dma_desc_cnt))
3666                         ctl &= ~GOYA_PKT_CTL_EB_MASK;
3667                 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3668                                 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3669                 new_dma_pkt->ctl = cpu_to_le32(ctl);
3670                 new_dma_pkt->tsize = cpu_to_le32((u32) len);
3671
3672                 if (dir == DMA_TO_DEVICE) {
3673                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3674                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3675                 } else {
3676                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3677                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3678                 }
3679
3680                 if (!user_memset)
3681                         device_memory_addr += len;
3682                 dma_desc_cnt++;
3683                 new_dma_pkt++;
3684         }
3685
3686         if (!dma_desc_cnt) {
3687                 dev_err(hdev->dev,
3688                         "Error of 0 SG entries when patching DMA packet\n");
3689                 return -EFAULT;
3690         }
3691
3692         /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3693         new_dma_pkt--;
3694         new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3695
3696         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3697
3698         return 0;
3699 }
3700
3701 static int goya_patch_cb(struct hl_device *hdev,
3702                                 struct hl_cs_parser *parser)
3703 {
3704         u32 cb_parsed_length = 0;
3705         u32 cb_patched_cur_length = 0;
3706         int rc = 0;
3707
3708         /* cb_user_size is more than 0 so loop will always be executed */
3709         while (cb_parsed_length < parser->user_cb_size) {
3710                 enum packet_id pkt_id;
3711                 u16 pkt_size;
3712                 u32 new_pkt_size = 0;
3713                 struct goya_packet *user_pkt, *kernel_pkt;
3714
3715                 user_pkt = (struct goya_packet *) (uintptr_t)
3716                         (parser->user_cb->kernel_address + cb_parsed_length);
3717                 kernel_pkt = (struct goya_packet *) (uintptr_t)
3718                         (parser->patched_cb->kernel_address +
3719                                         cb_patched_cur_length);
3720
3721                 pkt_id = (enum packet_id) (
3722                                 (le64_to_cpu(user_pkt->header) &
3723                                 PACKET_HEADER_PACKET_ID_MASK) >>
3724                                         PACKET_HEADER_PACKET_ID_SHIFT);
3725
3726                 pkt_size = goya_packet_sizes[pkt_id];
3727                 cb_parsed_length += pkt_size;
3728                 if (cb_parsed_length > parser->user_cb_size) {
3729                         dev_err(hdev->dev,
3730                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3731                         rc = -EINVAL;
3732                         break;
3733                 }
3734
3735                 switch (pkt_id) {
3736                 case PACKET_LIN_DMA:
3737                         rc = goya_patch_dma_packet(hdev, parser,
3738                                         (struct packet_lin_dma *) user_pkt,
3739                                         (struct packet_lin_dma *) kernel_pkt,
3740                                         &new_pkt_size);
3741                         cb_patched_cur_length += new_pkt_size;
3742                         break;
3743
3744                 case PACKET_WREG_32:
3745                         memcpy(kernel_pkt, user_pkt, pkt_size);
3746                         cb_patched_cur_length += pkt_size;
3747                         rc = goya_validate_wreg32(hdev, parser,
3748                                         (struct packet_wreg32 *) kernel_pkt);
3749                         break;
3750
3751                 case PACKET_WREG_BULK:
3752                         dev_err(hdev->dev,
3753                                 "User not allowed to use WREG_BULK\n");
3754                         rc = -EPERM;
3755                         break;
3756
3757                 case PACKET_MSG_PROT:
3758                         dev_err(hdev->dev,
3759                                 "User not allowed to use MSG_PROT\n");
3760                         rc = -EPERM;
3761                         break;
3762
3763                 case PACKET_CP_DMA:
3764                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3765                         rc = -EPERM;
3766                         break;
3767
3768                 case PACKET_STOP:
3769                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3770                         rc = -EPERM;
3771                         break;
3772
3773                 case PACKET_MSG_LONG:
3774                 case PACKET_MSG_SHORT:
3775                 case PACKET_FENCE:
3776                 case PACKET_NOP:
3777                         memcpy(kernel_pkt, user_pkt, pkt_size);
3778                         cb_patched_cur_length += pkt_size;
3779                         break;
3780
3781                 default:
3782                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3783                                 pkt_id);
3784                         rc = -EINVAL;
3785                         break;
3786                 }
3787
3788                 if (rc)
3789                         break;
3790         }
3791
3792         return rc;
3793 }
3794
3795 static int goya_parse_cb_mmu(struct hl_device *hdev,
3796                 struct hl_cs_parser *parser)
3797 {
3798         u64 patched_cb_handle;
3799         u32 patched_cb_size;
3800         struct hl_cb *user_cb;
3801         int rc;
3802
3803         /*
3804          * The new CB should have space at the end for two MSG_PROT pkt:
3805          * 1. A packet that will act as a completion packet
3806          * 2. A packet that will generate MSI-X interrupt
3807          */
3808         parser->patched_cb_size = parser->user_cb_size +
3809                         sizeof(struct packet_msg_prot) * 2;
3810
3811         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3812                                 parser->patched_cb_size,
3813                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
3814
3815         if (rc) {
3816                 dev_err(hdev->dev,
3817                         "Failed to allocate patched CB for DMA CS %d\n",
3818                         rc);
3819                 return rc;
3820         }
3821
3822         patched_cb_handle >>= PAGE_SHIFT;
3823         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3824                                 (u32) patched_cb_handle);
3825         /* hl_cb_get should never fail here so use kernel WARN */
3826         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3827                         (u32) patched_cb_handle);
3828         if (!parser->patched_cb) {
3829                 rc = -EFAULT;
3830                 goto out;
3831         }
3832
3833         /*
3834          * The check that parser->user_cb_size <= parser->user_cb->size was done
3835          * in validate_queue_index().
3836          */
3837         memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
3838                 (void *) (uintptr_t) parser->user_cb->kernel_address,
3839                 parser->user_cb_size);
3840
3841         patched_cb_size = parser->patched_cb_size;
3842
3843         /* validate patched CB instead of user CB */
3844         user_cb = parser->user_cb;
3845         parser->user_cb = parser->patched_cb;
3846         rc = goya_validate_cb(hdev, parser, true);
3847         parser->user_cb = user_cb;
3848
3849         if (rc) {
3850                 hl_cb_put(parser->patched_cb);
3851                 goto out;
3852         }
3853
3854         if (patched_cb_size != parser->patched_cb_size) {
3855                 dev_err(hdev->dev, "user CB size mismatch\n");
3856                 hl_cb_put(parser->patched_cb);
3857                 rc = -EINVAL;
3858                 goto out;
3859         }
3860
3861 out:
3862         /*
3863          * Always call cb destroy here because we still have 1 reference
3864          * to it by calling cb_get earlier. After the job will be completed,
3865          * cb_put will release it, but here we want to remove it from the
3866          * idr
3867          */
3868         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3869                                         patched_cb_handle << PAGE_SHIFT);
3870
3871         return rc;
3872 }
3873
3874 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3875                                 struct hl_cs_parser *parser)
3876 {
3877         u64 patched_cb_handle;
3878         int rc;
3879
3880         rc = goya_validate_cb(hdev, parser, false);
3881
3882         if (rc)
3883                 goto free_userptr;
3884
3885         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3886                                 parser->patched_cb_size,
3887                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
3888         if (rc) {
3889                 dev_err(hdev->dev,
3890                         "Failed to allocate patched CB for DMA CS %d\n", rc);
3891                 goto free_userptr;
3892         }
3893
3894         patched_cb_handle >>= PAGE_SHIFT;
3895         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3896                                 (u32) patched_cb_handle);
3897         /* hl_cb_get should never fail here so use kernel WARN */
3898         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3899                         (u32) patched_cb_handle);
3900         if (!parser->patched_cb) {
3901                 rc = -EFAULT;
3902                 goto out;
3903         }
3904
3905         rc = goya_patch_cb(hdev, parser);
3906
3907         if (rc)
3908                 hl_cb_put(parser->patched_cb);
3909
3910 out:
3911         /*
3912          * Always call cb destroy here because we still have 1 reference
3913          * to it by calling cb_get earlier. After the job will be completed,
3914          * cb_put will release it, but here we want to remove it from the
3915          * idr
3916          */
3917         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3918                                 patched_cb_handle << PAGE_SHIFT);
3919
3920 free_userptr:
3921         if (rc)
3922                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
3923         return rc;
3924 }
3925
3926 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3927                                         struct hl_cs_parser *parser)
3928 {
3929         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3930         struct goya_device *goya = hdev->asic_specific;
3931
3932         if (goya->hw_cap_initialized & HW_CAP_MMU)
3933                 return 0;
3934
3935         /* For internal queue jobs, just check if CB address is valid */
3936         if (hl_mem_area_inside_range(
3937                         (u64) (uintptr_t) parser->user_cb,
3938                         parser->user_cb_size,
3939                         asic_prop->sram_user_base_address,
3940                         asic_prop->sram_end_address))
3941                 return 0;
3942
3943         if (hl_mem_area_inside_range(
3944                         (u64) (uintptr_t) parser->user_cb,
3945                         parser->user_cb_size,
3946                         asic_prop->dram_user_base_address,
3947                         asic_prop->dram_end_address))
3948                 return 0;
3949
3950         dev_err(hdev->dev,
3951                 "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
3952                 parser->user_cb, parser->user_cb_size);
3953
3954         return -EFAULT;
3955 }
3956
3957 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3958 {
3959         struct goya_device *goya = hdev->asic_specific;
3960
3961         if (parser->queue_type == QUEUE_TYPE_INT)
3962                 return goya_parse_cb_no_ext_queue(hdev, parser);
3963
3964         if (goya->hw_cap_initialized & HW_CAP_MMU)
3965                 return goya_parse_cb_mmu(hdev, parser);
3966         else
3967                 return goya_parse_cb_no_mmu(hdev, parser);
3968 }
3969
3970 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
3971                                 u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
3972 {
3973         struct packet_msg_prot *cq_pkt;
3974         u32 tmp;
3975
3976         cq_pkt = (struct packet_msg_prot *) (uintptr_t)
3977                 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
3978
3979         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3980                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3981                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3982         cq_pkt->ctl = cpu_to_le32(tmp);
3983         cq_pkt->value = cpu_to_le32(cq_val);
3984         cq_pkt->addr = cpu_to_le64(cq_addr);
3985
3986         cq_pkt++;
3987
3988         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3989                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3990         cq_pkt->ctl = cpu_to_le32(tmp);
3991         cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3992         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3993 }
3994
3995 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
3996 {
3997         WREG32(mmCPU_EQ_CI, val);
3998 }
3999
4000 void goya_restore_phase_topology(struct hl_device *hdev)
4001 {
4002
4003 }
4004
4005 static void goya_clear_sm_regs(struct hl_device *hdev)
4006 {
4007         int i, num_of_sob_in_longs, num_of_mon_in_longs;
4008
4009         num_of_sob_in_longs =
4010                 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4011
4012         num_of_mon_in_longs =
4013                 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4014
4015         for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4016                 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4017
4018         for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4019                 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4020
4021         /* Flush all WREG to prevent race */
4022         i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4023 }
4024
4025 /*
4026  * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4027  *                       address.
4028  *
4029  * @hdev:       pointer to hl_device structure
4030  * @addr:       device or host mapped address
4031  * @val:        returned value
4032  *
4033  * In case of DDR address that is not mapped into the default aperture that
4034  * the DDR bar exposes, the function will configure the iATU so that the DDR
4035  * bar will be positioned at a base address that allows reading from the
4036  * required address. Configuring the iATU during normal operation can
4037  * lead to undefined behavior and therefore, should be done with extreme care
4038  *
4039  */
4040 static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4041 {
4042         struct asic_fixed_properties *prop = &hdev->asic_prop;
4043         u64 ddr_bar_addr;
4044         int rc = 0;
4045
4046         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4047                 *val = RREG32(addr - CFG_BASE);
4048
4049         } else if ((addr >= SRAM_BASE_ADDR) &&
4050                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4051
4052                 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4053                                 (addr - SRAM_BASE_ADDR));
4054
4055         } else if ((addr >= DRAM_PHYS_BASE) &&
4056                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4057
4058                 u64 bar_base_addr = DRAM_PHYS_BASE +
4059                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4060
4061                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4062                 if (ddr_bar_addr != U64_MAX) {
4063                         *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4064                                                 (addr - bar_base_addr));
4065
4066                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4067                                                         ddr_bar_addr);
4068                 }
4069                 if (ddr_bar_addr == U64_MAX)
4070                         rc = -EIO;
4071
4072         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4073                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4074
4075         } else {
4076                 rc = -EFAULT;
4077         }
4078
4079         return rc;
4080 }
4081
4082 /*
4083  * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4084  *                        address.
4085  *
4086  * @hdev:       pointer to hl_device structure
4087  * @addr:       device or host mapped address
4088  * @val:        returned value
4089  *
4090  * In case of DDR address that is not mapped into the default aperture that
4091  * the DDR bar exposes, the function will configure the iATU so that the DDR
4092  * bar will be positioned at a base address that allows writing to the
4093  * required address. Configuring the iATU during normal operation can
4094  * lead to undefined behavior and therefore, should be done with extreme care
4095  *
4096  */
4097 static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4098 {
4099         struct asic_fixed_properties *prop = &hdev->asic_prop;
4100         u64 ddr_bar_addr;
4101         int rc = 0;
4102
4103         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4104                 WREG32(addr - CFG_BASE, val);
4105
4106         } else if ((addr >= SRAM_BASE_ADDR) &&
4107                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4108
4109                 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4110                                         (addr - SRAM_BASE_ADDR));
4111
4112         } else if ((addr >= DRAM_PHYS_BASE) &&
4113                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4114
4115                 u64 bar_base_addr = DRAM_PHYS_BASE +
4116                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4117
4118                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4119                 if (ddr_bar_addr != U64_MAX) {
4120                         writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4121                                                 (addr - bar_base_addr));
4122
4123                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4124                                                         ddr_bar_addr);
4125                 }
4126                 if (ddr_bar_addr == U64_MAX)
4127                         rc = -EIO;
4128
4129         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4130                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4131
4132         } else {
4133                 rc = -EFAULT;
4134         }
4135
4136         return rc;
4137 }
4138
4139 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4140 {
4141         struct goya_device *goya = hdev->asic_specific;
4142
4143         if (hdev->hard_reset_pending)
4144                 return U64_MAX;
4145
4146         return readq(hdev->pcie_bar[DDR_BAR_ID] +
4147                         (addr - goya->ddr_bar_cur_addr));
4148 }
4149
4150 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4151 {
4152         struct goya_device *goya = hdev->asic_specific;
4153
4154         if (hdev->hard_reset_pending)
4155                 return;
4156
4157         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4158                         (addr - goya->ddr_bar_cur_addr));
4159 }
4160
4161 static const char *_goya_get_event_desc(u16 event_type)
4162 {
4163         switch (event_type) {
4164         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4165                 return "PCIe_if";
4166         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4167         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4168         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4169         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4170         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4171         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4172         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4173         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4174                 return "TPC%d_ecc";
4175         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4176                 return "MME_ecc";
4177         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4178                 return "MME_ecc_ext";
4179         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4180                 return "MMU_ecc";
4181         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4182                 return "DMA_macro";
4183         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4184                 return "DMA_ecc";
4185         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4186                 return "CPU_if_ecc";
4187         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4188                 return "PSOC_mem";
4189         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4190                 return "PSOC_coresight";
4191         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4192                 return "SRAM%d";
4193         case GOYA_ASYNC_EVENT_ID_GIC500:
4194                 return "GIC500";
4195         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4196                 return "PLL%d";
4197         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4198                 return "AXI_ecc";
4199         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4200                 return "L2_ram_ecc";
4201         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4202                 return "PSOC_gpio_05_sw_reset";
4203         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4204                 return "PSOC_gpio_10_vrhot_icrit";
4205         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4206                 return "PCIe_dec";
4207         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4208         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4209         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4210         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4211         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4212         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4213         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4214         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4215                 return "TPC%d_dec";
4216         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4217                 return "MME_wacs";
4218         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4219                 return "MME_wacsd";
4220         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4221                 return "CPU_axi_splitter";
4222         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4223                 return "PSOC_axi_dec";
4224         case GOYA_ASYNC_EVENT_ID_PSOC:
4225                 return "PSOC";
4226         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4227         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4228         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4229         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4230         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4231         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4232         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4233         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4234                 return "TPC%d_krn_err";
4235         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4236                 return "TPC%d_cq";
4237         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4238                 return "TPC%d_qm";
4239         case GOYA_ASYNC_EVENT_ID_MME_QM:
4240                 return "MME_qm";
4241         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4242                 return "MME_cq";
4243         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4244                 return "DMA%d_qm";
4245         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4246                 return "DMA%d_ch";
4247         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4248         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4249         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4250         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4251         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4252         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4253         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4254         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4255                 return "TPC%d_bmon_spmu";
4256         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4257                 return "DMA_bm_ch%d";
4258         default:
4259                 return "N/A";
4260         }
4261 }
4262
4263 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4264 {
4265         u8 index;
4266
4267         switch (event_type) {
4268         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4269         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4270         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4271         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4272         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4273         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4274         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4275         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4276                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4277                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4278                 break;
4279         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4280                 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4281                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4282                 break;
4283         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4284                 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4285                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4286                 break;
4287         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4288         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4289         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4290         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4291         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4292         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4293         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4294         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4295                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4296                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4297                 break;
4298         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4299         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4300         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4301         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4302         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4303         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4304         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4305         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4306                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4307                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4308                 break;
4309         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4310                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4311                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4312                 break;
4313         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4314                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4315                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4316                 break;
4317         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4318                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4319                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4320                 break;
4321         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4322                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4323                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4324                 break;
4325         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4326         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4327         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4328         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4329         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4330         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4331         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4332         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4333                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4334                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4335                 break;
4336         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4337                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4338                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4339                 break;
4340         default:
4341                 snprintf(desc, size, _goya_get_event_desc(event_type));
4342                 break;
4343         }
4344 }
4345
4346 static void goya_print_razwi_info(struct hl_device *hdev)
4347 {
4348         if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4349                 dev_err(hdev->dev, "Illegal write to LBW\n");
4350                 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4351         }
4352
4353         if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4354                 dev_err(hdev->dev, "Illegal read from LBW\n");
4355                 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4356         }
4357
4358         if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4359                 dev_err(hdev->dev, "Illegal write to HBW\n");
4360                 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4361         }
4362
4363         if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4364                 dev_err(hdev->dev, "Illegal read from HBW\n");
4365                 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4366         }
4367 }
4368
4369 static void goya_print_mmu_error_info(struct hl_device *hdev)
4370 {
4371         struct goya_device *goya = hdev->asic_specific;
4372         u64 addr;
4373         u32 val;
4374
4375         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4376                 return;
4377
4378         val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4379         if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4380                 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4381                 addr <<= 32;
4382                 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4383
4384                 dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
4385
4386                 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4387         }
4388 }
4389
4390 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4391                                 bool razwi)
4392 {
4393         char desc[20] = "";
4394
4395         goya_get_event_desc(event_type, desc, sizeof(desc));
4396         dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4397                 event_type, desc);
4398
4399         if (razwi) {
4400                 goya_print_razwi_info(hdev);
4401                 goya_print_mmu_error_info(hdev);
4402         }
4403 }
4404
4405 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4406                 size_t irq_arr_size)
4407 {
4408         struct armcp_unmask_irq_arr_packet *pkt;
4409         size_t total_pkt_size;
4410         long result;
4411         int rc;
4412         int irq_num_entries, irq_arr_index;
4413         __le32 *goya_irq_arr;
4414
4415         total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4416                         irq_arr_size;
4417
4418         /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4419         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4420
4421         /* total_pkt_size is casted to u16 later on */
4422         if (total_pkt_size > USHRT_MAX) {
4423                 dev_err(hdev->dev, "too many elements in IRQ array\n");
4424                 return -EINVAL;
4425         }
4426
4427         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4428         if (!pkt)
4429                 return -ENOMEM;
4430
4431         irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4432         pkt->length = cpu_to_le32(irq_num_entries);
4433
4434         /* We must perform any necessary endianness conversation on the irq
4435          * array being passed to the goya hardware
4436          */
4437         for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4438                         irq_arr_index < irq_num_entries ; irq_arr_index++)
4439                 goya_irq_arr[irq_arr_index] =
4440                                 cpu_to_le32(irq_arr[irq_arr_index]);
4441
4442         pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4443                                                 ARMCP_PKT_CTL_OPCODE_SHIFT);
4444
4445         rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
4446                         HL_DEVICE_TIMEOUT_USEC, &result);
4447
4448         if (rc)
4449                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4450
4451         kfree(pkt);
4452
4453         return rc;
4454 }
4455
4456 static int goya_soft_reset_late_init(struct hl_device *hdev)
4457 {
4458         /*
4459          * Unmask all IRQs since some could have been received
4460          * during the soft reset
4461          */
4462         return goya_unmask_irq_arr(hdev, goya_all_events,
4463                                         sizeof(goya_all_events));
4464 }
4465
4466 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4467 {
4468         struct armcp_packet pkt;
4469         long result;
4470         int rc;
4471
4472         memset(&pkt, 0, sizeof(pkt));
4473
4474         pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
4475                                 ARMCP_PKT_CTL_OPCODE_SHIFT);
4476         pkt.value = cpu_to_le64(event_type);
4477
4478         rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4479                         HL_DEVICE_TIMEOUT_USEC, &result);
4480
4481         if (rc)
4482                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4483
4484         return rc;
4485 }
4486
4487 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4488 {
4489         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4490         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4491                                 >> EQ_CTL_EVENT_TYPE_SHIFT);
4492         struct goya_device *goya = hdev->asic_specific;
4493
4494         goya->events_stat[event_type]++;
4495         goya->events_stat_aggregate[event_type]++;
4496
4497         switch (event_type) {
4498         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4499         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4500         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4501         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4502         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4503         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4504         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4505         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4506         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4507         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4508         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4509         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4510         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4511         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4512         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4513         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4514         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4515         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4516         case GOYA_ASYNC_EVENT_ID_GIC500:
4517         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4518         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4519         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4520         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4521                 goya_print_irq_info(hdev, event_type, false);
4522                 hl_device_reset(hdev, true, false);
4523                 break;
4524
4525         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4526         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4527         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4528         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4529         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4530         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4531         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4532         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4533         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4534         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4535         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4536         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4537         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4538         case GOYA_ASYNC_EVENT_ID_PSOC:
4539         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4540         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4541         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4542         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4543         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4544         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4545         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4546         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4547         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4548         case GOYA_ASYNC_EVENT_ID_MME_QM:
4549         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4550         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4551         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4552                 goya_print_irq_info(hdev, event_type, true);
4553                 goya_unmask_irq(hdev, event_type);
4554                 break;
4555
4556         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4557         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4558         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4559         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4560         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4561         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4562         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4563         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4564         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4565         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4566                 goya_print_irq_info(hdev, event_type, false);
4567                 goya_unmask_irq(hdev, event_type);
4568                 break;
4569
4570         default:
4571                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4572                                 event_type);
4573                 break;
4574         }
4575 }
4576
4577 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4578 {
4579         struct goya_device *goya = hdev->asic_specific;
4580
4581         if (aggregate) {
4582                 *size = (u32) sizeof(goya->events_stat_aggregate);
4583                 return goya->events_stat_aggregate;
4584         }
4585
4586         *size = (u32) sizeof(goya->events_stat);
4587         return goya->events_stat;
4588 }
4589
4590 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4591                                 u64 val, bool is_dram)
4592 {
4593         struct packet_lin_dma *lin_dma_pkt;
4594         struct hl_cs_job *job;
4595         u32 cb_size, ctl;
4596         struct hl_cb *cb;
4597         int rc, lin_dma_pkts_cnt;
4598
4599         lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4600         cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4601                                                 sizeof(struct packet_msg_prot);
4602         cb = hl_cb_kernel_create(hdev, cb_size);
4603         if (!cb)
4604                 return -ENOMEM;
4605
4606         lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4607
4608         do {
4609                 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4610
4611                 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4612                                 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4613                                 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4614                                 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4615                                 (1 << GOYA_PKT_CTL_MB_SHIFT));
4616                 ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4617                                 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4618                 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4619
4620                 lin_dma_pkt->src_addr = cpu_to_le64(val);
4621                 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4622                 if (lin_dma_pkts_cnt > 1)
4623                         lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4624                 else
4625                         lin_dma_pkt->tsize = cpu_to_le32(size);
4626
4627                 size -= SZ_2G;
4628                 addr += SZ_2G;
4629                 lin_dma_pkt++;
4630         } while (--lin_dma_pkts_cnt);
4631
4632         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4633         if (!job) {
4634                 dev_err(hdev->dev, "Failed to allocate a new job\n");
4635                 rc = -ENOMEM;
4636                 goto release_cb;
4637         }
4638
4639         job->id = 0;
4640         job->user_cb = cb;
4641         job->user_cb->cs_cnt++;
4642         job->user_cb_size = cb_size;
4643         job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4644         job->patched_cb = job->user_cb;
4645         job->job_cb_size = job->user_cb_size;
4646
4647         hl_debugfs_add_job(hdev, job);
4648
4649         rc = goya_send_job_on_qman0(hdev, job);
4650
4651         hl_cb_put(job->patched_cb);
4652
4653         hl_debugfs_remove_job(hdev, job);
4654         kfree(job);
4655         cb->cs_cnt--;
4656
4657 release_cb:
4658         hl_cb_put(cb);
4659         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4660
4661         return rc;
4662 }
4663
4664 int goya_context_switch(struct hl_device *hdev, u32 asid)
4665 {
4666         struct asic_fixed_properties *prop = &hdev->asic_prop;
4667         u64 addr = prop->sram_base_address, sob_addr;
4668         u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4669         u64 val = 0x7777777777777777ull;
4670         int rc, dma_id;
4671         u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4672                                         mmDMA_CH_0_WR_COMP_ADDR_LO;
4673
4674         rc = goya_memset_device_memory(hdev, addr, size, val, false);
4675         if (rc) {
4676                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4677                 return rc;
4678         }
4679
4680         /* we need to reset registers that the user is allowed to change */
4681         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4682         WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4683
4684         for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4685                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4686                                                         (dma_id - 1) * 4;
4687                 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4688                                                 lower_32_bits(sob_addr));
4689         }
4690
4691         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4692
4693         goya_mmu_prepare(hdev, asid);
4694
4695         goya_clear_sm_regs(hdev);
4696
4697         return 0;
4698 }
4699
4700 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4701 {
4702         struct asic_fixed_properties *prop = &hdev->asic_prop;
4703         struct goya_device *goya = hdev->asic_specific;
4704         u64 addr = prop->mmu_pgt_addr;
4705         u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4706                         MMU_CACHE_MNG_SIZE;
4707
4708         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4709                 return 0;
4710
4711         return goya_memset_device_memory(hdev, addr, size, 0, true);
4712 }
4713
4714 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4715 {
4716         struct goya_device *goya = hdev->asic_specific;
4717         u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4718         u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4719         u64 val = 0x9999999999999999ull;
4720
4721         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4722                 return 0;
4723
4724         return goya_memset_device_memory(hdev, addr, size, val, true);
4725 }
4726
4727 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4728 {
4729         struct asic_fixed_properties *prop = &hdev->asic_prop;
4730         struct goya_device *goya = hdev->asic_specific;
4731         s64 off, cpu_off;
4732         int rc;
4733
4734         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4735                 return 0;
4736
4737         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4738                 rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4739                                 prop->dram_base_address + off, PAGE_SIZE_2MB);
4740                 if (rc) {
4741                         dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4742                                 prop->dram_base_address + off);
4743                         goto unmap;
4744                 }
4745         }
4746
4747         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4748                 rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4749                         hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
4750
4751                 if (rc) {
4752                         dev_err(hdev->dev,
4753                                 "Map failed for CPU accessible memory\n");
4754                         off -= PAGE_SIZE_2MB;
4755                         goto unmap;
4756                 }
4757         } else {
4758                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4759                         rc = hl_mmu_map(hdev->kernel_ctx,
4760                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4761                                 hdev->cpu_accessible_dma_address + cpu_off,
4762                                 PAGE_SIZE_4KB);
4763                         if (rc) {
4764                                 dev_err(hdev->dev,
4765                                         "Map failed for CPU accessible memory\n");
4766                                 cpu_off -= PAGE_SIZE_4KB;
4767                                 goto unmap_cpu;
4768                         }
4769                 }
4770         }
4771
4772         goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4773         goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4774         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4775         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4776
4777         /* Make sure configuration is flushed to device */
4778         RREG32(mmCPU_IF_AWUSER_OVR_EN);
4779
4780         goya->device_cpu_mmu_mappings_done = true;
4781
4782         return 0;
4783
4784 unmap_cpu:
4785         for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4786                 if (hl_mmu_unmap(hdev->kernel_ctx,
4787                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4788                                 PAGE_SIZE_4KB))
4789                         dev_warn_ratelimited(hdev->dev,
4790                                 "failed to unmap address 0x%llx\n",
4791                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4792 unmap:
4793         for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4794                 if (hl_mmu_unmap(hdev->kernel_ctx,
4795                                 prop->dram_base_address + off, PAGE_SIZE_2MB))
4796                         dev_warn_ratelimited(hdev->dev,
4797                                 "failed to unmap address 0x%llx\n",
4798                                 prop->dram_base_address + off);
4799
4800         return rc;
4801 }
4802
4803 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4804 {
4805         struct asic_fixed_properties *prop = &hdev->asic_prop;
4806         struct goya_device *goya = hdev->asic_specific;
4807         u32 off, cpu_off;
4808
4809         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4810                 return;
4811
4812         if (!goya->device_cpu_mmu_mappings_done)
4813                 return;
4814
4815         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4816         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4817
4818         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4819                 if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4820                                 PAGE_SIZE_2MB))
4821                         dev_warn(hdev->dev,
4822                                 "Failed to unmap CPU accessible memory\n");
4823         } else {
4824                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4825                         if (hl_mmu_unmap(hdev->kernel_ctx,
4826                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4827                                         PAGE_SIZE_4KB))
4828                                 dev_warn_ratelimited(hdev->dev,
4829                                         "failed to unmap address 0x%llx\n",
4830                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4831         }
4832
4833         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4834                 if (hl_mmu_unmap(hdev->kernel_ctx,
4835                                 prop->dram_base_address + off, PAGE_SIZE_2MB))
4836                         dev_warn_ratelimited(hdev->dev,
4837                                         "Failed to unmap address 0x%llx\n",
4838                                         prop->dram_base_address + off);
4839
4840         goya->device_cpu_mmu_mappings_done = false;
4841 }
4842
4843 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4844 {
4845         struct goya_device *goya = hdev->asic_specific;
4846         int i;
4847
4848         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4849                 return;
4850
4851         if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4852                 WARN(1, "asid %u is too big\n", asid);
4853                 return;
4854         }
4855
4856         /* zero the MMBP and ASID bits and then set the ASID */
4857         for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
4858                 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
4859 }
4860
4861 static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
4862                                         u32 flags)
4863 {
4864         struct goya_device *goya = hdev->asic_specific;
4865         u32 status, timeout_usec;
4866         int rc;
4867
4868         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
4869                 hdev->hard_reset_pending)
4870                 return;
4871
4872         /* no need in L1 only invalidation in Goya */
4873         if (!is_hard)
4874                 return;
4875
4876         if (hdev->pldm)
4877                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4878         else
4879                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4880
4881         mutex_lock(&hdev->mmu_cache_lock);
4882
4883         /* L0 & L1 invalidation */
4884         WREG32(mmSTLB_INV_ALL_START, 1);
4885
4886         rc = hl_poll_timeout(
4887                 hdev,
4888                 mmSTLB_INV_ALL_START,
4889                 status,
4890                 !status,
4891                 1000,
4892                 timeout_usec);
4893
4894         mutex_unlock(&hdev->mmu_cache_lock);
4895
4896         if (rc)
4897                 dev_notice_ratelimited(hdev->dev,
4898                         "Timeout when waiting for MMU cache invalidation\n");
4899 }
4900
4901 static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
4902                 bool is_hard, u32 asid, u64 va, u64 size)
4903 {
4904         struct goya_device *goya = hdev->asic_specific;
4905         u32 status, timeout_usec, inv_data, pi;
4906         int rc;
4907
4908         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
4909                 hdev->hard_reset_pending)
4910                 return;
4911
4912         /* no need in L1 only invalidation in Goya */
4913         if (!is_hard)
4914                 return;
4915
4916         if (hdev->pldm)
4917                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4918         else
4919                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4920
4921         mutex_lock(&hdev->mmu_cache_lock);
4922
4923         /*
4924          * TODO: currently invalidate entire L0 & L1 as in regular hard
4925          * invalidation. Need to apply invalidation of specific cache lines with
4926          * mask of ASID & VA & size.
4927          * Note that L1 with be flushed entirely in any case.
4928          */
4929
4930         /* L0 & L1 invalidation */
4931         inv_data = RREG32(mmSTLB_CACHE_INV);
4932         /* PI is 8 bit */
4933         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
4934         WREG32(mmSTLB_CACHE_INV,
4935                         (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
4936
4937         rc = hl_poll_timeout(
4938                 hdev,
4939                 mmSTLB_INV_CONSUMER_INDEX,
4940                 status,
4941                 status == pi,
4942                 1000,
4943                 timeout_usec);
4944
4945         mutex_unlock(&hdev->mmu_cache_lock);
4946
4947         if (rc)
4948                 dev_notice_ratelimited(hdev->dev,
4949                         "Timeout when waiting for MMU cache invalidation\n");
4950 }
4951
4952 int goya_send_heartbeat(struct hl_device *hdev)
4953 {
4954         struct goya_device *goya = hdev->asic_specific;
4955
4956         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4957                 return 0;
4958
4959         return hl_fw_send_heartbeat(hdev);
4960 }
4961
4962 int goya_armcp_info_get(struct hl_device *hdev)
4963 {
4964         struct goya_device *goya = hdev->asic_specific;
4965         struct asic_fixed_properties *prop = &hdev->asic_prop;
4966         u64 dram_size;
4967         int rc;
4968
4969         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4970                 return 0;
4971
4972         rc = hl_fw_armcp_info_get(hdev);
4973         if (rc)
4974                 return rc;
4975
4976         dram_size = le64_to_cpu(prop->armcp_info.dram_size);
4977         if (dram_size) {
4978                 if ((!is_power_of_2(dram_size)) ||
4979                                 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
4980                         dev_err(hdev->dev,
4981                                 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
4982                                 dram_size);
4983                         dram_size = DRAM_PHYS_DEFAULT_SIZE;
4984                 }
4985
4986                 prop->dram_size = dram_size;
4987                 prop->dram_end_address = prop->dram_base_address + dram_size;
4988         }
4989
4990         if (!strlen(prop->armcp_info.card_name))
4991                 strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
4992                                 CARD_NAME_MAX_LEN);
4993
4994         return 0;
4995 }
4996
4997 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
4998                                 struct seq_file *s)
4999 {
5000         const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5001         const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5002         u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5003                 mme_arch_sts;
5004         bool is_idle = true, is_eng_idle;
5005         u64 offset;
5006         int i;
5007
5008         if (s)
5009                 seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5010                                 "---  -------  ------------  -------------\n");
5011
5012         offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5013
5014         for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5015                 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5016                 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5017                 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5018                                 IS_DMA_IDLE(dma_core_sts0);
5019                 is_idle &= is_eng_idle;
5020
5021                 if (mask)
5022                         *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
5023                 if (s)
5024                         seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5025                                         qm_glbl_sts0, dma_core_sts0);
5026         }
5027
5028         if (s)
5029                 seq_puts(s,
5030                         "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5031                         "---  -------  ------------  --------------  ----------\n");
5032
5033         offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5034
5035         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5036                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5037                 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5038                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5039                 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5040                                 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5041                                 IS_TPC_IDLE(tpc_cfg_sts);
5042                 is_idle &= is_eng_idle;
5043
5044                 if (mask)
5045                         *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
5046                 if (s)
5047                         seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5048                                 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5049         }
5050
5051         if (s)
5052                 seq_puts(s,
5053                         "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5054                         "---  -------  ------------  --------------  -----------\n");
5055
5056         qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5057         cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5058         mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5059         is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5060                         IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5061                         IS_MME_IDLE(mme_arch_sts);
5062         is_idle &= is_eng_idle;
5063
5064         if (mask)
5065                 *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
5066         if (s) {
5067                 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5068                                 cmdq_glbl_sts0, mme_arch_sts);
5069                 seq_puts(s, "\n");
5070         }
5071
5072         return is_idle;
5073 }
5074
5075 static void goya_hw_queues_lock(struct hl_device *hdev)
5076 {
5077         struct goya_device *goya = hdev->asic_specific;
5078
5079         spin_lock(&goya->hw_queues_lock);
5080 }
5081
5082 static void goya_hw_queues_unlock(struct hl_device *hdev)
5083 {
5084         struct goya_device *goya = hdev->asic_specific;
5085
5086         spin_unlock(&goya->hw_queues_lock);
5087 }
5088
5089 static u32 goya_get_pci_id(struct hl_device *hdev)
5090 {
5091         return hdev->pdev->device;
5092 }
5093
5094 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5095                                 size_t max_size)
5096 {
5097         struct goya_device *goya = hdev->asic_specific;
5098
5099         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5100                 return 0;
5101
5102         return hl_fw_get_eeprom_data(hdev, data, max_size);
5103 }
5104
5105 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5106 {
5107         return RREG32(mmHW_STATE);
5108 }
5109
5110 static const struct hl_asic_funcs goya_funcs = {
5111         .early_init = goya_early_init,
5112         .early_fini = goya_early_fini,
5113         .late_init = goya_late_init,
5114         .late_fini = goya_late_fini,
5115         .sw_init = goya_sw_init,
5116         .sw_fini = goya_sw_fini,
5117         .hw_init = goya_hw_init,
5118         .hw_fini = goya_hw_fini,
5119         .halt_engines = goya_halt_engines,
5120         .suspend = goya_suspend,
5121         .resume = goya_resume,
5122         .cb_mmap = goya_cb_mmap,
5123         .ring_doorbell = goya_ring_doorbell,
5124         .pqe_write = goya_pqe_write,
5125         .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5126         .asic_dma_free_coherent = goya_dma_free_coherent,
5127         .get_int_queue_base = goya_get_int_queue_base,
5128         .test_queues = goya_test_queues,
5129         .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5130         .asic_dma_pool_free = goya_dma_pool_free,
5131         .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5132         .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5133         .hl_dma_unmap_sg = goya_dma_unmap_sg,
5134         .cs_parser = goya_cs_parser,
5135         .asic_dma_map_sg = goya_dma_map_sg,
5136         .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5137         .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5138         .update_eq_ci = goya_update_eq_ci,
5139         .context_switch = goya_context_switch,
5140         .restore_phase_topology = goya_restore_phase_topology,
5141         .debugfs_read32 = goya_debugfs_read32,
5142         .debugfs_write32 = goya_debugfs_write32,
5143         .add_device_attr = goya_add_device_attr,
5144         .handle_eqe = goya_handle_eqe,
5145         .set_pll_profile = goya_set_pll_profile,
5146         .get_events_stat = goya_get_events_stat,
5147         .read_pte = goya_read_pte,
5148         .write_pte = goya_write_pte,
5149         .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5150         .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5151         .send_heartbeat = goya_send_heartbeat,
5152         .debug_coresight = goya_debug_coresight,
5153         .is_device_idle = goya_is_device_idle,
5154         .soft_reset_late_init = goya_soft_reset_late_init,
5155         .hw_queues_lock = goya_hw_queues_lock,
5156         .hw_queues_unlock = goya_hw_queues_unlock,
5157         .get_pci_id = goya_get_pci_id,
5158         .get_eeprom_data = goya_get_eeprom_data,
5159         .send_cpu_message = goya_send_cpu_message,
5160         .get_hw_state = goya_get_hw_state,
5161         .pci_bars_map = goya_pci_bars_map,
5162         .set_dram_bar_base = goya_set_ddr_bar_base,
5163         .init_iatu = goya_init_iatu,
5164         .rreg = hl_rreg,
5165         .wreg = hl_wreg,
5166         .halt_coresight = goya_halt_coresight,
5167         .get_clk_rate = goya_get_clk_rate
5168 };
5169
5170 /*
5171  * goya_set_asic_funcs - set Goya function pointers
5172  *
5173  * @*hdev: pointer to hl_device structure
5174  *
5175  */
5176 void goya_set_asic_funcs(struct hl_device *hdev)
5177 {
5178         hdev->asic_funcs = &goya_funcs;
5179 }