83f0c70f140b151ec84fa87ad08d4dfea5bb8fb5
[linux-2.6-block.git] / drivers / misc / habanalabs / goya / goya.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "goyaP.h"
9 #include "include/hw_ip/mmu/mmu_general.h"
10 #include "include/hw_ip/mmu/mmu_v1_0.h"
11 #include "include/goya/asic_reg/goya_masks.h"
12 #include "include/goya/goya_reg_map.h"
13
14 #include <linux/pci.h>
15 #include <linux/genalloc.h>
16 #include <linux/hwmon.h>
17 #include <linux/io-64-nonatomic-lo-hi.h>
18 #include <linux/iommu.h>
19 #include <linux/seq_file.h>
20
21 /*
22  * GOYA security scheme:
23  *
24  * 1. Host is protected by:
25  *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
26  *        - MMU
27  *
28  * 2. DRAM is protected by:
29  *        - Range registers (protect the first 512MB)
30  *        - MMU (isolation between users)
31  *
32  * 3. Configuration is protected by:
33  *        - Range registers
34  *        - Protection bits
35  *
36  * When MMU is disabled:
37  *
38  * QMAN DMA: PQ, CQ, CP, DMA are secured.
39  * PQ, CB and the data are on the host.
40  *
41  * QMAN TPC/MME:
42  * PQ, CQ and CP are not secured.
43  * PQ, CB and the data are on the SRAM/DRAM.
44  *
45  * Since QMAN DMA is secured, the driver is parsing the DMA CB:
46  *     - checks DMA pointer
47  *     - WREG, MSG_PROT are not allowed.
48  *     - MSG_LONG/SHORT are allowed.
49  *
50  * A read/write transaction by the QMAN to a protected area will succeed if
51  * and only if the QMAN's CP is secured and MSG_PROT is used
52  *
53  *
54  * When MMU is enabled:
55  *
56  * QMAN DMA: PQ, CQ and CP are secured.
57  * MMU is set to bypass on the Secure props register of the QMAN.
58  * The reasons we don't enable MMU for PQ, CQ and CP are:
59  *     - PQ entry is in kernel address space and the driver doesn't map it.
60  *     - CP writes to MSIX register and to kernel address space (completion
61  *       queue).
62  *
63  * DMA is not secured but because CP is secured, the driver still needs to parse
64  * the CB, but doesn't need to check the DMA addresses.
65  *
66  * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
67  * the driver doesn't map memory in MMU.
68  *
69  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
70  *
71  * DMA RR does NOT protect host because DMA is not secured
72  *
73  */
74
75 #define GOYA_BOOT_FIT_FILE      "habanalabs/goya/goya-boot-fit.itb"
76 #define GOYA_LINUX_FW_FILE      "habanalabs/goya/goya-fit.itb"
77
78 #define GOYA_MMU_REGS_NUM               63
79
80 #define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
81
82 #define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
83 #define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
84 #define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
85 #define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
86 #define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
87 #define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
88 #define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
89 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
90 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC  1000000         /* 1s */
91
92 #define GOYA_QMAN0_FENCE_VAL            0xD169B243
93
94 #define GOYA_MAX_STRING_LEN             20
95
96 #define GOYA_CB_POOL_CB_CNT             512
97 #define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
98
99 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
100         (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
101 #define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
102 #define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
103 #define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
104
105 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
106         (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
107                         engine##_CMDQ_IDLE_MASK)
108 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
109         IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
110 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
111         IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
112
113 #define IS_DMA_IDLE(dma_core_sts0) \
114         !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
115
116 #define IS_TPC_IDLE(tpc_cfg_sts) \
117         (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
118
119 #define IS_MME_IDLE(mme_arch_sts) \
120         (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
121
122
123 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
124                 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
125                 "goya cq 4", "goya cpu eq"
126 };
127
128 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
129         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
130         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
131         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
132         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
133         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
134         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
135         [PACKET_FENCE]          = sizeof(struct packet_fence),
136         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
137         [PACKET_NOP]            = sizeof(struct packet_nop),
138         [PACKET_STOP]           = sizeof(struct packet_stop)
139 };
140
141 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
142         mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
143         mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
144         mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
145         mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
146         mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
147         mmTPC0_QM_GLBL_SECURE_PROPS,
148         mmTPC0_QM_GLBL_NON_SECURE_PROPS,
149         mmTPC0_CMDQ_GLBL_SECURE_PROPS,
150         mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
151         mmTPC0_CFG_ARUSER,
152         mmTPC0_CFG_AWUSER,
153         mmTPC1_QM_GLBL_SECURE_PROPS,
154         mmTPC1_QM_GLBL_NON_SECURE_PROPS,
155         mmTPC1_CMDQ_GLBL_SECURE_PROPS,
156         mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
157         mmTPC1_CFG_ARUSER,
158         mmTPC1_CFG_AWUSER,
159         mmTPC2_QM_GLBL_SECURE_PROPS,
160         mmTPC2_QM_GLBL_NON_SECURE_PROPS,
161         mmTPC2_CMDQ_GLBL_SECURE_PROPS,
162         mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
163         mmTPC2_CFG_ARUSER,
164         mmTPC2_CFG_AWUSER,
165         mmTPC3_QM_GLBL_SECURE_PROPS,
166         mmTPC3_QM_GLBL_NON_SECURE_PROPS,
167         mmTPC3_CMDQ_GLBL_SECURE_PROPS,
168         mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
169         mmTPC3_CFG_ARUSER,
170         mmTPC3_CFG_AWUSER,
171         mmTPC4_QM_GLBL_SECURE_PROPS,
172         mmTPC4_QM_GLBL_NON_SECURE_PROPS,
173         mmTPC4_CMDQ_GLBL_SECURE_PROPS,
174         mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
175         mmTPC4_CFG_ARUSER,
176         mmTPC4_CFG_AWUSER,
177         mmTPC5_QM_GLBL_SECURE_PROPS,
178         mmTPC5_QM_GLBL_NON_SECURE_PROPS,
179         mmTPC5_CMDQ_GLBL_SECURE_PROPS,
180         mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
181         mmTPC5_CFG_ARUSER,
182         mmTPC5_CFG_AWUSER,
183         mmTPC6_QM_GLBL_SECURE_PROPS,
184         mmTPC6_QM_GLBL_NON_SECURE_PROPS,
185         mmTPC6_CMDQ_GLBL_SECURE_PROPS,
186         mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
187         mmTPC6_CFG_ARUSER,
188         mmTPC6_CFG_AWUSER,
189         mmTPC7_QM_GLBL_SECURE_PROPS,
190         mmTPC7_QM_GLBL_NON_SECURE_PROPS,
191         mmTPC7_CMDQ_GLBL_SECURE_PROPS,
192         mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
193         mmTPC7_CFG_ARUSER,
194         mmTPC7_CFG_AWUSER,
195         mmMME_QM_GLBL_SECURE_PROPS,
196         mmMME_QM_GLBL_NON_SECURE_PROPS,
197         mmMME_CMDQ_GLBL_SECURE_PROPS,
198         mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
199         mmMME_SBA_CONTROL_DATA,
200         mmMME_SBB_CONTROL_DATA,
201         mmMME_SBC_CONTROL_DATA,
202         mmMME_WBC_CONTROL_DATA,
203         mmPCIE_WRAP_PSOC_ARUSER,
204         mmPCIE_WRAP_PSOC_AWUSER
205 };
206
207 static u32 goya_all_events[] = {
208         GOYA_ASYNC_EVENT_ID_PCIE_IF,
209         GOYA_ASYNC_EVENT_ID_TPC0_ECC,
210         GOYA_ASYNC_EVENT_ID_TPC1_ECC,
211         GOYA_ASYNC_EVENT_ID_TPC2_ECC,
212         GOYA_ASYNC_EVENT_ID_TPC3_ECC,
213         GOYA_ASYNC_EVENT_ID_TPC4_ECC,
214         GOYA_ASYNC_EVENT_ID_TPC5_ECC,
215         GOYA_ASYNC_EVENT_ID_TPC6_ECC,
216         GOYA_ASYNC_EVENT_ID_TPC7_ECC,
217         GOYA_ASYNC_EVENT_ID_MME_ECC,
218         GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
219         GOYA_ASYNC_EVENT_ID_MMU_ECC,
220         GOYA_ASYNC_EVENT_ID_DMA_MACRO,
221         GOYA_ASYNC_EVENT_ID_DMA_ECC,
222         GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
223         GOYA_ASYNC_EVENT_ID_PSOC_MEM,
224         GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
225         GOYA_ASYNC_EVENT_ID_SRAM0,
226         GOYA_ASYNC_EVENT_ID_SRAM1,
227         GOYA_ASYNC_EVENT_ID_SRAM2,
228         GOYA_ASYNC_EVENT_ID_SRAM3,
229         GOYA_ASYNC_EVENT_ID_SRAM4,
230         GOYA_ASYNC_EVENT_ID_SRAM5,
231         GOYA_ASYNC_EVENT_ID_SRAM6,
232         GOYA_ASYNC_EVENT_ID_SRAM7,
233         GOYA_ASYNC_EVENT_ID_SRAM8,
234         GOYA_ASYNC_EVENT_ID_SRAM9,
235         GOYA_ASYNC_EVENT_ID_SRAM10,
236         GOYA_ASYNC_EVENT_ID_SRAM11,
237         GOYA_ASYNC_EVENT_ID_SRAM12,
238         GOYA_ASYNC_EVENT_ID_SRAM13,
239         GOYA_ASYNC_EVENT_ID_SRAM14,
240         GOYA_ASYNC_EVENT_ID_SRAM15,
241         GOYA_ASYNC_EVENT_ID_SRAM16,
242         GOYA_ASYNC_EVENT_ID_SRAM17,
243         GOYA_ASYNC_EVENT_ID_SRAM18,
244         GOYA_ASYNC_EVENT_ID_SRAM19,
245         GOYA_ASYNC_EVENT_ID_SRAM20,
246         GOYA_ASYNC_EVENT_ID_SRAM21,
247         GOYA_ASYNC_EVENT_ID_SRAM22,
248         GOYA_ASYNC_EVENT_ID_SRAM23,
249         GOYA_ASYNC_EVENT_ID_SRAM24,
250         GOYA_ASYNC_EVENT_ID_SRAM25,
251         GOYA_ASYNC_EVENT_ID_SRAM26,
252         GOYA_ASYNC_EVENT_ID_SRAM27,
253         GOYA_ASYNC_EVENT_ID_SRAM28,
254         GOYA_ASYNC_EVENT_ID_SRAM29,
255         GOYA_ASYNC_EVENT_ID_GIC500,
256         GOYA_ASYNC_EVENT_ID_PLL0,
257         GOYA_ASYNC_EVENT_ID_PLL1,
258         GOYA_ASYNC_EVENT_ID_PLL3,
259         GOYA_ASYNC_EVENT_ID_PLL4,
260         GOYA_ASYNC_EVENT_ID_PLL5,
261         GOYA_ASYNC_EVENT_ID_PLL6,
262         GOYA_ASYNC_EVENT_ID_AXI_ECC,
263         GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
264         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
265         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
266         GOYA_ASYNC_EVENT_ID_PCIE_DEC,
267         GOYA_ASYNC_EVENT_ID_TPC0_DEC,
268         GOYA_ASYNC_EVENT_ID_TPC1_DEC,
269         GOYA_ASYNC_EVENT_ID_TPC2_DEC,
270         GOYA_ASYNC_EVENT_ID_TPC3_DEC,
271         GOYA_ASYNC_EVENT_ID_TPC4_DEC,
272         GOYA_ASYNC_EVENT_ID_TPC5_DEC,
273         GOYA_ASYNC_EVENT_ID_TPC6_DEC,
274         GOYA_ASYNC_EVENT_ID_TPC7_DEC,
275         GOYA_ASYNC_EVENT_ID_MME_WACS,
276         GOYA_ASYNC_EVENT_ID_MME_WACSD,
277         GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
278         GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
279         GOYA_ASYNC_EVENT_ID_PSOC,
280         GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
281         GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
282         GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
283         GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
284         GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
285         GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
286         GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
287         GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
288         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
289         GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
290         GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
291         GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
292         GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
293         GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
294         GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
295         GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
296         GOYA_ASYNC_EVENT_ID_TPC0_QM,
297         GOYA_ASYNC_EVENT_ID_TPC1_QM,
298         GOYA_ASYNC_EVENT_ID_TPC2_QM,
299         GOYA_ASYNC_EVENT_ID_TPC3_QM,
300         GOYA_ASYNC_EVENT_ID_TPC4_QM,
301         GOYA_ASYNC_EVENT_ID_TPC5_QM,
302         GOYA_ASYNC_EVENT_ID_TPC6_QM,
303         GOYA_ASYNC_EVENT_ID_TPC7_QM,
304         GOYA_ASYNC_EVENT_ID_MME_QM,
305         GOYA_ASYNC_EVENT_ID_MME_CMDQ,
306         GOYA_ASYNC_EVENT_ID_DMA0_QM,
307         GOYA_ASYNC_EVENT_ID_DMA1_QM,
308         GOYA_ASYNC_EVENT_ID_DMA2_QM,
309         GOYA_ASYNC_EVENT_ID_DMA3_QM,
310         GOYA_ASYNC_EVENT_ID_DMA4_QM,
311         GOYA_ASYNC_EVENT_ID_DMA0_CH,
312         GOYA_ASYNC_EVENT_ID_DMA1_CH,
313         GOYA_ASYNC_EVENT_ID_DMA2_CH,
314         GOYA_ASYNC_EVENT_ID_DMA3_CH,
315         GOYA_ASYNC_EVENT_ID_DMA4_CH,
316         GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
317         GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
318         GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
319         GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
320         GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
321         GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
322         GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
323         GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
324         GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
325         GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
326         GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
327         GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
328         GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
329         GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
330         GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
331         GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
332         GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
333 };
334
335 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
336 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
337 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
338 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
339
340 void goya_get_fixed_properties(struct hl_device *hdev)
341 {
342         struct asic_fixed_properties *prop = &hdev->asic_prop;
343         int i;
344
345         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
346                 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
347                 prop->hw_queues_props[i].driver_only = 0;
348                 prop->hw_queues_props[i].requires_kernel_cb = 1;
349         }
350
351         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
352                 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
353                 prop->hw_queues_props[i].driver_only = 1;
354                 prop->hw_queues_props[i].requires_kernel_cb = 0;
355         }
356
357         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
358                         NUMBER_OF_INT_HW_QUEUES; i++) {
359                 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
360                 prop->hw_queues_props[i].driver_only = 0;
361                 prop->hw_queues_props[i].requires_kernel_cb = 0;
362         }
363
364         for (; i < HL_MAX_QUEUES; i++)
365                 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
366
367         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
368
369         prop->dram_base_address = DRAM_PHYS_BASE;
370         prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
371         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
372         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
373
374         prop->sram_base_address = SRAM_BASE_ADDR;
375         prop->sram_size = SRAM_SIZE;
376         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
377         prop->sram_user_base_address = prop->sram_base_address +
378                                                 SRAM_USER_BASE_OFFSET;
379
380         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
381         prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
382         if (hdev->pldm)
383                 prop->mmu_pgt_size = 0x800000; /* 8MB */
384         else
385                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
386         prop->mmu_pte_size = HL_PTE_SIZE;
387         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
388         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
389         prop->dram_page_size = PAGE_SIZE_2MB;
390
391         prop->dmmu.hop0_shift = HOP0_SHIFT;
392         prop->dmmu.hop1_shift = HOP1_SHIFT;
393         prop->dmmu.hop2_shift = HOP2_SHIFT;
394         prop->dmmu.hop3_shift = HOP3_SHIFT;
395         prop->dmmu.hop4_shift = HOP4_SHIFT;
396         prop->dmmu.hop0_mask = HOP0_MASK;
397         prop->dmmu.hop1_mask = HOP1_MASK;
398         prop->dmmu.hop2_mask = HOP2_MASK;
399         prop->dmmu.hop3_mask = HOP3_MASK;
400         prop->dmmu.hop4_mask = HOP4_MASK;
401         prop->dmmu.start_addr = VA_DDR_SPACE_START;
402         prop->dmmu.end_addr = VA_DDR_SPACE_END;
403         prop->dmmu.page_size = PAGE_SIZE_2MB;
404
405         /* shifts and masks are the same in PMMU and DMMU */
406         memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
407         prop->pmmu.start_addr = VA_HOST_SPACE_START;
408         prop->pmmu.end_addr = VA_HOST_SPACE_END;
409         prop->pmmu.page_size = PAGE_SIZE_4KB;
410
411         /* PMMU and HPMMU are the same except of page size */
412         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
413         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
414
415         prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
416         prop->cfg_size = CFG_SIZE;
417         prop->max_asid = MAX_ASID;
418         prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
419         prop->high_pll = PLL_HIGH_DEFAULT;
420         prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
421         prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
422         prop->max_power_default = MAX_POWER_DEFAULT;
423         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
424         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
425         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
426
427         strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
428                 CARD_NAME_MAX_LEN);
429 }
430
431 /*
432  * goya_pci_bars_map - Map PCI BARS of Goya device
433  *
434  * @hdev: pointer to hl_device structure
435  *
436  * Request PCI regions and map them to kernel virtual addresses.
437  * Returns 0 on success
438  *
439  */
440 static int goya_pci_bars_map(struct hl_device *hdev)
441 {
442         static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
443         bool is_wc[3] = {false, false, true};
444         int rc;
445
446         rc = hl_pci_bars_map(hdev, name, is_wc);
447         if (rc)
448                 return rc;
449
450         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
451                         (CFG_BASE - SRAM_BASE_ADDR);
452
453         return 0;
454 }
455
456 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
457 {
458         struct goya_device *goya = hdev->asic_specific;
459         u64 old_addr = addr;
460         int rc;
461
462         if ((goya) && (goya->ddr_bar_cur_addr == addr))
463                 return old_addr;
464
465         /* Inbound Region 1 - Bar 4 - Point to DDR */
466         rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
467         if (rc)
468                 return U64_MAX;
469
470         if (goya) {
471                 old_addr = goya->ddr_bar_cur_addr;
472                 goya->ddr_bar_cur_addr = addr;
473         }
474
475         return old_addr;
476 }
477
478 /*
479  * goya_init_iatu - Initialize the iATU unit inside the PCI controller
480  *
481  * @hdev: pointer to hl_device structure
482  *
483  * This is needed in case the firmware doesn't initialize the iATU
484  *
485  */
486 static int goya_init_iatu(struct hl_device *hdev)
487 {
488         return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
489                                 HOST_PHYS_BASE, HOST_PHYS_SIZE);
490 }
491
492 /*
493  * goya_early_init - GOYA early initialization code
494  *
495  * @hdev: pointer to hl_device structure
496  *
497  * Verify PCI bars
498  * Set DMA masks
499  * PCI controller initialization
500  * Map PCI bars
501  *
502  */
503 static int goya_early_init(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506         struct pci_dev *pdev = hdev->pdev;
507         u32 val;
508         int rc;
509
510         goya_get_fixed_properties(hdev);
511
512         /* Check BAR sizes */
513         if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
514                 dev_err(hdev->dev,
515                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
516                         SRAM_CFG_BAR_ID,
517                         (unsigned long long) pci_resource_len(pdev,
518                                                         SRAM_CFG_BAR_ID),
519                         CFG_BAR_SIZE);
520                 return -ENODEV;
521         }
522
523         if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
524                 dev_err(hdev->dev,
525                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
526                         MSIX_BAR_ID,
527                         (unsigned long long) pci_resource_len(pdev,
528                                                                 MSIX_BAR_ID),
529                         MSIX_BAR_SIZE);
530                 return -ENODEV;
531         }
532
533         prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
534
535         rc = hl_pci_init(hdev);
536         if (rc)
537                 return rc;
538
539         if (!hdev->pldm) {
540                 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
541                 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
542                         dev_warn(hdev->dev,
543                                 "PCI strap is not configured correctly, PCI bus errors may occur\n");
544         }
545
546         return 0;
547 }
548
549 /*
550  * goya_early_fini - GOYA early finalization code
551  *
552  * @hdev: pointer to hl_device structure
553  *
554  * Unmap PCI bars
555  *
556  */
557 static int goya_early_fini(struct hl_device *hdev)
558 {
559         hl_pci_fini(hdev);
560
561         return 0;
562 }
563
564 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
565 {
566         /* mask to zero the MMBP and ASID bits */
567         WREG32_AND(reg, ~0x7FF);
568         WREG32_OR(reg, asid);
569 }
570
571 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
572 {
573         struct goya_device *goya = hdev->asic_specific;
574
575         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
576                 return;
577
578         if (secure)
579                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
580         else
581                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
582
583         RREG32(mmDMA_QM_0_GLBL_PROT);
584 }
585
586 /*
587  * goya_fetch_psoc_frequency - Fetch PSOC frequency values
588  *
589  * @hdev: pointer to hl_device structure
590  *
591  */
592 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
593 {
594         struct asic_fixed_properties *prop = &hdev->asic_prop;
595
596         prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
597         prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
598         prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
599         prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
600 }
601
602 int goya_late_init(struct hl_device *hdev)
603 {
604         struct asic_fixed_properties *prop = &hdev->asic_prop;
605         int rc;
606
607         goya_fetch_psoc_frequency(hdev);
608
609         rc = goya_mmu_clear_pgt_range(hdev);
610         if (rc) {
611                 dev_err(hdev->dev,
612                         "Failed to clear MMU page tables range %d\n", rc);
613                 return rc;
614         }
615
616         rc = goya_mmu_set_dram_default_page(hdev);
617         if (rc) {
618                 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
619                 return rc;
620         }
621
622         rc = goya_mmu_add_mappings_for_device_cpu(hdev);
623         if (rc)
624                 return rc;
625
626         rc = goya_init_cpu_queues(hdev);
627         if (rc)
628                 return rc;
629
630         rc = goya_test_cpu_queue(hdev);
631         if (rc)
632                 return rc;
633
634         rc = goya_armcp_info_get(hdev);
635         if (rc) {
636                 dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
637                 return rc;
638         }
639
640         /* Now that we have the DRAM size in ASIC prop, we need to check
641          * its size and configure the DMA_IF DDR wrap protection (which is in
642          * the MMU block) accordingly. The value is the log2 of the DRAM size
643          */
644         WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
645
646         rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
647         if (rc) {
648                 dev_err(hdev->dev,
649                         "Failed to enable PCI access from CPU %d\n", rc);
650                 return rc;
651         }
652
653         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
654                         GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
655
656         return 0;
657 }
658
659 /*
660  * goya_late_fini - GOYA late tear-down code
661  *
662  * @hdev: pointer to hl_device structure
663  *
664  * Free sensors allocated structures
665  */
666 void goya_late_fini(struct hl_device *hdev)
667 {
668         const struct hwmon_channel_info **channel_info_arr;
669         int i = 0;
670
671         if (!hdev->hl_chip_info->info)
672                 return;
673
674         channel_info_arr = hdev->hl_chip_info->info;
675
676         while (channel_info_arr[i]) {
677                 kfree(channel_info_arr[i]->config);
678                 kfree(channel_info_arr[i]);
679                 i++;
680         }
681
682         kfree(channel_info_arr);
683
684         hdev->hl_chip_info->info = NULL;
685 }
686
687 /*
688  * goya_sw_init - Goya software initialization code
689  *
690  * @hdev: pointer to hl_device structure
691  *
692  */
693 static int goya_sw_init(struct hl_device *hdev)
694 {
695         struct goya_device *goya;
696         int rc;
697
698         /* Allocate device structure */
699         goya = kzalloc(sizeof(*goya), GFP_KERNEL);
700         if (!goya)
701                 return -ENOMEM;
702
703         /* according to goya_init_iatu */
704         goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
705
706         goya->mme_clk = GOYA_PLL_FREQ_LOW;
707         goya->tpc_clk = GOYA_PLL_FREQ_LOW;
708         goya->ic_clk = GOYA_PLL_FREQ_LOW;
709
710         hdev->asic_specific = goya;
711
712         /* Create DMA pool for small allocations */
713         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
714                         &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
715         if (!hdev->dma_pool) {
716                 dev_err(hdev->dev, "failed to create DMA pool\n");
717                 rc = -ENOMEM;
718                 goto free_goya_device;
719         }
720
721         hdev->cpu_accessible_dma_mem =
722                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
723                                         HL_CPU_ACCESSIBLE_MEM_SIZE,
724                                         &hdev->cpu_accessible_dma_address,
725                                         GFP_KERNEL | __GFP_ZERO);
726
727         if (!hdev->cpu_accessible_dma_mem) {
728                 rc = -ENOMEM;
729                 goto free_dma_pool;
730         }
731
732         dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
733                 &hdev->cpu_accessible_dma_address);
734
735         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
736         if (!hdev->cpu_accessible_dma_pool) {
737                 dev_err(hdev->dev,
738                         "Failed to create CPU accessible DMA pool\n");
739                 rc = -ENOMEM;
740                 goto free_cpu_dma_mem;
741         }
742
743         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
744                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
745                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
746         if (rc) {
747                 dev_err(hdev->dev,
748                         "Failed to add memory to CPU accessible DMA pool\n");
749                 rc = -EFAULT;
750                 goto free_cpu_accessible_dma_pool;
751         }
752
753         spin_lock_init(&goya->hw_queues_lock);
754         hdev->supports_coresight = true;
755         hdev->supports_soft_reset = true;
756
757         return 0;
758
759 free_cpu_accessible_dma_pool:
760         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
761 free_cpu_dma_mem:
762         hdev->asic_funcs->asic_dma_free_coherent(hdev,
763                         HL_CPU_ACCESSIBLE_MEM_SIZE,
764                         hdev->cpu_accessible_dma_mem,
765                         hdev->cpu_accessible_dma_address);
766 free_dma_pool:
767         dma_pool_destroy(hdev->dma_pool);
768 free_goya_device:
769         kfree(goya);
770
771         return rc;
772 }
773
774 /*
775  * goya_sw_fini - Goya software tear-down code
776  *
777  * @hdev: pointer to hl_device structure
778  *
779  */
780 static int goya_sw_fini(struct hl_device *hdev)
781 {
782         struct goya_device *goya = hdev->asic_specific;
783
784         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
785
786         hdev->asic_funcs->asic_dma_free_coherent(hdev,
787                         HL_CPU_ACCESSIBLE_MEM_SIZE,
788                         hdev->cpu_accessible_dma_mem,
789                         hdev->cpu_accessible_dma_address);
790
791         dma_pool_destroy(hdev->dma_pool);
792
793         kfree(goya);
794
795         return 0;
796 }
797
798 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
799                 dma_addr_t bus_address)
800 {
801         struct goya_device *goya = hdev->asic_specific;
802         u32 mtr_base_lo, mtr_base_hi;
803         u32 so_base_lo, so_base_hi;
804         u32 gic_base_lo, gic_base_hi;
805         u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
806         u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
807
808         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
809         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
810         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
811         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
812
813         gic_base_lo =
814                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
815         gic_base_hi =
816                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
817
818         WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
819         WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
820
821         WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
822         WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
823         WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
824
825         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
826         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
827         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
828         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
829         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
830         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
831         WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
832                         GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
833
834         /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
835         WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
836         WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
837
838         if (goya->hw_cap_initialized & HW_CAP_MMU)
839                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
840         else
841                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
842
843         if (hdev->stop_on_err)
844                 dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
845
846         WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
847         WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
848 }
849
850 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
851 {
852         u32 gic_base_lo, gic_base_hi;
853         u64 sob_addr;
854         u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
855
856         gic_base_lo =
857                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
858         gic_base_hi =
859                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
860
861         WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
862         WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
863         WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
864                         GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
865
866         if (dma_id)
867                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
868                                 (dma_id - 1) * 4;
869         else
870                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
871
872         WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
873         WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
874 }
875
876 /*
877  * goya_init_dma_qmans - Initialize QMAN DMA registers
878  *
879  * @hdev: pointer to hl_device structure
880  *
881  * Initialize the H/W registers of the QMAN DMA channels
882  *
883  */
884 void goya_init_dma_qmans(struct hl_device *hdev)
885 {
886         struct goya_device *goya = hdev->asic_specific;
887         struct hl_hw_queue *q;
888         int i;
889
890         if (goya->hw_cap_initialized & HW_CAP_DMA)
891                 return;
892
893         q = &hdev->kernel_queues[0];
894
895         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
896                 q->cq_id = q->msi_vec = i;
897                 goya_init_dma_qman(hdev, i, q->bus_address);
898                 goya_init_dma_ch(hdev, i);
899         }
900
901         goya->hw_cap_initialized |= HW_CAP_DMA;
902 }
903
904 /*
905  * goya_disable_external_queues - Disable external queues
906  *
907  * @hdev: pointer to hl_device structure
908  *
909  */
910 static void goya_disable_external_queues(struct hl_device *hdev)
911 {
912         struct goya_device *goya = hdev->asic_specific;
913
914         if (!(goya->hw_cap_initialized & HW_CAP_DMA))
915                 return;
916
917         WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
918         WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
919         WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
920         WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
921         WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
922 }
923
924 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
925                                 u32 cp_sts_reg, u32 glbl_sts0_reg)
926 {
927         int rc;
928         u32 status;
929
930         /* use the values of TPC0 as they are all the same*/
931
932         WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
933
934         status = RREG32(cp_sts_reg);
935         if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
936                 rc = hl_poll_timeout(
937                         hdev,
938                         cp_sts_reg,
939                         status,
940                         !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
941                         1000,
942                         QMAN_FENCE_TIMEOUT_USEC);
943
944                 /* if QMAN is stuck in fence no need to check for stop */
945                 if (rc)
946                         return 0;
947         }
948
949         rc = hl_poll_timeout(
950                 hdev,
951                 glbl_sts0_reg,
952                 status,
953                 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
954                 1000,
955                 QMAN_STOP_TIMEOUT_USEC);
956
957         if (rc) {
958                 dev_err(hdev->dev,
959                         "Timeout while waiting for QMAN to stop\n");
960                 return -EINVAL;
961         }
962
963         return 0;
964 }
965
966 /*
967  * goya_stop_external_queues - Stop external queues
968  *
969  * @hdev: pointer to hl_device structure
970  *
971  * Returns 0 on success
972  *
973  */
974 static int goya_stop_external_queues(struct hl_device *hdev)
975 {
976         int rc, retval = 0;
977
978         struct goya_device *goya = hdev->asic_specific;
979
980         if (!(goya->hw_cap_initialized & HW_CAP_DMA))
981                 return retval;
982
983         rc = goya_stop_queue(hdev,
984                         mmDMA_QM_0_GLBL_CFG1,
985                         mmDMA_QM_0_CP_STS,
986                         mmDMA_QM_0_GLBL_STS0);
987
988         if (rc) {
989                 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
990                 retval = -EIO;
991         }
992
993         rc = goya_stop_queue(hdev,
994                         mmDMA_QM_1_GLBL_CFG1,
995                         mmDMA_QM_1_CP_STS,
996                         mmDMA_QM_1_GLBL_STS0);
997
998         if (rc) {
999                 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1000                 retval = -EIO;
1001         }
1002
1003         rc = goya_stop_queue(hdev,
1004                         mmDMA_QM_2_GLBL_CFG1,
1005                         mmDMA_QM_2_CP_STS,
1006                         mmDMA_QM_2_GLBL_STS0);
1007
1008         if (rc) {
1009                 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1010                 retval = -EIO;
1011         }
1012
1013         rc = goya_stop_queue(hdev,
1014                         mmDMA_QM_3_GLBL_CFG1,
1015                         mmDMA_QM_3_CP_STS,
1016                         mmDMA_QM_3_GLBL_STS0);
1017
1018         if (rc) {
1019                 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1020                 retval = -EIO;
1021         }
1022
1023         rc = goya_stop_queue(hdev,
1024                         mmDMA_QM_4_GLBL_CFG1,
1025                         mmDMA_QM_4_CP_STS,
1026                         mmDMA_QM_4_GLBL_STS0);
1027
1028         if (rc) {
1029                 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1030                 retval = -EIO;
1031         }
1032
1033         return retval;
1034 }
1035
1036 /*
1037  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1038  *
1039  * @hdev: pointer to hl_device structure
1040  *
1041  * Returns 0 on success
1042  *
1043  */
1044 int goya_init_cpu_queues(struct hl_device *hdev)
1045 {
1046         struct goya_device *goya = hdev->asic_specific;
1047         struct hl_eq *eq;
1048         u32 status;
1049         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1050         int err;
1051
1052         if (!hdev->cpu_queues_enable)
1053                 return 0;
1054
1055         if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1056                 return 0;
1057
1058         eq = &hdev->event_queue;
1059
1060         WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1061         WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1062
1063         WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1064         WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1065
1066         WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1067                         lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1068         WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1069                         upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1070
1071         WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1072         WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1073         WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1074
1075         /* Used for EQ CI */
1076         WREG32(mmCPU_EQ_CI, 0);
1077
1078         WREG32(mmCPU_IF_PF_PQ_PI, 0);
1079
1080         WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1081
1082         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1083                         GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1084
1085         err = hl_poll_timeout(
1086                 hdev,
1087                 mmCPU_PQ_INIT_STATUS,
1088                 status,
1089                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1090                 1000,
1091                 GOYA_CPU_TIMEOUT_USEC);
1092
1093         if (err) {
1094                 dev_err(hdev->dev,
1095                         "Failed to setup communication with device CPU\n");
1096                 return -EIO;
1097         }
1098
1099         goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1100         return 0;
1101 }
1102
1103 static void goya_set_pll_refclk(struct hl_device *hdev)
1104 {
1105         WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1106         WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1107         WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1108         WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1109
1110         WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1111         WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1112         WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1113         WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1114
1115         WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1116         WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1117         WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1118         WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1119
1120         WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1121         WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1122         WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1123         WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1124
1125         WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1126         WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1127         WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1128         WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1129
1130         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1131         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1132         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1133         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1134
1135         WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1136         WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1137         WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1138         WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1139 }
1140
1141 static void goya_disable_clk_rlx(struct hl_device *hdev)
1142 {
1143         WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1144         WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1145 }
1146
1147 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1148 {
1149         u64 tpc_eml_address;
1150         u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1151         int err, slm_index;
1152
1153         tpc_offset = tpc_id * 0x40000;
1154         tpc_eml_offset = tpc_id * 0x200000;
1155         tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1156         tpc_slm_offset = tpc_eml_address + 0x100000;
1157
1158         /*
1159          * Workaround for Bug H2 #2443 :
1160          * "TPC SB is not initialized on chip reset"
1161          */
1162
1163         val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1164         if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1165                 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1166                         tpc_id);
1167
1168         WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1169
1170         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1171         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1172         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1173         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1174         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1175         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1176         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1177         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1178         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1179         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1180
1181         WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1182                 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1183
1184         err = hl_poll_timeout(
1185                 hdev,
1186                 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1187                 val,
1188                 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1189                 1000,
1190                 HL_DEVICE_TIMEOUT_USEC);
1191
1192         if (err)
1193                 dev_err(hdev->dev,
1194                         "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1195
1196         WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1197                 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1198
1199         msleep(GOYA_RESET_WAIT_MSEC);
1200
1201         WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1202                 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1203
1204         msleep(GOYA_RESET_WAIT_MSEC);
1205
1206         for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1207                 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1208
1209         val = RREG32(tpc_slm_offset);
1210 }
1211
1212 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1213 {
1214         struct goya_device *goya = hdev->asic_specific;
1215         int i;
1216
1217         if (hdev->pldm)
1218                 return;
1219
1220         if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1221                 return;
1222
1223         /* Workaround for H2 #2443 */
1224
1225         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1226                 _goya_tpc_mbist_workaround(hdev, i);
1227
1228         goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1229 }
1230
1231 /*
1232  * goya_init_golden_registers - Initialize golden registers
1233  *
1234  * @hdev: pointer to hl_device structure
1235  *
1236  * Initialize the H/W registers of the device
1237  *
1238  */
1239 static void goya_init_golden_registers(struct hl_device *hdev)
1240 {
1241         struct goya_device *goya = hdev->asic_specific;
1242         u32 polynom[10], tpc_intr_mask, offset;
1243         int i;
1244
1245         if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1246                 return;
1247
1248         polynom[0] = 0x00020080;
1249         polynom[1] = 0x00401000;
1250         polynom[2] = 0x00200800;
1251         polynom[3] = 0x00002000;
1252         polynom[4] = 0x00080200;
1253         polynom[5] = 0x00040100;
1254         polynom[6] = 0x00100400;
1255         polynom[7] = 0x00004000;
1256         polynom[8] = 0x00010000;
1257         polynom[9] = 0x00008000;
1258
1259         /* Mask all arithmetic interrupts from TPC */
1260         tpc_intr_mask = 0x7FFF;
1261
1262         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1263                 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1264                 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1265                 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1266                 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1267                 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1268
1269                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1270                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1271                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1272                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1273                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1274
1275
1276                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1277                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1278                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1279                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1280                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1281
1282                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1283                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1284                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1285                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1286                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1287
1288                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1289                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1290                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1291                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1292                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1293
1294                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1295                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1296                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1297                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1298                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1299         }
1300
1301         WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1302         WREG32(mmMME_AGU, 0x0f0f0f10);
1303         WREG32(mmMME_SEI_MASK, ~0x0);
1304
1305         WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1306         WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1307         WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1308         WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1309         WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1310         WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1311         WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1312         WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1313         WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1314         WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1315         WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1316         WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1317         WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1318         WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1319         WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1320         WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1321         WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1322         WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1323         WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1324         WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1325         WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1326         WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1327         WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1328         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1329         WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1330         WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1331         WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1332         WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1333         WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1334         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1335         WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1336         WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1337         WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1338         WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1339         WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1340         WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1341         WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1342         WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1343         WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1344         WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1345         WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1346         WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1347         WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1348         WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1349         WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1350         WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1351         WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1352         WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1353         WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1354         WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1355         WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1356         WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1357         WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1358         WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1359         WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1360         WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1361         WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1362         WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1363         WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1364         WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1365         WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1366         WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1367         WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1368         WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1369         WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1370         WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1371         WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1372         WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1373         WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1374         WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1375         WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1376         WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1377         WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1378         WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1379         WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1380         WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1381         WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1382         WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1383         WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1384         WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1385         WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1386         WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1387         WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1388         WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1389
1390         WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1391         WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1392         WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1393         WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1394         WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1395         WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1396         WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1397         WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1398         WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1399         WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1400         WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1401         WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1402
1403         WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1404         WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1405         WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1406         WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1407         WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1408         WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1409         WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1410         WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1411         WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1412         WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1413         WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1414         WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1415
1416         WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1417         WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1418         WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1419         WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1420         WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1421         WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1422         WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1423         WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1424         WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1425         WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1426         WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1427         WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1428
1429         WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1430         WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1431         WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1432         WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1433         WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1434         WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1435         WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1436         WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1437         WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1438         WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1439         WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1440         WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1441
1442         WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1443         WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1444         WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1445         WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1446         WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1447         WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1448         WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1449         WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1450         WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1451         WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1452         WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1453         WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1454
1455         WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1456         WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1457         WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1458         WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1459         WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1460         WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1461         WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1462         WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1463         WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1464         WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1465         WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1466         WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1467
1468         for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1469                 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1470                 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1471                 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1472                 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1473                 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1474                 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1475
1476                 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1477                 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1478                 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1479                 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1480                 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1481                 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1482                 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1483                 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1484
1485                 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1486                 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1487         }
1488
1489         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1490                 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1491                                 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1492                 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1493                                 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1494         }
1495
1496         for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1497                 /*
1498                  * Workaround for Bug H2 #2441 :
1499                  * "ST.NOP set trace event illegal opcode"
1500                  */
1501                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1502
1503                 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1504                                 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1505                 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1506                                 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1507
1508                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1509                                 ICACHE_FETCH_LINE_NUM, 2);
1510         }
1511
1512         WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1513         WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1514                         1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1515
1516         WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1517         WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1518                         1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1519
1520         /*
1521          * Workaround for H2 #HW-23 bug
1522          * Set DMA max outstanding read requests to 240 on DMA CH 1.
1523          * This limitation is still large enough to not affect Gen4 bandwidth.
1524          * We need to only limit that DMA channel because the user can only read
1525          * from Host using DMA CH 1
1526          */
1527         WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1528
1529         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1530
1531         goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1532 }
1533
1534 static void goya_init_mme_qman(struct hl_device *hdev)
1535 {
1536         u32 mtr_base_lo, mtr_base_hi;
1537         u32 so_base_lo, so_base_hi;
1538         u32 gic_base_lo, gic_base_hi;
1539         u64 qman_base_addr;
1540
1541         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1542         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1543         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1544         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1545
1546         gic_base_lo =
1547                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1548         gic_base_hi =
1549                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1550
1551         qman_base_addr = hdev->asic_prop.sram_base_address +
1552                                 MME_QMAN_BASE_OFFSET;
1553
1554         WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1555         WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1556         WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1557         WREG32(mmMME_QM_PQ_PI, 0);
1558         WREG32(mmMME_QM_PQ_CI, 0);
1559         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1560         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1561         WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1562         WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1563
1564         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1565         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1566         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1567         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1568
1569         /* QMAN CQ has 8 cache lines */
1570         WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1571
1572         WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1573         WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1574
1575         WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1576
1577         WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1578
1579         WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1580
1581         WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1582 }
1583
1584 static void goya_init_mme_cmdq(struct hl_device *hdev)
1585 {
1586         u32 mtr_base_lo, mtr_base_hi;
1587         u32 so_base_lo, so_base_hi;
1588         u32 gic_base_lo, gic_base_hi;
1589
1590         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1591         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1592         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1593         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1594
1595         gic_base_lo =
1596                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1597         gic_base_hi =
1598                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1599
1600         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1601         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1602         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1603         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1604
1605         /* CMDQ CQ has 20 cache lines */
1606         WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1607
1608         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1609         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1610
1611         WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1612
1613         WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1614
1615         WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1616
1617         WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1618 }
1619
1620 void goya_init_mme_qmans(struct hl_device *hdev)
1621 {
1622         struct goya_device *goya = hdev->asic_specific;
1623         u32 so_base_lo, so_base_hi;
1624
1625         if (goya->hw_cap_initialized & HW_CAP_MME)
1626                 return;
1627
1628         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1629         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1630
1631         WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1632         WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1633
1634         goya_init_mme_qman(hdev);
1635         goya_init_mme_cmdq(hdev);
1636
1637         goya->hw_cap_initialized |= HW_CAP_MME;
1638 }
1639
1640 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1641 {
1642         u32 mtr_base_lo, mtr_base_hi;
1643         u32 so_base_lo, so_base_hi;
1644         u32 gic_base_lo, gic_base_hi;
1645         u64 qman_base_addr;
1646         u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1647
1648         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1649         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1650         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1651         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1652
1653         gic_base_lo =
1654                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1655         gic_base_hi =
1656                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1657
1658         qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1659
1660         WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1661         WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1662         WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1663         WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1664         WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1665         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1666         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1667         WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1668         WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1669
1670         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1671         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1672         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1673         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1674
1675         WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1676
1677         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1678         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1679
1680         WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1681                         GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1682
1683         WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1684
1685         WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1686
1687         WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1688 }
1689
1690 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1691 {
1692         u32 mtr_base_lo, mtr_base_hi;
1693         u32 so_base_lo, so_base_hi;
1694         u32 gic_base_lo, gic_base_hi;
1695         u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1696
1697         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1698         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1699         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1700         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1701
1702         gic_base_lo =
1703                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1704         gic_base_hi =
1705                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1706
1707         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1708         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1709         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1710         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1711
1712         WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1713
1714         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1715         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1716
1717         WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1718                         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1719
1720         WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1721
1722         WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1723
1724         WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1725 }
1726
1727 void goya_init_tpc_qmans(struct hl_device *hdev)
1728 {
1729         struct goya_device *goya = hdev->asic_specific;
1730         u32 so_base_lo, so_base_hi;
1731         u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1732                         mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1733         int i;
1734
1735         if (goya->hw_cap_initialized & HW_CAP_TPC)
1736                 return;
1737
1738         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1739         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1740
1741         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1742                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1743                                 so_base_lo);
1744                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1745                                 so_base_hi);
1746         }
1747
1748         goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1749         goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1750         goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1751         goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1752         goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1753         goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1754         goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1755         goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1756
1757         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1758                 goya_init_tpc_cmdq(hdev, i);
1759
1760         goya->hw_cap_initialized |= HW_CAP_TPC;
1761 }
1762
1763 /*
1764  * goya_disable_internal_queues - Disable internal queues
1765  *
1766  * @hdev: pointer to hl_device structure
1767  *
1768  */
1769 static void goya_disable_internal_queues(struct hl_device *hdev)
1770 {
1771         struct goya_device *goya = hdev->asic_specific;
1772
1773         if (!(goya->hw_cap_initialized & HW_CAP_MME))
1774                 goto disable_tpc;
1775
1776         WREG32(mmMME_QM_GLBL_CFG0, 0);
1777         WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1778
1779 disable_tpc:
1780         if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1781                 return;
1782
1783         WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1784         WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1785
1786         WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1787         WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1788
1789         WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1790         WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1791
1792         WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1793         WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1794
1795         WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1796         WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1797
1798         WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1799         WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1800
1801         WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1802         WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1803
1804         WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1805         WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1806 }
1807
1808 /*
1809  * goya_stop_internal_queues - Stop internal queues
1810  *
1811  * @hdev: pointer to hl_device structure
1812  *
1813  * Returns 0 on success
1814  *
1815  */
1816 static int goya_stop_internal_queues(struct hl_device *hdev)
1817 {
1818         struct goya_device *goya = hdev->asic_specific;
1819         int rc, retval = 0;
1820
1821         if (!(goya->hw_cap_initialized & HW_CAP_MME))
1822                 goto stop_tpc;
1823
1824         /*
1825          * Each queue (QMAN) is a separate H/W logic. That means that each
1826          * QMAN can be stopped independently and failure to stop one does NOT
1827          * mandate we should not try to stop other QMANs
1828          */
1829
1830         rc = goya_stop_queue(hdev,
1831                         mmMME_QM_GLBL_CFG1,
1832                         mmMME_QM_CP_STS,
1833                         mmMME_QM_GLBL_STS0);
1834
1835         if (rc) {
1836                 dev_err(hdev->dev, "failed to stop MME QMAN\n");
1837                 retval = -EIO;
1838         }
1839
1840         rc = goya_stop_queue(hdev,
1841                         mmMME_CMDQ_GLBL_CFG1,
1842                         mmMME_CMDQ_CP_STS,
1843                         mmMME_CMDQ_GLBL_STS0);
1844
1845         if (rc) {
1846                 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1847                 retval = -EIO;
1848         }
1849
1850 stop_tpc:
1851         if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1852                 return retval;
1853
1854         rc = goya_stop_queue(hdev,
1855                         mmTPC0_QM_GLBL_CFG1,
1856                         mmTPC0_QM_CP_STS,
1857                         mmTPC0_QM_GLBL_STS0);
1858
1859         if (rc) {
1860                 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1861                 retval = -EIO;
1862         }
1863
1864         rc = goya_stop_queue(hdev,
1865                         mmTPC0_CMDQ_GLBL_CFG1,
1866                         mmTPC0_CMDQ_CP_STS,
1867                         mmTPC0_CMDQ_GLBL_STS0);
1868
1869         if (rc) {
1870                 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1871                 retval = -EIO;
1872         }
1873
1874         rc = goya_stop_queue(hdev,
1875                         mmTPC1_QM_GLBL_CFG1,
1876                         mmTPC1_QM_CP_STS,
1877                         mmTPC1_QM_GLBL_STS0);
1878
1879         if (rc) {
1880                 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1881                 retval = -EIO;
1882         }
1883
1884         rc = goya_stop_queue(hdev,
1885                         mmTPC1_CMDQ_GLBL_CFG1,
1886                         mmTPC1_CMDQ_CP_STS,
1887                         mmTPC1_CMDQ_GLBL_STS0);
1888
1889         if (rc) {
1890                 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1891                 retval = -EIO;
1892         }
1893
1894         rc = goya_stop_queue(hdev,
1895                         mmTPC2_QM_GLBL_CFG1,
1896                         mmTPC2_QM_CP_STS,
1897                         mmTPC2_QM_GLBL_STS0);
1898
1899         if (rc) {
1900                 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
1901                 retval = -EIO;
1902         }
1903
1904         rc = goya_stop_queue(hdev,
1905                         mmTPC2_CMDQ_GLBL_CFG1,
1906                         mmTPC2_CMDQ_CP_STS,
1907                         mmTPC2_CMDQ_GLBL_STS0);
1908
1909         if (rc) {
1910                 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
1911                 retval = -EIO;
1912         }
1913
1914         rc = goya_stop_queue(hdev,
1915                         mmTPC3_QM_GLBL_CFG1,
1916                         mmTPC3_QM_CP_STS,
1917                         mmTPC3_QM_GLBL_STS0);
1918
1919         if (rc) {
1920                 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
1921                 retval = -EIO;
1922         }
1923
1924         rc = goya_stop_queue(hdev,
1925                         mmTPC3_CMDQ_GLBL_CFG1,
1926                         mmTPC3_CMDQ_CP_STS,
1927                         mmTPC3_CMDQ_GLBL_STS0);
1928
1929         if (rc) {
1930                 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
1931                 retval = -EIO;
1932         }
1933
1934         rc = goya_stop_queue(hdev,
1935                         mmTPC4_QM_GLBL_CFG1,
1936                         mmTPC4_QM_CP_STS,
1937                         mmTPC4_QM_GLBL_STS0);
1938
1939         if (rc) {
1940                 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
1941                 retval = -EIO;
1942         }
1943
1944         rc = goya_stop_queue(hdev,
1945                         mmTPC4_CMDQ_GLBL_CFG1,
1946                         mmTPC4_CMDQ_CP_STS,
1947                         mmTPC4_CMDQ_GLBL_STS0);
1948
1949         if (rc) {
1950                 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
1951                 retval = -EIO;
1952         }
1953
1954         rc = goya_stop_queue(hdev,
1955                         mmTPC5_QM_GLBL_CFG1,
1956                         mmTPC5_QM_CP_STS,
1957                         mmTPC5_QM_GLBL_STS0);
1958
1959         if (rc) {
1960                 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
1961                 retval = -EIO;
1962         }
1963
1964         rc = goya_stop_queue(hdev,
1965                         mmTPC5_CMDQ_GLBL_CFG1,
1966                         mmTPC5_CMDQ_CP_STS,
1967                         mmTPC5_CMDQ_GLBL_STS0);
1968
1969         if (rc) {
1970                 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
1971                 retval = -EIO;
1972         }
1973
1974         rc = goya_stop_queue(hdev,
1975                         mmTPC6_QM_GLBL_CFG1,
1976                         mmTPC6_QM_CP_STS,
1977                         mmTPC6_QM_GLBL_STS0);
1978
1979         if (rc) {
1980                 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
1981                 retval = -EIO;
1982         }
1983
1984         rc = goya_stop_queue(hdev,
1985                         mmTPC6_CMDQ_GLBL_CFG1,
1986                         mmTPC6_CMDQ_CP_STS,
1987                         mmTPC6_CMDQ_GLBL_STS0);
1988
1989         if (rc) {
1990                 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
1991                 retval = -EIO;
1992         }
1993
1994         rc = goya_stop_queue(hdev,
1995                         mmTPC7_QM_GLBL_CFG1,
1996                         mmTPC7_QM_CP_STS,
1997                         mmTPC7_QM_GLBL_STS0);
1998
1999         if (rc) {
2000                 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2001                 retval = -EIO;
2002         }
2003
2004         rc = goya_stop_queue(hdev,
2005                         mmTPC7_CMDQ_GLBL_CFG1,
2006                         mmTPC7_CMDQ_CP_STS,
2007                         mmTPC7_CMDQ_GLBL_STS0);
2008
2009         if (rc) {
2010                 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2011                 retval = -EIO;
2012         }
2013
2014         return retval;
2015 }
2016
2017 static void goya_dma_stall(struct hl_device *hdev)
2018 {
2019         struct goya_device *goya = hdev->asic_specific;
2020
2021         if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2022                 return;
2023
2024         WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2025         WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2026         WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2027         WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2028         WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2029 }
2030
2031 static void goya_tpc_stall(struct hl_device *hdev)
2032 {
2033         struct goya_device *goya = hdev->asic_specific;
2034
2035         if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2036                 return;
2037
2038         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2039         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2040         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2041         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2042         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2043         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2044         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2045         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2046 }
2047
2048 static void goya_mme_stall(struct hl_device *hdev)
2049 {
2050         struct goya_device *goya = hdev->asic_specific;
2051
2052         if (!(goya->hw_cap_initialized & HW_CAP_MME))
2053                 return;
2054
2055         WREG32(mmMME_STALL, 0xFFFFFFFF);
2056 }
2057
2058 static int goya_enable_msix(struct hl_device *hdev)
2059 {
2060         struct goya_device *goya = hdev->asic_specific;
2061         int cq_cnt = hdev->asic_prop.completion_queues_count;
2062         int rc, i, irq_cnt_init, irq;
2063
2064         if (goya->hw_cap_initialized & HW_CAP_MSIX)
2065                 return 0;
2066
2067         rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2068                                 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2069         if (rc < 0) {
2070                 dev_err(hdev->dev,
2071                         "MSI-X: Failed to enable support -- %d/%d\n",
2072                         GOYA_MSIX_ENTRIES, rc);
2073                 return rc;
2074         }
2075
2076         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2077                 irq = pci_irq_vector(hdev->pdev, i);
2078                 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2079                                 &hdev->completion_queue[i]);
2080                 if (rc) {
2081                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2082                         goto free_irqs;
2083                 }
2084         }
2085
2086         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2087
2088         rc = request_irq(irq, hl_irq_handler_eq, 0,
2089                         goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2090                         &hdev->event_queue);
2091         if (rc) {
2092                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2093                 goto free_irqs;
2094         }
2095
2096         goya->hw_cap_initialized |= HW_CAP_MSIX;
2097         return 0;
2098
2099 free_irqs:
2100         for (i = 0 ; i < irq_cnt_init ; i++)
2101                 free_irq(pci_irq_vector(hdev->pdev, i),
2102                         &hdev->completion_queue[i]);
2103
2104         pci_free_irq_vectors(hdev->pdev);
2105         return rc;
2106 }
2107
2108 static void goya_sync_irqs(struct hl_device *hdev)
2109 {
2110         struct goya_device *goya = hdev->asic_specific;
2111         int i;
2112
2113         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2114                 return;
2115
2116         /* Wait for all pending IRQs to be finished */
2117         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2118                 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2119
2120         synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2121 }
2122
2123 static void goya_disable_msix(struct hl_device *hdev)
2124 {
2125         struct goya_device *goya = hdev->asic_specific;
2126         int i, irq;
2127
2128         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2129                 return;
2130
2131         goya_sync_irqs(hdev);
2132
2133         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2134         free_irq(irq, &hdev->event_queue);
2135
2136         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2137                 irq = pci_irq_vector(hdev->pdev, i);
2138                 free_irq(irq, &hdev->completion_queue[i]);
2139         }
2140
2141         pci_free_irq_vectors(hdev->pdev);
2142
2143         goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2144 }
2145
2146 static void goya_enable_timestamp(struct hl_device *hdev)
2147 {
2148         /* Disable the timestamp counter */
2149         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2150
2151         /* Zero the lower/upper parts of the 64-bit counter */
2152         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2153         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2154
2155         /* Enable the counter */
2156         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2157 }
2158
2159 static void goya_disable_timestamp(struct hl_device *hdev)
2160 {
2161         /* Disable the timestamp counter */
2162         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2163 }
2164
2165 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2166 {
2167         u32 wait_timeout_ms, cpu_timeout_ms;
2168
2169         dev_info(hdev->dev,
2170                 "Halting compute engines and disabling interrupts\n");
2171
2172         if (hdev->pldm) {
2173                 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2174                 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2175         } else {
2176                 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2177                 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2178         }
2179
2180         if (hard_reset) {
2181                 /*
2182                  * I don't know what is the state of the CPU so make sure it is
2183                  * stopped in any means necessary
2184                  */
2185                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2186                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2187                         GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2188                 msleep(cpu_timeout_ms);
2189         }
2190
2191         goya_stop_external_queues(hdev);
2192         goya_stop_internal_queues(hdev);
2193
2194         msleep(wait_timeout_ms);
2195
2196         goya_dma_stall(hdev);
2197         goya_tpc_stall(hdev);
2198         goya_mme_stall(hdev);
2199
2200         msleep(wait_timeout_ms);
2201
2202         goya_disable_external_queues(hdev);
2203         goya_disable_internal_queues(hdev);
2204
2205         goya_disable_timestamp(hdev);
2206
2207         if (hard_reset) {
2208                 goya_disable_msix(hdev);
2209                 goya_mmu_remove_device_cpu_mappings(hdev);
2210         } else {
2211                 goya_sync_irqs(hdev);
2212         }
2213 }
2214
2215 /*
2216  * goya_load_firmware_to_device() - Load LINUX FW code to device.
2217  * @hdev: Pointer to hl_device structure.
2218  *
2219  * Copy LINUX fw code from firmware file to HBM BAR.
2220  *
2221  * Return: 0 on success, non-zero for failure.
2222  */
2223 static int goya_load_firmware_to_device(struct hl_device *hdev)
2224 {
2225         void __iomem *dst;
2226
2227         dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2228
2229         return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
2230 }
2231
2232 /*
2233  * goya_load_boot_fit_to_device() - Load boot fit to device.
2234  * @hdev: Pointer to hl_device structure.
2235  *
2236  * Copy boot fit file to SRAM BAR.
2237  *
2238  * Return: 0 on success, non-zero for failure.
2239  */
2240 static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2241 {
2242         void __iomem *dst;
2243
2244         dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2245
2246         return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst);
2247 }
2248
2249 /*
2250  * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2251  * The version string should be located by that offset.
2252  */
2253 static void goya_read_device_fw_version(struct hl_device *hdev,
2254                                         enum hl_fw_component fwc)
2255 {
2256         const char *name;
2257         u32 ver_off;
2258         char *dest;
2259
2260         switch (fwc) {
2261         case FW_COMP_UBOOT:
2262                 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2263                 dest = hdev->asic_prop.uboot_ver;
2264                 name = "U-Boot";
2265                 break;
2266         case FW_COMP_PREBOOT:
2267                 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2268                 dest = hdev->asic_prop.preboot_ver;
2269                 name = "Preboot";
2270                 break;
2271         default:
2272                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2273                 return;
2274         }
2275
2276         ver_off &= ~((u32)SRAM_BASE_ADDR);
2277
2278         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2279                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2280                                                         VERSION_MAX_LEN);
2281         } else {
2282                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2283                                                                 name, ver_off);
2284                 strcpy(dest, "unavailable");
2285         }
2286 }
2287
2288 static int goya_init_cpu(struct hl_device *hdev)
2289 {
2290         struct goya_device *goya = hdev->asic_specific;
2291         int rc;
2292
2293         if (!hdev->cpu_enable)
2294                 return 0;
2295
2296         if (goya->hw_cap_initialized & HW_CAP_CPU)
2297                 return 0;
2298
2299         /*
2300          * Before pushing u-boot/linux to device, need to set the ddr bar to
2301          * base address of dram
2302          */
2303         if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2304                 dev_err(hdev->dev,
2305                         "failed to map DDR bar to DRAM base address\n");
2306                 return -EIO;
2307         }
2308
2309         rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2310                         mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
2311                         mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0,
2312                         false, GOYA_CPU_TIMEOUT_USEC,
2313                         GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
2314
2315         if (rc)
2316                 return rc;
2317
2318         goya->hw_cap_initialized |= HW_CAP_CPU;
2319
2320         return 0;
2321 }
2322
2323 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2324                                                 u64 phys_addr)
2325 {
2326         u32 status, timeout_usec;
2327         int rc;
2328
2329         if (hdev->pldm)
2330                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2331         else
2332                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2333
2334         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2335         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2336         WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2337
2338         rc = hl_poll_timeout(
2339                 hdev,
2340                 MMU_ASID_BUSY,
2341                 status,
2342                 !(status & 0x80000000),
2343                 1000,
2344                 timeout_usec);
2345
2346         if (rc) {
2347                 dev_err(hdev->dev,
2348                         "Timeout during MMU hop0 config of asid %d\n", asid);
2349                 return rc;
2350         }
2351
2352         return 0;
2353 }
2354
2355 int goya_mmu_init(struct hl_device *hdev)
2356 {
2357         struct asic_fixed_properties *prop = &hdev->asic_prop;
2358         struct goya_device *goya = hdev->asic_specific;
2359         u64 hop0_addr;
2360         int rc, i;
2361
2362         if (!hdev->mmu_enable)
2363                 return 0;
2364
2365         if (goya->hw_cap_initialized & HW_CAP_MMU)
2366                 return 0;
2367
2368         hdev->dram_supports_virtual_memory = true;
2369         hdev->dram_default_page_mapping = true;
2370
2371         for (i = 0 ; i < prop->max_asid ; i++) {
2372                 hop0_addr = prop->mmu_pgt_addr +
2373                                 (i * prop->mmu_hop_table_size);
2374
2375                 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2376                 if (rc) {
2377                         dev_err(hdev->dev,
2378                                 "failed to set hop0 addr for asid %d\n", i);
2379                         goto err;
2380                 }
2381         }
2382
2383         goya->hw_cap_initialized |= HW_CAP_MMU;
2384
2385         /* init MMU cache manage page */
2386         WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2387                                 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2388         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2389
2390         /* Remove follower feature due to performance bug */
2391         WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2392                         (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2393
2394         hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2395                                         VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2396
2397         WREG32(mmMMU_MMU_ENABLE, 1);
2398         WREG32(mmMMU_SPI_MASK, 0xF);
2399
2400         return 0;
2401
2402 err:
2403         return rc;
2404 }
2405
2406 /*
2407  * goya_hw_init - Goya hardware initialization code
2408  *
2409  * @hdev: pointer to hl_device structure
2410  *
2411  * Returns 0 on success
2412  *
2413  */
2414 static int goya_hw_init(struct hl_device *hdev)
2415 {
2416         struct asic_fixed_properties *prop = &hdev->asic_prop;
2417         int rc;
2418
2419         dev_info(hdev->dev, "Starting initialization of H/W\n");
2420
2421         /* Perform read from the device to make sure device is up */
2422         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2423
2424         /*
2425          * Let's mark in the H/W that we have reached this point. We check
2426          * this value in the reset_before_init function to understand whether
2427          * we need to reset the chip before doing H/W init. This register is
2428          * cleared by the H/W upon H/W reset
2429          */
2430         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2431
2432         rc = goya_init_cpu(hdev);
2433         if (rc) {
2434                 dev_err(hdev->dev, "failed to initialize CPU\n");
2435                 return rc;
2436         }
2437
2438         goya_tpc_mbist_workaround(hdev);
2439
2440         goya_init_golden_registers(hdev);
2441
2442         /*
2443          * After CPU initialization is finished, change DDR bar mapping inside
2444          * iATU to point to the start address of the MMU page tables
2445          */
2446         if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2447                         ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2448                 dev_err(hdev->dev,
2449                         "failed to map DDR bar to MMU page tables\n");
2450                 return -EIO;
2451         }
2452
2453         rc = goya_mmu_init(hdev);
2454         if (rc)
2455                 return rc;
2456
2457         goya_init_security(hdev);
2458
2459         goya_init_dma_qmans(hdev);
2460
2461         goya_init_mme_qmans(hdev);
2462
2463         goya_init_tpc_qmans(hdev);
2464
2465         goya_enable_timestamp(hdev);
2466
2467         /* MSI-X must be enabled before CPU queues are initialized */
2468         rc = goya_enable_msix(hdev);
2469         if (rc)
2470                 goto disable_queues;
2471
2472         /* Perform read from the device to flush all MSI-X configuration */
2473         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2474
2475         return 0;
2476
2477 disable_queues:
2478         goya_disable_internal_queues(hdev);
2479         goya_disable_external_queues(hdev);
2480
2481         return rc;
2482 }
2483
2484 /*
2485  * goya_hw_fini - Goya hardware tear-down code
2486  *
2487  * @hdev: pointer to hl_device structure
2488  * @hard_reset: should we do hard reset to all engines or just reset the
2489  *              compute/dma engines
2490  */
2491 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2492 {
2493         struct goya_device *goya = hdev->asic_specific;
2494         u32 reset_timeout_ms, status;
2495
2496         if (hdev->pldm)
2497                 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2498         else
2499                 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2500
2501         if (hard_reset) {
2502                 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2503                 goya_disable_clk_rlx(hdev);
2504                 goya_set_pll_refclk(hdev);
2505
2506                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2507                 dev_info(hdev->dev,
2508                         "Issued HARD reset command, going to wait %dms\n",
2509                         reset_timeout_ms);
2510         } else {
2511                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2512                 dev_info(hdev->dev,
2513                         "Issued SOFT reset command, going to wait %dms\n",
2514                         reset_timeout_ms);
2515         }
2516
2517         /*
2518          * After hard reset, we can't poll the BTM_FSM register because the PSOC
2519          * itself is in reset. In either reset we need to wait until the reset
2520          * is deasserted
2521          */
2522         msleep(reset_timeout_ms);
2523
2524         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2525         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2526                 dev_err(hdev->dev,
2527                         "Timeout while waiting for device to reset 0x%x\n",
2528                         status);
2529
2530         if (!hard_reset) {
2531                 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2532                                                 HW_CAP_GOLDEN | HW_CAP_TPC);
2533                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2534                                 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2535                 return;
2536         }
2537
2538         /* Chicken bit to re-initiate boot sequencer flow */
2539         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2540                 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2541         /* Move boot manager FSM to pre boot sequencer init state */
2542         WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2543                         0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2544
2545         goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2546                                         HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2547                                         HW_CAP_DMA | HW_CAP_MME |
2548                                         HW_CAP_MMU | HW_CAP_TPC_MBIST |
2549                                         HW_CAP_GOLDEN | HW_CAP_TPC);
2550         memset(goya->events_stat, 0, sizeof(goya->events_stat));
2551 }
2552
2553 int goya_suspend(struct hl_device *hdev)
2554 {
2555         int rc;
2556
2557         rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2558         if (rc)
2559                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2560
2561         return rc;
2562 }
2563
2564 int goya_resume(struct hl_device *hdev)
2565 {
2566         return goya_init_iatu(hdev);
2567 }
2568
2569 static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2570                 u64 kaddress, phys_addr_t paddress, u32 size)
2571 {
2572         int rc;
2573
2574         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2575                         VM_DONTCOPY | VM_NORESERVE;
2576
2577         rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2578                                 size, vma->vm_page_prot);
2579         if (rc)
2580                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2581
2582         return rc;
2583 }
2584
2585 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2586 {
2587         u32 db_reg_offset, db_value;
2588
2589         switch (hw_queue_id) {
2590         case GOYA_QUEUE_ID_DMA_0:
2591                 db_reg_offset = mmDMA_QM_0_PQ_PI;
2592                 break;
2593
2594         case GOYA_QUEUE_ID_DMA_1:
2595                 db_reg_offset = mmDMA_QM_1_PQ_PI;
2596                 break;
2597
2598         case GOYA_QUEUE_ID_DMA_2:
2599                 db_reg_offset = mmDMA_QM_2_PQ_PI;
2600                 break;
2601
2602         case GOYA_QUEUE_ID_DMA_3:
2603                 db_reg_offset = mmDMA_QM_3_PQ_PI;
2604                 break;
2605
2606         case GOYA_QUEUE_ID_DMA_4:
2607                 db_reg_offset = mmDMA_QM_4_PQ_PI;
2608                 break;
2609
2610         case GOYA_QUEUE_ID_CPU_PQ:
2611                 db_reg_offset = mmCPU_IF_PF_PQ_PI;
2612                 break;
2613
2614         case GOYA_QUEUE_ID_MME:
2615                 db_reg_offset = mmMME_QM_PQ_PI;
2616                 break;
2617
2618         case GOYA_QUEUE_ID_TPC0:
2619                 db_reg_offset = mmTPC0_QM_PQ_PI;
2620                 break;
2621
2622         case GOYA_QUEUE_ID_TPC1:
2623                 db_reg_offset = mmTPC1_QM_PQ_PI;
2624                 break;
2625
2626         case GOYA_QUEUE_ID_TPC2:
2627                 db_reg_offset = mmTPC2_QM_PQ_PI;
2628                 break;
2629
2630         case GOYA_QUEUE_ID_TPC3:
2631                 db_reg_offset = mmTPC3_QM_PQ_PI;
2632                 break;
2633
2634         case GOYA_QUEUE_ID_TPC4:
2635                 db_reg_offset = mmTPC4_QM_PQ_PI;
2636                 break;
2637
2638         case GOYA_QUEUE_ID_TPC5:
2639                 db_reg_offset = mmTPC5_QM_PQ_PI;
2640                 break;
2641
2642         case GOYA_QUEUE_ID_TPC6:
2643                 db_reg_offset = mmTPC6_QM_PQ_PI;
2644                 break;
2645
2646         case GOYA_QUEUE_ID_TPC7:
2647                 db_reg_offset = mmTPC7_QM_PQ_PI;
2648                 break;
2649
2650         default:
2651                 /* Should never get here */
2652                 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2653                         hw_queue_id);
2654                 return;
2655         }
2656
2657         db_value = pi;
2658
2659         /* ring the doorbell */
2660         WREG32(db_reg_offset, db_value);
2661
2662         if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2663                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2664                                 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2665 }
2666
2667 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2668 {
2669         /* The QMANs are on the SRAM so need to copy to IO space */
2670         memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2671 }
2672
2673 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2674                                         dma_addr_t *dma_handle, gfp_t flags)
2675 {
2676         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2677                                                 dma_handle, flags);
2678
2679         /* Shift to the device's base physical address of host memory */
2680         if (kernel_addr)
2681                 *dma_handle += HOST_PHYS_BASE;
2682
2683         return kernel_addr;
2684 }
2685
2686 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2687                                         void *cpu_addr, dma_addr_t dma_handle)
2688 {
2689         /* Cancel the device's base physical address of host memory */
2690         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2691
2692         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2693 }
2694
2695 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2696                                 dma_addr_t *dma_handle, u16 *queue_len)
2697 {
2698         void *base;
2699         u32 offset;
2700
2701         *dma_handle = hdev->asic_prop.sram_base_address;
2702
2703         base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2704
2705         switch (queue_id) {
2706         case GOYA_QUEUE_ID_MME:
2707                 offset = MME_QMAN_BASE_OFFSET;
2708                 *queue_len = MME_QMAN_LENGTH;
2709                 break;
2710         case GOYA_QUEUE_ID_TPC0:
2711                 offset = TPC0_QMAN_BASE_OFFSET;
2712                 *queue_len = TPC_QMAN_LENGTH;
2713                 break;
2714         case GOYA_QUEUE_ID_TPC1:
2715                 offset = TPC1_QMAN_BASE_OFFSET;
2716                 *queue_len = TPC_QMAN_LENGTH;
2717                 break;
2718         case GOYA_QUEUE_ID_TPC2:
2719                 offset = TPC2_QMAN_BASE_OFFSET;
2720                 *queue_len = TPC_QMAN_LENGTH;
2721                 break;
2722         case GOYA_QUEUE_ID_TPC3:
2723                 offset = TPC3_QMAN_BASE_OFFSET;
2724                 *queue_len = TPC_QMAN_LENGTH;
2725                 break;
2726         case GOYA_QUEUE_ID_TPC4:
2727                 offset = TPC4_QMAN_BASE_OFFSET;
2728                 *queue_len = TPC_QMAN_LENGTH;
2729                 break;
2730         case GOYA_QUEUE_ID_TPC5:
2731                 offset = TPC5_QMAN_BASE_OFFSET;
2732                 *queue_len = TPC_QMAN_LENGTH;
2733                 break;
2734         case GOYA_QUEUE_ID_TPC6:
2735                 offset = TPC6_QMAN_BASE_OFFSET;
2736                 *queue_len = TPC_QMAN_LENGTH;
2737                 break;
2738         case GOYA_QUEUE_ID_TPC7:
2739                 offset = TPC7_QMAN_BASE_OFFSET;
2740                 *queue_len = TPC_QMAN_LENGTH;
2741                 break;
2742         default:
2743                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2744                 return NULL;
2745         }
2746
2747         base += offset;
2748         *dma_handle += offset;
2749
2750         return base;
2751 }
2752
2753 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2754 {
2755         struct packet_msg_prot *fence_pkt;
2756         u32 *fence_ptr;
2757         dma_addr_t fence_dma_addr;
2758         struct hl_cb *cb;
2759         u32 tmp, timeout;
2760         int rc;
2761
2762         if (hdev->pldm)
2763                 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2764         else
2765                 timeout = HL_DEVICE_TIMEOUT_USEC;
2766
2767         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2768                 dev_err_ratelimited(hdev->dev,
2769                         "Can't send driver job on QMAN0 because the device is not idle\n");
2770                 return -EBUSY;
2771         }
2772
2773         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2774                                                         &fence_dma_addr);
2775         if (!fence_ptr) {
2776                 dev_err(hdev->dev,
2777                         "Failed to allocate fence memory for QMAN0\n");
2778                 return -ENOMEM;
2779         }
2780
2781         goya_qman0_set_security(hdev, true);
2782
2783         cb = job->patched_cb;
2784
2785         fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
2786                         job->job_cb_size - sizeof(struct packet_msg_prot));
2787
2788         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2789                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
2790                         (1 << GOYA_PKT_CTL_MB_SHIFT);
2791         fence_pkt->ctl = cpu_to_le32(tmp);
2792         fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2793         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2794
2795         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2796                                         job->job_cb_size, cb->bus_address);
2797         if (rc) {
2798                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2799                 goto free_fence_ptr;
2800         }
2801
2802         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2803                                 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2804                                 timeout, true);
2805
2806         hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2807
2808         if (rc == -ETIMEDOUT) {
2809                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2810                 goto free_fence_ptr;
2811         }
2812
2813 free_fence_ptr:
2814         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2815                                         fence_dma_addr);
2816
2817         goya_qman0_set_security(hdev, false);
2818
2819         return rc;
2820 }
2821
2822 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2823                                 u32 timeout, long *result)
2824 {
2825         struct goya_device *goya = hdev->asic_specific;
2826
2827         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2828                 if (result)
2829                         *result = 0;
2830                 return 0;
2831         }
2832
2833         return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2834                                         timeout, result);
2835 }
2836
2837 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2838 {
2839         struct packet_msg_prot *fence_pkt;
2840         dma_addr_t pkt_dma_addr;
2841         u32 fence_val, tmp;
2842         dma_addr_t fence_dma_addr;
2843         u32 *fence_ptr;
2844         int rc;
2845
2846         fence_val = GOYA_QMAN0_FENCE_VAL;
2847
2848         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2849                                                         &fence_dma_addr);
2850         if (!fence_ptr) {
2851                 dev_err(hdev->dev,
2852                         "Failed to allocate memory for queue testing\n");
2853                 return -ENOMEM;
2854         }
2855
2856         *fence_ptr = 0;
2857
2858         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2859                                         sizeof(struct packet_msg_prot),
2860                                         GFP_KERNEL, &pkt_dma_addr);
2861         if (!fence_pkt) {
2862                 dev_err(hdev->dev,
2863                         "Failed to allocate packet for queue testing\n");
2864                 rc = -ENOMEM;
2865                 goto free_fence_ptr;
2866         }
2867
2868         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2869                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
2870                         (1 << GOYA_PKT_CTL_MB_SHIFT);
2871         fence_pkt->ctl = cpu_to_le32(tmp);
2872         fence_pkt->value = cpu_to_le32(fence_val);
2873         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2874
2875         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2876                                         sizeof(struct packet_msg_prot),
2877                                         pkt_dma_addr);
2878         if (rc) {
2879                 dev_err(hdev->dev,
2880                         "Failed to send fence packet\n");
2881                 goto free_pkt;
2882         }
2883
2884         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2885                                         1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2886
2887         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
2888
2889         if (rc == -ETIMEDOUT) {
2890                 dev_err(hdev->dev,
2891                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2892                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
2893                 rc = -EIO;
2894         }
2895
2896 free_pkt:
2897         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
2898                                         pkt_dma_addr);
2899 free_fence_ptr:
2900         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2901                                         fence_dma_addr);
2902         return rc;
2903 }
2904
2905 int goya_test_cpu_queue(struct hl_device *hdev)
2906 {
2907         struct goya_device *goya = hdev->asic_specific;
2908
2909         /*
2910          * check capability here as send_cpu_message() won't update the result
2911          * value if no capability
2912          */
2913         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
2914                 return 0;
2915
2916         return hl_fw_test_cpu_queue(hdev);
2917 }
2918
2919 int goya_test_queues(struct hl_device *hdev)
2920 {
2921         int i, rc, ret_val = 0;
2922
2923         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
2924                 rc = goya_test_queue(hdev, i);
2925                 if (rc)
2926                         ret_val = -EINVAL;
2927         }
2928
2929         return ret_val;
2930 }
2931
2932 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
2933                                         gfp_t mem_flags, dma_addr_t *dma_handle)
2934 {
2935         void *kernel_addr;
2936
2937         if (size > GOYA_DMA_POOL_BLK_SIZE)
2938                 return NULL;
2939
2940         kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
2941
2942         /* Shift to the device's base physical address of host memory */
2943         if (kernel_addr)
2944                 *dma_handle += HOST_PHYS_BASE;
2945
2946         return kernel_addr;
2947 }
2948
2949 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
2950                                 dma_addr_t dma_addr)
2951 {
2952         /* Cancel the device's base physical address of host memory */
2953         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
2954
2955         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
2956 }
2957
2958 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
2959                                         dma_addr_t *dma_handle)
2960 {
2961         void *vaddr;
2962
2963         vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
2964         *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
2965                         VA_CPU_ACCESSIBLE_MEM_ADDR;
2966
2967         return vaddr;
2968 }
2969
2970 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
2971                                         void *vaddr)
2972 {
2973         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
2974 }
2975
2976 static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
2977                                 int nents, enum dma_data_direction dir)
2978 {
2979         struct scatterlist *sg;
2980         int i;
2981
2982         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
2983                 return -ENOMEM;
2984
2985         /* Shift to the device's base physical address of host memory */
2986         for_each_sg(sgl, sg, nents, i)
2987                 sg->dma_address += HOST_PHYS_BASE;
2988
2989         return 0;
2990 }
2991
2992 static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
2993                                 int nents, enum dma_data_direction dir)
2994 {
2995         struct scatterlist *sg;
2996         int i;
2997
2998         /* Cancel the device's base physical address of host memory */
2999         for_each_sg(sgl, sg, nents, i)
3000                 sg->dma_address -= HOST_PHYS_BASE;
3001
3002         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3003 }
3004
3005 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3006 {
3007         struct scatterlist *sg, *sg_next_iter;
3008         u32 count, dma_desc_cnt;
3009         u64 len, len_next;
3010         dma_addr_t addr, addr_next;
3011
3012         dma_desc_cnt = 0;
3013
3014         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3015
3016                 len = sg_dma_len(sg);
3017                 addr = sg_dma_address(sg);
3018
3019                 if (len == 0)
3020                         break;
3021
3022                 while ((count + 1) < sgt->nents) {
3023                         sg_next_iter = sg_next(sg);
3024                         len_next = sg_dma_len(sg_next_iter);
3025                         addr_next = sg_dma_address(sg_next_iter);
3026
3027                         if (len_next == 0)
3028                                 break;
3029
3030                         if ((addr + len == addr_next) &&
3031                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3032                                 len += len_next;
3033                                 count++;
3034                                 sg = sg_next_iter;
3035                         } else {
3036                                 break;
3037                         }
3038                 }
3039
3040                 dma_desc_cnt++;
3041         }
3042
3043         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3044 }
3045
3046 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3047                                 struct hl_cs_parser *parser,
3048                                 struct packet_lin_dma *user_dma_pkt,
3049                                 u64 addr, enum dma_data_direction dir)
3050 {
3051         struct hl_userptr *userptr;
3052         int rc;
3053
3054         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3055                         parser->job_userptr_list, &userptr))
3056                 goto already_pinned;
3057
3058         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3059         if (!userptr)
3060                 return -ENOMEM;
3061
3062         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3063                                 userptr);
3064         if (rc)
3065                 goto free_userptr;
3066
3067         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3068
3069         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3070                                         userptr->sgt->nents, dir);
3071         if (rc) {
3072                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3073                 goto unpin_memory;
3074         }
3075
3076         userptr->dma_mapped = true;
3077         userptr->dir = dir;
3078
3079 already_pinned:
3080         parser->patched_cb_size +=
3081                         goya_get_dma_desc_list_size(hdev, userptr->sgt);
3082
3083         return 0;
3084
3085 unpin_memory:
3086         hl_unpin_host_memory(hdev, userptr);
3087 free_userptr:
3088         kfree(userptr);
3089         return rc;
3090 }
3091
3092 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3093                                 struct hl_cs_parser *parser,
3094                                 struct packet_lin_dma *user_dma_pkt)
3095 {
3096         u64 device_memory_addr, addr;
3097         enum dma_data_direction dir;
3098         enum goya_dma_direction user_dir;
3099         bool sram_addr = true;
3100         bool skip_host_mem_pin = false;
3101         bool user_memset;
3102         u32 ctl;
3103         int rc = 0;
3104
3105         ctl = le32_to_cpu(user_dma_pkt->ctl);
3106
3107         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3108                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3109
3110         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3111                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3112
3113         switch (user_dir) {
3114         case DMA_HOST_TO_DRAM:
3115                 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3116                 dir = DMA_TO_DEVICE;
3117                 sram_addr = false;
3118                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3119                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3120                 if (user_memset)
3121                         skip_host_mem_pin = true;
3122                 break;
3123
3124         case DMA_DRAM_TO_HOST:
3125                 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3126                 dir = DMA_FROM_DEVICE;
3127                 sram_addr = false;
3128                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3129                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3130                 break;
3131
3132         case DMA_HOST_TO_SRAM:
3133                 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3134                 dir = DMA_TO_DEVICE;
3135                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3136                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3137                 if (user_memset)
3138                         skip_host_mem_pin = true;
3139                 break;
3140
3141         case DMA_SRAM_TO_HOST:
3142                 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3143                 dir = DMA_FROM_DEVICE;
3144                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3145                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3146                 break;
3147         default:
3148                 dev_err(hdev->dev, "DMA direction is undefined\n");
3149                 return -EFAULT;
3150         }
3151
3152         if (sram_addr) {
3153                 if (!hl_mem_area_inside_range(device_memory_addr,
3154                                 le32_to_cpu(user_dma_pkt->tsize),
3155                                 hdev->asic_prop.sram_user_base_address,
3156                                 hdev->asic_prop.sram_end_address)) {
3157
3158                         dev_err(hdev->dev,
3159                                 "SRAM address 0x%llx + 0x%x is invalid\n",
3160                                 device_memory_addr,
3161                                 user_dma_pkt->tsize);
3162                         return -EFAULT;
3163                 }
3164         } else {
3165                 if (!hl_mem_area_inside_range(device_memory_addr,
3166                                 le32_to_cpu(user_dma_pkt->tsize),
3167                                 hdev->asic_prop.dram_user_base_address,
3168                                 hdev->asic_prop.dram_end_address)) {
3169
3170                         dev_err(hdev->dev,
3171                                 "DRAM address 0x%llx + 0x%x is invalid\n",
3172                                 device_memory_addr,
3173                                 user_dma_pkt->tsize);
3174                         return -EFAULT;
3175                 }
3176         }
3177
3178         if (skip_host_mem_pin)
3179                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3180         else {
3181                 if ((dir == DMA_TO_DEVICE) &&
3182                                 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3183                         dev_err(hdev->dev,
3184                                 "Can't DMA from host on queue other then 1\n");
3185                         return -EFAULT;
3186                 }
3187
3188                 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3189                                                 addr, dir);
3190         }
3191
3192         return rc;
3193 }
3194
3195 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3196                                 struct hl_cs_parser *parser,
3197                                 struct packet_lin_dma *user_dma_pkt)
3198 {
3199         u64 sram_memory_addr, dram_memory_addr;
3200         enum goya_dma_direction user_dir;
3201         u32 ctl;
3202
3203         ctl = le32_to_cpu(user_dma_pkt->ctl);
3204         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3205                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3206
3207         if (user_dir == DMA_DRAM_TO_SRAM) {
3208                 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3209                 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3210                 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3211         } else {
3212                 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3213                 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3214                 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3215         }
3216
3217         if (!hl_mem_area_inside_range(sram_memory_addr,
3218                                 le32_to_cpu(user_dma_pkt->tsize),
3219                                 hdev->asic_prop.sram_user_base_address,
3220                                 hdev->asic_prop.sram_end_address)) {
3221                 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3222                         sram_memory_addr, user_dma_pkt->tsize);
3223                 return -EFAULT;
3224         }
3225
3226         if (!hl_mem_area_inside_range(dram_memory_addr,
3227                                 le32_to_cpu(user_dma_pkt->tsize),
3228                                 hdev->asic_prop.dram_user_base_address,
3229                                 hdev->asic_prop.dram_end_address)) {
3230                 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3231                         dram_memory_addr, user_dma_pkt->tsize);
3232                 return -EFAULT;
3233         }
3234
3235         parser->patched_cb_size += sizeof(*user_dma_pkt);
3236
3237         return 0;
3238 }
3239
3240 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3241                                 struct hl_cs_parser *parser,
3242                                 struct packet_lin_dma *user_dma_pkt)
3243 {
3244         enum goya_dma_direction user_dir;
3245         u32 ctl;
3246         int rc;
3247
3248         dev_dbg(hdev->dev, "DMA packet details:\n");
3249         dev_dbg(hdev->dev, "source == 0x%llx\n",
3250                 le64_to_cpu(user_dma_pkt->src_addr));
3251         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3252                 le64_to_cpu(user_dma_pkt->dst_addr));
3253         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3254
3255         ctl = le32_to_cpu(user_dma_pkt->ctl);
3256         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3257                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3258
3259         /*
3260          * Special handling for DMA with size 0. The H/W has a bug where
3261          * this can cause the QMAN DMA to get stuck, so block it here.
3262          */
3263         if (user_dma_pkt->tsize == 0) {
3264                 dev_err(hdev->dev,
3265                         "Got DMA with size 0, might reset the device\n");
3266                 return -EINVAL;
3267         }
3268
3269         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3270                 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3271         else
3272                 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3273
3274         return rc;
3275 }
3276
3277 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3278                                 struct hl_cs_parser *parser,
3279                                 struct packet_lin_dma *user_dma_pkt)
3280 {
3281         dev_dbg(hdev->dev, "DMA packet details:\n");
3282         dev_dbg(hdev->dev, "source == 0x%llx\n",
3283                 le64_to_cpu(user_dma_pkt->src_addr));
3284         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3285                 le64_to_cpu(user_dma_pkt->dst_addr));
3286         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3287
3288         /*
3289          * WA for HW-23.
3290          * We can't allow user to read from Host using QMANs other than 1.
3291          * PMMU and HPMMU addresses are equal, check only one of them.
3292          */
3293         if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3294                 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3295                                 le32_to_cpu(user_dma_pkt->tsize),
3296                                 hdev->asic_prop.pmmu.start_addr,
3297                                 hdev->asic_prop.pmmu.end_addr)) {
3298                 dev_err(hdev->dev,
3299                         "Can't DMA from host on queue other then 1\n");
3300                 return -EFAULT;
3301         }
3302
3303         if (user_dma_pkt->tsize == 0) {
3304                 dev_err(hdev->dev,
3305                         "Got DMA with size 0, might reset the device\n");
3306                 return -EINVAL;
3307         }
3308
3309         parser->patched_cb_size += sizeof(*user_dma_pkt);
3310
3311         return 0;
3312 }
3313
3314 static int goya_validate_wreg32(struct hl_device *hdev,
3315                                 struct hl_cs_parser *parser,
3316                                 struct packet_wreg32 *wreg_pkt)
3317 {
3318         struct goya_device *goya = hdev->asic_specific;
3319         u32 sob_start_addr, sob_end_addr;
3320         u16 reg_offset;
3321
3322         reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3323                         GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3324
3325         dev_dbg(hdev->dev, "WREG32 packet details:\n");
3326         dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3327         dev_dbg(hdev->dev, "value      == 0x%x\n",
3328                 le32_to_cpu(wreg_pkt->value));
3329
3330         if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3331                 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3332                         reg_offset);
3333                 return -EPERM;
3334         }
3335
3336         /*
3337          * With MMU, DMA channels are not secured, so it doesn't matter where
3338          * the WR COMP will be written to because it will go out with
3339          * non-secured property
3340          */
3341         if (goya->hw_cap_initialized & HW_CAP_MMU)
3342                 return 0;
3343
3344         sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3345         sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3346
3347         if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3348                         (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3349
3350                 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3351                         wreg_pkt->value);
3352                 return -EPERM;
3353         }
3354
3355         return 0;
3356 }
3357
3358 static int goya_validate_cb(struct hl_device *hdev,
3359                         struct hl_cs_parser *parser, bool is_mmu)
3360 {
3361         u32 cb_parsed_length = 0;
3362         int rc = 0;
3363
3364         parser->patched_cb_size = 0;
3365
3366         /* cb_user_size is more than 0 so loop will always be executed */
3367         while (cb_parsed_length < parser->user_cb_size) {
3368                 enum packet_id pkt_id;
3369                 u16 pkt_size;
3370                 struct goya_packet *user_pkt;
3371
3372                 user_pkt = (struct goya_packet *) (uintptr_t)
3373                         (parser->user_cb->kernel_address + cb_parsed_length);
3374
3375                 pkt_id = (enum packet_id) (
3376                                 (le64_to_cpu(user_pkt->header) &
3377                                 PACKET_HEADER_PACKET_ID_MASK) >>
3378                                         PACKET_HEADER_PACKET_ID_SHIFT);
3379
3380                 pkt_size = goya_packet_sizes[pkt_id];
3381                 cb_parsed_length += pkt_size;
3382                 if (cb_parsed_length > parser->user_cb_size) {
3383                         dev_err(hdev->dev,
3384                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3385                         rc = -EINVAL;
3386                         break;
3387                 }
3388
3389                 switch (pkt_id) {
3390                 case PACKET_WREG_32:
3391                         /*
3392                          * Although it is validated after copy in patch_cb(),
3393                          * need to validate here as well because patch_cb() is
3394                          * not called in MMU path while this function is called
3395                          */
3396                         rc = goya_validate_wreg32(hdev,
3397                                 parser, (struct packet_wreg32 *) user_pkt);
3398                         parser->patched_cb_size += pkt_size;
3399                         break;
3400
3401                 case PACKET_WREG_BULK:
3402                         dev_err(hdev->dev,
3403                                 "User not allowed to use WREG_BULK\n");
3404                         rc = -EPERM;
3405                         break;
3406
3407                 case PACKET_MSG_PROT:
3408                         dev_err(hdev->dev,
3409                                 "User not allowed to use MSG_PROT\n");
3410                         rc = -EPERM;
3411                         break;
3412
3413                 case PACKET_CP_DMA:
3414                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3415                         rc = -EPERM;
3416                         break;
3417
3418                 case PACKET_STOP:
3419                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3420                         rc = -EPERM;
3421                         break;
3422
3423                 case PACKET_LIN_DMA:
3424                         if (is_mmu)
3425                                 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3426                                         (struct packet_lin_dma *) user_pkt);
3427                         else
3428                                 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3429                                         (struct packet_lin_dma *) user_pkt);
3430                         break;
3431
3432                 case PACKET_MSG_LONG:
3433                 case PACKET_MSG_SHORT:
3434                 case PACKET_FENCE:
3435                 case PACKET_NOP:
3436                         parser->patched_cb_size += pkt_size;
3437                         break;
3438
3439                 default:
3440                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3441                                 pkt_id);
3442                         rc = -EINVAL;
3443                         break;
3444                 }
3445
3446                 if (rc)
3447                         break;
3448         }
3449
3450         /*
3451          * The new CB should have space at the end for two MSG_PROT packets:
3452          * 1. A packet that will act as a completion packet
3453          * 2. A packet that will generate MSI-X interrupt
3454          */
3455         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3456
3457         return rc;
3458 }
3459
3460 static int goya_patch_dma_packet(struct hl_device *hdev,
3461                                 struct hl_cs_parser *parser,
3462                                 struct packet_lin_dma *user_dma_pkt,
3463                                 struct packet_lin_dma *new_dma_pkt,
3464                                 u32 *new_dma_pkt_size)
3465 {
3466         struct hl_userptr *userptr;
3467         struct scatterlist *sg, *sg_next_iter;
3468         u32 count, dma_desc_cnt;
3469         u64 len, len_next;
3470         dma_addr_t dma_addr, dma_addr_next;
3471         enum goya_dma_direction user_dir;
3472         u64 device_memory_addr, addr;
3473         enum dma_data_direction dir;
3474         struct sg_table *sgt;
3475         bool skip_host_mem_pin = false;
3476         bool user_memset;
3477         u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3478
3479         ctl = le32_to_cpu(user_dma_pkt->ctl);
3480
3481         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3482                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3483
3484         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3485                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3486
3487         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3488                         (user_dma_pkt->tsize == 0)) {
3489                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3490                 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3491                 return 0;
3492         }
3493
3494         if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3495                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3496                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3497                 dir = DMA_TO_DEVICE;
3498                 if (user_memset)
3499                         skip_host_mem_pin = true;
3500         } else {
3501                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3502                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3503                 dir = DMA_FROM_DEVICE;
3504         }
3505
3506         if ((!skip_host_mem_pin) &&
3507                 (hl_userptr_is_pinned(hdev, addr,
3508                         le32_to_cpu(user_dma_pkt->tsize),
3509                         parser->job_userptr_list, &userptr) == false)) {
3510                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3511                                 addr, user_dma_pkt->tsize);
3512                 return -EFAULT;
3513         }
3514
3515         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3516                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3517                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3518                 return 0;
3519         }
3520
3521         user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3522
3523         user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3524
3525         sgt = userptr->sgt;
3526         dma_desc_cnt = 0;
3527
3528         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3529                 len = sg_dma_len(sg);
3530                 dma_addr = sg_dma_address(sg);
3531
3532                 if (len == 0)
3533                         break;
3534
3535                 while ((count + 1) < sgt->nents) {
3536                         sg_next_iter = sg_next(sg);
3537                         len_next = sg_dma_len(sg_next_iter);
3538                         dma_addr_next = sg_dma_address(sg_next_iter);
3539
3540                         if (len_next == 0)
3541                                 break;
3542
3543                         if ((dma_addr + len == dma_addr_next) &&
3544                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3545                                 len += len_next;
3546                                 count++;
3547                                 sg = sg_next_iter;
3548                         } else {
3549                                 break;
3550                         }
3551                 }
3552
3553                 ctl = le32_to_cpu(user_dma_pkt->ctl);
3554                 if (likely(dma_desc_cnt))
3555                         ctl &= ~GOYA_PKT_CTL_EB_MASK;
3556                 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3557                                 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3558                 new_dma_pkt->ctl = cpu_to_le32(ctl);
3559                 new_dma_pkt->tsize = cpu_to_le32((u32) len);
3560
3561                 if (dir == DMA_TO_DEVICE) {
3562                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3563                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3564                 } else {
3565                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3566                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3567                 }
3568
3569                 if (!user_memset)
3570                         device_memory_addr += len;
3571                 dma_desc_cnt++;
3572                 new_dma_pkt++;
3573         }
3574
3575         if (!dma_desc_cnt) {
3576                 dev_err(hdev->dev,
3577                         "Error of 0 SG entries when patching DMA packet\n");
3578                 return -EFAULT;
3579         }
3580
3581         /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3582         new_dma_pkt--;
3583         new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3584
3585         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3586
3587         return 0;
3588 }
3589
3590 static int goya_patch_cb(struct hl_device *hdev,
3591                                 struct hl_cs_parser *parser)
3592 {
3593         u32 cb_parsed_length = 0;
3594         u32 cb_patched_cur_length = 0;
3595         int rc = 0;
3596
3597         /* cb_user_size is more than 0 so loop will always be executed */
3598         while (cb_parsed_length < parser->user_cb_size) {
3599                 enum packet_id pkt_id;
3600                 u16 pkt_size;
3601                 u32 new_pkt_size = 0;
3602                 struct goya_packet *user_pkt, *kernel_pkt;
3603
3604                 user_pkt = (struct goya_packet *) (uintptr_t)
3605                         (parser->user_cb->kernel_address + cb_parsed_length);
3606                 kernel_pkt = (struct goya_packet *) (uintptr_t)
3607                         (parser->patched_cb->kernel_address +
3608                                         cb_patched_cur_length);
3609
3610                 pkt_id = (enum packet_id) (
3611                                 (le64_to_cpu(user_pkt->header) &
3612                                 PACKET_HEADER_PACKET_ID_MASK) >>
3613                                         PACKET_HEADER_PACKET_ID_SHIFT);
3614
3615                 pkt_size = goya_packet_sizes[pkt_id];
3616                 cb_parsed_length += pkt_size;
3617                 if (cb_parsed_length > parser->user_cb_size) {
3618                         dev_err(hdev->dev,
3619                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3620                         rc = -EINVAL;
3621                         break;
3622                 }
3623
3624                 switch (pkt_id) {
3625                 case PACKET_LIN_DMA:
3626                         rc = goya_patch_dma_packet(hdev, parser,
3627                                         (struct packet_lin_dma *) user_pkt,
3628                                         (struct packet_lin_dma *) kernel_pkt,
3629                                         &new_pkt_size);
3630                         cb_patched_cur_length += new_pkt_size;
3631                         break;
3632
3633                 case PACKET_WREG_32:
3634                         memcpy(kernel_pkt, user_pkt, pkt_size);
3635                         cb_patched_cur_length += pkt_size;
3636                         rc = goya_validate_wreg32(hdev, parser,
3637                                         (struct packet_wreg32 *) kernel_pkt);
3638                         break;
3639
3640                 case PACKET_WREG_BULK:
3641                         dev_err(hdev->dev,
3642                                 "User not allowed to use WREG_BULK\n");
3643                         rc = -EPERM;
3644                         break;
3645
3646                 case PACKET_MSG_PROT:
3647                         dev_err(hdev->dev,
3648                                 "User not allowed to use MSG_PROT\n");
3649                         rc = -EPERM;
3650                         break;
3651
3652                 case PACKET_CP_DMA:
3653                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3654                         rc = -EPERM;
3655                         break;
3656
3657                 case PACKET_STOP:
3658                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3659                         rc = -EPERM;
3660                         break;
3661
3662                 case PACKET_MSG_LONG:
3663                 case PACKET_MSG_SHORT:
3664                 case PACKET_FENCE:
3665                 case PACKET_NOP:
3666                         memcpy(kernel_pkt, user_pkt, pkt_size);
3667                         cb_patched_cur_length += pkt_size;
3668                         break;
3669
3670                 default:
3671                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3672                                 pkt_id);
3673                         rc = -EINVAL;
3674                         break;
3675                 }
3676
3677                 if (rc)
3678                         break;
3679         }
3680
3681         return rc;
3682 }
3683
3684 static int goya_parse_cb_mmu(struct hl_device *hdev,
3685                 struct hl_cs_parser *parser)
3686 {
3687         u64 patched_cb_handle;
3688         u32 patched_cb_size;
3689         struct hl_cb *user_cb;
3690         int rc;
3691
3692         /*
3693          * The new CB should have space at the end for two MSG_PROT pkt:
3694          * 1. A packet that will act as a completion packet
3695          * 2. A packet that will generate MSI-X interrupt
3696          */
3697         parser->patched_cb_size = parser->user_cb_size +
3698                         sizeof(struct packet_msg_prot) * 2;
3699
3700         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3701                                 parser->patched_cb_size,
3702                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
3703
3704         if (rc) {
3705                 dev_err(hdev->dev,
3706                         "Failed to allocate patched CB for DMA CS %d\n",
3707                         rc);
3708                 return rc;
3709         }
3710
3711         patched_cb_handle >>= PAGE_SHIFT;
3712         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3713                                 (u32) patched_cb_handle);
3714         /* hl_cb_get should never fail here so use kernel WARN */
3715         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3716                         (u32) patched_cb_handle);
3717         if (!parser->patched_cb) {
3718                 rc = -EFAULT;
3719                 goto out;
3720         }
3721
3722         /*
3723          * The check that parser->user_cb_size <= parser->user_cb->size was done
3724          * in validate_queue_index().
3725          */
3726         memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
3727                 (void *) (uintptr_t) parser->user_cb->kernel_address,
3728                 parser->user_cb_size);
3729
3730         patched_cb_size = parser->patched_cb_size;
3731
3732         /* validate patched CB instead of user CB */
3733         user_cb = parser->user_cb;
3734         parser->user_cb = parser->patched_cb;
3735         rc = goya_validate_cb(hdev, parser, true);
3736         parser->user_cb = user_cb;
3737
3738         if (rc) {
3739                 hl_cb_put(parser->patched_cb);
3740                 goto out;
3741         }
3742
3743         if (patched_cb_size != parser->patched_cb_size) {
3744                 dev_err(hdev->dev, "user CB size mismatch\n");
3745                 hl_cb_put(parser->patched_cb);
3746                 rc = -EINVAL;
3747                 goto out;
3748         }
3749
3750 out:
3751         /*
3752          * Always call cb destroy here because we still have 1 reference
3753          * to it by calling cb_get earlier. After the job will be completed,
3754          * cb_put will release it, but here we want to remove it from the
3755          * idr
3756          */
3757         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3758                                         patched_cb_handle << PAGE_SHIFT);
3759
3760         return rc;
3761 }
3762
3763 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3764                                 struct hl_cs_parser *parser)
3765 {
3766         u64 patched_cb_handle;
3767         int rc;
3768
3769         rc = goya_validate_cb(hdev, parser, false);
3770
3771         if (rc)
3772                 goto free_userptr;
3773
3774         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3775                                 parser->patched_cb_size,
3776                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
3777         if (rc) {
3778                 dev_err(hdev->dev,
3779                         "Failed to allocate patched CB for DMA CS %d\n", rc);
3780                 goto free_userptr;
3781         }
3782
3783         patched_cb_handle >>= PAGE_SHIFT;
3784         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3785                                 (u32) patched_cb_handle);
3786         /* hl_cb_get should never fail here so use kernel WARN */
3787         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3788                         (u32) patched_cb_handle);
3789         if (!parser->patched_cb) {
3790                 rc = -EFAULT;
3791                 goto out;
3792         }
3793
3794         rc = goya_patch_cb(hdev, parser);
3795
3796         if (rc)
3797                 hl_cb_put(parser->patched_cb);
3798
3799 out:
3800         /*
3801          * Always call cb destroy here because we still have 1 reference
3802          * to it by calling cb_get earlier. After the job will be completed,
3803          * cb_put will release it, but here we want to remove it from the
3804          * idr
3805          */
3806         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3807                                 patched_cb_handle << PAGE_SHIFT);
3808
3809 free_userptr:
3810         if (rc)
3811                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
3812         return rc;
3813 }
3814
3815 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3816                                         struct hl_cs_parser *parser)
3817 {
3818         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3819         struct goya_device *goya = hdev->asic_specific;
3820
3821         if (goya->hw_cap_initialized & HW_CAP_MMU)
3822                 return 0;
3823
3824         /* For internal queue jobs, just check if CB address is valid */
3825         if (hl_mem_area_inside_range(
3826                         (u64) (uintptr_t) parser->user_cb,
3827                         parser->user_cb_size,
3828                         asic_prop->sram_user_base_address,
3829                         asic_prop->sram_end_address))
3830                 return 0;
3831
3832         if (hl_mem_area_inside_range(
3833                         (u64) (uintptr_t) parser->user_cb,
3834                         parser->user_cb_size,
3835                         asic_prop->dram_user_base_address,
3836                         asic_prop->dram_end_address))
3837                 return 0;
3838
3839         dev_err(hdev->dev,
3840                 "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
3841                 parser->user_cb, parser->user_cb_size);
3842
3843         return -EFAULT;
3844 }
3845
3846 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3847 {
3848         struct goya_device *goya = hdev->asic_specific;
3849
3850         if (parser->queue_type == QUEUE_TYPE_INT)
3851                 return goya_parse_cb_no_ext_queue(hdev, parser);
3852
3853         if (goya->hw_cap_initialized & HW_CAP_MMU)
3854                 return goya_parse_cb_mmu(hdev, parser);
3855         else
3856                 return goya_parse_cb_no_mmu(hdev, parser);
3857 }
3858
3859 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
3860                                 u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
3861                                 bool eb)
3862 {
3863         struct packet_msg_prot *cq_pkt;
3864         u32 tmp;
3865
3866         cq_pkt = (struct packet_msg_prot *) (uintptr_t)
3867                 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
3868
3869         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3870                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3871                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3872         cq_pkt->ctl = cpu_to_le32(tmp);
3873         cq_pkt->value = cpu_to_le32(cq_val);
3874         cq_pkt->addr = cpu_to_le64(cq_addr);
3875
3876         cq_pkt++;
3877
3878         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3879                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3880         cq_pkt->ctl = cpu_to_le32(tmp);
3881         cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3882         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3883 }
3884
3885 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
3886 {
3887         WREG32(mmCPU_EQ_CI, val);
3888 }
3889
3890 void goya_restore_phase_topology(struct hl_device *hdev)
3891 {
3892
3893 }
3894
3895 static void goya_clear_sm_regs(struct hl_device *hdev)
3896 {
3897         int i, num_of_sob_in_longs, num_of_mon_in_longs;
3898
3899         num_of_sob_in_longs =
3900                 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
3901
3902         num_of_mon_in_longs =
3903                 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
3904
3905         for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
3906                 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
3907
3908         for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
3909                 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
3910
3911         /* Flush all WREG to prevent race */
3912         i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
3913 }
3914
3915 /*
3916  * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
3917  *                       address.
3918  *
3919  * @hdev:       pointer to hl_device structure
3920  * @addr:       device or host mapped address
3921  * @val:        returned value
3922  *
3923  * In case of DDR address that is not mapped into the default aperture that
3924  * the DDR bar exposes, the function will configure the iATU so that the DDR
3925  * bar will be positioned at a base address that allows reading from the
3926  * required address. Configuring the iATU during normal operation can
3927  * lead to undefined behavior and therefore, should be done with extreme care
3928  *
3929  */
3930 static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
3931 {
3932         struct asic_fixed_properties *prop = &hdev->asic_prop;
3933         u64 ddr_bar_addr;
3934         int rc = 0;
3935
3936         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
3937                 *val = RREG32(addr - CFG_BASE);
3938
3939         } else if ((addr >= SRAM_BASE_ADDR) &&
3940                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
3941
3942                 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
3943                                 (addr - SRAM_BASE_ADDR));
3944
3945         } else if ((addr >= DRAM_PHYS_BASE) &&
3946                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
3947
3948                 u64 bar_base_addr = DRAM_PHYS_BASE +
3949                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
3950
3951                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
3952                 if (ddr_bar_addr != U64_MAX) {
3953                         *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
3954                                                 (addr - bar_base_addr));
3955
3956                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
3957                                                         ddr_bar_addr);
3958                 }
3959                 if (ddr_bar_addr == U64_MAX)
3960                         rc = -EIO;
3961
3962         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
3963                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
3964
3965         } else {
3966                 rc = -EFAULT;
3967         }
3968
3969         return rc;
3970 }
3971
3972 /*
3973  * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
3974  *                        address.
3975  *
3976  * @hdev:       pointer to hl_device structure
3977  * @addr:       device or host mapped address
3978  * @val:        returned value
3979  *
3980  * In case of DDR address that is not mapped into the default aperture that
3981  * the DDR bar exposes, the function will configure the iATU so that the DDR
3982  * bar will be positioned at a base address that allows writing to the
3983  * required address. Configuring the iATU during normal operation can
3984  * lead to undefined behavior and therefore, should be done with extreme care
3985  *
3986  */
3987 static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
3988 {
3989         struct asic_fixed_properties *prop = &hdev->asic_prop;
3990         u64 ddr_bar_addr;
3991         int rc = 0;
3992
3993         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
3994                 WREG32(addr - CFG_BASE, val);
3995
3996         } else if ((addr >= SRAM_BASE_ADDR) &&
3997                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
3998
3999                 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4000                                         (addr - SRAM_BASE_ADDR));
4001
4002         } else if ((addr >= DRAM_PHYS_BASE) &&
4003                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4004
4005                 u64 bar_base_addr = DRAM_PHYS_BASE +
4006                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4007
4008                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4009                 if (ddr_bar_addr != U64_MAX) {
4010                         writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4011                                                 (addr - bar_base_addr));
4012
4013                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4014                                                         ddr_bar_addr);
4015                 }
4016                 if (ddr_bar_addr == U64_MAX)
4017                         rc = -EIO;
4018
4019         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4020                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4021
4022         } else {
4023                 rc = -EFAULT;
4024         }
4025
4026         return rc;
4027 }
4028
4029 static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4030 {
4031         struct asic_fixed_properties *prop = &hdev->asic_prop;
4032         u64 ddr_bar_addr;
4033         int rc = 0;
4034
4035         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4036                 u32 val_l = RREG32(addr - CFG_BASE);
4037                 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4038
4039                 *val = (((u64) val_h) << 32) | val_l;
4040
4041         } else if ((addr >= SRAM_BASE_ADDR) &&
4042                         (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4043
4044                 *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4045                                 (addr - SRAM_BASE_ADDR));
4046
4047         } else if ((addr >= DRAM_PHYS_BASE) &&
4048                    (addr <=
4049                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
4050
4051                 u64 bar_base_addr = DRAM_PHYS_BASE +
4052                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4053
4054                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4055                 if (ddr_bar_addr != U64_MAX) {
4056                         *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4057                                                 (addr - bar_base_addr));
4058
4059                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4060                                                         ddr_bar_addr);
4061                 }
4062                 if (ddr_bar_addr == U64_MAX)
4063                         rc = -EIO;
4064
4065         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4066                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4067
4068         } else {
4069                 rc = -EFAULT;
4070         }
4071
4072         return rc;
4073 }
4074
4075 static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4076 {
4077         struct asic_fixed_properties *prop = &hdev->asic_prop;
4078         u64 ddr_bar_addr;
4079         int rc = 0;
4080
4081         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4082                 WREG32(addr - CFG_BASE, lower_32_bits(val));
4083                 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4084
4085         } else if ((addr >= SRAM_BASE_ADDR) &&
4086                         (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4087
4088                 writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4089                                         (addr - SRAM_BASE_ADDR));
4090
4091         } else if ((addr >= DRAM_PHYS_BASE) &&
4092                    (addr <=
4093                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
4094
4095                 u64 bar_base_addr = DRAM_PHYS_BASE +
4096                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4097
4098                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4099                 if (ddr_bar_addr != U64_MAX) {
4100                         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4101                                                 (addr - bar_base_addr));
4102
4103                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4104                                                         ddr_bar_addr);
4105                 }
4106                 if (ddr_bar_addr == U64_MAX)
4107                         rc = -EIO;
4108
4109         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4110                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4111
4112         } else {
4113                 rc = -EFAULT;
4114         }
4115
4116         return rc;
4117 }
4118
4119 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4120 {
4121         struct goya_device *goya = hdev->asic_specific;
4122
4123         if (hdev->hard_reset_pending)
4124                 return U64_MAX;
4125
4126         return readq(hdev->pcie_bar[DDR_BAR_ID] +
4127                         (addr - goya->ddr_bar_cur_addr));
4128 }
4129
4130 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4131 {
4132         struct goya_device *goya = hdev->asic_specific;
4133
4134         if (hdev->hard_reset_pending)
4135                 return;
4136
4137         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4138                         (addr - goya->ddr_bar_cur_addr));
4139 }
4140
4141 static const char *_goya_get_event_desc(u16 event_type)
4142 {
4143         switch (event_type) {
4144         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4145                 return "PCIe_if";
4146         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4147         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4148         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4149         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4150         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4151         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4152         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4153         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4154                 return "TPC%d_ecc";
4155         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4156                 return "MME_ecc";
4157         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4158                 return "MME_ecc_ext";
4159         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4160                 return "MMU_ecc";
4161         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4162                 return "DMA_macro";
4163         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4164                 return "DMA_ecc";
4165         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4166                 return "CPU_if_ecc";
4167         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4168                 return "PSOC_mem";
4169         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4170                 return "PSOC_coresight";
4171         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4172                 return "SRAM%d";
4173         case GOYA_ASYNC_EVENT_ID_GIC500:
4174                 return "GIC500";
4175         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4176                 return "PLL%d";
4177         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4178                 return "AXI_ecc";
4179         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4180                 return "L2_ram_ecc";
4181         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4182                 return "PSOC_gpio_05_sw_reset";
4183         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4184                 return "PSOC_gpio_10_vrhot_icrit";
4185         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4186                 return "PCIe_dec";
4187         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4188         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4189         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4190         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4191         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4192         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4193         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4194         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4195                 return "TPC%d_dec";
4196         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4197                 return "MME_wacs";
4198         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4199                 return "MME_wacsd";
4200         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4201                 return "CPU_axi_splitter";
4202         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4203                 return "PSOC_axi_dec";
4204         case GOYA_ASYNC_EVENT_ID_PSOC:
4205                 return "PSOC";
4206         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4207         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4208         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4209         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4210         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4211         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4212         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4213         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4214                 return "TPC%d_krn_err";
4215         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4216                 return "TPC%d_cq";
4217         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4218                 return "TPC%d_qm";
4219         case GOYA_ASYNC_EVENT_ID_MME_QM:
4220                 return "MME_qm";
4221         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4222                 return "MME_cq";
4223         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4224                 return "DMA%d_qm";
4225         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4226                 return "DMA%d_ch";
4227         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4228         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4229         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4230         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4231         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4232         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4233         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4234         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4235                 return "TPC%d_bmon_spmu";
4236         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4237                 return "DMA_bm_ch%d";
4238         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4239                 return "POWER_ENV_S";
4240         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4241                 return "POWER_ENV_E";
4242         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4243                 return "THERMAL_ENV_S";
4244         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4245                 return "THERMAL_ENV_E";
4246         default:
4247                 return "N/A";
4248         }
4249 }
4250
4251 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4252 {
4253         u8 index;
4254
4255         switch (event_type) {
4256         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4257         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4258         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4259         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4260         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4261         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4262         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4263         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4264                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4265                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4266                 break;
4267         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4268                 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4269                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4270                 break;
4271         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4272                 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4273                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4274                 break;
4275         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4276         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4277         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4278         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4279         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4280         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4281         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4282         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4283                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4284                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4285                 break;
4286         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4287         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4288         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4289         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4290         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4291         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4292         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4293         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4294                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4295                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4296                 break;
4297         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4298                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4299                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4300                 break;
4301         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4302                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4303                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4304                 break;
4305         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4306                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4307                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4308                 break;
4309         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4310                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4311                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4312                 break;
4313         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4314         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4315         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4316         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4317         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4318         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4319         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4320         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4321                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4322                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4323                 break;
4324         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4325                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4326                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4327                 break;
4328         default:
4329                 snprintf(desc, size, _goya_get_event_desc(event_type));
4330                 break;
4331         }
4332 }
4333
4334 static void goya_print_razwi_info(struct hl_device *hdev)
4335 {
4336         if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4337                 dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4338                 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4339         }
4340
4341         if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4342                 dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4343                 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4344         }
4345
4346         if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4347                 dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4348                 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4349         }
4350
4351         if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4352                 dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4353                 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4354         }
4355 }
4356
4357 static void goya_print_mmu_error_info(struct hl_device *hdev)
4358 {
4359         struct goya_device *goya = hdev->asic_specific;
4360         u64 addr;
4361         u32 val;
4362
4363         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4364                 return;
4365
4366         val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4367         if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4368                 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4369                 addr <<= 32;
4370                 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4371
4372                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4373                                         addr);
4374
4375                 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4376         }
4377 }
4378
4379 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4380                                 bool razwi)
4381 {
4382         char desc[20] = "";
4383
4384         goya_get_event_desc(event_type, desc, sizeof(desc));
4385         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4386                 event_type, desc);
4387
4388         if (razwi) {
4389                 goya_print_razwi_info(hdev);
4390                 goya_print_mmu_error_info(hdev);
4391         }
4392 }
4393
4394 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4395                 size_t irq_arr_size)
4396 {
4397         struct armcp_unmask_irq_arr_packet *pkt;
4398         size_t total_pkt_size;
4399         long result;
4400         int rc;
4401         int irq_num_entries, irq_arr_index;
4402         __le32 *goya_irq_arr;
4403
4404         total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4405                         irq_arr_size;
4406
4407         /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4408         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4409
4410         /* total_pkt_size is casted to u16 later on */
4411         if (total_pkt_size > USHRT_MAX) {
4412                 dev_err(hdev->dev, "too many elements in IRQ array\n");
4413                 return -EINVAL;
4414         }
4415
4416         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4417         if (!pkt)
4418                 return -ENOMEM;
4419
4420         irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4421         pkt->length = cpu_to_le32(irq_num_entries);
4422
4423         /* We must perform any necessary endianness conversation on the irq
4424          * array being passed to the goya hardware
4425          */
4426         for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4427                         irq_arr_index < irq_num_entries ; irq_arr_index++)
4428                 goya_irq_arr[irq_arr_index] =
4429                                 cpu_to_le32(irq_arr[irq_arr_index]);
4430
4431         pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4432                                                 ARMCP_PKT_CTL_OPCODE_SHIFT);
4433
4434         rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
4435                         HL_DEVICE_TIMEOUT_USEC, &result);
4436
4437         if (rc)
4438                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4439
4440         kfree(pkt);
4441
4442         return rc;
4443 }
4444
4445 static int goya_soft_reset_late_init(struct hl_device *hdev)
4446 {
4447         /*
4448          * Unmask all IRQs since some could have been received
4449          * during the soft reset
4450          */
4451         return goya_unmask_irq_arr(hdev, goya_all_events,
4452                                         sizeof(goya_all_events));
4453 }
4454
4455 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4456 {
4457         struct armcp_packet pkt;
4458         long result;
4459         int rc;
4460
4461         memset(&pkt, 0, sizeof(pkt));
4462
4463         pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
4464                                 ARMCP_PKT_CTL_OPCODE_SHIFT);
4465         pkt.value = cpu_to_le64(event_type);
4466
4467         rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4468                         HL_DEVICE_TIMEOUT_USEC, &result);
4469
4470         if (rc)
4471                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4472
4473         return rc;
4474 }
4475
4476 static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4477 {
4478         switch (event_type) {
4479         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4480                 dev_info_ratelimited(hdev->dev,
4481                         "Clock throttling due to power consumption\n");
4482                 break;
4483         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4484                 dev_info_ratelimited(hdev->dev,
4485                         "Power envelop is safe, back to optimal clock\n");
4486                 break;
4487         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4488                 dev_info_ratelimited(hdev->dev,
4489                         "Clock throttling due to overheating\n");
4490                 break;
4491         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4492                 dev_info_ratelimited(hdev->dev,
4493                         "Thermal envelop is safe, back to optimal clock\n");
4494                 break;
4495
4496         default:
4497                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
4498                         event_type);
4499                 break;
4500         }
4501 }
4502
4503 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4504 {
4505         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4506         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4507                                 >> EQ_CTL_EVENT_TYPE_SHIFT);
4508         struct goya_device *goya = hdev->asic_specific;
4509
4510         goya->events_stat[event_type]++;
4511         goya->events_stat_aggregate[event_type]++;
4512
4513         switch (event_type) {
4514         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4515         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4516         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4517         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4518         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4519         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4520         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4521         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4522         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4523         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4524         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4525         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4526         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4527         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4528         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4529         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4530         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4531         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4532         case GOYA_ASYNC_EVENT_ID_GIC500:
4533         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4534         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4535         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4536         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4537                 goya_print_irq_info(hdev, event_type, false);
4538                 hl_device_reset(hdev, true, false);
4539                 break;
4540
4541         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4542         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4543         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4544         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4545         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4546         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4547         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4548         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4549         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4550         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4551         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4552         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4553         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4554         case GOYA_ASYNC_EVENT_ID_PSOC:
4555         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4556         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4557         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4558         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4559         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4560         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4561         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4562         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4563         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4564         case GOYA_ASYNC_EVENT_ID_MME_QM:
4565         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4566         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4567         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4568                 goya_print_irq_info(hdev, event_type, true);
4569                 goya_unmask_irq(hdev, event_type);
4570                 break;
4571
4572         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4573         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4574         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4575         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4576         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4577         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4578         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4579         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4580         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4581         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4582                 goya_print_irq_info(hdev, event_type, false);
4583                 goya_unmask_irq(hdev, event_type);
4584                 break;
4585
4586         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4587         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4588         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4589         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4590                 goya_print_clk_change_info(hdev, event_type);
4591                 goya_unmask_irq(hdev, event_type);
4592                 break;
4593
4594         default:
4595                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4596                                 event_type);
4597                 break;
4598         }
4599 }
4600
4601 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4602 {
4603         struct goya_device *goya = hdev->asic_specific;
4604
4605         if (aggregate) {
4606                 *size = (u32) sizeof(goya->events_stat_aggregate);
4607                 return goya->events_stat_aggregate;
4608         }
4609
4610         *size = (u32) sizeof(goya->events_stat);
4611         return goya->events_stat;
4612 }
4613
4614 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4615                                 u64 val, bool is_dram)
4616 {
4617         struct packet_lin_dma *lin_dma_pkt;
4618         struct hl_cs_job *job;
4619         u32 cb_size, ctl;
4620         struct hl_cb *cb;
4621         int rc, lin_dma_pkts_cnt;
4622
4623         lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4624         cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4625                                                 sizeof(struct packet_msg_prot);
4626         cb = hl_cb_kernel_create(hdev, cb_size);
4627         if (!cb)
4628                 return -ENOMEM;
4629
4630         lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4631
4632         do {
4633                 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4634
4635                 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4636                                 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4637                                 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4638                                 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4639                                 (1 << GOYA_PKT_CTL_MB_SHIFT));
4640                 ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4641                                 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4642                 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4643
4644                 lin_dma_pkt->src_addr = cpu_to_le64(val);
4645                 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4646                 if (lin_dma_pkts_cnt > 1)
4647                         lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4648                 else
4649                         lin_dma_pkt->tsize = cpu_to_le32(size);
4650
4651                 size -= SZ_2G;
4652                 addr += SZ_2G;
4653                 lin_dma_pkt++;
4654         } while (--lin_dma_pkts_cnt);
4655
4656         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4657         if (!job) {
4658                 dev_err(hdev->dev, "Failed to allocate a new job\n");
4659                 rc = -ENOMEM;
4660                 goto release_cb;
4661         }
4662
4663         job->id = 0;
4664         job->user_cb = cb;
4665         job->user_cb->cs_cnt++;
4666         job->user_cb_size = cb_size;
4667         job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4668         job->patched_cb = job->user_cb;
4669         job->job_cb_size = job->user_cb_size;
4670
4671         hl_debugfs_add_job(hdev, job);
4672
4673         rc = goya_send_job_on_qman0(hdev, job);
4674
4675         hl_debugfs_remove_job(hdev, job);
4676         kfree(job);
4677         cb->cs_cnt--;
4678
4679 release_cb:
4680         hl_cb_put(cb);
4681         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4682
4683         return rc;
4684 }
4685
4686 int goya_context_switch(struct hl_device *hdev, u32 asid)
4687 {
4688         struct asic_fixed_properties *prop = &hdev->asic_prop;
4689         u64 addr = prop->sram_base_address, sob_addr;
4690         u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4691         u64 val = 0x7777777777777777ull;
4692         int rc, dma_id;
4693         u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4694                                         mmDMA_CH_0_WR_COMP_ADDR_LO;
4695
4696         rc = goya_memset_device_memory(hdev, addr, size, val, false);
4697         if (rc) {
4698                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4699                 return rc;
4700         }
4701
4702         /* we need to reset registers that the user is allowed to change */
4703         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4704         WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4705
4706         for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4707                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4708                                                         (dma_id - 1) * 4;
4709                 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4710                                                 lower_32_bits(sob_addr));
4711         }
4712
4713         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4714
4715         goya_mmu_prepare(hdev, asid);
4716
4717         goya_clear_sm_regs(hdev);
4718
4719         return 0;
4720 }
4721
4722 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4723 {
4724         struct asic_fixed_properties *prop = &hdev->asic_prop;
4725         struct goya_device *goya = hdev->asic_specific;
4726         u64 addr = prop->mmu_pgt_addr;
4727         u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4728                         MMU_CACHE_MNG_SIZE;
4729
4730         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4731                 return 0;
4732
4733         return goya_memset_device_memory(hdev, addr, size, 0, true);
4734 }
4735
4736 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4737 {
4738         struct goya_device *goya = hdev->asic_specific;
4739         u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4740         u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4741         u64 val = 0x9999999999999999ull;
4742
4743         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4744                 return 0;
4745
4746         return goya_memset_device_memory(hdev, addr, size, val, true);
4747 }
4748
4749 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4750 {
4751         struct asic_fixed_properties *prop = &hdev->asic_prop;
4752         struct goya_device *goya = hdev->asic_specific;
4753         s64 off, cpu_off;
4754         int rc;
4755
4756         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4757                 return 0;
4758
4759         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4760                 rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4761                                 prop->dram_base_address + off, PAGE_SIZE_2MB,
4762                                 (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4763                 if (rc) {
4764                         dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4765                                 prop->dram_base_address + off);
4766                         goto unmap;
4767                 }
4768         }
4769
4770         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4771                 rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4772                         hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
4773
4774                 if (rc) {
4775                         dev_err(hdev->dev,
4776                                 "Map failed for CPU accessible memory\n");
4777                         off -= PAGE_SIZE_2MB;
4778                         goto unmap;
4779                 }
4780         } else {
4781                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4782                         rc = hl_mmu_map(hdev->kernel_ctx,
4783                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4784                                 hdev->cpu_accessible_dma_address + cpu_off,
4785                                 PAGE_SIZE_4KB, true);
4786                         if (rc) {
4787                                 dev_err(hdev->dev,
4788                                         "Map failed for CPU accessible memory\n");
4789                                 cpu_off -= PAGE_SIZE_4KB;
4790                                 goto unmap_cpu;
4791                         }
4792                 }
4793         }
4794
4795         goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4796         goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4797         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4798         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4799
4800         /* Make sure configuration is flushed to device */
4801         RREG32(mmCPU_IF_AWUSER_OVR_EN);
4802
4803         goya->device_cpu_mmu_mappings_done = true;
4804
4805         return 0;
4806
4807 unmap_cpu:
4808         for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4809                 if (hl_mmu_unmap(hdev->kernel_ctx,
4810                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4811                                 PAGE_SIZE_4KB, true))
4812                         dev_warn_ratelimited(hdev->dev,
4813                                 "failed to unmap address 0x%llx\n",
4814                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4815 unmap:
4816         for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4817                 if (hl_mmu_unmap(hdev->kernel_ctx,
4818                                 prop->dram_base_address + off, PAGE_SIZE_2MB,
4819                                 true))
4820                         dev_warn_ratelimited(hdev->dev,
4821                                 "failed to unmap address 0x%llx\n",
4822                                 prop->dram_base_address + off);
4823
4824         return rc;
4825 }
4826
4827 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4828 {
4829         struct asic_fixed_properties *prop = &hdev->asic_prop;
4830         struct goya_device *goya = hdev->asic_specific;
4831         u32 off, cpu_off;
4832
4833         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4834                 return;
4835
4836         if (!goya->device_cpu_mmu_mappings_done)
4837                 return;
4838
4839         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4840         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4841
4842         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4843                 if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4844                                 PAGE_SIZE_2MB, true))
4845                         dev_warn(hdev->dev,
4846                                 "Failed to unmap CPU accessible memory\n");
4847         } else {
4848                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4849                         if (hl_mmu_unmap(hdev->kernel_ctx,
4850                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4851                                         PAGE_SIZE_4KB,
4852                                         (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
4853                                 dev_warn_ratelimited(hdev->dev,
4854                                         "failed to unmap address 0x%llx\n",
4855                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4856         }
4857
4858         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4859                 if (hl_mmu_unmap(hdev->kernel_ctx,
4860                                 prop->dram_base_address + off, PAGE_SIZE_2MB,
4861                                 (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
4862                         dev_warn_ratelimited(hdev->dev,
4863                                         "Failed to unmap address 0x%llx\n",
4864                                         prop->dram_base_address + off);
4865
4866         goya->device_cpu_mmu_mappings_done = false;
4867 }
4868
4869 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4870 {
4871         struct goya_device *goya = hdev->asic_specific;
4872         int i;
4873
4874         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4875                 return;
4876
4877         if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4878                 WARN(1, "asid %u is too big\n", asid);
4879                 return;
4880         }
4881
4882         /* zero the MMBP and ASID bits and then set the ASID */
4883         for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
4884                 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
4885 }
4886
4887 static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
4888                                         u32 flags)
4889 {
4890         struct goya_device *goya = hdev->asic_specific;
4891         u32 status, timeout_usec;
4892         int rc;
4893
4894         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
4895                 hdev->hard_reset_pending)
4896                 return 0;
4897
4898         /* no need in L1 only invalidation in Goya */
4899         if (!is_hard)
4900                 return 0;
4901
4902         if (hdev->pldm)
4903                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4904         else
4905                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4906
4907         mutex_lock(&hdev->mmu_cache_lock);
4908
4909         /* L0 & L1 invalidation */
4910         WREG32(mmSTLB_INV_ALL_START, 1);
4911
4912         rc = hl_poll_timeout(
4913                 hdev,
4914                 mmSTLB_INV_ALL_START,
4915                 status,
4916                 !status,
4917                 1000,
4918                 timeout_usec);
4919
4920         mutex_unlock(&hdev->mmu_cache_lock);
4921
4922         if (rc) {
4923                 dev_err_ratelimited(hdev->dev,
4924                                         "MMU cache invalidation timeout\n");
4925                 hl_device_reset(hdev, true, false);
4926         }
4927
4928         return rc;
4929 }
4930
4931 static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
4932                                 bool is_hard, u32 asid, u64 va, u64 size)
4933 {
4934         struct goya_device *goya = hdev->asic_specific;
4935         u32 status, timeout_usec, inv_data, pi;
4936         int rc;
4937
4938         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
4939                 hdev->hard_reset_pending)
4940                 return 0;
4941
4942         /* no need in L1 only invalidation in Goya */
4943         if (!is_hard)
4944                 return 0;
4945
4946         if (hdev->pldm)
4947                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4948         else
4949                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4950
4951         mutex_lock(&hdev->mmu_cache_lock);
4952
4953         /*
4954          * TODO: currently invalidate entire L0 & L1 as in regular hard
4955          * invalidation. Need to apply invalidation of specific cache lines with
4956          * mask of ASID & VA & size.
4957          * Note that L1 with be flushed entirely in any case.
4958          */
4959
4960         /* L0 & L1 invalidation */
4961         inv_data = RREG32(mmSTLB_CACHE_INV);
4962         /* PI is 8 bit */
4963         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
4964         WREG32(mmSTLB_CACHE_INV,
4965                         (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
4966
4967         rc = hl_poll_timeout(
4968                 hdev,
4969                 mmSTLB_INV_CONSUMER_INDEX,
4970                 status,
4971                 status == pi,
4972                 1000,
4973                 timeout_usec);
4974
4975         mutex_unlock(&hdev->mmu_cache_lock);
4976
4977         if (rc) {
4978                 dev_err_ratelimited(hdev->dev,
4979                                         "MMU cache invalidation timeout\n");
4980                 hl_device_reset(hdev, true, false);
4981         }
4982
4983         return rc;
4984 }
4985
4986 int goya_send_heartbeat(struct hl_device *hdev)
4987 {
4988         struct goya_device *goya = hdev->asic_specific;
4989
4990         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4991                 return 0;
4992
4993         return hl_fw_send_heartbeat(hdev);
4994 }
4995
4996 int goya_armcp_info_get(struct hl_device *hdev)
4997 {
4998         struct goya_device *goya = hdev->asic_specific;
4999         struct asic_fixed_properties *prop = &hdev->asic_prop;
5000         u64 dram_size;
5001         int rc;
5002
5003         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5004                 return 0;
5005
5006         rc = hl_fw_armcp_info_get(hdev);
5007         if (rc)
5008                 return rc;
5009
5010         dram_size = le64_to_cpu(prop->armcp_info.dram_size);
5011         if (dram_size) {
5012                 if ((!is_power_of_2(dram_size)) ||
5013                                 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5014                         dev_err(hdev->dev,
5015                                 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5016                                 dram_size);
5017                         dram_size = DRAM_PHYS_DEFAULT_SIZE;
5018                 }
5019
5020                 prop->dram_size = dram_size;
5021                 prop->dram_end_address = prop->dram_base_address + dram_size;
5022         }
5023
5024         if (!strlen(prop->armcp_info.card_name))
5025                 strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5026                                 CARD_NAME_MAX_LEN);
5027
5028         return 0;
5029 }
5030
5031 static void goya_set_clock_gating(struct hl_device *hdev)
5032 {
5033         /* clock gating not supported in Goya */
5034 }
5035
5036 static void goya_disable_clock_gating(struct hl_device *hdev)
5037 {
5038         /* clock gating not supported in Goya */
5039 }
5040
5041 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
5042                                 struct seq_file *s)
5043 {
5044         const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5045         const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5046         u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5047                 mme_arch_sts;
5048         bool is_idle = true, is_eng_idle;
5049         u64 offset;
5050         int i;
5051
5052         if (s)
5053                 seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5054                                 "---  -------  ------------  -------------\n");
5055
5056         offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5057
5058         for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5059                 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5060                 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5061                 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5062                                 IS_DMA_IDLE(dma_core_sts0);
5063                 is_idle &= is_eng_idle;
5064
5065                 if (mask)
5066                         *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
5067                 if (s)
5068                         seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5069                                         qm_glbl_sts0, dma_core_sts0);
5070         }
5071
5072         if (s)
5073                 seq_puts(s,
5074                         "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5075                         "---  -------  ------------  --------------  ----------\n");
5076
5077         offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5078
5079         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5080                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5081                 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5082                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5083                 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5084                                 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5085                                 IS_TPC_IDLE(tpc_cfg_sts);
5086                 is_idle &= is_eng_idle;
5087
5088                 if (mask)
5089                         *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
5090                 if (s)
5091                         seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5092                                 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5093         }
5094
5095         if (s)
5096                 seq_puts(s,
5097                         "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5098                         "---  -------  ------------  --------------  -----------\n");
5099
5100         qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5101         cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5102         mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5103         is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5104                         IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5105                         IS_MME_IDLE(mme_arch_sts);
5106         is_idle &= is_eng_idle;
5107
5108         if (mask)
5109                 *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
5110         if (s) {
5111                 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5112                                 cmdq_glbl_sts0, mme_arch_sts);
5113                 seq_puts(s, "\n");
5114         }
5115
5116         return is_idle;
5117 }
5118
5119 static void goya_hw_queues_lock(struct hl_device *hdev)
5120         __acquires(&goya->hw_queues_lock)
5121 {
5122         struct goya_device *goya = hdev->asic_specific;
5123
5124         spin_lock(&goya->hw_queues_lock);
5125 }
5126
5127 static void goya_hw_queues_unlock(struct hl_device *hdev)
5128         __releases(&goya->hw_queues_lock)
5129 {
5130         struct goya_device *goya = hdev->asic_specific;
5131
5132         spin_unlock(&goya->hw_queues_lock);
5133 }
5134
5135 static u32 goya_get_pci_id(struct hl_device *hdev)
5136 {
5137         return hdev->pdev->device;
5138 }
5139
5140 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5141                                 size_t max_size)
5142 {
5143         struct goya_device *goya = hdev->asic_specific;
5144
5145         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5146                 return 0;
5147
5148         return hl_fw_get_eeprom_data(hdev, data, max_size);
5149 }
5150
5151 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5152 {
5153         return RREG32(mmHW_STATE);
5154 }
5155
5156 u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5157 {
5158         return cq_idx;
5159 }
5160
5161 static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
5162 {
5163
5164 }
5165
5166 static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
5167 {
5168
5169 }
5170
5171 static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5172 {
5173         return 0;
5174 }
5175
5176 static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5177 {
5178         return 0;
5179 }
5180
5181 static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
5182 {
5183
5184 }
5185
5186 static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
5187                         u16 sob_val, u16 mon_id, u32 q_idx)
5188 {
5189
5190 }
5191
5192 static void goya_reset_sob(struct hl_device *hdev, void *data)
5193 {
5194
5195 }
5196
5197 static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5198 {
5199         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
5200                                                         HL_POWER9_HOST_MAGIC) {
5201                 dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
5202                 hdev->power9_64bit_dma_enable = 1;
5203                 hdev->dma_mask = 64;
5204         } else {
5205                 dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
5206                 hdev->power9_64bit_dma_enable = 0;
5207                 hdev->dma_mask = 48;
5208         }
5209 }
5210
5211 u64 goya_get_device_time(struct hl_device *hdev)
5212 {
5213         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5214
5215         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5216 }
5217
5218 static const struct hl_asic_funcs goya_funcs = {
5219         .early_init = goya_early_init,
5220         .early_fini = goya_early_fini,
5221         .late_init = goya_late_init,
5222         .late_fini = goya_late_fini,
5223         .sw_init = goya_sw_init,
5224         .sw_fini = goya_sw_fini,
5225         .hw_init = goya_hw_init,
5226         .hw_fini = goya_hw_fini,
5227         .halt_engines = goya_halt_engines,
5228         .suspend = goya_suspend,
5229         .resume = goya_resume,
5230         .cb_mmap = goya_cb_mmap,
5231         .ring_doorbell = goya_ring_doorbell,
5232         .pqe_write = goya_pqe_write,
5233         .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5234         .asic_dma_free_coherent = goya_dma_free_coherent,
5235         .get_int_queue_base = goya_get_int_queue_base,
5236         .test_queues = goya_test_queues,
5237         .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5238         .asic_dma_pool_free = goya_dma_pool_free,
5239         .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5240         .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5241         .hl_dma_unmap_sg = goya_dma_unmap_sg,
5242         .cs_parser = goya_cs_parser,
5243         .asic_dma_map_sg = goya_dma_map_sg,
5244         .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5245         .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5246         .update_eq_ci = goya_update_eq_ci,
5247         .context_switch = goya_context_switch,
5248         .restore_phase_topology = goya_restore_phase_topology,
5249         .debugfs_read32 = goya_debugfs_read32,
5250         .debugfs_write32 = goya_debugfs_write32,
5251         .debugfs_read64 = goya_debugfs_read64,
5252         .debugfs_write64 = goya_debugfs_write64,
5253         .add_device_attr = goya_add_device_attr,
5254         .handle_eqe = goya_handle_eqe,
5255         .set_pll_profile = goya_set_pll_profile,
5256         .get_events_stat = goya_get_events_stat,
5257         .read_pte = goya_read_pte,
5258         .write_pte = goya_write_pte,
5259         .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5260         .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5261         .send_heartbeat = goya_send_heartbeat,
5262         .set_clock_gating = goya_set_clock_gating,
5263         .disable_clock_gating = goya_disable_clock_gating,
5264         .debug_coresight = goya_debug_coresight,
5265         .is_device_idle = goya_is_device_idle,
5266         .soft_reset_late_init = goya_soft_reset_late_init,
5267         .hw_queues_lock = goya_hw_queues_lock,
5268         .hw_queues_unlock = goya_hw_queues_unlock,
5269         .get_pci_id = goya_get_pci_id,
5270         .get_eeprom_data = goya_get_eeprom_data,
5271         .send_cpu_message = goya_send_cpu_message,
5272         .get_hw_state = goya_get_hw_state,
5273         .pci_bars_map = goya_pci_bars_map,
5274         .set_dram_bar_base = goya_set_ddr_bar_base,
5275         .init_iatu = goya_init_iatu,
5276         .rreg = hl_rreg,
5277         .wreg = hl_wreg,
5278         .halt_coresight = goya_halt_coresight,
5279         .get_clk_rate = goya_get_clk_rate,
5280         .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5281         .read_device_fw_version = goya_read_device_fw_version,
5282         .load_firmware_to_device = goya_load_firmware_to_device,
5283         .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5284         .ext_queue_init = goya_ext_queue_init,
5285         .ext_queue_reset = goya_ext_queue_reset,
5286         .get_signal_cb_size = goya_get_signal_cb_size,
5287         .get_wait_cb_size = goya_get_wait_cb_size,
5288         .gen_signal_cb = goya_gen_signal_cb,
5289         .gen_wait_cb = goya_gen_wait_cb,
5290         .reset_sob = goya_reset_sob,
5291         .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5292         .get_device_time = goya_get_device_time
5293 };
5294
5295 /*
5296  * goya_set_asic_funcs - set Goya function pointers
5297  *
5298  * @*hdev: pointer to hl_device structure
5299  *
5300  */
5301 void goya_set_asic_funcs(struct hl_device *hdev)
5302 {
5303         hdev->asic_funcs = &goya_funcs;
5304 }