1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2020-2022 HabanaLabs, Ltd.
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
31 #define GAUDI2_RESET_POLL_CNT 3
32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT 512
36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46 * and the code relies on that value (for array size etc..) we define another value
47 * for MAX faulty TPCs which reflects the cluster binning requirements
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
50 #define MAX_FAULTY_XBARS 1
51 #define MAX_FAULTY_EDMAS 1
52 #define MAX_FAULTY_DECODERS 1
54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK 0x3FF
58 #define GAUDI2_NA_EVENT_CAUSE 0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
60 #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE 25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
82 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
84 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
87 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
89 #define IS_DMA_IDLE(dma_core_sts0) \
90 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92 #define IS_DMA_HALTED(dma_core_sts1) \
93 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
100 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
101 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
102 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104 #define PCIE_DEC_EN_MASK 0x300
105 #define DEC_WORK_STATE_IDLE 0
106 #define DEC_WORK_STATE_PEND 3
107 #define IS_DEC_IDLE(dec_swreg15) \
108 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
109 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
111 /* HBM MMU address scrambling parameters */
112 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
113 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
114 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
115 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
116 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
117 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
118 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
119 #define MMU_RANGE_INV_EN_SHIFT 0
120 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
121 #define MMU_RANGE_INV_ASID_SHIFT 2
123 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
124 * a 2 entries FIFO, and hence it is not enabled for it.
126 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
127 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129 #define GAUDI2_MAX_STRING_LEN 64
131 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
132 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136 /* RAZWI initiator coordinates */
137 #define RAZWI_GET_AXUSER_XY(x) \
138 ((x & 0xF8001FF0) >> 4)
140 #define RAZWI_GET_AXUSER_LOW_XY(x) \
141 ((x & 0x00001FF0) >> 4)
143 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0
144 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F
145 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5
146 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF
148 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23
149 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F
151 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
152 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
153 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
156 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
159 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161 #define PSOC_RAZWI_ENG_STR_SIZE 128
162 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164 /* HW scrambles only bits 0-25 */
165 #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
167 #define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
169 struct gaudi2_razwi_info {
176 static struct gaudi2_razwi_info common_razwi_info[] = {
177 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
178 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
179 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
180 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
181 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
182 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
183 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
184 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
185 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
186 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
187 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
188 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
189 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
190 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
191 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
192 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
193 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
194 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
195 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
196 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
197 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
198 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
199 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
200 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
201 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
202 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
203 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
204 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
205 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
206 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
207 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
208 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
209 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
210 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
211 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
212 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
213 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
214 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
215 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
216 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
217 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
218 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
219 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
220 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
221 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
222 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
223 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
224 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
225 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
226 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
227 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
228 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
229 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
230 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
231 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
232 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
233 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
234 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
235 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
236 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
237 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
238 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
239 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
240 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
241 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
242 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
243 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
244 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
245 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
246 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
248 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
250 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
252 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
253 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
254 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
255 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
256 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
258 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
260 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
261 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
262 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
263 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
264 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
266 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
267 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
268 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
269 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
270 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
272 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
274 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
275 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
276 GAUDI2_ENGINE_ID_SIZE, "PMMU"},
277 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
278 GAUDI2_ENGINE_ID_SIZE, "PCIE"},
279 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
280 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
281 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
282 GAUDI2_ENGINE_ID_KDMA, "KDMA"},
283 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
284 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
285 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
286 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
287 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
288 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
289 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
290 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
291 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
293 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
295 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
296 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
297 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
298 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
299 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
301 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
303 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
305 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
307 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
309 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
311 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
312 GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
313 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
314 GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
315 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
317 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
319 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
321 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
323 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
325 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
327 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
328 GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
329 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
330 GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
331 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
333 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
334 GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
335 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
336 GAUDI2_ENGINE_ID_PSOC, "CPU"},
337 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
338 GAUDI2_ENGINE_ID_PSOC, "PSOC"}
341 static struct gaudi2_razwi_info mme_razwi_info[] = {
342 /* MME X high coordinate is N/A, hence using only low coordinates */
343 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
345 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
347 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
349 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
351 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
353 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
355 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
357 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
358 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
359 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
360 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
361 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
363 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
365 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
367 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
369 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
371 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
373 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
375 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
376 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
377 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
378 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
379 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
381 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
383 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
385 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
387 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
389 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
391 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
393 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
394 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
395 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
396 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
397 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
399 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
401 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
403 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
405 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
407 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
409 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
411 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
412 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
413 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
414 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
417 enum hl_pmmu_fatal_cause {
418 LATENCY_RD_OUT_FIFO_OVERRUN,
419 LATENCY_WR_OUT_FIFO_OVERRUN,
422 enum hl_pcie_drain_ind_cause {
427 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
436 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
443 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
444 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
445 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
446 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
447 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
448 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
449 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
450 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
451 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
454 static const int gaudi2_qman_async_event_id[] = {
455 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
456 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
457 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
458 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
459 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
460 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
461 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
462 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
467 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
468 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
469 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
470 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
471 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
472 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
473 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
474 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
475 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
476 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
477 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
478 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
479 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
480 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
481 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
482 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
483 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
484 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
485 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
486 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
487 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
488 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
489 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
490 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
491 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
492 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
493 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
494 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
495 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
496 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
497 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
498 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
499 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
500 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
501 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
502 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
507 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
508 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
509 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
510 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
511 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
512 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
513 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
514 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
515 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
516 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
517 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
518 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
519 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
520 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
521 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
522 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
523 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
524 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
525 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
526 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
527 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
528 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
529 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
530 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
531 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
532 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
533 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
534 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
535 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
536 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
537 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
538 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
543 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
544 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
545 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
546 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
547 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
548 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
549 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
550 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
551 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
552 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
553 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
554 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
555 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
556 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
557 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
558 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
559 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
560 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
561 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
562 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
563 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
564 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
565 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
566 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
567 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
568 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
569 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
570 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
571 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
572 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
573 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
574 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
579 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
580 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
581 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
582 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
583 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
584 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
585 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
586 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
587 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
588 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
589 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
590 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
591 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
592 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
593 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
594 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
595 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
596 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
597 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
598 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
599 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
600 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
601 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
602 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
603 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
604 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
605 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
606 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
607 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
608 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
609 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
610 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
611 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
612 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
613 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
614 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
615 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
616 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
617 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
618 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
619 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
620 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
621 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
622 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
623 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
624 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
625 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
626 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
627 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
628 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
629 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
630 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
631 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
632 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
633 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
634 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
635 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
636 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
637 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
638 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
639 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
640 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
641 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
642 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
643 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
644 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
645 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
646 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
647 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
648 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
649 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
650 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
651 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
652 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
653 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
654 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
655 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
656 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
657 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
658 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
659 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
660 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
661 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
662 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
663 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
664 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
665 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
666 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
667 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
668 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
669 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
670 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
671 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
672 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
673 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
674 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
675 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
676 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
677 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
678 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
679 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
680 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
681 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
682 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
683 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
684 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
685 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
686 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
687 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
688 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
689 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
690 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
691 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
692 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
693 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
694 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
695 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
696 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
697 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
698 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
699 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
700 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
701 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
702 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
703 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
704 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
705 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
706 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
707 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
710 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
711 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
713 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
714 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
717 static const int gaudi2_dma_core_async_event_id[] = {
718 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
719 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
720 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
721 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
722 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
723 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
724 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
725 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
726 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
727 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
728 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
731 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
736 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
739 "AXI SPLIT SEI Status"
742 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
743 "cbu_bresp_sei_intr_cause",
744 "cbu_rresp_sei_intr_cause",
745 "lbu_bresp_sei_intr_cause",
746 "lbu_rresp_sei_intr_cause",
747 "cbu_axi_split_intr_cause",
748 "lbu_axi_split_intr_cause",
749 "arc_ip_excptn_sei_intr_cause",
750 "dmi_bresp_sei_intr_cause",
751 "aux2apb_err_sei_intr_cause",
752 "cfg_lbw_wr_terminated_intr_cause",
753 "cfg_lbw_rd_terminated_intr_cause",
754 "cfg_dccm_wr_terminated_intr_cause",
755 "cfg_dccm_rd_terminated_intr_cause",
756 "cfg_hbw_rd_terminated_intr_cause"
759 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
763 "msix_abnrm_hbw_sei",
767 "msix_abnrm_lbw_sei",
776 "axi_split_bresp_err_sei",
777 "hbw_axi_wr_viol_sei",
778 "hbw_axi_rd_viol_sei",
779 "lbw_axi_wr_viol_sei",
780 "lbw_axi_rd_viol_sei",
787 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
791 "CP error due to undefined OPCODE",
792 "CP encountered STOP OPCODE",
794 "CP WRREG32 or WRBULK returned error",
796 "FENCE 0 inc over max value and clipped",
797 "FENCE 1 inc over max value and clipped",
798 "FENCE 2 inc over max value and clipped",
799 "FENCE 3 inc over max value and clipped",
800 "FENCE 0 dec under min value and clipped",
801 "FENCE 1 dec under min value and clipped",
802 "FENCE 2 dec under min value and clipped",
803 "FENCE 3 dec under min value and clipped",
808 static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
812 "CP error due to undefined OPCODE",
813 "CP encountered STOP OPCODE",
815 "CP WRREG32 or WRBULK returned error",
817 "FENCE 0 inc over max value and clipped",
818 "FENCE 1 inc over max value and clipped",
819 "FENCE 2 inc over max value and clipped",
820 "FENCE 3 inc over max value and clipped",
821 "FENCE 0 dec under min value and clipped",
822 "FENCE 1 dec under min value and clipped",
823 "FENCE 2 dec under min value and clipped",
824 "FENCE 3 dec under min value and clipped",
827 "CQ_WR_IFIFO_CI_ERR",
830 "ARC_CQ_WR_IFIFO_CI_ERR",
831 "ARC_CQ_WR_CTL_CI_ERR",
836 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
837 "Choice push while full error",
838 "Choice Q watchdog error",
839 "MSG AXI LBW returned with error"
842 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
844 "qm_trace_fence_events",
847 "lbw_mstr_rresp_err",
848 "lbw_mstr_bresp_err",
852 "hbw_mstr_rresp_err",
853 "hbw_mstr_bresp_err",
864 "async_arc2cpu_sei_intr",
867 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
868 "tpc_address_exceed_slm",
870 "tpc_spu_mac_overflow",
871 "tpc_spu_addsub_overflow",
872 "tpc_spu_abs_overflow",
873 "tpc_spu_fma_fp_dst_nan",
874 "tpc_spu_fma_fp_dst_inf",
875 "tpc_spu_convert_fp_dst_nan",
876 "tpc_spu_convert_fp_dst_inf",
877 "tpc_spu_fp_dst_denorm",
878 "tpc_vpu_mac_overflow",
879 "tpc_vpu_addsub_overflow",
880 "tpc_vpu_abs_overflow",
881 "tpc_vpu_convert_fp_dst_nan",
882 "tpc_vpu_convert_fp_dst_inf",
883 "tpc_vpu_fma_fp_dst_nan",
884 "tpc_vpu_fma_fp_dst_inf",
885 "tpc_vpu_fp_dst_denorm",
887 "tpc_illegal_instruction",
888 "tpc_pc_wrap_around",
894 "st_unlock_already_locked",
895 "invalid_lock_access",
896 "LD_L protection violation",
897 "ST_L protection violation",
901 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
904 "wap sei (wbc axi err)",
920 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
930 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
931 "HBW Read returned with error RRESP",
932 "HBW write returned with error BRESP",
933 "LBW write returned with error BRESP",
934 "descriptor_fifo_overflow",
935 "KDMA SB LBW Read returned with error",
936 "KDMA WBC LBW Write returned with error",
937 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
938 "WRONG CFG FOR COMMIT IN LIN DMA"
941 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
942 "HBW/LBW Read returned with error RRESP",
943 "HBW/LBW write returned with error BRESP",
944 "LBW write returned with error BRESP",
945 "descriptor_fifo_overflow",
946 "KDMA SB LBW Read returned with error",
947 "KDMA WBC LBW Write returned with error",
948 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
949 "WRONG CFG FOR COMMIT IN LIN DMA"
952 struct gaudi2_sm_sei_cause_data {
953 const char *cause_name;
954 const char *log_name;
957 static const struct gaudi2_sm_sei_cause_data
958 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
959 {"calculated SO value overflow/underflow", "SOB ID"},
960 {"payload address of monitor is not aligned to 4B", "monitor addr"},
961 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
964 static const char * const
965 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
966 "LATENCY_RD_OUT_FIFO_OVERRUN",
967 "LATENCY_WR_OUT_FIFO_OVERRUN",
970 static const char * const
971 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
972 "LATENCY_RD_OUT_FIFO_OVERRUN",
973 "LATENCY_WR_OUT_FIFO_OVERRUN",
976 static const char * const
977 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
982 static const char * const
983 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
984 "HBW error response",
985 "LBW error response",
986 "TLP is blocked by RR"
989 static const int gaudi2_queue_id_to_engine_id[] = {
990 [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
991 [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
992 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
993 GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
994 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
995 GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
996 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
997 GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
998 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
999 GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
1000 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
1001 GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
1002 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
1003 GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
1004 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
1005 GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
1006 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
1007 GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
1008 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
1009 GAUDI2_DCORE0_ENGINE_ID_MME,
1010 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
1011 GAUDI2_DCORE1_ENGINE_ID_MME,
1012 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
1013 GAUDI2_DCORE2_ENGINE_ID_MME,
1014 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
1015 GAUDI2_DCORE3_ENGINE_ID_MME,
1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
1017 GAUDI2_DCORE0_ENGINE_ID_TPC_0,
1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
1019 GAUDI2_DCORE0_ENGINE_ID_TPC_1,
1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
1021 GAUDI2_DCORE0_ENGINE_ID_TPC_2,
1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
1023 GAUDI2_DCORE0_ENGINE_ID_TPC_3,
1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
1025 GAUDI2_DCORE0_ENGINE_ID_TPC_4,
1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
1027 GAUDI2_DCORE0_ENGINE_ID_TPC_5,
1028 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
1029 GAUDI2_DCORE0_ENGINE_ID_TPC_6,
1030 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
1031 GAUDI2_DCORE1_ENGINE_ID_TPC_0,
1032 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
1033 GAUDI2_DCORE1_ENGINE_ID_TPC_1,
1034 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
1035 GAUDI2_DCORE1_ENGINE_ID_TPC_2,
1036 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
1037 GAUDI2_DCORE1_ENGINE_ID_TPC_3,
1038 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
1039 GAUDI2_DCORE1_ENGINE_ID_TPC_4,
1040 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
1041 GAUDI2_DCORE1_ENGINE_ID_TPC_5,
1042 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
1043 GAUDI2_DCORE2_ENGINE_ID_TPC_0,
1044 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
1045 GAUDI2_DCORE2_ENGINE_ID_TPC_1,
1046 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
1047 GAUDI2_DCORE2_ENGINE_ID_TPC_2,
1048 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
1049 GAUDI2_DCORE2_ENGINE_ID_TPC_3,
1050 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
1051 GAUDI2_DCORE2_ENGINE_ID_TPC_4,
1052 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
1053 GAUDI2_DCORE2_ENGINE_ID_TPC_5,
1054 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
1055 GAUDI2_DCORE3_ENGINE_ID_TPC_0,
1056 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
1057 GAUDI2_DCORE3_ENGINE_ID_TPC_1,
1058 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
1059 GAUDI2_DCORE3_ENGINE_ID_TPC_2,
1060 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
1061 GAUDI2_DCORE3_ENGINE_ID_TPC_3,
1062 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
1063 GAUDI2_DCORE3_ENGINE_ID_TPC_4,
1064 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
1065 GAUDI2_DCORE3_ENGINE_ID_TPC_5,
1066 [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
1067 [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
1068 [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
1069 [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
1070 [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
1071 [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
1072 [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
1073 [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
1074 [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
1075 [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
1076 [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
1077 [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
1078 [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
1079 [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
1080 [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
1081 [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
1082 [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
1083 [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
1084 [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
1085 [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
1086 [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
1087 [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
1088 [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
1089 [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
1090 [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
1091 [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
1094 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
1095 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
1096 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
1097 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1098 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1099 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1100 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1101 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1102 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1103 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1104 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1105 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1106 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1107 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1108 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1109 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1110 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1111 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1112 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1113 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1114 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1115 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1116 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1117 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1118 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1119 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1120 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1121 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1122 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1123 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1124 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1125 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1126 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1127 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1128 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1129 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1130 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1131 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1132 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1133 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1134 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1135 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1136 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1137 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1138 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1139 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1140 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1141 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1142 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1143 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1144 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1145 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1146 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1147 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1148 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1149 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1150 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1151 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1152 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1153 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1154 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1155 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1156 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1157 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1158 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1159 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1160 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1161 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1162 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1163 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1164 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1165 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1166 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1167 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1168 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1169 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1170 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1171 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1172 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1173 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1174 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1175 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1176 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1177 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1178 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1179 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1180 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1181 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1182 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1183 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1184 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1185 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1186 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1187 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1188 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1189 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1190 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1191 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1192 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1193 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1194 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1195 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1196 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1197 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1198 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1199 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1200 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1201 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1202 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1203 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1204 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1205 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1206 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1207 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1208 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1209 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1210 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1211 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1212 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1213 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1214 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1215 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1216 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1217 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1218 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1219 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1220 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1221 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1222 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1223 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1224 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1225 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1226 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1227 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1228 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1229 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1230 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1231 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1232 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1233 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1234 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1235 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1236 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1237 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1238 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1239 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1240 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1241 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1242 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1243 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1244 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1245 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1246 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1247 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1248 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1249 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1250 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1251 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1252 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1253 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1254 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1255 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1256 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1257 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1258 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1259 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1260 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1261 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1262 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1263 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1264 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1265 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1266 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1267 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1268 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1269 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1270 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1271 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1272 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1273 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1274 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1275 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1276 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1277 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1278 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1279 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1280 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1281 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1282 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1283 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1284 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1285 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1286 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1287 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1288 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1289 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1290 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1291 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1292 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1293 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1294 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1295 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1296 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1297 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1298 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1299 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1300 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1301 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1302 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1303 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1304 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1305 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1306 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1307 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1308 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1309 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1310 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1311 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1312 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1313 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1314 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1315 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1316 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1317 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1318 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1319 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1320 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1321 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1322 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1323 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1324 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1325 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1326 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1327 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1328 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1329 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1330 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1331 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1332 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1333 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1334 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1335 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1336 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1337 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1338 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1339 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1340 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1341 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1342 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1343 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1344 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1345 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1346 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1347 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1348 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1349 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1350 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1351 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1352 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1353 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1354 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1357 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1358 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1359 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1360 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1361 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1362 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1363 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1364 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1365 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1366 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1367 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1368 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1369 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1370 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1371 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1372 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1373 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1374 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1375 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1376 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1377 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1378 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1379 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1380 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1381 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1382 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1383 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1384 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1385 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1386 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1387 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1388 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1389 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1390 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1391 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1392 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1393 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1394 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1395 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1396 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1397 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1398 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1399 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1400 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1401 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1402 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1403 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1404 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1405 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1406 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1407 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1408 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1409 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1410 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1411 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1412 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1413 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1414 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1415 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1416 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1417 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1418 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1419 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1420 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1421 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1422 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1423 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1424 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1425 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1426 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1429 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1430 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1431 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1432 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1433 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1434 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1435 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1436 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1437 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1438 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1439 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1440 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1441 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1442 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1443 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1444 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1445 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1446 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1447 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1448 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1449 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1450 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1451 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1452 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1453 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1454 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1455 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1456 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1457 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1458 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1459 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1460 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1461 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1462 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1463 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1464 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1465 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1466 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1467 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1468 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1469 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1470 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1471 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1472 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1473 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1474 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1475 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1476 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1477 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1478 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1479 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1480 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1481 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1482 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1483 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1484 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1485 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1486 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1487 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1488 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1489 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1490 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1491 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1492 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1493 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1494 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1495 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1496 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1497 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1498 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1501 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1502 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1503 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1504 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1505 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1508 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1509 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1510 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1511 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1512 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1513 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1514 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1515 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1516 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1517 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1518 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1519 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1520 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1521 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1522 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1523 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1524 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1525 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1526 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1527 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1528 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1529 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1530 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1531 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1532 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1533 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1534 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1535 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1536 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1537 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1538 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1539 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1540 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1541 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1542 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1543 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1544 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1545 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1546 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1547 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1548 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1549 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1550 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1551 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1552 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1553 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1554 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1555 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1556 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1557 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1558 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1559 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1560 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1561 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1562 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1563 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1564 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1565 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1566 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1567 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1568 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1569 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1570 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1571 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1572 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1573 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1574 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1575 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1576 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1577 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1578 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1579 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1580 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1581 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1582 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1583 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1584 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1585 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1586 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1587 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1588 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1589 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1590 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1591 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1592 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1593 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1594 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1595 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1596 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1597 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1598 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1599 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1600 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1601 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1602 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1603 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1604 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1605 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1606 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1607 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1608 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1609 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1610 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1611 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1612 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1613 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1614 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1615 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1616 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1617 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1618 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1619 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1620 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1621 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1622 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1623 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1624 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1625 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1626 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1627 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1628 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1629 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1630 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1631 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1632 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1633 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1634 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1635 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1636 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1637 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1638 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1639 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1640 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1641 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1642 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1643 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1644 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1645 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1646 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1647 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1648 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1649 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1650 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1651 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1652 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1653 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1654 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1655 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1656 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1657 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1658 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1659 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1660 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1661 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1662 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1663 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1664 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1665 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1666 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1667 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1668 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1669 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1670 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1671 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1672 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1673 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1674 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1675 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1676 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1677 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1678 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1679 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1680 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1681 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1682 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1683 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1684 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1685 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1686 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1687 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1688 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1689 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1690 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1691 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1692 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1693 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1694 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1695 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1696 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1697 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1698 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1699 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1700 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1701 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1702 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1703 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1704 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1705 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1706 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1707 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1708 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1709 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1710 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1711 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1712 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1713 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1714 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1715 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1716 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1717 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1718 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1719 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1720 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1721 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1722 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1723 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1724 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1725 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1726 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1727 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1728 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1729 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1730 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1731 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1732 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1733 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1734 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1735 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1736 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1737 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1738 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1739 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1740 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1741 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1742 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1743 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1744 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1745 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1746 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1747 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1748 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1749 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1750 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1751 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1752 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1753 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1754 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1755 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1756 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1757 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1758 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1759 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1760 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1761 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1762 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1763 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1764 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1765 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1766 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1767 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1768 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1771 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1772 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1773 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1774 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1775 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1776 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1777 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1778 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1779 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1780 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1781 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1782 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1785 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1786 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1787 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1788 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1789 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1792 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1793 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1794 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1795 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1796 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1797 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1798 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1799 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1800 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1801 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1802 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1803 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1804 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1805 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1806 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1807 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1808 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1809 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1810 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1811 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1812 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1813 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1814 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1815 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1816 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1817 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1820 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1821 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1822 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1823 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1824 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1825 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1826 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1827 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1828 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1829 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1830 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1831 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1832 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1833 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1834 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1835 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1836 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1837 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1838 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1839 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1840 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1841 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1842 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1843 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1844 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1845 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1848 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1849 [ROTATOR_ID_0] = mmROT0_BASE,
1850 [ROTATOR_ID_1] = mmROT1_BASE
1853 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1854 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1855 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1856 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1857 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1858 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1859 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1860 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1861 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1862 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1863 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1864 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1865 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1866 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1867 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1868 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1869 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1870 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1871 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1872 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1873 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1874 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1875 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1876 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1877 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1878 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1881 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1882 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1883 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1886 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1887 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1888 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1889 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1890 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1891 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1892 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1893 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1894 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1895 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1896 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1897 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1898 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1899 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1900 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1901 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1902 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1903 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1904 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1905 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1906 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1907 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1908 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1909 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1910 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1911 /* the PCI TPC is placed last (mapped liked HW) */
1912 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1915 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1916 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1917 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1918 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1919 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1922 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1923 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1924 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1925 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1926 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1927 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1928 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1929 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1930 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1931 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1932 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1933 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1936 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1937 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1938 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1939 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1940 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1941 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1942 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1943 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1944 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1947 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1948 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1949 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1950 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1951 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1952 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1953 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1954 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1955 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1956 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1957 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1995 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1996 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1997 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1998 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1999 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
2003 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
2004 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
2005 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
2006 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
2007 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
2011 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
2012 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
2013 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
2016 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
2017 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
2018 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
2021 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2022 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2023 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2026 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2027 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2028 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2031 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
2032 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2033 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2034 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2035 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2036 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2037 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2038 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2039 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
2042 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
2043 DCORE0_RTR0, DCORE0_RTR0
2046 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
2047 DCORE0_RTR2, DCORE0_RTR2
2050 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
2051 DCORE2_RTR0, DCORE3_RTR7
2054 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
2055 DCORE2_RTR2, DCORE3_RTR5
2058 struct mme_initiators_rtr_id {
2070 enum mme_initiators {
2083 static const struct mme_initiators_rtr_id
2084 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
2085 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
2086 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
2087 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
2088 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
2089 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
2090 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
2091 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
2092 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
2095 enum razwi_event_sources {
2106 struct hbm_mc_error_causes {
2111 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2113 /* Special blocks iterator is currently used to configure security protection bits,
2114 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2115 * must be skipped. Following configurations are commonly used for both PB config
2116 * and global error reading, since currently they both share the same settings.
2117 * Once it changes, we must remember to use separate configurations for either one.
2119 static int gaudi2_iterator_skip_block_types[] = {
2120 GAUDI2_BLOCK_TYPE_PLL,
2121 GAUDI2_BLOCK_TYPE_EU_BIST,
2122 GAUDI2_BLOCK_TYPE_HBM,
2123 GAUDI2_BLOCK_TYPE_XFT
2126 static struct range gaudi2_iterator_skip_block_ranges[] = {
2127 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2128 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2129 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2130 /* Skip all CPU blocks except for CPU_IF */
2131 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2132 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2135 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2136 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2137 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2138 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2139 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2140 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2143 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2144 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2145 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2146 [HBM_SEI_READ_ERR] = "SEI read data error",
2147 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2148 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2149 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2150 [HBM_SEI_DFI] = "SEI DFI error",
2151 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2152 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2155 struct mmu_spi_sei_cause {
2160 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2161 {"page fault", 1}, /* INTERRUPT_CLR[1] */
2162 {"page access", 1}, /* INTERRUPT_CLR[1] */
2163 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
2164 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
2165 {"mmu rei0", -1}, /* no clear register bit */
2166 {"mmu rei1", -1}, /* no clear register bit */
2167 {"stlb rei0", -1}, /* no clear register bit */
2168 {"stlb rei1", -1}, /* no clear register bit */
2169 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
2170 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
2171 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
2172 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
2173 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2174 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2175 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2176 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2177 {"slave error", 16}, /* INTERRUPT_CLR[16] */
2178 {"dec error", 17}, /* INTERRUPT_CLR[17] */
2179 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
2182 struct gaudi2_cache_invld_params {
2187 bool range_invalidation;
2190 struct gaudi2_tpc_idle_data {
2191 struct engines_data *e;
2192 unsigned long *mask;
2194 const char *tpc_fmt;
2197 struct gaudi2_tpc_mmu_data {
2201 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2203 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2204 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2205 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2206 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2207 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2208 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2209 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2211 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2212 struct engines_data *e);
2213 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2214 struct engines_data *e);
2215 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2216 struct engines_data *e);
2217 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2218 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2220 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2225 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2227 return sizeof(struct packet_msg_short);
2230 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2232 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2235 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2237 struct asic_fixed_properties *prop = &hdev->asic_prop;
2238 int dcore, inst, tpc_seq;
2241 /* init the return code */
2244 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2245 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2246 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2248 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2251 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2253 ctx->fn(hdev, dcore, inst, offset, ctx);
2255 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2262 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2265 /* special check for PCI TPC (DCORE0_TPC6) */
2266 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2267 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2269 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2272 static bool gaudi2_host_phys_addr_valid(u64 addr)
2274 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2280 static int set_number_of_functional_hbms(struct hl_device *hdev)
2282 struct asic_fixed_properties *prop = &hdev->asic_prop;
2283 u8 faulty_hbms = hweight64(hdev->dram_binning);
2285 /* check if all HBMs should be used */
2287 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2288 prop->num_functional_hbms = GAUDI2_HBM_NUM;
2293 * check for error condition in which number of binning
2294 * candidates is higher than the maximum supported by the
2295 * driver (in which case binning mask shall be ignored and driver will
2298 if (faulty_hbms > MAX_FAULTY_HBMS) {
2300 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2301 MAX_FAULTY_HBMS, hdev->dram_binning);
2306 * by default, number of functional HBMs in Gaudi2 is always
2307 * GAUDI2_HBM_NUM - 1.
2309 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2313 static bool gaudi2_is_edma_queue_id(u32 queue_id)
2317 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
2318 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
2319 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
2320 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
2327 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2329 struct asic_fixed_properties *prop = &hdev->asic_prop;
2330 u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
2331 u32 basic_hbm_page_size, edma_idx = 0;
2334 rc = set_number_of_functional_hbms(hdev);
2339 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2340 * in which we are using x16 bigger page size to be able to populate the entire
2341 * HBM mappings in the TLB
2343 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2344 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2345 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2346 prop->dram_size = prop->num_functional_hbms * SZ_16G;
2347 prop->dram_base_address = DRAM_PHYS_BASE;
2348 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2349 prop->dram_supports_virtual_memory = true;
2351 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2352 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2353 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2354 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2356 /* since DRAM page size differs from DMMU page size we need to allocate
2357 * DRAM memory in units of dram_page size and mapping this memory in
2358 * units of DMMU page size. we overcome this size mismatch using a
2359 * scrambling routine which takes a DRAM page and converts it to a DMMU
2362 * 1. partition the virtual address space to DRAM-page (whole) pages.
2363 * (suppose we get n such pages)
2364 * 2. limit the amount of virtual address space we got from 1 above to
2365 * a multiple of 64M as we don't want the scrambled address to cross
2366 * the DRAM virtual address space.
2367 * ( m = (n * DRAM_page_size) / DMMU_page_size).
2368 * 3. determine the and address accordingly
2369 * end_addr = start_addr + m * 48M
2371 * the DRAM address MSBs (63:48) are not part of the roundup calculation
2373 prop->dmmu.start_addr = prop->dram_base_address +
2374 (prop->dram_page_size *
2375 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2376 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2377 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2379 * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
2380 * the driver part by range register, so it must start at the next (48MB) page
2382 hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
2385 * The NIC driver section size and the HMMU page tables section in the HBM needs
2386 * to be the remaining size in the first dram page after taking into
2387 * account the F/W image size
2390 /* Reserve region in HBM for HMMU page tables */
2391 prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
2392 ((prop->dram_page_size - hbm_drv_base_offset) -
2393 (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
2395 /* Set EDMA PQs HBM addresses */
2396 edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
2398 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2399 if (gaudi2_is_edma_queue_id(i)) {
2400 prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
2401 (edma_idx * HL_QUEUE_SIZE_IN_BYTES);
2409 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2411 struct asic_fixed_properties *prop = &hdev->asic_prop;
2412 struct hw_queue_properties *q_props;
2413 u32 num_sync_stream_queues = 0;
2416 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2417 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2420 if (!prop->hw_queues_props)
2423 q_props = prop->hw_queues_props;
2425 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2426 q_props[i].type = QUEUE_TYPE_HW;
2427 q_props[i].driver_only = 0;
2429 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2430 q_props[i].supports_sync_stream = 0;
2432 q_props[i].supports_sync_stream = 1;
2433 num_sync_stream_queues++;
2436 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2438 if (gaudi2_is_edma_queue_id(i))
2439 q_props[i].dram_bd = 1;
2442 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2443 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2444 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2446 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2447 prop->cfg_base_address = CFG_BASE;
2448 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2449 prop->host_base_address = HOST_PHYS_BASE_0;
2450 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2451 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2452 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2453 prop->user_dec_intr_count = NUMBER_OF_DEC;
2454 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2455 prop->completion_mode = HL_COMPLETION_MODE_CS;
2456 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2457 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2459 prop->sram_base_address = SRAM_BASE_ADDR;
2460 prop->sram_size = SRAM_SIZE;
2461 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2462 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2464 prop->hints_range_reservation = true;
2466 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2470 prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
2471 prop->mmu_pte_size = HL_PTE_SIZE;
2473 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2474 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2475 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2476 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2477 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2478 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2479 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2480 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2481 prop->dmmu.page_size = PAGE_SIZE_1GB;
2482 prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
2483 prop->dmmu.last_mask = LAST_MASK;
2484 prop->dmmu.host_resident = 0;
2485 prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2486 prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2488 /* As we need to set the pgt address in dram for HMMU init so we cannot
2489 * wait to the fw cpucp info to set the dram props as mmu init comes before
2492 rc = hdev->asic_funcs->set_dram_properties(hdev);
2496 prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
2498 prop->pmmu.pgt_size = prop->mmu_pgt_size;
2499 hdev->pmmu_huge_range = true;
2500 prop->pmmu.host_resident = 1;
2501 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2502 prop->pmmu.last_mask = LAST_MASK;
2503 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2504 prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2506 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2507 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2508 prop->hints_host_hpage_reserved_va_range.start_addr =
2509 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2510 prop->hints_host_hpage_reserved_va_range.end_addr =
2511 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2513 if (PAGE_SIZE == SZ_64K) {
2514 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2515 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2516 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2517 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2518 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2519 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2520 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2521 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2522 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2523 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2524 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2525 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2526 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2527 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2528 prop->pmmu.page_size = PAGE_SIZE_64KB;
2530 /* shifts and masks are the same in PMMU and HPMMU */
2531 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2532 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2533 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2534 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2536 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2537 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2538 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2539 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2540 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2541 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2542 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2543 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2544 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2545 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2546 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2547 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2548 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2549 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2550 prop->pmmu.page_size = PAGE_SIZE_4KB;
2552 /* shifts and masks are the same in PMMU and HPMMU */
2553 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2554 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2555 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2556 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2559 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2560 prop->num_engine_cores = CPU_ID_MAX;
2561 prop->cfg_size = CFG_SIZE;
2562 prop->num_of_events = GAUDI2_EVENT_SIZE;
2564 prop->supports_engine_modes = true;
2566 prop->dc_power_default = DC_POWER_DEFAULT;
2568 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2569 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2570 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2571 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2573 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2575 prop->mme_master_slave_mode = 1;
2577 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2578 (num_sync_stream_queues * HL_RSVD_SOBS);
2580 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2581 (num_sync_stream_queues * HL_RSVD_MONS);
2583 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2584 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2585 prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2587 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2589 prop->fw_cpu_boot_dev_sts0_valid = false;
2590 prop->fw_cpu_boot_dev_sts1_valid = false;
2591 prop->hard_reset_done_by_fw = false;
2592 prop->gic_interrupts_enable = true;
2594 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2596 prop->max_dec = NUMBER_OF_DEC;
2598 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2600 prop->dma_mask = 64;
2602 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2607 kfree(prop->hw_queues_props);
2611 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2613 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2614 bool is_wc[3] = {false, false, true};
2617 rc = hl_pci_bars_map(hdev, name, is_wc);
2621 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2626 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2628 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2629 struct hl_inbound_pci_region pci_region;
2630 u64 old_addr = addr;
2633 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2636 if (hdev->asic_prop.iatu_done_by_fw)
2639 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2640 pci_region.mode = PCI_BAR_MATCH_MODE;
2641 pci_region.bar = DRAM_BAR_ID;
2642 pci_region.addr = addr;
2643 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2648 old_addr = gaudi2->dram_bar_cur_addr;
2649 gaudi2->dram_bar_cur_addr = addr;
2655 static int gaudi2_init_iatu(struct hl_device *hdev)
2657 struct hl_inbound_pci_region inbound_region;
2658 struct hl_outbound_pci_region outbound_region;
2659 u32 bar_addr_low, bar_addr_high;
2662 if (hdev->asic_prop.iatu_done_by_fw)
2665 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2666 * We must map this region in BAR match mode in order to
2667 * fetch BAR physical base address
2669 inbound_region.mode = PCI_BAR_MATCH_MODE;
2670 inbound_region.bar = SRAM_CFG_BAR_ID;
2671 /* Base address must be aligned to Bar size which is 256 MB */
2672 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2673 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2677 /* Fetch physical BAR address */
2678 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2679 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2681 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2683 /* Inbound Region 0 - Bar 0 - Point to CFG */
2684 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2685 inbound_region.bar = SRAM_CFG_BAR_ID;
2686 inbound_region.offset_in_bar = 0;
2687 inbound_region.addr = STM_FLASH_BASE_ADDR;
2688 inbound_region.size = CFG_REGION_SIZE;
2689 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2693 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2694 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2695 inbound_region.bar = SRAM_CFG_BAR_ID;
2696 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2697 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2698 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2699 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2703 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2704 inbound_region.mode = PCI_BAR_MATCH_MODE;
2705 inbound_region.bar = DRAM_BAR_ID;
2706 inbound_region.addr = DRAM_PHYS_BASE;
2707 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2711 /* Outbound Region 0 - Point to Host */
2712 outbound_region.addr = HOST_PHYS_BASE_0;
2713 outbound_region.size = HOST_PHYS_SIZE_0;
2714 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2719 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2721 return RREG32(mmHW_STATE);
2724 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2726 struct asic_fixed_properties *prop = &hdev->asic_prop;
2729 * check for error condition in which number of binning candidates
2730 * is higher than the maximum supported by the driver
2732 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2733 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2734 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2739 prop->tpc_binning_mask = hdev->tpc_binning;
2740 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2745 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2747 struct asic_fixed_properties *prop = &hdev->asic_prop;
2748 struct hw_queue_properties *q_props = prop->hw_queues_props;
2749 u64 tpc_binning_mask;
2753 rc = gaudi2_tpc_binning_init_prop(hdev);
2757 tpc_binning_mask = prop->tpc_binning_mask;
2759 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2760 u8 subst_seq, binned, qid_base;
2762 if (tpc_binning_mask == 0)
2765 if (subst_idx == 0) {
2766 subst_seq = TPC_ID_DCORE0_TPC6;
2767 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2769 subst_seq = TPC_ID_DCORE3_TPC5;
2770 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2774 /* clear bit from mask */
2775 binned = __ffs(tpc_binning_mask);
2777 * Coverity complains about possible out-of-bound access in
2780 if (binned >= TPC_ID_SIZE) {
2782 "Invalid binned TPC (binning mask: %llx)\n",
2786 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2788 /* also clear replacing TPC bit from enabled mask */
2789 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2791 /* bin substite TPC's Qs */
2792 q_props[qid_base].binned = 1;
2793 q_props[qid_base + 1].binned = 1;
2794 q_props[qid_base + 2].binned = 1;
2795 q_props[qid_base + 3].binned = 1;
2803 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2805 struct asic_fixed_properties *prop = &hdev->asic_prop;
2808 num_faulty = hweight32(hdev->decoder_binning);
2811 * check for error condition in which number of binning candidates
2812 * is higher than the maximum supported by the driver
2814 if (num_faulty > MAX_FAULTY_DECODERS) {
2815 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2816 hdev->decoder_binning);
2820 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2822 if (prop->decoder_binning_mask)
2823 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2825 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2830 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2832 struct asic_fixed_properties *prop = &hdev->asic_prop;
2834 /* check if we should override default binning */
2835 if (!hdev->dram_binning) {
2836 prop->dram_binning_mask = 0;
2837 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2841 /* set DRAM binning constraints */
2842 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2843 prop->dram_binning_mask = hdev->dram_binning;
2844 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2847 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2849 struct asic_fixed_properties *prop = &hdev->asic_prop;
2850 struct hw_queue_properties *q_props;
2853 num_faulty = hweight32(hdev->edma_binning);
2856 * check for error condition in which number of binning candidates
2857 * is higher than the maximum supported by the driver
2859 if (num_faulty > MAX_FAULTY_EDMAS) {
2861 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2862 hdev->edma_binning);
2866 if (!hdev->edma_binning) {
2867 prop->edma_binning_mask = 0;
2868 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2872 seq = __ffs((unsigned long)hdev->edma_binning);
2874 /* set binning constraints */
2875 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2876 prop->edma_binning_mask = hdev->edma_binning;
2877 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2879 /* bin substitute EDMA's queue */
2880 q_props = prop->hw_queues_props;
2881 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2882 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2883 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2884 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2889 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2891 struct asic_fixed_properties *prop = &hdev->asic_prop;
2894 /* check if we should override default binning */
2895 if (!xbar_edge_iso_mask) {
2896 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2901 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2902 * only the FW can set a redundancy value). for user it'll always be 0.
2904 num_faulty = hweight32(xbar_edge_iso_mask);
2907 * check for error condition in which number of binning candidates
2908 * is higher than the maximum supported by the driver
2910 if (num_faulty > MAX_FAULTY_XBARS) {
2911 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2916 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2918 /* set binning constraints */
2919 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2920 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2925 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2930 * mark all clusters as good, each component will "fail" cluster
2931 * based on eFuse/user values.
2932 * If more than single cluster is faulty- the chip is unusable
2934 hdev->asic_prop.faulty_dram_cluster_map = 0;
2936 gaudi2_set_dram_binning_masks(hdev);
2938 rc = gaudi2_set_edma_binning_masks(hdev);
2942 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2947 /* always initially set to full mask */
2948 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2953 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2955 struct asic_fixed_properties *prop = &hdev->asic_prop;
2958 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2962 /* if we have DRAM binning reported by FW we should perform cluster config */
2963 if (prop->faulty_dram_cluster_map) {
2964 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2966 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2972 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2976 rc = gaudi2_set_cluster_binning_masks(hdev);
2980 rc = gaudi2_set_tpc_binning_masks(hdev);
2984 rc = gaudi2_set_dec_binning_masks(hdev);
2991 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2993 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2994 struct asic_fixed_properties *prop = &hdev->asic_prop;
2999 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3002 /* No point of asking this information again when not doing hard reset, as the device
3003 * CPU hasn't been reset
3005 if (hdev->reset_info.in_compute_reset)
3008 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
3013 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
3015 /* we can have wither 5 or 6 HBMs. other values are invalid */
3017 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
3018 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
3020 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
3021 dram_size, prop->dram_size);
3022 dram_size = prop->dram_size;
3025 prop->dram_size = dram_size;
3026 prop->dram_end_address = prop->dram_base_address + dram_size;
3029 if (!strlen(prop->cpucp_info.card_name))
3030 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
3033 /* Overwrite binning masks with the actual binning values from F/W */
3034 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
3035 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
3036 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
3037 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
3039 dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
3040 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
3041 hdev->decoder_binning);
3044 * at this point the DRAM parameters need to be updated according to data obtained
3047 rc = hdev->asic_funcs->set_dram_properties(hdev);
3051 rc = hdev->asic_funcs->set_binning_masks(hdev);
3055 max_power = hl_fw_get_max_power(hdev);
3059 prop->max_power_default = (u64) max_power;
3064 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
3066 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3067 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
3070 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3073 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
3077 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
3082 static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
3084 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3085 struct asic_fixed_properties *prop = &hdev->asic_prop;
3088 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
3091 if (prop->dmmu.host_resident)
3094 rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
3096 dev_err(hdev->dev, "Failed to clear mmu pgt");
3101 static int gaudi2_early_init(struct hl_device *hdev)
3103 struct asic_fixed_properties *prop = &hdev->asic_prop;
3104 struct pci_dev *pdev = hdev->pdev;
3105 resource_size_t pci_bar_size;
3108 rc = gaudi2_set_fixed_properties(hdev);
3112 /* Check BAR sizes */
3113 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
3115 if (pci_bar_size != CFG_BAR_SIZE) {
3116 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3117 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
3119 goto free_queue_props;
3122 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
3123 if (pci_bar_size != MSIX_BAR_SIZE) {
3124 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3125 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
3127 goto free_queue_props;
3130 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
3131 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
3134 * Only in pldm driver config iATU
3137 hdev->asic_prop.iatu_done_by_fw = false;
3139 hdev->asic_prop.iatu_done_by_fw = true;
3141 rc = hl_pci_init(hdev);
3143 goto free_queue_props;
3145 /* Before continuing in the initialization, we need to read the preboot
3146 * version to determine whether we run with a security-enabled firmware
3148 rc = hl_fw_read_preboot_status(hdev);
3150 if (hdev->reset_on_preboot_fail)
3151 /* we are already on failure flow, so don't check if hw_fini fails. */
3152 hdev->asic_funcs->hw_fini(hdev, true, false);
3156 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
3157 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
3158 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
3160 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
3170 kfree(hdev->asic_prop.hw_queues_props);
3174 static int gaudi2_early_fini(struct hl_device *hdev)
3176 kfree(hdev->asic_prop.hw_queues_props);
3182 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3185 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3192 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3195 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3202 static void gaudi2_init_arcs(struct hl_device *hdev)
3204 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3205 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3209 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3210 if (gaudi2_is_arc_enabled(hdev, i))
3213 gaudi2_set_arc_id_cap(hdev, i);
3216 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3217 if (!gaudi2_is_queue_enabled(hdev, i))
3220 arc_id = gaudi2_queue_id_to_arc_id[i];
3221 if (gaudi2_is_arc_enabled(hdev, arc_id))
3224 if (gaudi2_is_arc_nic_owned(arc_id) &&
3225 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3228 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3229 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3232 gaudi2_set_arc_id_cap(hdev, arc_id);
3235 /* Fetch ARC scratchpad address */
3236 hdev->asic_prop.engine_core_interrupt_reg_addr =
3237 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3240 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3242 u32 reg_base, reg_val;
3246 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3247 /* Each ARC scheduler has 2 consecutive DCCM blocks */
3248 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3249 ARC_DCCM_BLOCK_SIZE * 2, true);
3253 case CPU_ID_SCHED_ARC4:
3254 case CPU_ID_SCHED_ARC5:
3255 case CPU_ID_MME_QMAN_ARC0:
3256 case CPU_ID_MME_QMAN_ARC1:
3257 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3259 /* Scrub lower DCCM block */
3260 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3261 ARC_DCCM_BLOCK_SIZE, true);
3265 /* Switch to upper DCCM block */
3266 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3267 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3269 /* Scrub upper DCCM block */
3270 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3271 ARC_DCCM_BLOCK_SIZE, true);
3275 /* Switch to lower DCCM block */
3276 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3277 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3280 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3281 ARC_DCCM_BLOCK_SIZE, true);
3289 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3294 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3295 if (!gaudi2_is_arc_enabled(hdev, arc_id))
3298 rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3306 static int gaudi2_late_init(struct hl_device *hdev)
3308 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3311 hdev->asic_prop.supports_advanced_cpucp_rc = true;
3313 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3314 gaudi2->virt_msix_db_dma_addr);
3316 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3320 rc = gaudi2_fetch_psoc_frequency(hdev);
3322 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3323 goto disable_pci_access;
3326 rc = gaudi2_mmu_clear_pgt_range(hdev);
3328 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
3329 goto disable_pci_access;
3332 gaudi2_init_arcs(hdev);
3334 rc = gaudi2_scrub_arcs_dccm(hdev);
3336 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3337 goto disable_pci_access;
3340 gaudi2_init_security(hdev);
3345 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3350 static void gaudi2_late_fini(struct hl_device *hdev)
3352 hl_hwmon_release_resources(hdev);
3355 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3357 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3359 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3360 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3361 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3362 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3363 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3364 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3365 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3366 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3367 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3368 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3371 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3373 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3374 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3375 u32 block_size, umr_start_idx, num_umr_blocks;
3378 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3379 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3380 block_size = ARC_DCCM_BLOCK_SIZE * 2;
3382 block_size = ARC_DCCM_BLOCK_SIZE;
3384 blocks[i].address = gaudi2_arc_dccm_bases[i];
3385 blocks[i].size = block_size;
3388 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3389 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3391 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3392 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3394 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3395 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3397 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3398 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3400 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3401 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3403 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3404 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3406 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3407 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3409 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3410 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3412 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3413 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3414 for (i = 0 ; i < num_umr_blocks ; i++) {
3415 u8 nic_id, umr_block_id;
3417 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3418 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3420 blocks[umr_start_idx + i].address =
3421 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3422 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3423 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3424 umr_block_id * NIC_UMR_OFFSET;
3425 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3428 /* Expose decoder HW configuration block to user */
3429 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3431 for (i = 1; i < NUM_OF_DCORES; ++i) {
3432 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3433 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3435 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3436 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3438 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3439 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3443 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3445 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3446 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3449 /* The device ARC works with 32-bits addresses, and because there is a single HW register
3450 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3454 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3455 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3456 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3457 if (!virt_addr_arr[i]) {
3459 goto free_dma_mem_arr;
3462 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3463 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3467 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3469 "MSB of ARC accessible DMA memory are not identical in all range\n");
3471 goto free_dma_mem_arr;
3474 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3475 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3478 for (j = 0 ; j < i ; j++)
3479 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3485 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3487 struct asic_fixed_properties *prop = &hdev->asic_prop;
3488 struct pci_mem_region *region;
3491 region = &hdev->pci_mem_region[PCI_REGION_CFG];
3492 region->region_base = CFG_BASE;
3493 region->region_size = CFG_SIZE;
3494 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3495 region->bar_size = CFG_BAR_SIZE;
3496 region->bar_id = SRAM_CFG_BAR_ID;
3500 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3501 region->region_base = SRAM_BASE_ADDR;
3502 region->region_size = SRAM_SIZE;
3503 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3504 region->bar_size = CFG_BAR_SIZE;
3505 region->bar_id = SRAM_CFG_BAR_ID;
3509 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3510 region->region_base = DRAM_PHYS_BASE;
3511 region->region_size = hdev->asic_prop.dram_size;
3512 region->offset_in_bar = 0;
3513 region->bar_size = prop->dram_pci_bar_size;
3514 region->bar_id = DRAM_BAR_ID;
3518 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3520 struct asic_fixed_properties *prop = &hdev->asic_prop;
3523 /* Initialize TPC interrupt */
3524 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3526 /* Initialize unexpected error interrupt */
3527 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3528 HL_USR_INTERRUPT_UNEXPECTED);
3530 /* Initialize common user CQ interrupt */
3531 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3532 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3534 /* Initialize common decoder interrupt */
3535 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3536 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3538 /* User interrupts structure holds both decoder and user interrupts from various engines.
3539 * We first initialize the decoder interrupts and then we add the user interrupts.
3540 * The only limitation is that the last decoder interrupt id must be smaller
3541 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3544 /* Initialize decoder interrupts, expose only normal interrupts,
3545 * error interrupts to be handled by driver
3547 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3549 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3550 HL_USR_INTERRUPT_DECODER);
3552 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3553 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3556 static inline int gaudi2_get_non_zero_random_int(void)
3558 int rand = get_random_u32();
3560 return rand ? rand : 1;
3563 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3565 struct asic_fixed_properties *prop = &hdev->asic_prop;
3566 struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3567 &prop->skip_special_blocks_cfg;
3569 kfree(prop->special_blocks);
3570 kfree(skip_special_blocks_cfg->block_types);
3571 kfree(skip_special_blocks_cfg->block_ranges);
3574 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3576 gaudi2_special_blocks_free(hdev);
3579 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3580 struct hl_special_blocks_cfg *special_blocks_cfg,
3581 u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3586 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3588 struct asic_fixed_properties *prop = &hdev->asic_prop;
3591 /* Configure Special blocks */
3592 prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
3593 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3594 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3595 sizeof(*prop->special_blocks), GFP_KERNEL);
3596 if (!prop->special_blocks)
3599 for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3600 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3601 sizeof(*prop->special_blocks));
3603 /* Configure when to skip Special blocks */
3604 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3605 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3607 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3608 prop->skip_special_blocks_cfg.block_types =
3609 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3610 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3611 if (!prop->skip_special_blocks_cfg.block_types) {
3613 goto free_special_blocks;
3616 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3617 sizeof(gaudi2_iterator_skip_block_types));
3619 prop->skip_special_blocks_cfg.block_types_len =
3620 ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3623 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3624 prop->skip_special_blocks_cfg.block_ranges =
3625 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3626 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3627 if (!prop->skip_special_blocks_cfg.block_ranges) {
3629 goto free_skip_special_blocks_types;
3632 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3633 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3634 &gaudi2_iterator_skip_block_ranges[i],
3635 sizeof(struct range));
3637 prop->skip_special_blocks_cfg.block_ranges_len =
3638 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3643 free_skip_special_blocks_types:
3644 kfree(prop->skip_special_blocks_cfg.block_types);
3645 free_special_blocks:
3646 kfree(prop->special_blocks);
3651 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3653 return gaudi2_special_blocks_config(hdev);
3656 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3658 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3659 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3662 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3663 /* bail-out if this is an allocation failure point */
3664 if (!msg_info[i].kern_addr)
3667 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3668 msg_info[i].kern_addr = NULL;
3672 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3674 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3675 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3678 /* allocate a message-short buf for each Q we intend to test */
3679 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3680 msg_info[i].kern_addr =
3681 (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3682 GFP_KERNEL, &msg_info[i].dma_addr);
3683 if (!msg_info[i].kern_addr) {
3685 "Failed to allocate dma memory for H/W queue %d testing\n", i);
3694 gaudi2_test_queues_msgs_free(hdev);
3698 static int gaudi2_sw_init(struct hl_device *hdev)
3700 struct asic_fixed_properties *prop = &hdev->asic_prop;
3701 struct gaudi2_device *gaudi2;
3704 /* Allocate device structure */
3705 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3709 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3710 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3713 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3714 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3717 goto free_gaudi2_device;
3720 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3723 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3724 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3726 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3728 hdev->asic_specific = gaudi2;
3730 /* Create DMA pool for small allocations.
3731 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3732 * PI/CI registers allocated from this pool have this restriction
3734 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3735 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3736 if (!hdev->dma_pool) {
3737 dev_err(hdev->dev, "failed to create DMA pool\n");
3739 goto free_gaudi2_device;
3742 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3746 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3747 if (!hdev->cpu_accessible_dma_pool) {
3748 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3750 goto free_cpu_dma_mem;
3753 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3754 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3756 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3758 goto free_cpu_accessible_dma_pool;
3761 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3762 &gaudi2->virt_msix_db_dma_addr);
3763 if (!gaudi2->virt_msix_db_cpu_addr) {
3764 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3766 goto free_cpu_accessible_dma_pool;
3769 spin_lock_init(&gaudi2->hw_queues_lock);
3771 gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
3773 gaudi2_user_mapped_blocks_init(hdev);
3775 /* Initialize user interrupts */
3776 gaudi2_user_interrupt_setup(hdev);
3778 hdev->supports_coresight = true;
3779 hdev->supports_sync_stream = true;
3780 hdev->supports_cb_mapping = true;
3781 hdev->supports_wait_for_multi_cs = false;
3783 prop->supports_compute_reset = true;
3785 /* Event queue sanity check added in FW version 1.11 */
3786 if (hl_is_fw_sw_ver_below(hdev, 1, 11))
3787 hdev->event_queue.check_eqe_index = false;
3789 hdev->event_queue.check_eqe_index = true;
3791 hdev->asic_funcs->set_pci_memory_regions(hdev);
3793 rc = gaudi2_special_blocks_iterator_config(hdev);
3795 goto free_virt_msix_db_mem;
3797 rc = gaudi2_test_queues_msgs_alloc(hdev);
3799 goto special_blocks_free;
3803 special_blocks_free:
3804 gaudi2_special_blocks_iterator_free(hdev);
3805 free_virt_msix_db_mem:
3806 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3807 free_cpu_accessible_dma_pool:
3808 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3810 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3811 hdev->cpu_accessible_dma_address);
3813 dma_pool_destroy(hdev->dma_pool);
3819 static int gaudi2_sw_fini(struct hl_device *hdev)
3821 struct asic_fixed_properties *prop = &hdev->asic_prop;
3822 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3824 gaudi2_test_queues_msgs_free(hdev);
3826 gaudi2_special_blocks_iterator_free(hdev);
3828 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3830 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3832 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3833 hdev->cpu_accessible_dma_address);
3835 dma_pool_destroy(hdev->dma_pool);
3842 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3844 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3845 QM_GLBL_CFG1_CQF_STOP |
3846 QM_GLBL_CFG1_CP_STOP);
3848 /* stop also the ARC */
3849 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3852 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3854 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3855 QM_GLBL_CFG1_CQF_FLUSH |
3856 QM_GLBL_CFG1_CP_FLUSH);
3859 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3861 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3865 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3867 * @hdev: pointer to the habanalabs device structure
3868 * @queue_id: queue to clear fence counters to
3869 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3870 * getting stuck on any fence value. otherwise set all fence
3871 * counters to 0 (standard clear of fence counters)
3873 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3879 reg_base = gaudi2_qm_blocks_bases[queue_id];
3881 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3882 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3885 * in case we want to make sure that QM that is stuck on a fence will
3886 * be released we should set the fence counter to a higher value that
3887 * the value the QM waiting for. to comply with any fence counter of
3888 * any value we set maximum fence value to all counters
3890 val = skip_fence ? U32_MAX : 0;
3891 gaudi2_memset_device_lbw(hdev, addr, size, val);
3894 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3896 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3898 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3899 gaudi2_flush_qman_common(hdev, reg_base);
3900 gaudi2_flush_qman_arc_common(hdev, reg_base);
3903 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3905 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3908 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3909 goto stop_edma_qmans;
3911 /* Stop CPs of PDMA QMANs */
3912 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3913 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3916 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3919 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3920 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3921 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3924 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3927 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3928 inst * DCORE_EDMA_OFFSET;
3930 /* Stop CPs of EDMA QMANs */
3931 gaudi2_stop_qman_common(hdev, qm_base);
3936 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3938 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3941 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3943 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3944 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3947 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3951 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3953 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3957 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3960 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3961 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3964 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3965 gaudi2_stop_qman_common(hdev, reg_base);
3969 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3971 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3975 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3978 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3979 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3982 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3983 gaudi2_stop_qman_common(hdev, reg_base);
3987 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3989 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3990 u32 reg_base, queue_id;
3993 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3996 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3998 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3999 if (!(hdev->nic_ports_mask & BIT(i)))
4002 reg_base = gaudi2_qm_blocks_bases[queue_id];
4003 gaudi2_stop_qman_common(hdev, reg_base);
4007 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
4011 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
4012 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
4015 static void gaudi2_dma_stall(struct hl_device *hdev)
4017 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4020 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4023 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
4024 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
4027 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4030 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4031 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4032 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4035 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4038 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
4039 inst * DCORE_EDMA_OFFSET;
4041 /* Stall CPs of EDMA QMANs */
4042 gaudi2_stall_dma_common(hdev, core_base);
4047 static void gaudi2_mme_stall(struct hl_device *hdev)
4049 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4052 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
4054 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4055 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4056 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
4059 static void gaudi2_tpc_stall(struct hl_device *hdev)
4061 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4065 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4068 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4069 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4072 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
4073 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
4077 static void gaudi2_rotator_stall(struct hl_device *hdev)
4079 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4083 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4086 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
4087 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
4088 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
4090 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4091 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4094 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
4098 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
4100 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
4103 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
4105 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4108 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4109 goto stop_edma_qmans;
4111 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
4112 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
4115 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4118 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4119 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4120 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4123 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4126 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
4127 inst * DCORE_EDMA_OFFSET;
4129 /* Disable CPs of EDMA QMANs */
4130 gaudi2_disable_qman_common(hdev, qm_base);
4135 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
4137 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4140 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
4142 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4143 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4144 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
4147 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
4149 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4153 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4156 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4157 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4160 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4161 gaudi2_disable_qman_common(hdev, reg_base);
4165 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4167 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4171 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4174 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4175 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4178 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4179 gaudi2_disable_qman_common(hdev, reg_base);
4183 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4185 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4186 u32 reg_base, queue_id;
4189 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4192 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4194 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4195 if (!(hdev->nic_ports_mask & BIT(i)))
4198 reg_base = gaudi2_qm_blocks_bases[queue_id];
4199 gaudi2_disable_qman_common(hdev, reg_base);
4203 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4205 /* Disable the timestamp counter */
4206 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4208 /* Zero the lower/upper parts of the 64-bit counter */
4209 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4210 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4212 /* Enable the counter */
4213 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4216 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4218 /* Disable the timestamp counter */
4219 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4222 static const char *gaudi2_irq_name(u16 irq_number)
4224 switch (irq_number) {
4225 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4226 return "gaudi2 cpu eq";
4227 case GAUDI2_IRQ_NUM_COMPLETION:
4228 return "gaudi2 completion";
4229 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4230 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4231 case GAUDI2_IRQ_NUM_TPC_ASSERT:
4232 return "gaudi2 tpc assert";
4233 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4234 return "gaudi2 unexpected error";
4235 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4236 return "gaudi2 user completion";
4237 case GAUDI2_IRQ_NUM_EQ_ERROR:
4238 return "gaudi2 eq error";
4244 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4246 int i, irq, relative_idx;
4249 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4250 irq = pci_irq_vector(hdev->pdev, i);
4251 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4253 dec = hdev->dec + relative_idx / 2;
4255 /* We pass different structures depending on the irq handler. For the abnormal
4256 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4257 * user_interrupt entry
4259 free_irq(irq, ((relative_idx % 2) ?
4261 (void *) &hdev->user_interrupt[dec->core_id]));
4265 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4267 int rc, i, irq_init_cnt, irq, relative_idx;
4270 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4271 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4272 i++, irq_init_cnt++) {
4274 irq = pci_irq_vector(hdev->pdev, i);
4275 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4277 /* We pass different structures depending on the irq handler. For the abnormal
4278 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4279 * user_interrupt entry
4281 * TODO: change the dec abnrm to threaded irq
4284 dec = hdev->dec + relative_idx / 2;
4285 if (relative_idx % 2) {
4286 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4287 gaudi2_irq_name(i), (void *) dec);
4289 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4290 (void *) &hdev->user_interrupt[dec->core_id]);
4294 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4302 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4306 static int gaudi2_enable_msix(struct hl_device *hdev)
4308 struct asic_fixed_properties *prop = &hdev->asic_prop;
4309 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4310 int rc, irq, i, j, user_irq_init_cnt;
4313 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4316 hl_init_cpu_for_irq(hdev);
4318 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4321 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4322 GAUDI2_MSIX_ENTRIES, rc);
4326 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4327 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4328 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4330 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4331 goto free_irq_vectors;
4334 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4335 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4336 &hdev->event_queue);
4338 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4339 goto free_completion_irq;
4342 rc = gaudi2_dec_enable_msix(hdev);
4344 dev_err(hdev->dev, "Failed to enable decoder IRQ");
4345 goto free_event_irq;
4348 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4349 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4350 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
4351 &hdev->tpc_interrupt);
4353 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4357 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4358 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4359 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4360 &hdev->unexpected_error_interrupt);
4362 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4366 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4367 user_irq_init_cnt < prop->user_interrupt_count;
4368 i++, j++, user_irq_init_cnt++) {
4370 irq = pci_irq_vector(hdev->pdev, i);
4371 hl_set_irq_affinity(hdev, irq);
4372 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4373 &hdev->user_interrupt[j]);
4375 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4380 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4381 rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
4382 IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
4385 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4389 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4394 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4395 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4397 irq = pci_irq_vector(hdev->pdev, i);
4398 irq_set_affinity_and_hint(irq, NULL);
4399 free_irq(irq, &hdev->user_interrupt[j]);
4401 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4402 free_irq(irq, &hdev->unexpected_error_interrupt);
4404 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4405 free_irq(irq, &hdev->tpc_interrupt);
4407 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4409 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4412 free_completion_irq:
4413 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4417 pci_free_irq_vectors(hdev->pdev);
4422 static void gaudi2_sync_irqs(struct hl_device *hdev)
4424 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4428 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4431 /* Wait for all pending IRQs to be finished */
4432 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4434 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4435 irq = pci_irq_vector(hdev->pdev, i);
4436 synchronize_irq(irq);
4439 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4440 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4442 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4444 irq = pci_irq_vector(hdev->pdev, i);
4445 synchronize_irq(irq);
4448 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4449 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
4452 static void gaudi2_disable_msix(struct hl_device *hdev)
4454 struct asic_fixed_properties *prop = &hdev->asic_prop;
4455 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4459 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4462 gaudi2_sync_irqs(hdev);
4464 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4465 free_irq(irq, &hdev->event_queue);
4467 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4469 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4470 free_irq(irq, &hdev->tpc_interrupt);
4472 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4473 free_irq(irq, &hdev->unexpected_error_interrupt);
4475 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4476 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4478 irq = pci_irq_vector(hdev->pdev, i);
4479 irq_set_affinity_and_hint(irq, NULL);
4480 free_irq(irq, &hdev->user_interrupt[j]);
4483 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4484 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4487 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4488 free_irq(irq, hdev);
4490 pci_free_irq_vectors(hdev->pdev);
4492 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4495 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4497 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4498 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4499 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4503 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4505 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4507 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4508 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4509 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4512 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4514 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4516 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4518 /* Wait till all traffic from decoder stops
4519 * before apply core reset.
4521 rc = hl_poll_timeout(
4523 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4525 (graceful & graceful_pend_mask),
4530 "Failed to stop traffic from DCORE%d Decoder %d\n",
4535 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4537 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4538 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4539 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4543 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4545 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4547 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4548 dec_bit = PCIE_DEC_SHIFT + dec_id;
4549 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4552 offset = dec_id * PCIE_VDEC_OFFSET;
4554 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4556 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4558 /* Wait till all traffic from decoder stops
4559 * before apply core reset.
4561 rc = hl_poll_timeout(
4563 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4565 (graceful & graceful_pend_mask),
4570 "Failed to stop traffic from PCIe Decoder %d\n",
4575 static void gaudi2_stop_dec(struct hl_device *hdev)
4577 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4580 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4583 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4584 gaudi2_stop_dcore_dec(hdev, dcore_id);
4586 gaudi2_stop_pcie_dec(hdev);
4589 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4591 u32 reg_base, reg_val;
4593 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4594 if (run_mode == HL_ENGINE_CORE_RUN)
4595 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4597 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4599 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4602 static void gaudi2_halt_arcs(struct hl_device *hdev)
4606 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4607 if (gaudi2_is_arc_enabled(hdev, arc_id))
4608 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4612 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4615 u32 reg_base, val, ack_mask, timeout_usec = 100000;
4618 timeout_usec *= 100;
4620 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4621 if (run_mode == HL_ENGINE_CORE_RUN)
4622 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4624 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4626 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4627 val, ((val & ack_mask) == ack_mask),
4628 1000, timeout_usec);
4632 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4633 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4639 static void gaudi2_reset_arcs(struct hl_device *hdev)
4641 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4647 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4648 if (gaudi2_is_arc_enabled(hdev, arc_id))
4649 gaudi2_clr_arc_id_cap(hdev, arc_id);
4652 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4654 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4658 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4661 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4663 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4664 if (!(hdev->nic_ports_mask & BIT(i)))
4667 gaudi2_qman_manual_flush_common(hdev, queue_id);
4671 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4672 u32 num_cores, u32 core_command)
4676 for (i = 0 ; i < num_cores ; i++) {
4677 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4678 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4681 for (i = 0 ; i < num_cores ; i++) {
4682 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4683 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4686 dev_err(hdev->dev, "failed to %s arc: %d\n",
4687 (core_command == HL_ENGINE_CORE_HALT) ?
4688 "HALT" : "RUN", core_ids[i]);
4697 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4699 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4700 u32 reg_base, reg_addr, reg_val, tpc_id;
4702 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4705 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4706 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4709 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4710 reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4711 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4712 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4713 WREG32(reg_addr, reg_val);
4715 if (engine_command == HL_ENGINE_RESUME) {
4716 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4717 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4718 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4724 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4726 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4727 u32 reg_base, reg_addr, reg_val, mme_id;
4729 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4730 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4733 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4734 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4735 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4736 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4737 WREG32(reg_addr, reg_val);
4742 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4744 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4745 u32 reg_base, reg_addr, reg_val, edma_id;
4747 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4750 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4751 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4754 reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4755 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4756 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4757 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4758 WREG32(reg_addr, reg_val);
4760 if (engine_command == HL_ENGINE_STALL) {
4761 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4762 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4763 WREG32(reg_addr, reg_val);
4769 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4770 u32 *engine_ids, u32 num_engines, u32 engine_command)
4774 for (i = 0 ; i < num_engines ; ++i) {
4775 switch (engine_ids[i]) {
4776 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4777 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4778 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4779 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4780 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4785 case GAUDI2_DCORE0_ENGINE_ID_MME:
4786 case GAUDI2_DCORE1_ENGINE_ID_MME:
4787 case GAUDI2_DCORE2_ENGINE_ID_MME:
4788 case GAUDI2_DCORE3_ENGINE_ID_MME:
4789 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4794 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4795 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4796 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4797 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4798 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4804 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4812 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4813 u32 num_engines, u32 engine_command)
4815 switch (engine_command) {
4816 case HL_ENGINE_CORE_HALT:
4817 case HL_ENGINE_CORE_RUN:
4818 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4820 case HL_ENGINE_STALL:
4821 case HL_ENGINE_RESUME:
4822 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4825 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4830 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4832 u32 wait_timeout_ms;
4835 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4837 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4842 gaudi2_stop_dma_qmans(hdev);
4843 gaudi2_stop_mme_qmans(hdev);
4844 gaudi2_stop_tpc_qmans(hdev);
4845 gaudi2_stop_rot_qmans(hdev);
4846 gaudi2_stop_nic_qmans(hdev);
4847 msleep(wait_timeout_ms);
4849 gaudi2_halt_arcs(hdev);
4850 gaudi2_dma_stall(hdev);
4851 gaudi2_mme_stall(hdev);
4852 gaudi2_tpc_stall(hdev);
4853 gaudi2_rotator_stall(hdev);
4855 msleep(wait_timeout_ms);
4857 gaudi2_stop_dec(hdev);
4860 * in case of soft reset do a manual flush for QMANs (currently called
4861 * only for NIC QMANs
4864 gaudi2_nic_qmans_manual_flush(hdev);
4866 gaudi2_disable_dma_qmans(hdev);
4867 gaudi2_disable_mme_qmans(hdev);
4868 gaudi2_disable_tpc_qmans(hdev);
4869 gaudi2_disable_rot_qmans(hdev);
4870 gaudi2_disable_nic_qmans(hdev);
4871 gaudi2_disable_timestamp(hdev);
4875 gaudi2_disable_msix(hdev);
4879 gaudi2_sync_irqs(hdev);
4882 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4884 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4886 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4887 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4888 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4889 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4890 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4891 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4892 pre_fw_load->wait_for_preboot_extended_timeout =
4893 GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
4896 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4898 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4899 struct dynamic_fw_load_mgr *dynamic_loader;
4900 struct cpu_dyn_regs *dyn_regs;
4902 /* fill common fields */
4903 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4904 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4905 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4906 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4907 fw_loader->skip_bmc = false;
4908 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4909 fw_loader->dram_bar_id = DRAM_BAR_ID;
4910 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4912 /* here we update initial values for few specific dynamic regs (as
4913 * before reading the first descriptor from FW those value has to be
4914 * hard-coded). in later stages of the protocol those values will be
4915 * updated automatically by reading the FW descriptor so data there
4916 * will always be up-to-date
4918 dynamic_loader = &hdev->fw_loader.dynamic_loader;
4919 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4920 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4921 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4922 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4925 static int gaudi2_init_cpu(struct hl_device *hdev)
4927 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4930 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4933 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4936 rc = hl_fw_init_cpu(hdev);
4940 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4945 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4947 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4948 struct asic_fixed_properties *prop = &hdev->asic_prop;
4949 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4950 struct cpu_dyn_regs *dyn_regs;
4955 if (!hdev->cpu_queues_enable)
4958 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4961 eq = &hdev->event_queue;
4963 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4965 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4966 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4968 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4969 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4971 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4972 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4974 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4975 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4976 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4978 /* Used for EQ CI */
4979 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4981 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4983 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4985 /* Let the ARC know we are ready as it is now handling those queues */
4987 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4988 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4990 err = hl_poll_timeout(
4992 mmCPU_IF_QUEUE_INIT,
4994 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4999 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
5003 /* update FW application security bits */
5004 if (prop->fw_cpu_boot_dev_sts0_valid)
5005 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
5007 if (prop->fw_cpu_boot_dev_sts1_valid)
5008 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
5010 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
5014 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
5017 struct hl_hw_queue *q;
5018 u32 pq_id, pq_offset;
5020 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5021 q = &hdev->kernel_queues[queue_id_base + pq_id];
5022 pq_offset = pq_id * 4;
5025 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5026 lower_32_bits(q->pq_dram_address));
5027 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5028 upper_32_bits(q->pq_dram_address));
5030 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5031 lower_32_bits(q->bus_address));
5032 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5033 upper_32_bits(q->bus_address));
5035 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
5036 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
5037 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
5041 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
5043 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
5045 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5046 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5047 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5048 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5050 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
5051 cp_offset = cp_id * 4;
5053 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
5054 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
5055 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
5056 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
5059 /* allow QMANs to accept work from ARC CQF */
5060 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
5063 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
5066 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5067 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
5069 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5070 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5072 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5073 pq_offset = pq_id * 4;
5075 /* Configure QMAN HBW to scratchpad as it is not needed */
5076 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
5077 lower_32_bits(gaudi2->scratchpad_bus_address));
5078 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
5079 upper_32_bits(gaudi2->scratchpad_bus_address));
5080 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
5081 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
5083 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
5084 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
5085 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
5086 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
5089 /* Enable QMAN H/W completion */
5090 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
5093 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
5095 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5098 switch (queue_id_base) {
5099 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
5101 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5103 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5105 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5107 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5108 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
5110 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5112 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5114 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5116 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5117 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
5119 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5121 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5123 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5125 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5126 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
5128 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
5129 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
5131 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
5132 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
5135 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
5142 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
5145 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
5146 int map_table_entry;
5148 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
5150 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
5151 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
5152 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
5154 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
5155 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
5156 gaudi2_irq_map_table[map_table_entry].cpu_id);
5158 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
5160 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
5161 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
5162 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
5164 /* Enable the QMAN channel.
5165 * PDMA QMAN configuration is different, as we do not allow user to
5166 * access some of the CPs.
5167 * PDMA0: CP2/3 are reserved for the ARC usage.
5168 * PDMA1: CP1/2/3 are reserved for the ARC usage.
5170 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
5171 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
5172 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
5173 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
5175 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
5178 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
5183 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5184 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5186 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5187 gaudi2_init_qman_cp(hdev, reg_base);
5188 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5189 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5192 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5193 u32 dma_core_id, bool is_secure)
5195 u32 prot, irq_handler_offset;
5196 struct cpu_dyn_regs *dyn_regs;
5197 int map_table_entry;
5199 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5201 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5203 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5205 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5206 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5208 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5209 lower_32_bits(CFG_BASE + irq_handler_offset));
5211 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5212 upper_32_bits(CFG_BASE + irq_handler_offset));
5214 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5215 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5216 gaudi2_irq_map_table[map_table_entry].cpu_id);
5218 /* Enable the DMA channel */
5219 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5222 static void gaudi2_init_kdma(struct hl_device *hdev)
5224 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5227 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5230 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5232 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5234 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5237 static void gaudi2_init_pdma(struct hl_device *hdev)
5239 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5242 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5245 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5246 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5248 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5249 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5251 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5252 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5254 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5255 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5257 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5260 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5262 u32 reg_base, base_edma_core_id, base_edma_qman_id;
5264 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5265 base_edma_qman_id = edma_stream_base[seq];
5267 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5268 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5270 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5271 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5274 static void gaudi2_init_edma(struct hl_device *hdev)
5276 struct asic_fixed_properties *prop = &hdev->asic_prop;
5277 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5280 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5283 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5284 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5285 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5287 if (!(prop->edma_enabled_mask & BIT(seq)))
5290 gaudi2_init_edma_instance(hdev, seq);
5292 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5298 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5299 * @hdev: pointer to habanalabs device structure.
5300 * @sob_id: sync object ID.
5301 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5302 * @interrupt_id: interrupt ID.
5304 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5305 * write directly to the HBW host memory of the virtual MSI-X doorbell.
5306 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5308 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5309 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5310 * completion, by decrementing the sync object value and re-arming the monitor.
5312 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5313 u32 first_mon_id, u32 interrupt_id)
5315 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5316 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5320 /* Reset the SOB value */
5321 sob_offset = sob_id * sizeof(u32);
5322 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5324 /* Configure 3 monitors:
5325 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5326 * 2. Decrement SOB value by 1.
5327 * 3. Re-arm the master monitor.
5330 first_mon_offset = first_mon_id * sizeof(u32);
5332 /* 2nd monitor: Decrement SOB value by 1 */
5333 mon_offset = first_mon_offset + sizeof(u32);
5335 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5336 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5337 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5339 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5340 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5341 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5342 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5344 /* 3rd monitor: Re-arm the master monitor */
5345 mon_offset = first_mon_offset + 2 * sizeof(u32);
5347 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5348 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5349 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5351 sob_group = sob_id / 8;
5352 mask = ~BIT(sob_id & 0x7);
5353 mode = 0; /* comparison mode is "greater than or equal to" */
5354 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5355 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5356 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5357 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5360 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5362 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5363 mon_offset = first_mon_offset;
5365 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5366 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5368 addr = gaudi2->virt_msix_db_dma_addr;
5369 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5370 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5372 payload = interrupt_id;
5373 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5375 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5378 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5380 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5381 struct asic_fixed_properties *prop = &hdev->asic_prop;
5383 /* Decoder normal/abnormal interrupts */
5384 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5385 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5388 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5389 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5390 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5391 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5393 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5394 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5396 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5400 static void gaudi2_init_sm(struct hl_device *hdev)
5402 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5407 /* Enable HBW/LBW CQ for completion monitors */
5408 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5409 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5411 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5412 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5414 /* Enable only HBW CQ for KDMA completion monitor */
5415 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5416 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5418 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5419 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5420 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5421 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5423 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5425 hdev->completion_queue[i].bus_address;
5427 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5428 lower_32_bits(cq_address));
5429 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5430 upper_32_bits(cq_address));
5431 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5432 ilog2(HL_CQ_SIZE_IN_BYTES));
5435 /* Configure kernel ASID and MMU BP*/
5436 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5437 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5439 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5440 gaudi2_prepare_sm_for_virt_msix_db(hdev);
5443 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5445 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5449 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5450 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5451 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5452 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5453 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5454 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5456 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5457 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5459 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5460 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5461 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5465 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5466 bool config_qman_only)
5468 u32 queue_id_base, reg_base;
5472 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5475 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5478 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5481 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5484 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5488 if (!config_qman_only) {
5489 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5490 gaudi2_init_mme_acc(hdev, reg_base);
5493 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5494 gaudi2_init_qman(hdev, reg_base, queue_id_base);
5497 static void gaudi2_init_mme(struct hl_device *hdev)
5499 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5502 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5505 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5506 gaudi2_init_dcore_mme(hdev, i, false);
5508 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5512 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5514 /* Mask arithmetic and QM interrupts in TPC */
5515 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5517 /* Set 16 cache lines */
5518 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5519 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5522 struct gaudi2_tpc_init_cfg_data {
5523 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5526 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5527 u32 offset, struct iterate_module_ctx *ctx)
5529 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5530 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5534 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5536 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5537 /* gets last sequence number */
5538 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5540 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5542 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5543 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5545 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5548 static void gaudi2_init_tpc(struct hl_device *hdev)
5550 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5551 struct gaudi2_tpc_init_cfg_data init_cfg_data;
5552 struct iterate_module_ctx tpc_iter;
5554 if (!hdev->asic_prop.tpc_enabled_mask)
5557 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5560 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5561 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5562 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5563 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5564 tpc_iter.fn = &gaudi2_init_tpc_config;
5565 tpc_iter.data = &init_cfg_data;
5566 gaudi2_iterate_tpcs(hdev, &tpc_iter);
5569 static void gaudi2_init_rotator(struct hl_device *hdev)
5571 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5572 u32 i, reg_base, queue_id;
5574 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5576 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5577 reg_base = gaudi2_qm_blocks_bases[queue_id];
5578 gaudi2_init_qman(hdev, reg_base, queue_id);
5580 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5584 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5588 /* VCMD normal interrupt */
5589 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5590 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5591 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5592 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5594 /* VCMD abnormal interrupt */
5595 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5596 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5597 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5598 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5601 static void gaudi2_init_dec(struct hl_device *hdev)
5603 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5604 u32 dcore_id, dec_id, dec_bit;
5607 if (!hdev->asic_prop.decoder_enabled_mask)
5610 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5613 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5614 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5615 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5617 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5620 base_addr = mmDCORE0_DEC0_CMD_BASE +
5621 BRDG_CTRL_BLOCK_OFFSET +
5622 dcore_id * DCORE_OFFSET +
5623 dec_id * DCORE_VDEC_OFFSET;
5625 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5627 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5630 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5631 dec_bit = PCIE_DEC_SHIFT + dec_id;
5632 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5635 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5636 dec_id * DCORE_VDEC_OFFSET;
5638 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5640 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5644 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5645 u32 stlb_base, u32 asid, u64 phys_addr)
5647 u32 status, timeout_usec;
5650 if (hdev->pldm || !hdev->pdev)
5651 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5653 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5655 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5656 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5657 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5658 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5660 rc = hl_poll_timeout(
5662 stlb_base + STLB_BUSY_OFFSET,
5664 !(status & 0x80000000),
5669 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5676 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5677 u32 start_offset, u32 inv_start_val,
5680 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
5681 if (flags & MMU_OP_CLEAR_MEMCACHE)
5682 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5684 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5687 WREG32(stlb_base + start_offset, inv_start_val);
5690 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5691 struct gaudi2_cache_invld_params *inv_params)
5693 u32 status, timeout_usec, start_offset;
5696 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5697 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5699 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
5700 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5701 rc = hl_poll_timeout(
5703 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5712 /* Need to manually reset the status to 0 */
5713 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5716 /* Lower cache does not work with cache lines, hence we can skip its
5717 * invalidation upon map and invalidate only upon unmap
5719 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5722 start_offset = inv_params->range_invalidation ?
5723 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5725 rc = hl_poll_timeout(
5727 stlb_base + start_offset,
5736 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5738 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5741 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5743 if (gaudi2->hw_cap_initialized & hw_cap)
5749 /* this function shall be called only for HMMUs for which capability bit is set */
5750 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5754 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5755 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5758 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5759 struct gaudi2_cache_invld_params *inv_params)
5763 if (inv_params->range_invalidation) {
5764 /* Set the addresses range
5765 * Note: that the start address we set in register, is not included in
5766 * the range of the invalidation, by design.
5767 * that's why we need to set lower address than the one we actually
5768 * want to be included in the range invalidation.
5770 u64 start = inv_params->start_va - 1;
5772 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5774 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5775 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5777 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5778 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5780 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5781 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5783 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5784 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5786 start_offset = STLB_INV_ALL_START_OFFSET;
5789 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5790 inv_params->inv_start_val, inv_params->flags);
5793 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5794 int dcore_id, int hmmu_id,
5795 struct gaudi2_cache_invld_params *inv_params)
5797 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5799 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5802 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5803 int dcore_id, int hmmu_id,
5804 struct gaudi2_cache_invld_params *inv_params)
5806 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5808 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5811 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5812 struct gaudi2_cache_invld_params *inv_params)
5814 int dcore_id, hmmu_id;
5816 /* first send all invalidation commands */
5817 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5818 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5819 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5822 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5826 /* next, poll all invalidations status */
5827 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5828 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5831 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5834 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5844 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5846 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5847 struct gaudi2_cache_invld_params invld_params;
5850 if (hdev->reset_info.hard_reset_pending)
5853 invld_params.range_invalidation = false;
5854 invld_params.inv_start_val = 1;
5856 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5857 invld_params.flags = flags;
5858 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5859 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5861 } else if (flags & MMU_OP_PHYS_PACK) {
5862 invld_params.flags = 0;
5863 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5869 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5870 u32 flags, u32 asid, u64 va, u64 size)
5872 struct gaudi2_cache_invld_params invld_params = {0};
5873 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5874 u64 start_va, end_va;
5878 if (hdev->reset_info.hard_reset_pending)
5881 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5882 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5883 asid << MMU_RANGE_INV_ASID_SHIFT);
5885 end_va = start_va + size;
5887 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5888 /* As range invalidation does not support zero address we will
5889 * do full invalidation in this case
5892 invld_params.range_invalidation = true;
5893 invld_params.start_va = start_va;
5894 invld_params.end_va = end_va;
5895 invld_params.inv_start_val = inv_start_val;
5896 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5898 invld_params.range_invalidation = false;
5899 invld_params.inv_start_val = 1;
5900 invld_params.flags = flags;
5904 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5905 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5910 } else if (flags & MMU_OP_PHYS_PACK) {
5911 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5912 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5913 invld_params.inv_start_val = inv_start_val;
5914 invld_params.flags = flags;
5915 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5921 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
5922 bool host_resident_pgt)
5924 struct asic_fixed_properties *prop = &hdev->asic_prop;
5926 u32 asid, max_asid = prop->max_asid;
5929 /* it takes too much time to init all of the ASIDs on palladium */
5931 max_asid = min((u32) 8, max_asid);
5933 for (asid = 0 ; asid < max_asid ; asid++) {
5934 if (host_resident_pgt)
5935 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5937 hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
5939 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5941 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5949 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
5950 bool host_resident_pgt)
5952 u32 status, timeout_usec;
5955 if (hdev->pldm || !hdev->pdev)
5956 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5958 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5960 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5962 rc = hl_poll_timeout(
5964 stlb_base + STLB_SRAM_INIT_OFFSET,
5971 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5973 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
5977 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5979 rc = hl_poll_timeout(
5981 stlb_base + STLB_INV_ALL_START_OFFSET,
5988 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5990 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5995 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5997 struct asic_fixed_properties *prop = &hdev->asic_prop;
5998 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5999 u32 mmu_base, stlb_base;
6002 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
6005 mmu_base = mmPMMU_HBW_MMU_BASE;
6006 stlb_base = mmPMMU_HBW_STLB_BASE;
6008 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6009 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
6010 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
6011 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
6012 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
6013 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
6014 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6015 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6016 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6017 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6018 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6020 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
6022 if (PAGE_SIZE == SZ_64K) {
6023 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
6024 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
6025 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
6026 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
6028 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
6030 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
6031 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
6032 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
6035 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
6037 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
6041 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
6046 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
6049 struct asic_fixed_properties *prop = &hdev->asic_prop;
6050 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6051 u32 offset, mmu_base, stlb_base, hw_cap;
6055 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
6056 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
6059 * return if DMMU is already initialized or if it's not out of
6060 * isolation (due to cluster binning)
6062 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
6065 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
6066 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
6067 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
6069 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
6070 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
6072 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6073 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
6074 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
6075 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
6076 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
6077 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
6078 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6079 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6080 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6081 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6082 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6084 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
6085 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
6087 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
6089 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
6093 gaudi2->hw_cap_initialized |= hw_cap;
6098 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
6100 int rc, dcore_id, hmmu_id;
6102 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
6103 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
6104 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
6112 static int gaudi2_mmu_init(struct hl_device *hdev)
6116 rc = gaudi2_pci_mmu_init(hdev);
6120 rc = gaudi2_hbm_mmu_init(hdev);
6127 static int gaudi2_hw_init(struct hl_device *hdev)
6129 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6132 /* Let's mark in the H/W that we have reached this point. We check
6133 * this value in the reset_before_init function to understand whether
6134 * we need to reset the chip before doing H/W init. This register is
6135 * cleared by the H/W upon H/W reset
6137 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
6139 /* Perform read from the device to make sure device is up */
6142 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
6143 * So we set it here and if anyone tries to move it later to
6144 * a different address, there will be an error
6146 if (hdev->asic_prop.iatu_done_by_fw)
6147 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
6150 * Before pushing u-boot/linux to device, need to set the hbm bar to
6151 * base address of dram
6153 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
6154 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
6158 rc = gaudi2_init_cpu(hdev);
6160 dev_err(hdev->dev, "failed to initialize CPU\n");
6164 gaudi2_init_scrambler_hbm(hdev);
6165 gaudi2_init_kdma(hdev);
6167 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
6169 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
6173 rc = gaudi2->cpucp_info_get(hdev);
6175 dev_err(hdev->dev, "Failed to get cpucp info\n");
6179 rc = gaudi2_mmu_init(hdev);
6183 gaudi2_init_pdma(hdev);
6184 gaudi2_init_edma(hdev);
6185 gaudi2_init_sm(hdev);
6186 gaudi2_init_tpc(hdev);
6187 gaudi2_init_mme(hdev);
6188 gaudi2_init_rotator(hdev);
6189 gaudi2_init_dec(hdev);
6190 gaudi2_enable_timestamp(hdev);
6192 rc = gaudi2_coresight_init(hdev);
6194 goto disable_queues;
6196 rc = gaudi2_enable_msix(hdev);
6198 goto disable_queues;
6200 /* Perform read from the device to flush all configuration */
6206 gaudi2_disable_dma_qmans(hdev);
6207 gaudi2_disable_mme_qmans(hdev);
6208 gaudi2_disable_tpc_qmans(hdev);
6209 gaudi2_disable_rot_qmans(hdev);
6210 gaudi2_disable_nic_qmans(hdev);
6212 gaudi2_disable_timestamp(hdev);
6218 * gaudi2_send_hard_reset_cmd - common function to handle reset
6220 * @hdev: pointer to the habanalabs device structure
6222 * This function handles the various possible scenarios for reset.
6223 * It considers if reset is handled by driver\FW and what FW components are loaded
6225 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6227 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6228 bool heartbeat_reset, preboot_only, cpu_initialized = false;
6229 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6230 u32 cpu_boot_status;
6232 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6233 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6236 * Handle corner case where failure was at cpu management app load,
6237 * and driver didn't detect any failure while loading the FW,
6238 * then at such scenario driver will send only HALT_MACHINE
6239 * and no one will respond to this request since FW already back to preboot
6240 * and it cannot handle such cmd.
6241 * In this case next time the management app loads it'll check on events register
6242 * which will still have the halt indication, and will reboot the device.
6243 * The solution is to let preboot clear all relevant registers before next boot
6244 * once driver send COMMS_RST_DEV.
6246 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6248 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6249 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6250 cpu_initialized = true;
6253 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6254 * 1. FW reset: FW initiate the reset sequence
6255 * 2. driver reset: FW will start HALT sequence (the preparations for the
6256 * reset but not the reset itself as it is not implemented
6257 * on their part) and LKD will wait to let FW complete the
6258 * sequence before issuing the reset
6260 if (!preboot_only && cpu_initialized) {
6261 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6262 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6264 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6268 * When working with preboot (without Linux/Boot fit) we can
6269 * communicate only using the COMMS commands to issue halt/reset.
6271 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6272 * attempt to revive the card in the small chance that the f/w has
6273 * experienced a watchdog event, which caused it to return back to preboot.
6274 * In that case, triggering reset through GIC won't help. We need to
6275 * trigger the reset as if Linux wasn't loaded.
6277 * We do it only if the reset cause was HB, because that would be the
6278 * indication of such an event.
6280 * In case watchdog hasn't expired but we still got HB, then this won't
6284 if (heartbeat_reset || preboot_only || !cpu_initialized) {
6285 if (hdev->asic_prop.hard_reset_done_by_fw)
6286 hl_fw_ask_hard_reset_without_linux(hdev);
6288 hl_fw_ask_halt_machine_without_linux(hdev);
6293 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6295 * @hdev: pointer to the habanalabs device structure
6297 * This function executes hard reset based on if driver/FW should do the reset
6299 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6301 if (hdev->asic_prop.hard_reset_done_by_fw) {
6302 gaudi2_send_hard_reset_cmd(hdev);
6306 /* Set device to handle FLR by H/W as we will put the device
6309 WREG32(mmPCIE_AUX_FLR_CTRL,
6310 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6312 gaudi2_send_hard_reset_cmd(hdev);
6314 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6317 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6322 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6323 rc = hl_poll_timeout(
6325 mmCPU_RST_STATUS_TO_HOST,
6327 reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6332 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6338 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6340 * @hdev: pointer to the habanalabs device structure
6341 * @driver_performs_reset: true if driver should perform reset instead of f/w.
6342 * @poll_timeout_us: time to wait for response from f/w.
6344 * This function executes soft reset based on if driver/FW should do the reset
6346 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6347 u32 poll_timeout_us)
6351 if (!driver_performs_reset) {
6352 if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
6353 /* set SP to indicate reset request sent to FW */
6354 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6356 WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
6357 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6359 /* wait for f/w response */
6360 rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6362 rc = hl_fw_send_soft_reset(hdev);
6367 /* Block access to engines, QMANs and SM during reset, these
6368 * RRs will be reconfigured after soft reset.
6369 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6371 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6372 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6374 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6375 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6376 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6378 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6382 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6387 /* We poll the BTM done indication multiple times after reset due to
6388 * a HW errata 'GAUDI2_0300'
6390 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6391 rc = hl_poll_timeout(
6393 mmPSOC_GLOBAL_CONF_BTM_FSM,
6400 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6403 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6405 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6406 u32 poll_timeout_us, reset_sleep_ms;
6407 bool driver_performs_reset = false;
6411 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6412 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6413 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6415 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6416 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6422 gaudi2_reset_arcs(hdev);
6425 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6426 gaudi2_execute_hard_reset(hdev);
6429 * As we have to support also work with preboot only (which does not supports
6430 * soft reset) we have to make sure that security is disabled before letting driver
6431 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6432 * secured device with preboot only.
6434 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6435 !hdev->asic_prop.fw_security_enabled);
6436 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6442 if (driver_performs_reset || hard_reset) {
6444 * Instead of waiting for BTM indication we should wait for preboot ready:
6445 * Consider the below scenario:
6446 * 1. FW update is being triggered
6447 * - setting the dirty bit
6448 * 2. hard reset will be triggered due to the dirty bit
6449 * 3. FW initiates the reset:
6450 * - dirty bit cleared
6451 * - BTM indication cleared
6452 * - preboot ready indication cleared
6453 * 4. during hard reset:
6454 * - BTM indication will be set
6455 * - BIST test performed and another reset triggered
6456 * 5. only after this reset the preboot will set the preboot ready
6458 * when polling on BTM indication alone we can lose sync with FW while trying to
6459 * communicate with FW that is during reset.
6460 * to overcome this we will always wait to preboot ready indication
6463 /* without this sleep reset will not work */
6464 msleep(reset_sleep_ms);
6466 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6467 hl_fw_wait_preboot_ready(hdev);
6469 gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6475 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6476 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6479 * Clear NIC capability mask in order for driver to re-configure
6480 * NIC QMANs. NIC ports will not be re-configured during soft
6481 * reset as we call gaudi2_nic_init only during hard reset
6483 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6486 gaudi2->hw_cap_initialized &=
6487 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6488 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6489 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6490 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6491 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6493 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6495 gaudi2->hw_cap_initialized &=
6496 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6497 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6503 static int gaudi2_suspend(struct hl_device *hdev)
6507 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6509 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6514 static int gaudi2_resume(struct hl_device *hdev)
6516 return gaudi2_init_iatu(hdev);
6519 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6520 void *cpu_addr, dma_addr_t dma_addr, size_t size)
6524 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6525 VM_DONTCOPY | VM_NORESERVE);
6527 #ifdef _HAS_DMA_MMAP_COHERENT
6529 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6531 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6535 rc = remap_pfn_range(vma, vma->vm_start,
6536 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6537 size, vma->vm_page_prot);
6539 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6546 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6548 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6549 u64 hw_cap_mask = 0;
6550 u64 hw_tpc_cap_bit = 0;
6551 u64 hw_nic_cap_bit = 0;
6552 u64 hw_test_cap_bit = 0;
6554 switch (hw_queue_id) {
6555 case GAUDI2_QUEUE_ID_PDMA_0_0:
6556 case GAUDI2_QUEUE_ID_PDMA_0_1:
6557 case GAUDI2_QUEUE_ID_PDMA_1_0:
6558 hw_cap_mask = HW_CAP_PDMA_MASK;
6560 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6561 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6562 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6564 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6565 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6566 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6568 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6569 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6570 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6572 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6573 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6574 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6577 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6578 hw_test_cap_bit = HW_CAP_MME_SHIFT;
6581 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6582 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6585 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6586 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6589 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6590 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6593 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6594 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6595 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6597 /* special case where cap bit refers to the first queue id */
6598 if (!hw_tpc_cap_bit)
6599 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6602 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6603 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6604 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6607 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6608 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6609 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6612 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6613 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6614 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6617 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6618 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6621 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6622 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6625 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6626 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6628 /* special case where cap bit refers to the first queue id */
6629 if (!hw_nic_cap_bit)
6630 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6633 case GAUDI2_QUEUE_ID_CPU_PQ:
6634 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6641 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6644 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6646 if (hw_test_cap_bit)
6647 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6649 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6652 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6654 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6657 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6658 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6659 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6661 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6662 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6664 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6665 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6672 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6674 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6677 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6678 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6679 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6682 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6683 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6686 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6687 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6695 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6697 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6700 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6701 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6702 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6705 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6706 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6709 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6710 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6718 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6720 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6721 u32 pq_offset, reg_base, db_reg_offset, db_value;
6723 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6725 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6726 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6729 pq_offset = (hw_queue_id & 0x3) * 4;
6730 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6731 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6733 db_reg_offset = mmCPU_IF_PF_PQ_PI;
6738 /* ring the doorbell */
6739 WREG32(db_reg_offset, db_value);
6741 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6742 /* make sure device CPU will read latest data from host */
6744 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6745 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6749 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6751 __le64 *pbd = (__le64 *) bd;
6753 /* The QMANs are on the host memory so a simple copy suffice */
6758 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6759 dma_addr_t *dma_handle, gfp_t flags)
6761 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6764 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6765 void *cpu_addr, dma_addr_t dma_handle)
6767 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6770 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6771 u32 timeout, u64 *result)
6773 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6775 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6782 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6784 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6787 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6788 gfp_t mem_flags, dma_addr_t *dma_handle)
6790 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6793 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6796 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6798 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6801 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6802 dma_addr_t *dma_handle)
6804 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6807 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6809 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6812 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6814 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6815 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6817 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6818 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6822 /* Just check if CB address is valid */
6824 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6825 parser->user_cb_size,
6826 asic_prop->sram_user_base_address,
6827 asic_prop->sram_end_address))
6830 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6831 parser->user_cb_size,
6832 asic_prop->dram_user_base_address,
6833 asic_prop->dram_end_address))
6836 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6837 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6838 parser->user_cb_size,
6839 asic_prop->dmmu.start_addr,
6840 asic_prop->dmmu.end_addr))
6843 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6844 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6845 parser->user_cb_size,
6846 asic_prop->pmmu.start_addr,
6847 asic_prop->pmmu.end_addr) ||
6848 hl_mem_area_inside_range(
6849 (u64) (uintptr_t) parser->user_cb,
6850 parser->user_cb_size,
6851 asic_prop->pmmu_huge.start_addr,
6852 asic_prop->pmmu_huge.end_addr))
6855 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6859 if (!device_iommu_mapped(&hdev->pdev->dev))
6863 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6864 parser->user_cb, parser->user_cb_size);
6869 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6871 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6873 if (!parser->is_kernel_allocated_cb)
6874 return gaudi2_validate_cb_address(hdev, parser);
6876 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6877 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6884 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6886 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6888 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6891 return hl_fw_send_heartbeat(hdev);
6894 /* This is an internal helper function, used to update the KDMA mmu props.
6895 * Should be called with a proper kdma lock.
6897 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6898 bool mmu_bypass, u32 asid)
6900 u32 rw_asid, rw_mmu_bp;
6902 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6903 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6905 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6906 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6908 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6909 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6912 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6913 u32 mon_payload, u32 sync_value)
6915 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6918 sob_offset = sob_id * 4;
6919 mon_offset = mon_id * 4;
6921 /* Reset the SOB value */
6922 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6924 /* Configure this address with CQ_ID 0 because CQ_EN is set */
6925 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6927 /* Configure this address with CS index because CQ_EN is set */
6928 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6930 sync_group_id = sob_id / 8;
6931 mask = ~(1 << (sob_id & 0x7));
6932 mode = 1; /* comparison mode is "equal to" */
6934 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6935 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6936 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6937 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6938 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6941 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6942 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6943 u64 src_addr, u64 dst_addr,
6944 u32 size, bool is_memset)
6946 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6947 struct hl_cq_entry *cq_base;
6952 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6953 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6954 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6956 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6957 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6959 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6960 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6962 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6963 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6964 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6965 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6966 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6967 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6968 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6969 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6971 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6972 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6975 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6977 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6979 /* Wait for completion */
6980 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6981 cq_base = cq->kernel_address;
6982 polling_addr = (u32 *)&cq_base[cq->ci];
6985 /* for each 1MB 20 second of timeout */
6986 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6988 timeout = KDMA_TIMEOUT_USEC;
6991 rc = hl_poll_timeout_memory(
7003 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
7004 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
7008 cq->ci = hl_cq_inc_ptr(cq->ci);
7013 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
7017 for (i = 0 ; i < size ; i += sizeof(u32))
7018 WREG32(addr + i, val);
7021 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
7023 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
7026 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
7027 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
7029 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
7030 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
7034 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
7036 return hdev->asic_prop.first_available_user_sob[0] +
7037 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
7040 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
7042 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7043 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7045 /* Reset the SOB value */
7046 WREG32(sob_addr, 0);
7049 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
7050 struct gaudi2_queues_test_info *msg_info)
7052 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7053 u32 tmp, sob_base = 1;
7054 struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
7055 size_t pkt_size = sizeof(struct packet_msg_short);
7058 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
7059 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
7060 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
7061 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
7062 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
7064 msg_short_pkt->value = cpu_to_le32(sob_val);
7065 msg_short_pkt->ctl = cpu_to_le32(tmp);
7067 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
7070 "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
7075 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
7077 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7078 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7079 u32 timeout_usec, tmp;
7083 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
7085 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
7087 rc = hl_poll_timeout(
7095 if (rc == -ETIMEDOUT) {
7096 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
7104 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
7106 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7109 * check capability here as send_cpu_message() won't update the result
7110 * value if no capability
7112 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7115 return hl_fw_test_cpu_queue(hdev);
7118 static int gaudi2_test_queues(struct hl_device *hdev)
7120 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7121 struct gaudi2_queues_test_info *msg_info;
7122 u32 sob_val = 0x5a5a;
7125 /* send test message on all enabled Qs */
7126 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7127 if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7130 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
7131 gaudi2_qman_set_test_mode(hdev, i, true);
7132 gaudi2_test_queue_clear(hdev, i);
7133 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
7138 rc = gaudi2_test_cpu_queue(hdev);
7142 /* verify that all messages were processed */
7143 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7144 if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7147 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
7149 /* chip is not usable, no need for cleanups, just bail-out with error */
7152 gaudi2_test_queue_clear(hdev, i);
7153 gaudi2_qman_set_test_mode(hdev, i, false);
7160 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
7162 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7163 size_t irq_arr_size;
7166 gaudi2_init_arcs(hdev);
7168 rc = gaudi2_scrub_arcs_dccm(hdev);
7170 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7174 gaudi2_init_security(hdev);
7176 /* Unmask all IRQs since some could have been received during the soft reset */
7177 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7178 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7181 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7182 struct engines_data *e)
7184 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7185 struct asic_fixed_properties *prop = &hdev->asic_prop;
7186 unsigned long *mask = (unsigned long *) mask_arr;
7187 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7188 bool is_idle = true, is_eng_idle;
7189 int engine_idx, i, j;
7193 hl_engine_data_sprintf(e,
7194 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7195 "---- ---- ------- ------------ ------------- -------------\n");
7197 for (i = 0; i < NUM_OF_DCORES; i++) {
7198 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7199 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7201 if (!(prop->edma_enabled_mask & BIT(seq)))
7204 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7205 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7206 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7208 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7209 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7211 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7212 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7213 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7215 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7216 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7217 is_idle &= is_eng_idle;
7219 if (mask && !is_eng_idle)
7220 set_bit(engine_idx, mask);
7223 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7224 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7231 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7232 struct engines_data *e)
7234 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7235 unsigned long *mask = (unsigned long *) mask_arr;
7236 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7237 bool is_idle = true, is_eng_idle;
7242 hl_engine_data_sprintf(e,
7243 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7244 "---- ------- ------------ ------------- -------------\n");
7246 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7247 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7248 offset = i * PDMA_OFFSET;
7249 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7250 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7252 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7253 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7254 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7256 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7257 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7258 is_idle &= is_eng_idle;
7260 if (mask && !is_eng_idle)
7261 set_bit(engine_idx, mask);
7264 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7265 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7271 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7272 struct engines_data *e)
7274 unsigned long *mask = (unsigned long *) mask_arr;
7275 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7276 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7277 bool is_idle = true, is_eng_idle;
7281 /* NIC, twelve macros in Full chip */
7282 if (e && hdev->nic_ports_mask)
7283 hl_engine_data_sprintf(e,
7284 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7285 "--- ------- ------------ ----------\n");
7287 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7289 offset = i / 2 * NIC_OFFSET;
7291 offset += NIC_QM_OFFSET;
7293 if (!(hdev->nic_ports_mask & BIT(i)))
7296 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7299 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7300 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7301 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7303 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7304 is_idle &= is_eng_idle;
7306 if (mask && !is_eng_idle)
7307 set_bit(engine_idx, mask);
7310 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7311 qm_glbl_sts0, qm_cgm_sts);
7317 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7318 struct engines_data *e)
7320 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7321 unsigned long *mask = (unsigned long *) mask_arr;
7322 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7323 bool is_idle = true, is_eng_idle;
7328 hl_engine_data_sprintf(e,
7329 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
7330 "--- ---- ------- ------------ ---------------\n");
7331 /* MME, one per Dcore */
7332 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7333 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7334 offset = i * DCORE_OFFSET;
7336 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7337 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7338 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7340 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7341 is_idle &= is_eng_idle;
7343 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7344 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7345 is_idle &= is_eng_idle;
7348 hl_engine_data_sprintf(e, mme_fmt, i, "N",
7349 is_eng_idle ? "Y" : "N",
7353 if (mask && !is_eng_idle)
7354 set_bit(engine_idx, mask);
7360 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7361 struct iterate_module_ctx *ctx)
7363 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7364 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7368 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7369 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7371 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7372 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7374 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7375 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7376 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7377 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7379 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7380 IS_TPC_IDLE(tpc_cfg_sts);
7381 *(idle_data->is_idle) &= is_eng_idle;
7383 if (idle_data->mask && !is_eng_idle)
7384 set_bit(engine_idx, idle_data->mask);
7387 hl_engine_data_sprintf(idle_data->e,
7388 idle_data->tpc_fmt, dcore, inst,
7389 is_eng_idle ? "Y" : "N",
7390 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7393 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7394 struct engines_data *e)
7396 struct asic_fixed_properties *prop = &hdev->asic_prop;
7397 unsigned long *mask = (unsigned long *) mask_arr;
7398 bool is_idle = true;
7400 struct gaudi2_tpc_idle_data tpc_idle_data = {
7401 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7404 .is_idle = &is_idle,
7406 struct iterate_module_ctx tpc_iter = {
7407 .fn = &gaudi2_is_tpc_engine_idle,
7408 .data = &tpc_idle_data,
7411 if (e && prop->tpc_enabled_mask)
7412 hl_engine_data_sprintf(e,
7413 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n"
7414 "---- --- ------- ------------ ---------- ------\n");
7416 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7418 return *tpc_idle_data.is_idle;
7421 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7422 struct engines_data *e)
7424 struct asic_fixed_properties *prop = &hdev->asic_prop;
7425 unsigned long *mask = (unsigned long *) mask_arr;
7426 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7427 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7428 bool is_idle = true, is_eng_idle;
7429 u32 dec_swreg15, dec_enabled_bit;
7430 int engine_idx, i, j;
7433 /* Decoders, two each Dcore and two shared PCIe decoders */
7434 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7435 hl_engine_data_sprintf(e,
7436 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
7437 "---- --- ------- ---------------\n");
7439 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7440 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7441 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7442 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7445 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7446 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7447 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7449 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7450 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7451 is_idle &= is_eng_idle;
7453 if (mask && !is_eng_idle)
7454 set_bit(engine_idx, mask);
7457 hl_engine_data_sprintf(e, dec_fmt, i, j,
7458 is_eng_idle ? "Y" : "N", dec_swreg15);
7462 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7463 hl_engine_data_sprintf(e,
7464 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
7465 "-------- ------- ---------------\n");
7467 /* Check shared(PCIe) decoders */
7468 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7469 dec_enabled_bit = PCIE_DEC_SHIFT + i;
7470 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7473 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7474 offset = i * DCORE_DEC_OFFSET;
7475 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7476 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7477 is_idle &= is_eng_idle;
7479 if (mask && !is_eng_idle)
7480 set_bit(engine_idx, mask);
7483 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7484 is_eng_idle ? "Y" : "N", dec_swreg15);
7490 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7491 struct engines_data *e)
7493 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7494 unsigned long *mask = (unsigned long *) mask_arr;
7495 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7496 bool is_idle = true, is_eng_idle;
7501 hl_engine_data_sprintf(e,
7502 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n"
7503 "---- --- ------- ------------ ------------ ----------\n");
7505 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7506 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7508 offset = i * ROT_OFFSET;
7510 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7511 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7512 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7514 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7515 is_idle &= is_eng_idle;
7517 if (mask && !is_eng_idle)
7518 set_bit(engine_idx, mask);
7521 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7522 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7528 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7529 struct engines_data *e)
7531 bool is_idle = true;
7533 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7534 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7535 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7536 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7537 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7538 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7539 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7544 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7545 __acquires(&gaudi2->hw_queues_lock)
7547 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7549 spin_lock(&gaudi2->hw_queues_lock);
7552 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7553 __releases(&gaudi2->hw_queues_lock)
7555 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7557 spin_unlock(&gaudi2->hw_queues_lock);
7560 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7562 return hdev->pdev->device;
7565 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7567 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7569 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7572 return hl_fw_get_eeprom_data(hdev, data, max_size);
7575 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7577 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7580 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7582 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7585 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
7586 return gaudi2->events_stat_aggregate;
7589 *size = (u32) sizeof(gaudi2->events_stat);
7590 return gaudi2->events_stat;
7593 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7594 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7596 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7597 dcore_vdec_id + DCORE_OFFSET * dcore_id;
7599 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7600 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7602 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7603 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7605 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7606 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7608 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7609 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7611 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7612 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7615 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7617 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7618 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7619 struct asic_fixed_properties *prop = &hdev->asic_prop;
7620 u32 dcore_offset = dcore_id * DCORE_OFFSET;
7621 u32 vdec_id, i, ports_offset, reg_val;
7625 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7626 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7627 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7628 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7629 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7630 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7633 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7634 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7635 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7636 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7637 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7641 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7643 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7644 * for any access type
7647 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7648 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7649 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7650 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7653 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7654 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7656 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7657 ports_offset = i * DCORE_MME_SBTE_OFFSET;
7658 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7659 dcore_offset + ports_offset, 0);
7660 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7661 dcore_offset + ports_offset, rw_asid);
7664 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7665 ports_offset = i * DCORE_MME_WB_OFFSET;
7666 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7667 dcore_offset + ports_offset, 0);
7668 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7669 dcore_offset + ports_offset, rw_asid);
7672 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7673 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7678 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7679 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7680 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7684 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7685 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7687 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7689 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7690 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7692 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7693 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7695 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7696 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7698 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7699 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7701 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7702 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7705 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7706 u32 rw_asid, u32 rw_mmu_bp)
7708 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7710 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7711 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7714 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7716 u32 reg_base, reg_offset, reg_val = 0;
7718 reg_base = gaudi2_arc_blocks_bases[cpu_id];
7720 /* Enable MMU and configure asid for all relevant ARC regions */
7721 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7722 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7724 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7725 WREG32(reg_base + reg_offset, reg_val);
7727 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7728 WREG32(reg_base + reg_offset, reg_val);
7730 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7731 WREG32(reg_base + reg_offset, reg_val);
7733 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7734 WREG32(reg_base + reg_offset, reg_val);
7736 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7737 WREG32(reg_base + reg_offset, reg_val);
7739 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7740 WREG32(reg_base + reg_offset, reg_val);
7742 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7743 WREG32(reg_base + reg_offset, reg_val);
7745 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7746 WREG32(reg_base + reg_offset, reg_val);
7748 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7749 WREG32(reg_base + reg_offset, reg_val);
7751 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7752 WREG32(reg_base + reg_offset, reg_val);
7754 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7755 WREG32(reg_base + reg_offset, reg_val);
7758 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7762 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7763 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7765 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7766 gaudi2_arc_mmu_prepare(hdev, i, asid);
7768 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7769 if (!gaudi2_is_queue_enabled(hdev, i))
7772 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7778 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7780 struct asic_fixed_properties *prop = &hdev->asic_prop;
7781 u32 rw_asid, offset;
7784 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7785 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7787 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7788 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7789 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7790 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7792 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7793 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7794 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7795 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7798 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7799 offset = i * ROT_OFFSET;
7800 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7801 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7802 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7803 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7804 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7807 /* Shared Decoders are the last bits in the decoders mask */
7808 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7809 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7811 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7812 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7814 /* arc farm arc dup eng */
7815 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7816 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7818 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7825 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
7826 struct iterate_module_ctx *ctx)
7828 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7830 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7831 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7832 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7833 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7836 /* zero the MMUBP and set the ASID */
7837 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7839 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7840 struct gaudi2_tpc_mmu_data tpc_mmu_data;
7841 struct iterate_module_ctx tpc_iter = {
7842 .fn = &gaudi2_tpc_mmu_prepare,
7843 .data = &tpc_mmu_data,
7847 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7848 dev_crit(hdev->dev, "asid %u is too big\n", asid);
7852 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7855 rc = gaudi2_mmu_shared_prepare(hdev, asid);
7859 /* configure DCORE MMUs */
7860 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7861 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7862 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7863 for (i = 0 ; i < NUM_OF_DCORES ; i++)
7864 gaudi2_mmu_dcore_prepare(hdev, i, asid);
7869 static inline bool is_info_event(u32 event)
7872 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7873 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7874 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
7876 /* return in case of NIC status event - these events are received periodically and not as
7877 * an indication to an error.
7879 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7880 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
7887 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7888 bool ratelimited, const char *fmt, ...)
7890 struct va_format vaf;
7893 va_start(args, fmt);
7898 dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7899 gaudi2_irq_map_table[event_type].valid ?
7900 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7902 dev_err(hdev->dev, "%s: %pV\n",
7903 gaudi2_irq_map_table[event_type].valid ?
7904 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7909 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7910 struct hl_eq_ecc_data *ecc_data)
7912 u64 ecc_address = 0, ecc_syndrome = 0;
7913 u8 memory_wrapper_idx = 0;
7914 bool has_block_id = false;
7917 if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
7918 has_block_id = true;
7920 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7921 ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
7922 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7925 block_id = le16_to_cpu(ecc_data->block_id);
7926 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7927 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
7928 ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
7929 ecc_data->is_critical);
7931 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7932 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
7933 ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
7936 return !!ecc_data->is_critical;
7939 static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
7941 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
7942 u64 cq_ptr, cp_current_inst;
7943 u32 lo, hi, cq_size, cp_sts;
7946 cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
7947 is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
7950 lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
7951 hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
7952 cq_ptr = ((u64) hi) << 32 | lo;
7953 cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
7955 lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
7956 hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
7957 cq_ptr = ((u64) hi) << 32 | lo;
7958 cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
7961 lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7962 hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7963 cp_current_inst = ((u64) hi) << 32 | lo;
7966 "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
7967 is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
7969 if (undef_opcode->write_enable) {
7970 memset(undef_opcode, 0, sizeof(*undef_opcode));
7971 undef_opcode->timestamp = ktime_get();
7972 undef_opcode->cq_addr = cq_ptr;
7973 undef_opcode->cq_size = cq_size;
7974 undef_opcode->engine_id = engine_id;
7975 undef_opcode->stream_id = QMAN_STREAMS;
7976 undef_opcode->write_enable = 0;
7980 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7981 u64 qman_base, u32 qid_base, u64 *event_mask)
7983 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7984 u64 glbl_sts_addr, arb_err_addr;
7987 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7988 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7990 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7991 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7992 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7997 if (i == QMAN_STREAMS) {
7998 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7999 num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
8001 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
8002 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
8005 for (j = 0 ; j < num_error_causes ; j++)
8006 if (glbl_sts_val & BIT(j)) {
8007 gaudi2_print_event(hdev, event_type, true,
8008 "%s. err cause: %s", reg_desc,
8010 gaudi2_lower_qman_error_cause[j] :
8011 gaudi2_qman_error_cause[j]);
8015 /* Check for undefined opcode error in lower QM */
8016 if ((i == QMAN_STREAMS) &&
8017 (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
8018 handle_lower_qman_data_on_err(hdev, qman_base,
8019 gaudi2_queue_id_to_engine_id[qid_base]);
8020 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
8024 arb_err_val = RREG32(arb_err_addr);
8029 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
8030 if (arb_err_val & BIT(j)) {
8031 gaudi2_print_event(hdev, event_type, true,
8032 "ARB_ERR. err cause: %s",
8033 gaudi2_qman_arb_error_cause[j]);
8042 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
8043 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8044 enum gaudi2_engine_id id, u64 *event_mask)
8046 u32 razwi_hi, razwi_lo, razwi_xy;
8051 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
8052 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
8053 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
8054 rd_wr_flag = HL_RAZWI_WRITE;
8056 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
8057 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
8058 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
8059 rd_wr_flag = HL_RAZWI_READ;
8062 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
8063 rd_wr_flag | HL_RAZWI_HBW, event_mask);
8065 dev_err_ratelimited(hdev->dev,
8066 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
8067 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
8070 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
8071 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8072 enum gaudi2_engine_id id, u64 *event_mask)
8074 u64 razwi_addr = CFG_BASE;
8080 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
8081 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
8082 rd_wr_flag = HL_RAZWI_WRITE;
8084 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
8085 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
8086 rd_wr_flag = HL_RAZWI_READ;
8089 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
8090 dev_err_ratelimited(hdev->dev,
8091 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
8092 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
8096 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
8097 enum razwi_event_sources module, u8 module_idx)
8101 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
8102 return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
8103 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8104 (module_idx % NUM_OF_TPC_PER_DCORE) +
8105 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8108 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8109 (module_idx * ENGINE_ID_DCORE_OFFSET));
8112 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8113 (module_idx % NUM_OF_EDMA_PER_DCORE));
8116 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8119 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8122 if (module_idx == 8)
8123 return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8125 if (module_idx == 9)
8126 return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8128 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8129 (module_idx % NUM_OF_DEC_PER_DCORE) +
8130 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8133 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8135 case RAZWI_ARC_FARM:
8136 return GAUDI2_ENGINE_ID_ARC_FARM;
8139 return GAUDI2_ENGINE_ID_SIZE;
8144 * This function handles RR(Range register) hit events.
8145 * raised be initiators not PSOC RAZWI.
8147 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8148 enum razwi_event_sources module, u8 module_idx,
8149 u8 module_sub_idx, u64 *event_mask)
8151 bool via_sft = false;
8152 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
8153 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8154 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8155 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8156 char initiator_name[64];
8160 sprintf(initiator_name, "TPC_%u", module_idx);
8161 if (hdev->tpc_binning) {
8162 binned_idx = __ffs(hdev->tpc_binning);
8163 if (binned_idx == module_idx)
8164 module_idx = TPC_ID_DCORE0_TPC6;
8167 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8169 if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
8170 !hdev->asic_prop.fw_security_enabled &&
8171 ((module_idx == 0) || (module_idx == 1)))
8172 lbw_rtr_id = DCORE0_RTR0;
8174 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8177 sprintf(initiator_name, "MME_%u", module_idx);
8178 switch (module_sub_idx) {
8180 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8183 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8186 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8189 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8192 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8195 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8198 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8201 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8204 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8209 lbw_rtr_id = hbw_rtr_id;
8212 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8213 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8214 /* SFT has separate MSTR_IF for LBW, only there we can
8215 * read the LBW razwi related registers
8217 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8218 dcore_id * SFT_DCORE_OFFSET;
8220 sprintf(initiator_name, "EDMA_%u", module_idx);
8223 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8224 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8225 sprintf(initiator_name, "PDMA_%u", module_idx);
8228 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8229 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8230 sprintf(initiator_name, "NIC_%u", module_idx);
8233 sprintf(initiator_name, "DEC_%u", module_idx);
8234 if (hdev->decoder_binning) {
8235 binned_idx = __ffs(hdev->decoder_binning);
8236 if (binned_idx == module_idx)
8237 module_idx = DEC_ID_PCIE_VDEC1;
8239 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8240 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8243 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8244 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8245 sprintf(initiator_name, "ROT_%u", module_idx);
8247 case RAZWI_ARC_FARM:
8248 lbw_rtr_id = DCORE1_RTR5;
8249 hbw_rtr_id = DCORE1_RTR7;
8250 sprintf(initiator_name, "ARC_FARM_%u", module_idx);
8256 /* Find router mstr_if register base */
8258 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8259 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8260 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8261 dcore_id * DCORE_OFFSET +
8262 dcore_rtr_id * DCORE_RTR_OFFSET +
8264 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8265 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8268 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
8269 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8270 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8271 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8272 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8274 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8276 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8277 initiator_name, eng_id, event_mask);
8279 /* Clear event indication */
8280 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8284 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8285 initiator_name, eng_id, event_mask);
8287 /* Clear event indication */
8288 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8292 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8293 initiator_name, eng_id, event_mask);
8295 /* Clear event indication */
8296 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8300 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8301 initiator_name, eng_id, event_mask);
8303 /* Clear event indication */
8304 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8308 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8310 struct asic_fixed_properties *prop = &hdev->asic_prop;
8311 u8 mod_idx, sub_mod;
8313 /* check all TPCs */
8314 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8315 if (prop->tpc_enabled_mask & BIT(mod_idx))
8316 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8319 /* check all MMEs */
8320 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8321 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8322 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8325 /* check all EDMAs */
8326 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8327 if (prop->edma_enabled_mask & BIT(mod_idx))
8328 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8330 /* check all PDMAs */
8331 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8332 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8334 /* check all NICs */
8335 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8336 if (hdev->nic_ports_mask & BIT(mod_idx))
8337 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8340 /* check all DECs */
8341 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8342 if (prop->decoder_enabled_mask & BIT(mod_idx))
8343 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8345 /* check all ROTs */
8346 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8347 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8350 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8351 u32 axuser_xy, u32 *base, u16 *eng_id,
8355 int i, num_of_eng = 0;
8358 for (i = 0 ; i < array_size ; i++) {
8359 if (axuser_xy != razwi_info[i].axuser_xy)
8362 eng_id[num_of_eng] = razwi_info[i].eng_id;
8363 base[num_of_eng] = razwi_info[i].rtr_ctrl;
8365 str_size += scnprintf(eng_name + str_size,
8366 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8367 razwi_info[i].eng_name);
8369 str_size += scnprintf(eng_name + str_size,
8370 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8371 razwi_info[i].eng_name);
8378 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8381 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8382 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8383 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8384 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8385 bool razwi_happened = false;
8389 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8390 axuser_xy, base, eng_id, eng_name_str);
8392 /* If no match for XY coordinates, try to find it in MME razwi table */
8394 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8395 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8396 ARRAY_SIZE(mme_razwi_info),
8397 axuser_xy, base, eng_id,
8401 for (i = 0 ; i < num_of_eng ; i++) {
8402 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8403 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8404 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8405 addr = ((u64)addr_hi << 32) + addr_lo;
8408 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8409 eng_name_str, addr);
8410 hl_handle_razwi(hdev, addr, &eng_id[0],
8411 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8412 razwi_happened = true;
8416 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8417 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8418 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8419 addr = ((u64)addr_hi << 32) + addr_lo;
8422 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8423 eng_name_str, addr);
8424 hl_handle_razwi(hdev, addr, &eng_id[0],
8425 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8426 razwi_happened = true;
8430 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8431 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8434 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8435 eng_name_str, addr_lo);
8436 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8437 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8438 razwi_happened = true;
8442 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8443 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8446 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8447 eng_name_str, addr_lo);
8448 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8449 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8450 razwi_happened = true;
8453 /* In common case the loop will break, when there is only one engine id, or
8454 * several engines with the same router. The exceptional case is with psoc razwi
8455 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8456 * interfaces of sft router). In this case, maybe the first router won't hold info
8457 * and we will need to iterate on the other router.
8463 return razwi_happened;
8466 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8467 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8469 u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8471 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8472 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8477 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8479 dev_err_ratelimited(hdev->dev,
8480 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8481 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8482 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8483 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8484 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8485 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8487 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8490 dev_err_ratelimited(hdev->dev,
8491 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8494 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8495 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8496 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8501 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8503 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8505 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8507 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8508 if (sts_val & BIT(i)) {
8509 gaudi2_print_event(hdev, event_type, true,
8510 "err cause: %s", gaudi2_qm_sei_error_cause[i]);
8511 sts_clr_val |= BIT(i);
8516 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8521 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8522 bool extended_err_check, u64 *event_mask)
8524 enum razwi_event_sources module;
8525 u32 error_count = 0;
8529 switch (event_type) {
8530 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8531 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8532 qman_base = mmDCORE0_TPC0_QM_BASE +
8533 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8534 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8537 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8538 qman_base = mmDCORE0_TPC6_QM_BASE;
8541 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8542 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8543 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8544 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8545 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8546 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8547 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8548 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8551 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8552 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8553 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8554 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8555 module = RAZWI_PDMA;
8557 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8558 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8559 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8560 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8567 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8569 /* There is a single event per NIC macro, so should check its both QMAN blocks */
8570 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8571 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8572 error_count += _gaudi2_handle_qm_sei_err(hdev,
8573 qman_base + NIC_QM_OFFSET, event_type);
8575 if (extended_err_check) {
8576 /* check if RAZWI happened */
8577 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8578 hl_check_for_glbl_errors(hdev);
8584 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8586 u32 qid_base, error_count = 0;
8590 switch (event_type) {
8591 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8592 index = event_type - GAUDI2_EVENT_TPC0_QM;
8593 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8594 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8596 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8597 index = event_type - GAUDI2_EVENT_TPC6_QM;
8598 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8599 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8601 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8602 index = event_type - GAUDI2_EVENT_TPC12_QM;
8603 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8604 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8606 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8607 index = event_type - GAUDI2_EVENT_TPC18_QM;
8608 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8609 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8611 case GAUDI2_EVENT_TPC24_QM:
8612 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8613 qman_base = mmDCORE0_TPC6_QM_BASE;
8615 case GAUDI2_EVENT_MME0_QM:
8616 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8617 qman_base = mmDCORE0_MME_QM_BASE;
8619 case GAUDI2_EVENT_MME1_QM:
8620 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8621 qman_base = mmDCORE1_MME_QM_BASE;
8623 case GAUDI2_EVENT_MME2_QM:
8624 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8625 qman_base = mmDCORE2_MME_QM_BASE;
8627 case GAUDI2_EVENT_MME3_QM:
8628 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8629 qman_base = mmDCORE3_MME_QM_BASE;
8631 case GAUDI2_EVENT_HDMA0_QM:
8633 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8634 qman_base = mmDCORE0_EDMA0_QM_BASE;
8636 case GAUDI2_EVENT_HDMA1_QM:
8638 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8639 qman_base = mmDCORE0_EDMA1_QM_BASE;
8641 case GAUDI2_EVENT_HDMA2_QM:
8643 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8644 qman_base = mmDCORE1_EDMA0_QM_BASE;
8646 case GAUDI2_EVENT_HDMA3_QM:
8648 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8649 qman_base = mmDCORE1_EDMA1_QM_BASE;
8651 case GAUDI2_EVENT_HDMA4_QM:
8653 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8654 qman_base = mmDCORE2_EDMA0_QM_BASE;
8656 case GAUDI2_EVENT_HDMA5_QM:
8658 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8659 qman_base = mmDCORE2_EDMA1_QM_BASE;
8661 case GAUDI2_EVENT_HDMA6_QM:
8663 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8664 qman_base = mmDCORE3_EDMA0_QM_BASE;
8666 case GAUDI2_EVENT_HDMA7_QM:
8668 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8669 qman_base = mmDCORE3_EDMA1_QM_BASE;
8671 case GAUDI2_EVENT_PDMA0_QM:
8672 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8673 qman_base = mmPDMA0_QM_BASE;
8675 case GAUDI2_EVENT_PDMA1_QM:
8676 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8677 qman_base = mmPDMA1_QM_BASE;
8679 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8680 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8681 qman_base = mmROT0_QM_BASE;
8683 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8684 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8685 qman_base = mmROT1_QM_BASE;
8691 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
8692 qid_base, event_mask);
8694 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8695 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8696 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8697 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8700 hl_check_for_glbl_errors(hdev);
8705 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8707 u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8709 for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8711 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8712 (arc_farm * ARC_FARM_OFFSET));
8714 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8715 if (sts_val & BIT(i)) {
8716 gaudi2_print_event(hdev, event_type, true,
8717 "ARC FARM ARC %u err cause: %s",
8718 arc_farm, gaudi2_arc_sei_error_cause[i]);
8719 sts_clr_val |= BIT(i);
8723 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8727 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
8728 hl_check_for_glbl_errors(hdev);
8733 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8735 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8737 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8739 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8740 if (sts_val & BIT(i)) {
8741 gaudi2_print_event(hdev, event_type, true,
8742 "err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8743 sts_clr_val |= BIT(i);
8748 hl_check_for_glbl_errors(hdev);
8750 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8755 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8756 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8759 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8760 u32 error_count = 0;
8763 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8764 if (intr_cause_data & BIT(i)) {
8765 gaudi2_print_event(hdev, event_type, true,
8766 "err cause: %s", guadi2_rot_error_cause[i]);
8770 /* check if RAZWI happened */
8771 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8772 hl_check_for_glbl_errors(hdev);
8777 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type,
8778 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8781 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8782 u32 error_count = 0;
8785 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8786 if (intr_cause_data & BIT(i)) {
8787 gaudi2_print_event(hdev, event_type, true,
8788 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]);
8792 /* check if RAZWI happened */
8793 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8794 hl_check_for_glbl_errors(hdev);
8799 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8802 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8805 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8807 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8808 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8809 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8812 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8813 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8815 sts_val = RREG32(sts_addr);
8817 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8818 if (sts_val & BIT(i)) {
8819 gaudi2_print_event(hdev, event_type, true,
8820 "err cause: %s", gaudi2_dec_error_cause[i]);
8821 sts_clr_val |= BIT(i);
8826 /* check if RAZWI happened */
8827 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8828 hl_check_for_glbl_errors(hdev);
8830 /* Write 1 clear errors */
8831 WREG32(sts_addr, sts_clr_val);
8836 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8839 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8842 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8843 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8845 sts_val = RREG32(sts_addr);
8847 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8848 if (sts_val & BIT(i)) {
8849 gaudi2_print_event(hdev, event_type, true,
8850 "err cause: %s", guadi2_mme_error_cause[i]);
8851 sts_clr_val |= BIT(i);
8856 /* check if RAZWI happened */
8857 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8858 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8860 hl_check_for_glbl_errors(hdev);
8862 WREG32(sts_clr_addr, sts_clr_val);
8867 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
8870 * We have a single error cause here but the report mechanism is
8871 * buggy. Hence there is no good reason to fetch the cause so we
8872 * just check for glbl_errors and exit.
8874 hl_check_for_glbl_errors(hdev);
8876 return GAUDI2_NA_EVENT_CAUSE;
8879 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8882 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8885 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8886 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8888 sts_val = RREG32(sts_addr);
8890 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8891 if (sts_val & BIT(i)) {
8892 gaudi2_print_event(hdev, event_type, true,
8893 "err cause: %s", guadi2_mme_wap_error_cause[i]);
8894 sts_clr_val |= BIT(i);
8899 /* check if RAZWI happened on WAP0/1 */
8900 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8901 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8902 hl_check_for_glbl_errors(hdev);
8904 WREG32(sts_clr_addr, sts_clr_val);
8909 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8910 u64 intr_cause_data)
8912 u32 error_count = 0;
8915 /* If an AXI read or write error is received, an error is reported and
8916 * interrupt message is sent. Due to an HW errata, when reading the cause
8917 * register of the KDMA engine, the reported error is always HBW even if
8918 * the actual error caused by a LBW KDMA transaction.
8920 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8921 if (intr_cause_data & BIT(i)) {
8922 gaudi2_print_event(hdev, event_type, true,
8923 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8927 hl_check_for_glbl_errors(hdev);
8932 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8934 u32 error_count = 0;
8937 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8938 if (intr_cause & BIT(i)) {
8939 gaudi2_print_event(hdev, event_type, true,
8940 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8944 hl_check_for_glbl_errors(hdev);
8949 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8951 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8953 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8954 if (RREG32(razwi_happened_addr)) {
8955 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8956 GAUDI2_ENGINE_ID_PCIE, event_mask);
8957 WREG32(razwi_happened_addr, 0x1);
8960 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8961 if (RREG32(razwi_happened_addr)) {
8962 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8963 GAUDI2_ENGINE_ID_PCIE, event_mask);
8964 WREG32(razwi_happened_addr, 0x1);
8967 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8968 if (RREG32(razwi_happened_addr)) {
8969 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8970 GAUDI2_ENGINE_ID_PCIE, event_mask);
8971 WREG32(razwi_happened_addr, 0x1);
8974 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8975 if (RREG32(razwi_happened_addr)) {
8976 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8977 GAUDI2_ENGINE_ID_PCIE, event_mask);
8978 WREG32(razwi_happened_addr, 0x1);
8982 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8983 u64 intr_cause_data, u64 *event_mask)
8985 u32 error_count = 0;
8988 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8989 if (!(intr_cause_data & BIT_ULL(i)))
8992 gaudi2_print_event(hdev, event_type, true,
8993 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8996 switch (intr_cause_data & BIT_ULL(i)) {
8997 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8998 hl_check_for_glbl_errors(hdev);
9000 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
9001 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
9009 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
9010 u64 intr_cause_data)
9013 u32 error_count = 0;
9016 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
9017 if (intr_cause_data & BIT_ULL(i)) {
9018 gaudi2_print_event(hdev, event_type, true,
9019 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
9027 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
9029 u32 error_count = 0;
9032 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
9033 if (intr_cause_data & BIT_ULL(i)) {
9034 gaudi2_print_event(hdev, event_type, true,
9035 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
9043 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
9049 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9051 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
9054 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
9055 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
9057 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
9060 dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
9062 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9063 addr &= HW_UNSCRAMBLED_BITS_MASK;
9064 dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
9065 addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
9068 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
9070 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9073 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
9078 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9080 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
9083 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
9084 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
9086 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
9089 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9091 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
9092 is_pmmu ? "PMMU" : "HMMU", addr);
9093 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9096 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
9097 u64 mmu_base, bool is_pmmu, u64 *event_mask)
9099 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
9102 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
9104 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
9105 if (spi_sei_cause & BIT(i)) {
9106 gaudi2_print_event(hdev, event_type, true,
9107 "err cause: %s", gaudi2_mmu_spi_sei[i].cause);
9110 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
9112 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9114 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9115 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9122 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9124 /* Clear interrupt */
9125 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9130 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9132 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9133 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9136 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9137 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9139 sei_cause_val = RREG32(sei_cause_addr);
9140 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9141 cq_intr_val = RREG32(cq_intr_addr);
9144 if (sei_cause_cause) {
9145 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9146 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9149 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9150 if (!(sei_cause_cause & BIT(i)))
9153 gaudi2_print_event(hdev, event_type, true,
9154 "err cause: %s. %s: 0x%X",
9155 gaudi2_sm_sei_cause[i].cause_name,
9156 gaudi2_sm_sei_cause[i].log_name,
9162 /* Clear SM_SEI_CAUSE */
9163 WREG32(sei_cause_addr, 0);
9167 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9168 cq_intr_queue_index =
9169 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9172 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9173 sm_index, cq_intr_queue_index);
9177 WREG32(cq_intr_addr, 0);
9180 hl_check_for_glbl_errors(hdev);
9185 static u64 get_hmmu_base(u16 event_type)
9187 u8 dcore, index_in_dcore;
9189 switch (event_type) {
9190 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9191 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9195 case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9196 case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9200 case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9201 case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9205 case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9206 case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9210 case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9211 case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9215 case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9216 case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9220 case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9221 case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9225 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9226 case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9230 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9231 case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9235 case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9236 case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9240 case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9241 case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9245 case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9246 case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9250 case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9251 case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9255 case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9256 case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9260 case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9261 case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9265 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9266 case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9274 return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9277 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9279 bool is_pmmu = false;
9280 u32 error_count = 0;
9283 switch (event_type) {
9284 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9285 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9286 mmu_base = get_hmmu_base(event_type);
9289 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9290 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9292 mmu_base = mmPMMU_HBW_MMU_BASE;
9298 if (mmu_base == ULONG_MAX)
9301 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9302 is_pmmu, event_mask);
9303 hl_check_for_glbl_errors(hdev);
9309 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9310 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9311 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9313 u32 addr, beat, beat_shift;
9316 dev_err_ratelimited(hdev->dev,
9317 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9318 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9319 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9320 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9322 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9323 dev_err_ratelimited(hdev->dev,
9324 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9325 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9326 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9327 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9328 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9329 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9331 /* For each beat (RDQS edge), look for possible errors and print relevant info */
9332 for (beat = 0 ; beat < 4 ; beat++) {
9333 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9334 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9335 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9337 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9338 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9340 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9341 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9342 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9344 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9345 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9349 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9350 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9351 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9352 dev_err_ratelimited(hdev->dev,
9353 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9355 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9356 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9357 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9358 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9362 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9363 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9364 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9365 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9366 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9372 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9373 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9375 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9376 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9378 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9380 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9381 derr & 0x3, derr & 0xc);
9383 /* JIRA H6-3286 - the following prints may not be valid */
9384 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9385 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9386 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9387 dev_err_ratelimited(hdev->dev,
9388 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9390 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9391 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9392 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9393 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9397 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9398 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9400 __le32 *col_cmd = ca_par_err_data->dbg_col;
9401 __le16 *row_cmd = ca_par_err_data->dbg_row;
9404 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9406 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9407 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9408 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9409 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9410 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9413 /* Returns true if hard reset is needed or false otherwise */
9414 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9415 struct hl_eq_hbm_sei_data *sei_data)
9417 bool require_hard_reset = false;
9418 u32 hbm_id, mc_id, cause_idx;
9420 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9421 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9423 cause_idx = sei_data->hdr.sei_cause;
9424 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9425 gaudi2_print_event(hdev, event_type, true,
9427 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9431 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9432 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9433 sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9434 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9435 hbm_mc_sei_cause[cause_idx]);
9437 /* Print error-specific info */
9438 switch (cause_idx) {
9439 case HBM_SEI_CATTRIP:
9440 require_hard_reset = true;
9443 case HBM_SEI_CMD_PARITY_EVEN:
9444 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9445 le32_to_cpu(sei_data->hdr.cnt));
9446 require_hard_reset = true;
9449 case HBM_SEI_CMD_PARITY_ODD:
9450 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9451 le32_to_cpu(sei_data->hdr.cnt));
9452 require_hard_reset = true;
9455 case HBM_SEI_WRITE_DATA_PARITY_ERR:
9456 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9457 le32_to_cpu(sei_data->hdr.cnt));
9458 require_hard_reset = true;
9461 case HBM_SEI_READ_ERR:
9462 /* Unlike other SEI events, read error requires further processing of the
9463 * raw data in order to determine the root cause.
9465 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9466 &sei_data->read_err_info,
9467 le32_to_cpu(sei_data->hdr.cnt));
9474 require_hard_reset |= !!sei_data->hdr.is_critical;
9476 return require_hard_reset;
9479 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9480 u64 intr_cause_data)
9482 if (intr_cause_data) {
9483 gaudi2_print_event(hdev, event_type, true,
9484 "temperature error cause: %#llx", intr_cause_data);
9491 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9493 u32 i, error_count = 0;
9495 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9496 if (intr_cause_data & hbm_mc_spi[i].mask) {
9497 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9498 hbm_mc_spi[i].cause);
9505 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9507 ktime_t zero_time = ktime_set(0, 0);
9509 mutex_lock(&hdev->clk_throttling.lock);
9511 switch (event_type) {
9512 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9513 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9514 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9515 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9516 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9517 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9520 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9521 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9522 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9523 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9526 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9527 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9528 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9529 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9530 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9531 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9532 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9535 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9536 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9537 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9538 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9539 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9543 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9547 mutex_unlock(&hdev->clk_throttling.lock);
9550 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9551 struct cpucp_pkt_sync_err *sync_err)
9553 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9555 gaudi2_print_event(hdev, event_type, false,
9556 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9557 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9558 q->pi, atomic_read(&q->ci));
9561 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9563 u32 p2p_intr, msix_gw_intr, error_count = 0;
9565 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9566 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9569 gaudi2_print_event(hdev, event_type, true,
9570 "pcie p2p transaction terminated due to security, req_id(0x%x)",
9571 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9573 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9578 gaudi2_print_event(hdev, event_type, true,
9579 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9580 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9582 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9589 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9590 struct hl_eq_pcie_drain_ind_data *drain_data)
9592 u64 cause, error_count = 0;
9594 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9596 if (cause & BIT_ULL(0)) {
9597 dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
9601 if (cause & BIT_ULL(1)) {
9602 dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
9609 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9611 u32 error_count = 0;
9614 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9615 if (intr_cause_data & BIT_ULL(i)) {
9616 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9617 gaudi2_psoc_axi_drain_interrupts_cause[i]);
9622 hl_check_for_glbl_errors(hdev);
9627 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9628 struct cpucp_pkt_sync_err *sync_err)
9630 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9632 gaudi2_print_event(hdev, event_type, false,
9633 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9634 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9637 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9638 struct hl_eq_engine_arc_intr_data *data)
9640 struct hl_engine_arc_dccm_queue_full_irq *q;
9641 u32 intr_type, engine_id;
9644 intr_type = le32_to_cpu(data->intr_type);
9645 engine_id = le32_to_cpu(data->engine_id);
9646 payload = le64_to_cpu(data->payload);
9648 switch (intr_type) {
9649 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9650 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9652 gaudi2_print_event(hdev, event_type, true,
9653 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9654 engine_id, intr_type, q->queue_index);
9657 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9662 static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
9664 enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
9667 switch (event_type) {
9668 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9669 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9670 type = GAUDI2_BLOCK_TYPE_TPC;
9672 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
9673 index = event_type - GAUDI2_EVENT_TPC0_QM;
9674 type = GAUDI2_BLOCK_TYPE_TPC;
9676 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9677 case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9678 case GAUDI2_EVENT_MME0_QM:
9680 type = GAUDI2_BLOCK_TYPE_MME;
9682 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9683 case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9684 case GAUDI2_EVENT_MME1_QM:
9686 type = GAUDI2_BLOCK_TYPE_MME;
9688 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9689 case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9690 case GAUDI2_EVENT_MME2_QM:
9692 type = GAUDI2_BLOCK_TYPE_MME;
9694 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9695 case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9696 case GAUDI2_EVENT_MME3_QM:
9698 type = GAUDI2_BLOCK_TYPE_MME;
9700 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9701 case GAUDI2_EVENT_KDMA_BM_SPMU:
9702 case GAUDI2_EVENT_KDMA0_CORE:
9703 return GAUDI2_ENGINE_ID_KDMA;
9704 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9705 case GAUDI2_EVENT_PDMA0_CORE:
9706 case GAUDI2_EVENT_PDMA0_BM_SPMU:
9707 case GAUDI2_EVENT_PDMA0_QM:
9708 return GAUDI2_ENGINE_ID_PDMA_0;
9709 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9710 case GAUDI2_EVENT_PDMA1_CORE:
9711 case GAUDI2_EVENT_PDMA1_BM_SPMU:
9712 case GAUDI2_EVENT_PDMA1_QM:
9713 return GAUDI2_ENGINE_ID_PDMA_1;
9714 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9715 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9716 type = GAUDI2_BLOCK_TYPE_DEC;
9718 case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
9719 index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
9720 type = GAUDI2_BLOCK_TYPE_DEC;
9722 case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
9723 index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
9724 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9725 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9726 index = event_type - GAUDI2_EVENT_NIC0_QM0;
9727 return GAUDI2_ENGINE_ID_NIC0_0 + index;
9728 case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
9729 index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
9730 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9731 case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
9732 index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
9733 type = GAUDI2_BLOCK_TYPE_TPC;
9735 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9736 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
9737 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
9738 return GAUDI2_ENGINE_ID_ROT_0;
9739 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9740 case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
9741 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9742 return GAUDI2_ENGINE_ID_ROT_1;
9743 case GAUDI2_EVENT_HDMA0_BM_SPMU:
9744 case GAUDI2_EVENT_HDMA0_QM:
9745 case GAUDI2_EVENT_HDMA0_CORE:
9746 return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
9747 case GAUDI2_EVENT_HDMA1_BM_SPMU:
9748 case GAUDI2_EVENT_HDMA1_QM:
9749 case GAUDI2_EVENT_HDMA1_CORE:
9750 return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
9751 case GAUDI2_EVENT_HDMA2_BM_SPMU:
9752 case GAUDI2_EVENT_HDMA2_QM:
9753 case GAUDI2_EVENT_HDMA2_CORE:
9754 return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
9755 case GAUDI2_EVENT_HDMA3_BM_SPMU:
9756 case GAUDI2_EVENT_HDMA3_QM:
9757 case GAUDI2_EVENT_HDMA3_CORE:
9758 return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
9759 case GAUDI2_EVENT_HDMA4_BM_SPMU:
9760 case GAUDI2_EVENT_HDMA4_QM:
9761 case GAUDI2_EVENT_HDMA4_CORE:
9762 return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
9763 case GAUDI2_EVENT_HDMA5_BM_SPMU:
9764 case GAUDI2_EVENT_HDMA5_QM:
9765 case GAUDI2_EVENT_HDMA5_CORE:
9766 return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
9767 case GAUDI2_EVENT_HDMA6_BM_SPMU:
9768 case GAUDI2_EVENT_HDMA6_QM:
9769 case GAUDI2_EVENT_HDMA6_CORE:
9770 return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
9771 case GAUDI2_EVENT_HDMA7_BM_SPMU:
9772 case GAUDI2_EVENT_HDMA7_QM:
9773 case GAUDI2_EVENT_HDMA7_CORE:
9774 return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
9780 case GAUDI2_BLOCK_TYPE_TPC:
9782 case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
9783 return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
9784 case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
9785 return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
9786 case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
9787 return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
9788 case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
9789 return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
9794 case GAUDI2_BLOCK_TYPE_MME:
9796 case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
9797 case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
9798 case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
9799 case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
9804 case GAUDI2_BLOCK_TYPE_DEC:
9806 case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
9807 case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
9808 case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
9809 case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
9810 case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
9811 case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
9812 case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
9813 case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
9814 case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
9815 case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
9827 static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
9829 hdev->eq_heartbeat_received = true;
9832 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9834 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9835 bool reset_required = false, is_critical = false;
9836 u32 index, ctl, reset_flags = 0, error_count = 0;
9840 ctl = le32_to_cpu(eq_entry->hdr.ctl);
9841 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9843 if (event_type >= GAUDI2_EVENT_SIZE) {
9844 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9845 event_type, GAUDI2_EVENT_SIZE - 1);
9849 gaudi2->events_stat[event_type]++;
9850 gaudi2->events_stat_aggregate[event_type]++;
9852 switch (event_type) {
9853 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9855 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9856 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9857 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9858 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9859 is_critical = eq_entry->ecc_data.is_critical;
9863 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9865 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9867 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9868 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9869 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9872 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9873 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
9874 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9877 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9878 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9879 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9880 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9883 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9884 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9885 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9886 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9889 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9890 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9891 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9892 error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9893 &eq_entry->razwi_with_intr_cause, &event_mask);
9894 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9895 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9898 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9899 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9900 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9901 &eq_entry->razwi_with_intr_cause, &event_mask);
9902 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9903 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9906 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9907 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9908 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9909 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9912 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9913 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9914 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9915 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9916 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9917 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9918 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9919 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9920 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9921 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9922 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9923 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9924 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9925 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9926 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9927 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9928 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9929 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9930 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9931 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9932 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9933 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9934 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9935 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9936 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9937 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9938 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9939 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9940 &eq_entry->razwi_with_intr_cause, &event_mask);
9941 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9944 case GAUDI2_EVENT_DEC0_SPI:
9945 case GAUDI2_EVENT_DEC1_SPI:
9946 case GAUDI2_EVENT_DEC2_SPI:
9947 case GAUDI2_EVENT_DEC3_SPI:
9948 case GAUDI2_EVENT_DEC4_SPI:
9949 case GAUDI2_EVENT_DEC5_SPI:
9950 case GAUDI2_EVENT_DEC6_SPI:
9951 case GAUDI2_EVENT_DEC7_SPI:
9952 case GAUDI2_EVENT_DEC8_SPI:
9953 case GAUDI2_EVENT_DEC9_SPI:
9954 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9955 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9956 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9957 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9960 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9961 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9962 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9963 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9964 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9965 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9966 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9967 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9968 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9969 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9972 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9973 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9974 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9975 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9976 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9977 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9978 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9979 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9980 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9983 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9984 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9985 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9986 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9987 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9988 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9989 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9990 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9991 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9994 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9995 case GAUDI2_EVENT_KDMA0_CORE:
9996 error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9997 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9998 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10001 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
10002 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
10003 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10004 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10007 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
10008 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
10009 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10010 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10013 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
10014 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
10015 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
10016 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10017 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10020 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
10021 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
10022 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
10023 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
10024 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
10025 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10026 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10029 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
10030 error_count = gaudi2_handle_hif_fatal(hdev, event_type,
10031 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10032 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10033 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10036 case GAUDI2_EVENT_PMMU_FATAL_0:
10037 error_count = gaudi2_handle_pif_fatal(hdev, event_type,
10038 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10039 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10040 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10043 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
10044 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
10045 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10048 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
10049 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10050 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
10051 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10052 reset_required = true;
10057 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
10058 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
10059 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10060 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10063 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
10064 error_count = gaudi2_handle_hbm_mc_spi(hdev,
10065 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10066 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10069 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
10070 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
10071 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10072 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10073 if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
10074 is_critical = true;
10077 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
10078 error_count = gaudi2_handle_psoc_drain(hdev,
10079 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10080 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10081 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10084 case GAUDI2_EVENT_CPU_AXI_ECC:
10085 error_count = GAUDI2_NA_EVENT_CAUSE;
10086 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10087 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10089 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
10090 error_count = GAUDI2_NA_EVENT_CAUSE;
10091 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10092 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10094 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
10095 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
10096 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
10097 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
10098 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
10099 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10101 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
10102 error_count = GAUDI2_NA_EVENT_CAUSE;
10103 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10104 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10106 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
10107 error_count = GAUDI2_NA_EVENT_CAUSE;
10108 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10109 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10111 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
10112 error_count = GAUDI2_NA_EVENT_CAUSE;
10113 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10115 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
10116 error_count = GAUDI2_NA_EVENT_CAUSE;
10117 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10118 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10120 case GAUDI2_EVENT_PCIE_FATAL_ERR:
10121 error_count = GAUDI2_NA_EVENT_CAUSE;
10122 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10123 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10125 case GAUDI2_EVENT_TPC0_BMON_SPMU:
10126 case GAUDI2_EVENT_TPC1_BMON_SPMU:
10127 case GAUDI2_EVENT_TPC2_BMON_SPMU:
10128 case GAUDI2_EVENT_TPC3_BMON_SPMU:
10129 case GAUDI2_EVENT_TPC4_BMON_SPMU:
10130 case GAUDI2_EVENT_TPC5_BMON_SPMU:
10131 case GAUDI2_EVENT_TPC6_BMON_SPMU:
10132 case GAUDI2_EVENT_TPC7_BMON_SPMU:
10133 case GAUDI2_EVENT_TPC8_BMON_SPMU:
10134 case GAUDI2_EVENT_TPC9_BMON_SPMU:
10135 case GAUDI2_EVENT_TPC10_BMON_SPMU:
10136 case GAUDI2_EVENT_TPC11_BMON_SPMU:
10137 case GAUDI2_EVENT_TPC12_BMON_SPMU:
10138 case GAUDI2_EVENT_TPC13_BMON_SPMU:
10139 case GAUDI2_EVENT_TPC14_BMON_SPMU:
10140 case GAUDI2_EVENT_TPC15_BMON_SPMU:
10141 case GAUDI2_EVENT_TPC16_BMON_SPMU:
10142 case GAUDI2_EVENT_TPC17_BMON_SPMU:
10143 case GAUDI2_EVENT_TPC18_BMON_SPMU:
10144 case GAUDI2_EVENT_TPC19_BMON_SPMU:
10145 case GAUDI2_EVENT_TPC20_BMON_SPMU:
10146 case GAUDI2_EVENT_TPC21_BMON_SPMU:
10147 case GAUDI2_EVENT_TPC22_BMON_SPMU:
10148 case GAUDI2_EVENT_TPC23_BMON_SPMU:
10149 case GAUDI2_EVENT_TPC24_BMON_SPMU:
10150 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
10151 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
10152 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
10153 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
10154 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
10155 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
10156 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
10157 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
10158 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
10159 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
10160 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
10161 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
10162 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
10164 case GAUDI2_EVENT_DEC0_BMON_SPMU:
10165 case GAUDI2_EVENT_DEC1_BMON_SPMU:
10166 case GAUDI2_EVENT_DEC2_BMON_SPMU:
10167 case GAUDI2_EVENT_DEC3_BMON_SPMU:
10168 case GAUDI2_EVENT_DEC4_BMON_SPMU:
10169 case GAUDI2_EVENT_DEC5_BMON_SPMU:
10170 case GAUDI2_EVENT_DEC6_BMON_SPMU:
10171 case GAUDI2_EVENT_DEC7_BMON_SPMU:
10172 case GAUDI2_EVENT_DEC8_BMON_SPMU:
10173 case GAUDI2_EVENT_DEC9_BMON_SPMU:
10174 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
10175 error_count = GAUDI2_NA_EVENT_CAUSE;
10176 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10179 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
10180 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
10181 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
10182 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
10183 gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
10184 error_count = GAUDI2_NA_EVENT_CAUSE;
10187 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
10188 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
10189 error_count = GAUDI2_NA_EVENT_CAUSE;
10190 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10191 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10194 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
10195 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10196 error_count = GAUDI2_NA_EVENT_CAUSE;
10197 /* Do nothing- FW will handle it */
10200 case GAUDI2_EVENT_PCIE_P2P_MSIX:
10201 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
10202 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10205 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
10206 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
10207 error_count = gaudi2_handle_sm_err(hdev, event_type, index);
10208 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10211 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
10212 error_count = GAUDI2_NA_EVENT_CAUSE;
10213 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10216 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
10217 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
10218 le64_to_cpu(eq_entry->data[0]));
10219 error_count = GAUDI2_NA_EVENT_CAUSE;
10220 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10222 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
10223 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
10224 le64_to_cpu(eq_entry->data[0]));
10225 error_count = GAUDI2_NA_EVENT_CAUSE;
10226 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10229 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
10230 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
10231 error_count = GAUDI2_NA_EVENT_CAUSE;
10232 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10233 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10236 case GAUDI2_EVENT_ARC_DCCM_FULL:
10237 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
10238 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10241 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
10242 case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
10243 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10244 error_count = GAUDI2_NA_EVENT_CAUSE;
10245 is_critical = true;
10248 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
10249 case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
10250 case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
10251 case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
10252 case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
10253 case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
10254 error_count = GAUDI2_NA_EVENT_CAUSE;
10255 dev_info_ratelimited(hdev->dev, "%s event received\n",
10256 gaudi2_irq_map_table[event_type].name);
10259 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
10260 hl_eq_heartbeat_event_handle(hdev);
10261 error_count = GAUDI2_NA_EVENT_CAUSE;
10264 if (gaudi2_irq_map_table[event_type].valid) {
10265 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
10267 error_count = GAUDI2_NA_EVENT_CAUSE;
10271 if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
10272 hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
10274 /* Make sure to dump an error in case no error cause was printed so far.
10275 * Note that although we have counted the errors, we use this number as
10278 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
10279 gaudi2_print_event(hdev, event_type, true, "%d", event_type);
10280 else if (error_count == 0)
10281 gaudi2_print_event(hdev, event_type, true,
10282 "No error cause for H/W event %u", event_type);
10284 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
10286 if (reset_required ||
10287 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
10288 reset_flags |= HL_DRV_RESET_HARD;
10290 if (hdev->hard_reset_on_fw_events ||
10291 (hdev->asic_prop.fw_security_enabled && is_critical))
10295 /* Send unmask irq only for interrupts not classified as MSG */
10296 if (!gaudi2_irq_map_table[event_type].msg)
10297 hl_fw_unmask_irq(hdev, event_type);
10300 hl_notifier_event_send_all(hdev, event_mask);
10305 if (hdev->asic_prop.fw_security_enabled && is_critical) {
10306 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10307 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10309 reset_flags |= HL_DRV_RESET_DELAY;
10311 /* escalate general hw errors to critical/fatal error */
10312 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10313 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10315 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10316 hl_device_cond_reset(hdev, reset_flags, event_mask);
10319 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10320 struct packet_lin_dma *lin_dma_pkt,
10321 u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
10326 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10327 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10328 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10329 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10331 lin_dma_pkt->ctl = cpu_to_le32(ctl);
10332 lin_dma_pkt->src_addr = cpu_to_le64(val);
10333 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10334 lin_dma_pkt->tsize = cpu_to_le32(size);
10336 pkt_size = sizeof(struct packet_lin_dma);
10338 for (i = 0; i < 3; i++) {
10339 rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10340 phys_addr + (i * sizeof(u64)),
10341 ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
10343 dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
10349 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
10351 dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
10357 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10359 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10360 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10361 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10362 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10363 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10364 old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
10365 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10366 struct asic_fixed_properties *prop = &hdev->asic_prop;
10367 int rc = 0, dma_num = 0, i;
10368 void *lin_dma_pkts_arr;
10370 if (prop->edma_enabled_mask == 0) {
10371 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10375 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10376 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10377 comp_addr = CFG_BASE + sob_addr;
10378 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10379 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10380 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10381 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10383 /* Calculate how many lin dma pkts we'll need */
10384 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10385 pkt_size = sizeof(struct packet_lin_dma);
10386 cb_len = pkt_size * num_of_pkts;
10389 * if we're not scrubing HMMU or NIC reserved sections in hbm,
10390 * then it the scrubing of the user section, as we use the start of the user section
10391 * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
10392 * and scrub the CB section before leaving this function.
10394 if ((addr >= prop->dram_user_base_address) &&
10395 (addr < prop->dram_user_base_address + cb_len))
10396 cur_addr += (prop->dram_user_base_address + cb_len) - addr;
10398 lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
10399 if (!lin_dma_pkts_arr)
10403 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10404 * only the first one to restore later
10405 * also set the sob addr for all edma cores for completion.
10406 * set QM as trusted to allow it to access physical address with MMU bp.
10408 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10409 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10410 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10411 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10412 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10414 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10417 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10418 edma_offset, mmubp);
10419 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10420 lower_32_bits(comp_addr));
10421 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10422 upper_32_bits(comp_addr));
10423 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10425 gaudi2_qman_set_test_mode(hdev,
10426 edma_queues_id[dcore] + 4 * edma_idx, true);
10430 WREG32(sob_addr, 0);
10432 while (cur_addr < end_addr) {
10433 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10434 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10435 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10437 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10440 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10442 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10443 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10444 prop->dram_user_base_address + (dma_num * pkt_size),
10445 edma_queues_id[dcore] + edma_idx * 4,
10446 chunk_size, cur_addr, val);
10451 cur_addr += chunk_size;
10452 if (cur_addr == end_addr)
10459 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10461 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
10466 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10467 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10468 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10469 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10471 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10474 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10475 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10476 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10477 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10478 gaudi2_qman_set_test_mode(hdev,
10479 edma_queues_id[dcore] + 4 * edma_idx, false);
10483 memset(lin_dma_pkts_arr, 0, sizeof(u64));
10485 /* Zero the HBM area where we copied the CB */
10486 for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
10487 rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10488 prop->dram_user_base_address + i,
10489 (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
10490 WREG32(sob_addr, 0);
10492 kfree(lin_dma_pkts_arr);
10497 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10500 struct asic_fixed_properties *prop = &hdev->asic_prop;
10501 u64 size = prop->dram_end_address - prop->dram_user_base_address;
10503 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10506 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10507 prop->dram_user_base_address, size);
10511 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10514 struct asic_fixed_properties *prop = &hdev->asic_prop;
10515 u64 val = hdev->memory_scrub_val;
10518 if (!hdev->memory_scrub)
10522 addr = prop->sram_user_base_address;
10523 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10524 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10525 addr, addr + size, val);
10526 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10528 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10533 rc = gaudi2_scrub_device_dram(hdev, val);
10535 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10541 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10543 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10544 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10545 u32 val, size, offset;
10548 offset = hdev->asic_prop.first_available_cq[0] * 4;
10549 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10550 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10551 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10552 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10553 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10554 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10555 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10556 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10558 /* memset dcore0 CQ registers */
10559 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10560 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10561 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10562 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10563 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10564 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10566 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10567 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10568 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10569 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10570 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10571 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10572 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10574 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10575 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10576 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10577 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10578 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10579 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10580 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10582 cq_lbw_l_addr += DCORE_OFFSET;
10583 cq_lbw_h_addr += DCORE_OFFSET;
10584 cq_lbw_data_addr += DCORE_OFFSET;
10585 cq_base_l_addr += DCORE_OFFSET;
10586 cq_base_h_addr += DCORE_OFFSET;
10587 cq_size_addr += DCORE_OFFSET;
10590 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10591 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10592 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10593 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10595 /* memset dcore0 monitors */
10596 gaudi2_memset_device_lbw(hdev, addr, size, val);
10598 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10599 gaudi2_memset_device_lbw(hdev, addr, size, 0);
10601 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10602 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10603 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10605 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10606 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10607 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10608 mon_sts_addr += DCORE_OFFSET;
10609 mon_cfg_addr += DCORE_OFFSET;
10612 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10613 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10615 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10616 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10618 /* memset dcore0 sobs */
10619 gaudi2_memset_device_lbw(hdev, addr, size, val);
10621 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10622 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10624 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10625 gaudi2_memset_device_lbw(hdev, addr, size, val);
10626 addr += DCORE_OFFSET;
10629 /* Flush all WREG to prevent race */
10630 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10633 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10635 u32 reg_base, hw_queue_id;
10637 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10638 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10639 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10642 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10644 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10645 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10648 /* Flush all WREG to prevent race */
10649 RREG32(mmPDMA0_QM_ARB_CFG_0);
10652 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10654 u32 reg_base, hw_queue_id;
10656 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10657 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10658 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10661 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10663 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10664 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10667 /* Flush all WREG to prevent race */
10668 RREG32(mmPDMA0_QM_ARB_CFG_0);
10671 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10676 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10680 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10681 struct dup_block_ctx *cfg_ctx)
10683 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10687 for (i = 0 ; i < cfg_ctx->instances ; i++) {
10688 seq = block_idx * cfg_ctx->instances + i;
10690 /* skip disabled instance */
10691 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10694 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10699 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10704 cfg_ctx->enabled_mask = mask;
10706 for (i = 0 ; i < cfg_ctx->blocks ; i++)
10707 gaudi2_init_block_instances(hdev, i, cfg_ctx);
10710 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10712 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10715 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10717 void *host_mem_virtual_addr;
10718 dma_addr_t host_mem_dma_addr;
10719 u64 reserved_va_base;
10720 u32 pos, size_left, size_to_dma;
10721 struct hl_ctx *ctx;
10724 /* Fetch the ctx */
10725 ctx = hl_get_compute_ctx(hdev);
10727 dev_err(hdev->dev, "No ctx available\n");
10731 /* Allocate buffers for read and for poll */
10732 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10733 GFP_KERNEL | __GFP_ZERO);
10734 if (host_mem_virtual_addr == NULL) {
10735 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10740 /* Reserve VM region on asic side */
10741 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10742 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10743 if (!reserved_va_base) {
10744 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10746 goto free_data_buffer;
10749 /* Create mapping on asic side */
10750 mutex_lock(&hdev->mmu_lock);
10752 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10754 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10758 rc = hl_mmu_invalidate_cache_range(hdev, false,
10759 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10760 ctx->asid, reserved_va_base, SZ_2M);
10762 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10766 mutex_unlock(&hdev->mmu_lock);
10768 /* Enable MMU on KDMA */
10769 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10773 size_to_dma = SZ_2M;
10775 while (size_left > 0) {
10776 if (size_left < SZ_2M)
10777 size_to_dma = size_left;
10779 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10783 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10785 if (size_left <= SZ_2M)
10790 size_left -= SZ_2M;
10793 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10795 mutex_lock(&hdev->mmu_lock);
10797 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10801 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10802 ctx->asid, reserved_va_base, SZ_2M);
10805 mutex_unlock(&hdev->mmu_lock);
10806 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10808 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10815 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10817 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10818 int min_alloc_order, rc;
10820 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10823 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10824 HOST_SPACE_INTERNAL_CB_SZ,
10825 &hdev->internal_cb_pool_dma_addr,
10826 GFP_KERNEL | __GFP_ZERO);
10828 if (!hdev->internal_cb_pool_virt_addr)
10831 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10832 gaudi2_get_wait_cb_size(hdev)));
10834 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10835 if (!hdev->internal_cb_pool) {
10836 dev_err(hdev->dev, "Failed to create internal CB pool\n");
10838 goto free_internal_cb_pool;
10841 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10842 HOST_SPACE_INTERNAL_CB_SZ, -1);
10844 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10846 goto destroy_internal_cb_pool;
10849 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10850 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10852 if (!hdev->internal_cb_va_base) {
10854 goto destroy_internal_cb_pool;
10857 mutex_lock(&hdev->mmu_lock);
10859 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10860 HOST_SPACE_INTERNAL_CB_SZ);
10862 goto unreserve_internal_cb_pool;
10864 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10866 goto unmap_internal_cb_pool;
10868 mutex_unlock(&hdev->mmu_lock);
10872 unmap_internal_cb_pool:
10873 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10874 unreserve_internal_cb_pool:
10875 mutex_unlock(&hdev->mmu_lock);
10876 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10877 destroy_internal_cb_pool:
10878 gen_pool_destroy(hdev->internal_cb_pool);
10879 free_internal_cb_pool:
10880 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10881 hdev->internal_cb_pool_dma_addr);
10886 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10888 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10890 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10893 mutex_lock(&hdev->mmu_lock);
10894 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10895 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10896 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10897 mutex_unlock(&hdev->mmu_lock);
10899 gen_pool_destroy(hdev->internal_cb_pool);
10901 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10902 hdev->internal_cb_pool_dma_addr);
10905 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10907 gaudi2_restore_user_sm_registers(hdev);
10908 gaudi2_restore_user_qm_registers(hdev);
10911 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10913 struct hl_device *hdev = ctx->hdev;
10914 struct asic_fixed_properties *prop = &hdev->asic_prop;
10915 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10918 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10919 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10921 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10922 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10927 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10929 struct hl_device *hdev = ctx->hdev;
10930 struct asic_fixed_properties *prop = &hdev->asic_prop;
10933 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10934 prop->pmmu.page_size, true);
10936 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10937 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10940 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10944 if (ctx->asid == HL_KERNEL_ASID_ID)
10947 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10951 /* No need to clear user registers if the device has just
10952 * performed reset, we restore only nic qm registers
10954 if (ctx->hdev->reset_upon_device_release)
10955 gaudi2_restore_nic_qm_registers(ctx->hdev);
10957 gaudi2_restore_user_registers(ctx->hdev);
10959 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10963 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10965 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10970 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10972 if (ctx->asid == HL_KERNEL_ASID_ID)
10975 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10977 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10980 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10982 struct hl_device *hdev = cs->ctx->hdev;
10983 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10984 u32 mon_payload, sob_id, mon_id;
10986 if (!cs_needs_completion(cs))
10990 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10991 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10992 * cyclic index. The SOB value is increased when each of the CS jobs is
10993 * completed. When the SOB reaches the number of CS jobs, the monitor
10994 * generates MSI-X interrupt.
10997 sob_id = mon_id = index;
10998 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10999 (1 << CQ_ENTRY_READY_SHIFT) | index;
11001 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
11007 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
11009 return HL_INVALID_QUEUE;
11012 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
11014 struct hl_cb *cb = data;
11015 struct packet_msg_short *pkt;
11016 u32 value, ctl, pkt_size = sizeof(*pkt);
11018 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
11019 memset(pkt, 0, pkt_size);
11021 /* Inc by 1, Mode ADD */
11022 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
11023 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
11025 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
11026 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
11027 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
11028 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
11029 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11031 pkt->value = cpu_to_le32(value);
11032 pkt->ctl = cpu_to_le32(ctl);
11034 return size + pkt_size;
11037 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
11039 u32 ctl, pkt_size = sizeof(*pkt);
11041 memset(pkt, 0, pkt_size);
11043 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
11044 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
11045 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
11046 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11047 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
11049 pkt->value = cpu_to_le32(value);
11050 pkt->ctl = cpu_to_le32(ctl);
11055 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
11056 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
11058 u32 ctl, value, pkt_size = sizeof(*pkt);
11061 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
11062 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
11066 memset(pkt, 0, pkt_size);
11068 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
11069 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
11070 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
11071 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
11073 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
11074 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
11075 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
11076 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11077 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11079 pkt->value = cpu_to_le32(value);
11080 pkt->ctl = cpu_to_le32(ctl);
11085 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
11087 u32 ctl, cfg, pkt_size = sizeof(*pkt);
11089 memset(pkt, 0, pkt_size);
11091 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
11092 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
11093 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
11095 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
11096 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11097 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11099 pkt->cfg = cpu_to_le32(cfg);
11100 pkt->ctl = cpu_to_le32(ctl);
11105 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
11107 struct hl_cb *cb = prop->data;
11108 void *buf = (void *) (uintptr_t) (cb->kernel_address);
11110 u64 monitor_base, fence_addr = 0;
11111 u32 stream_index, size = prop->size;
11112 u16 msg_addr_offset;
11114 stream_index = prop->q_idx % 4;
11115 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
11116 QM_FENCE2_OFFSET + stream_index * 4;
11119 * monitor_base should be the content of the base0 address registers,
11120 * so it will be added to the msg short offsets
11122 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
11124 /* First monitor config packet: low address of the sync */
11125 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
11128 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
11130 /* Second monitor config packet: high address of the sync */
11131 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
11134 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
11137 * Third monitor config packet: the payload, i.e. what to write when the
11140 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
11143 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
11145 /* Fourth monitor config packet: bind the monitor to a sync object */
11146 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
11148 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
11149 prop->sob_val, msg_addr_offset);
11152 size += gaudi2_add_fence_pkt(buf + size);
11157 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
11159 struct hl_hw_sob *hw_sob = data;
11161 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
11163 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
11165 kref_init(&hw_sob->kref);
11168 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
11172 static u64 gaudi2_get_device_time(struct hl_device *hdev)
11174 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
11176 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
11179 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
11184 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
11185 struct hl_cs *cs, u32 wait_queue_id,
11186 u32 collective_engine_id, u32 encaps_signal_offset)
11192 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
11193 * to DMMU page-size address (64MB) before mapping it in
11195 * The operation is performed on both the virtual and physical addresses.
11196 * for device with 6 HBMs the scramble is:
11197 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
11200 * =============================================================================
11201 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
11202 * Phys address in MMU last
11204 * =============================================================================
11205 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
11206 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
11207 * =============================================================================
11209 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
11211 struct asic_fixed_properties *prop = &hdev->asic_prop;
11212 u32 divisor, mod_va;
11215 /* accept any address in the DRAM address space */
11216 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
11217 VA_HBM_SPACE_END)) {
11219 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11220 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
11221 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
11222 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
11223 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
11229 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
11231 struct asic_fixed_properties *prop = &hdev->asic_prop;
11232 u32 divisor, mod_va;
11235 /* accept any address in the DRAM address space */
11236 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
11237 VA_HBM_SPACE_END)) {
11239 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11240 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
11241 PAGE_SIZE_64MB, &mod_va);
11243 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
11244 (div_va * divisor + mod_va));
11247 return scrambled_addr;
11250 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
11252 u32 base = 0, dcore_id, dec_id;
11254 if (core_id >= NUMBER_OF_DEC) {
11255 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
11260 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
11261 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
11263 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
11264 dec_id * DCORE_VDEC_OFFSET;
11266 /* PCIe Shared Decoder */
11267 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
11273 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
11274 u32 *block_size, u32 *block_id)
11276 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11279 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
11280 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
11283 *block_size = gaudi2->mapped_blocks[i].size;
11288 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
11293 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
11294 u32 block_id, u32 block_size)
11296 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11301 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
11302 dev_err(hdev->dev, "Invalid block id %u", block_id);
11306 /* we allow mapping only an entire block */
11307 if (block_size != gaudi2->mapped_blocks[block_id].size) {
11308 dev_err(hdev->dev, "Invalid block size %u", block_size);
11312 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
11314 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
11316 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
11317 VM_DONTCOPY | VM_NORESERVE);
11319 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
11320 block_size, vma->vm_page_prot);
11322 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
11327 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11329 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11331 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11332 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11334 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11335 WREG32(irq_handler_offset,
11336 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11339 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11342 case HW_CAP_DCORE0_DMMU0:
11343 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11345 case HW_CAP_DCORE0_DMMU1:
11346 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11348 case HW_CAP_DCORE0_DMMU2:
11349 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11351 case HW_CAP_DCORE0_DMMU3:
11352 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11354 case HW_CAP_DCORE1_DMMU0:
11355 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11357 case HW_CAP_DCORE1_DMMU1:
11358 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11360 case HW_CAP_DCORE1_DMMU2:
11361 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11363 case HW_CAP_DCORE1_DMMU3:
11364 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11366 case HW_CAP_DCORE2_DMMU0:
11367 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11369 case HW_CAP_DCORE2_DMMU1:
11370 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11372 case HW_CAP_DCORE2_DMMU2:
11373 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11375 case HW_CAP_DCORE2_DMMU3:
11376 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11378 case HW_CAP_DCORE3_DMMU0:
11379 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11381 case HW_CAP_DCORE3_DMMU1:
11382 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11384 case HW_CAP_DCORE3_DMMU2:
11385 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11387 case HW_CAP_DCORE3_DMMU3:
11388 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11391 *mmu_base = mmPMMU_HBW_MMU_BASE;
11400 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11402 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11403 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11406 if (!(gaudi2->hw_cap_initialized & mmu_id))
11409 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11412 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11413 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11416 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11418 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11420 /* check all HMMUs */
11421 for (i = 0 ; i < num_of_hmmus ; i++) {
11422 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11424 if (mmu_cap_mask & mmu_id)
11425 gaudi2_ack_mmu_error(hdev, mmu_id);
11429 if (mmu_cap_mask & HW_CAP_PMMU)
11430 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11435 static void gaudi2_get_msi_info(__le32 *table)
11437 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11438 table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
11441 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11444 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11445 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11446 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11447 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11448 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11449 case HL_GAUDI2_MME_PLL: return MME_PLL;
11450 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11451 case HL_GAUDI2_IF_PLL: return IF_PLL;
11452 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11453 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11454 case HL_GAUDI2_VID_PLL: return VID_PLL;
11455 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11456 default: return -EINVAL;
11460 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11462 /* Not implemented */
11466 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11468 /* Not implemented */
11472 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11473 struct hl_device *hdev, struct hl_mon_state_dump *mon)
11475 /* Not implemented */
11480 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11481 u64 status_base_offset, enum hl_sync_engine_type engine_type,
11482 u32 engine_id, char **buf, size_t *size, size_t *offset)
11484 /* Not implemented */
11489 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11490 .monitor_valid = gaudi2_monitor_valid,
11491 .print_single_monitor = gaudi2_print_single_monitor,
11492 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11493 .print_fences_single_engine = gaudi2_print_fences_single_engine,
11496 static void gaudi2_state_dump_init(struct hl_device *hdev)
11498 /* Not implemented */
11499 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11500 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11503 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11508 static u32 *gaudi2_get_stream_master_qid_arr(void)
11513 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11514 struct attribute_group *dev_vrm_attr_grp)
11516 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11517 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11520 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11521 u32 page_size, u32 *real_page_size, bool is_dram_addr)
11523 struct asic_fixed_properties *prop = &hdev->asic_prop;
11525 /* for host pages the page size must be */
11526 if (!is_dram_addr) {
11527 if (page_size % mmu_prop->page_size)
11528 goto page_size_err;
11530 *real_page_size = mmu_prop->page_size;
11534 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11535 goto page_size_err;
11538 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11539 * than DRAM page size).
11540 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11541 * this mismatch when calculating the address to place in the MMU page table.
11542 * (in that case also make sure that the dram_page_size is not greater than the
11545 *real_page_size = prop->dram_page_size;
11550 dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
11551 page_size, mmu_prop->page_size >> 10);
11555 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11557 return -EOPNOTSUPP;
11560 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11562 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11564 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11567 return hl_fw_send_device_activity(hdev, open);
11570 static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
11572 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11575 if (hdev->reset_info.hard_reset_pending)
11578 val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11583 static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
11585 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11587 if (hdev->reset_info.hard_reset_pending)
11590 writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11593 static const struct hl_asic_funcs gaudi2_funcs = {
11594 .early_init = gaudi2_early_init,
11595 .early_fini = gaudi2_early_fini,
11596 .late_init = gaudi2_late_init,
11597 .late_fini = gaudi2_late_fini,
11598 .sw_init = gaudi2_sw_init,
11599 .sw_fini = gaudi2_sw_fini,
11600 .hw_init = gaudi2_hw_init,
11601 .hw_fini = gaudi2_hw_fini,
11602 .halt_engines = gaudi2_halt_engines,
11603 .suspend = gaudi2_suspend,
11604 .resume = gaudi2_resume,
11605 .mmap = gaudi2_mmap,
11606 .ring_doorbell = gaudi2_ring_doorbell,
11607 .pqe_write = gaudi2_pqe_write,
11608 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11609 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
11610 .scrub_device_mem = gaudi2_scrub_device_mem,
11611 .scrub_device_dram = gaudi2_scrub_device_dram,
11612 .get_int_queue_base = NULL,
11613 .test_queues = gaudi2_test_queues,
11614 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11615 .asic_dma_pool_free = gaudi2_dma_pool_free,
11616 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11617 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11618 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
11619 .cs_parser = gaudi2_cs_parser,
11620 .dma_map_sgtable = hl_asic_dma_map_sgtable,
11621 .add_end_of_cb_packets = NULL,
11622 .update_eq_ci = gaudi2_update_eq_ci,
11623 .context_switch = gaudi2_context_switch,
11624 .restore_phase_topology = gaudi2_restore_phase_topology,
11625 .debugfs_read_dma = gaudi2_debugfs_read_dma,
11626 .add_device_attr = gaudi2_add_device_attr,
11627 .handle_eqe = gaudi2_handle_eqe,
11628 .get_events_stat = gaudi2_get_events_stat,
11629 .read_pte = gaudi2_read_pte,
11630 .write_pte = gaudi2_write_pte,
11631 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11632 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11633 .mmu_prefetch_cache_range = NULL,
11634 .send_heartbeat = gaudi2_send_heartbeat,
11635 .debug_coresight = gaudi2_debug_coresight,
11636 .is_device_idle = gaudi2_is_device_idle,
11637 .compute_reset_late_init = gaudi2_compute_reset_late_init,
11638 .hw_queues_lock = gaudi2_hw_queues_lock,
11639 .hw_queues_unlock = gaudi2_hw_queues_unlock,
11640 .get_pci_id = gaudi2_get_pci_id,
11641 .get_eeprom_data = gaudi2_get_eeprom_data,
11642 .get_monitor_dump = gaudi2_get_monitor_dump,
11643 .send_cpu_message = gaudi2_send_cpu_message,
11644 .pci_bars_map = gaudi2_pci_bars_map,
11645 .init_iatu = gaudi2_init_iatu,
11648 .halt_coresight = gaudi2_halt_coresight,
11649 .ctx_init = gaudi2_ctx_init,
11650 .ctx_fini = gaudi2_ctx_fini,
11651 .pre_schedule_cs = gaudi2_pre_schedule_cs,
11652 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11653 .load_firmware_to_device = NULL,
11654 .load_boot_fit_to_device = NULL,
11655 .get_signal_cb_size = gaudi2_get_signal_cb_size,
11656 .get_wait_cb_size = gaudi2_get_wait_cb_size,
11657 .gen_signal_cb = gaudi2_gen_signal_cb,
11658 .gen_wait_cb = gaudi2_gen_wait_cb,
11659 .reset_sob = gaudi2_reset_sob,
11660 .reset_sob_group = gaudi2_reset_sob_group,
11661 .get_device_time = gaudi2_get_device_time,
11662 .pb_print_security_errors = gaudi2_pb_print_security_errors,
11663 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11664 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11665 .get_dec_base_addr = gaudi2_get_dec_base_addr,
11666 .scramble_addr = gaudi2_mmu_scramble_addr,
11667 .descramble_addr = gaudi2_mmu_descramble_addr,
11668 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11669 .get_hw_block_id = gaudi2_get_hw_block_id,
11670 .hw_block_mmap = gaudi2_block_mmap,
11671 .enable_events_from_fw = gaudi2_enable_events_from_fw,
11672 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11673 .get_msi_info = gaudi2_get_msi_info,
11674 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11675 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11676 .init_firmware_loader = gaudi2_init_firmware_loader,
11677 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11678 .state_dump_init = gaudi2_state_dump_init,
11679 .get_sob_addr = &gaudi2_get_sob_addr,
11680 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11681 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11682 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11683 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11684 .access_dev_mem = hl_access_dev_mem,
11685 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
11686 .set_engine_cores = gaudi2_set_engine_cores,
11687 .set_engines = gaudi2_set_engines,
11688 .send_device_activity = gaudi2_send_device_activity,
11689 .set_dram_properties = gaudi2_set_dram_properties,
11690 .set_binning_masks = gaudi2_set_binning_masks,
11693 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11695 hdev->asic_funcs = &gaudi2_funcs;