Merge remote-tracking branch 'torvalds/master' into perf/core
[sfrench/cifs-2.6.git] / drivers / misc / habanalabs / common / firmware_if.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "habanalabs.h"
9 #include "../include/common/hl_boot_if.h"
10
11 #include <linux/firmware.h>
12 #include <linux/slab.h>
13
14 #define FW_FILE_MAX_SIZE        0x1400000 /* maximum size of 20MB */
15 /**
16  * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
17  *
18  * @hdev: pointer to hl_device structure.
19  * @fw_name: the firmware image name
20  * @dst: IO memory mapped address space to copy firmware to
21  * @src_offset: offset in src FW to copy from
22  * @size: amount of bytes to copy (0 to copy the whole binary)
23  *
24  * Copy fw code from firmware file to device memory.
25  *
26  * Return: 0 on success, non-zero for failure.
27  */
28 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
29                                 void __iomem *dst, u32 src_offset, u32 size)
30 {
31         const struct firmware *fw;
32         const void *fw_data;
33         size_t fw_size;
34         int rc;
35
36         rc = request_firmware(&fw, fw_name, hdev->dev);
37         if (rc) {
38                 dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
39                 goto out;
40         }
41
42         fw_size = fw->size;
43         if ((fw_size % 4) != 0) {
44                 dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
45                         fw_name, fw_size);
46                 rc = -EINVAL;
47                 goto out;
48         }
49
50         dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
51
52         if (fw_size > FW_FILE_MAX_SIZE) {
53                 dev_err(hdev->dev,
54                         "FW file size %zu exceeds maximum of %u bytes\n",
55                         fw_size, FW_FILE_MAX_SIZE);
56                 rc = -EINVAL;
57                 goto out;
58         }
59
60         if (size - src_offset > fw_size) {
61                 dev_err(hdev->dev,
62                         "size to copy(%u) and offset(%u) are invalid\n",
63                         size, src_offset);
64                 rc = -EINVAL;
65                 goto out;
66         }
67
68         if (size)
69                 fw_size = size;
70
71         fw_data = (const void *) fw->data;
72
73         memcpy_toio(dst, fw_data + src_offset, fw_size);
74
75 out:
76         release_firmware(fw);
77         return rc;
78 }
79
80 int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
81 {
82         struct cpucp_packet pkt = {};
83
84         pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
85
86         return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
87                                                 sizeof(pkt), 0, NULL);
88 }
89
90 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
91                                 u16 len, u32 timeout, u64 *result)
92 {
93         struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
94         struct cpucp_packet *pkt;
95         dma_addr_t pkt_dma_addr;
96         u32 tmp, expected_ack_val;
97         int rc = 0;
98
99         pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
100                                                                 &pkt_dma_addr);
101         if (!pkt) {
102                 dev_err(hdev->dev,
103                         "Failed to allocate DMA memory for packet to CPU\n");
104                 return -ENOMEM;
105         }
106
107         memcpy(pkt, msg, len);
108
109         mutex_lock(&hdev->send_cpu_message_lock);
110
111         if (hdev->disabled)
112                 goto out;
113
114         if (hdev->device_cpu_disabled) {
115                 rc = -EIO;
116                 goto out;
117         }
118
119         /* set fence to a non valid value */
120         pkt->fence = UINT_MAX;
121
122         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
123         if (rc) {
124                 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
125                 goto out;
126         }
127
128         if (hdev->asic_prop.fw_app_security_map &
129                         CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
130                 expected_ack_val = queue->pi;
131         else
132                 expected_ack_val = CPUCP_PACKET_FENCE_VAL;
133
134         rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
135                                 (tmp == expected_ack_val), 1000,
136                                 timeout, true);
137
138         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
139
140         if (rc == -ETIMEDOUT) {
141                 dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
142                 hdev->device_cpu_disabled = true;
143                 goto out;
144         }
145
146         tmp = le32_to_cpu(pkt->ctl);
147
148         rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
149         if (rc) {
150                 dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
151                         rc,
152                         (tmp & CPUCP_PKT_CTL_OPCODE_MASK)
153                                                 >> CPUCP_PKT_CTL_OPCODE_SHIFT);
154                 rc = -EIO;
155         } else if (result) {
156                 *result = le64_to_cpu(pkt->result);
157         }
158
159 out:
160         mutex_unlock(&hdev->send_cpu_message_lock);
161
162         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
163
164         return rc;
165 }
166
167 int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
168 {
169         struct cpucp_packet pkt;
170         u64 result;
171         int rc;
172
173         memset(&pkt, 0, sizeof(pkt));
174
175         pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
176                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
177         pkt.value = cpu_to_le64(event_type);
178
179         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
180                                                 0, &result);
181
182         if (rc)
183                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
184
185         return rc;
186 }
187
188 int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
189                 size_t irq_arr_size)
190 {
191         struct cpucp_unmask_irq_arr_packet *pkt;
192         size_t total_pkt_size;
193         u64 result;
194         int rc;
195
196         total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
197                         irq_arr_size;
198
199         /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
200         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
201
202         /* total_pkt_size is casted to u16 later on */
203         if (total_pkt_size > USHRT_MAX) {
204                 dev_err(hdev->dev, "too many elements in IRQ array\n");
205                 return -EINVAL;
206         }
207
208         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
209         if (!pkt)
210                 return -ENOMEM;
211
212         pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
213         memcpy(&pkt->irqs, irq_arr, irq_arr_size);
214
215         pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
216                                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
217
218         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
219                                                 total_pkt_size, 0, &result);
220
221         if (rc)
222                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
223
224         kfree(pkt);
225
226         return rc;
227 }
228
229 int hl_fw_test_cpu_queue(struct hl_device *hdev)
230 {
231         struct cpucp_packet test_pkt = {};
232         u64 result;
233         int rc;
234
235         test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
236                                         CPUCP_PKT_CTL_OPCODE_SHIFT);
237         test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
238
239         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
240                                                 sizeof(test_pkt), 0, &result);
241
242         if (!rc) {
243                 if (result != CPUCP_PACKET_FENCE_VAL)
244                         dev_err(hdev->dev,
245                                 "CPU queue test failed (%#08llx)\n", result);
246         } else {
247                 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
248         }
249
250         return rc;
251 }
252
253 void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
254                                                 dma_addr_t *dma_handle)
255 {
256         u64 kernel_addr;
257
258         kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
259
260         *dma_handle = hdev->cpu_accessible_dma_address +
261                 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
262
263         return (void *) (uintptr_t) kernel_addr;
264 }
265
266 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
267                                         void *vaddr)
268 {
269         gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
270                         size);
271 }
272
273 int hl_fw_send_heartbeat(struct hl_device *hdev)
274 {
275         struct cpucp_packet hb_pkt = {};
276         u64 result;
277         int rc;
278
279         hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
280                                         CPUCP_PKT_CTL_OPCODE_SHIFT);
281         hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
282
283         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
284                                                 sizeof(hb_pkt), 0, &result);
285
286         if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
287                 rc = -EIO;
288
289         return rc;
290 }
291
292 static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
293                 u32 cpu_security_boot_status_reg)
294 {
295         u32 err_val, security_val;
296
297         /* Some of the firmware status codes are deprecated in newer f/w
298          * versions. In those versions, the errors are reported
299          * in different registers. Therefore, we need to check those
300          * registers and print the exact errors. Moreover, there
301          * may be multiple errors, so we need to report on each error
302          * separately. Some of the error codes might indicate a state
303          * that is not an error per-se, but it is an error in production
304          * environment
305          */
306         err_val = RREG32(boot_err0_reg);
307         if (!(err_val & CPU_BOOT_ERR0_ENABLED))
308                 return 0;
309
310         if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
311                 dev_err(hdev->dev,
312                         "Device boot error - DRAM initialization failed\n");
313         if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
314                 dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
315         if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
316                 dev_err(hdev->dev,
317                         "Device boot error - Thermal Sensor initialization failed\n");
318         if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
319                 dev_warn(hdev->dev,
320                         "Device boot warning - Skipped DRAM initialization\n");
321
322         if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
323                 if (hdev->bmc_enable)
324                         dev_warn(hdev->dev,
325                                 "Device boot error - Skipped waiting for BMC\n");
326                 else
327                         err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
328         }
329
330         if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
331                 dev_err(hdev->dev,
332                         "Device boot error - Serdes data from BMC not available\n");
333         if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
334                 dev_err(hdev->dev,
335                         "Device boot error - NIC F/W initialization failed\n");
336         if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
337                 dev_warn(hdev->dev,
338                         "Device boot warning - security not ready\n");
339         if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
340                 dev_err(hdev->dev, "Device boot error - security failure\n");
341         if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
342                 dev_err(hdev->dev, "Device boot error - eFuse failure\n");
343         if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
344                 dev_err(hdev->dev, "Device boot error - PLL failure\n");
345
346         security_val = RREG32(cpu_security_boot_status_reg);
347         if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
348                 dev_dbg(hdev->dev, "Device security status %#x\n",
349                                 security_val);
350
351         if (err_val & ~CPU_BOOT_ERR0_ENABLED)
352                 return -EIO;
353
354         return 0;
355 }
356
357 int hl_fw_cpucp_info_get(struct hl_device *hdev,
358                         u32 cpu_security_boot_status_reg,
359                         u32 boot_err0_reg)
360 {
361         struct asic_fixed_properties *prop = &hdev->asic_prop;
362         struct cpucp_packet pkt = {};
363         void *cpucp_info_cpu_addr;
364         dma_addr_t cpucp_info_dma_addr;
365         u64 result;
366         int rc;
367
368         cpucp_info_cpu_addr =
369                         hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
370                                         sizeof(struct cpucp_info),
371                                         &cpucp_info_dma_addr);
372         if (!cpucp_info_cpu_addr) {
373                 dev_err(hdev->dev,
374                         "Failed to allocate DMA memory for CPU-CP info packet\n");
375                 return -ENOMEM;
376         }
377
378         memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
379
380         pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
381                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
382         pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
383         pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
384
385         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
386                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
387         if (rc) {
388                 dev_err(hdev->dev,
389                         "Failed to handle CPU-CP info pkt, error %d\n", rc);
390                 goto out;
391         }
392
393         rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
394         if (rc) {
395                 dev_err(hdev->dev, "Errors in device boot\n");
396                 goto out;
397         }
398
399         memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
400                         sizeof(prop->cpucp_info));
401
402         rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
403         if (rc) {
404                 dev_err(hdev->dev,
405                         "Failed to build hwmon channel info, error %d\n", rc);
406                 rc = -EFAULT;
407                 goto out;
408         }
409
410         /* Read FW application security bits again */
411         if (hdev->asic_prop.fw_security_status_valid)
412                 hdev->asic_prop.fw_app_security_map =
413                                 RREG32(cpu_security_boot_status_reg);
414
415 out:
416         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
417                         sizeof(struct cpucp_info), cpucp_info_cpu_addr);
418
419         return rc;
420 }
421
422 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
423 {
424         struct cpucp_packet pkt = {};
425         void *eeprom_info_cpu_addr;
426         dma_addr_t eeprom_info_dma_addr;
427         u64 result;
428         int rc;
429
430         eeprom_info_cpu_addr =
431                         hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
432                                         max_size, &eeprom_info_dma_addr);
433         if (!eeprom_info_cpu_addr) {
434                 dev_err(hdev->dev,
435                         "Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
436                 return -ENOMEM;
437         }
438
439         memset(eeprom_info_cpu_addr, 0, max_size);
440
441         pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
442                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
443         pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
444         pkt.data_max_size = cpu_to_le32(max_size);
445
446         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
447                         HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
448
449         if (rc) {
450                 dev_err(hdev->dev,
451                         "Failed to handle CPU-CP EEPROM packet, error %d\n",
452                         rc);
453                 goto out;
454         }
455
456         /* result contains the actual size */
457         memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
458
459 out:
460         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
461                         eeprom_info_cpu_addr);
462
463         return rc;
464 }
465
466 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
467                 struct hl_info_pci_counters *counters)
468 {
469         struct cpucp_packet pkt = {};
470         u64 result;
471         int rc;
472
473         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
474                         CPUCP_PKT_CTL_OPCODE_SHIFT);
475
476         /* Fetch PCI rx counter */
477         pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
478         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
479                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
480         if (rc) {
481                 dev_err(hdev->dev,
482                         "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
483                 return rc;
484         }
485         counters->rx_throughput = result;
486
487         memset(&pkt, 0, sizeof(pkt));
488         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
489                         CPUCP_PKT_CTL_OPCODE_SHIFT);
490
491         /* Fetch PCI tx counter */
492         pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
493         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
494                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
495         if (rc) {
496                 dev_err(hdev->dev,
497                         "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
498                 return rc;
499         }
500         counters->tx_throughput = result;
501
502         /* Fetch PCI replay counter */
503         memset(&pkt, 0, sizeof(pkt));
504         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
505                         CPUCP_PKT_CTL_OPCODE_SHIFT);
506
507         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
508                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
509         if (rc) {
510                 dev_err(hdev->dev,
511                         "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
512                 return rc;
513         }
514         counters->replay_cnt = (u32) result;
515
516         return rc;
517 }
518
519 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
520 {
521         struct cpucp_packet pkt = {};
522         u64 result;
523         int rc;
524
525         pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
526                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
527
528         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
529                                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
530         if (rc) {
531                 dev_err(hdev->dev,
532                         "Failed to handle CpuCP total energy pkt, error %d\n",
533                                 rc);
534                 return rc;
535         }
536
537         *total_energy = result;
538
539         return rc;
540 }
541
542 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
543                 u16 *pll_freq_arr)
544 {
545         struct cpucp_packet pkt;
546         u64 result;
547         int rc;
548
549         memset(&pkt, 0, sizeof(pkt));
550
551         pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
552                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
553         pkt.pll_type = __cpu_to_le16(pll_index);
554
555         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
556                         HL_CPUCP_INFO_TIMEOUT_USEC, &result);
557         if (rc)
558                 dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
559
560         pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
561         pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
562         pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
563         pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
564
565         return rc;
566 }
567
568 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
569 {
570         /* Some of the status codes below are deprecated in newer f/w
571          * versions but we keep them here for backward compatibility
572          */
573         switch (status) {
574         case CPU_BOOT_STATUS_NA:
575                 dev_err(hdev->dev,
576                         "Device boot error - BTL did NOT run\n");
577                 break;
578         case CPU_BOOT_STATUS_IN_WFE:
579                 dev_err(hdev->dev,
580                         "Device boot error - Stuck inside WFE loop\n");
581                 break;
582         case CPU_BOOT_STATUS_IN_BTL:
583                 dev_err(hdev->dev,
584                         "Device boot error - Stuck in BTL\n");
585                 break;
586         case CPU_BOOT_STATUS_IN_PREBOOT:
587                 dev_err(hdev->dev,
588                         "Device boot error - Stuck in Preboot\n");
589                 break;
590         case CPU_BOOT_STATUS_IN_SPL:
591                 dev_err(hdev->dev,
592                         "Device boot error - Stuck in SPL\n");
593                 break;
594         case CPU_BOOT_STATUS_IN_UBOOT:
595                 dev_err(hdev->dev,
596                         "Device boot error - Stuck in u-boot\n");
597                 break;
598         case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
599                 dev_err(hdev->dev,
600                         "Device boot error - DRAM initialization failed\n");
601                 break;
602         case CPU_BOOT_STATUS_UBOOT_NOT_READY:
603                 dev_err(hdev->dev,
604                         "Device boot error - u-boot stopped by user\n");
605                 break;
606         case CPU_BOOT_STATUS_TS_INIT_FAIL:
607                 dev_err(hdev->dev,
608                         "Device boot error - Thermal Sensor initialization failed\n");
609                 break;
610         default:
611                 dev_err(hdev->dev,
612                         "Device boot error - Invalid status code %d\n",
613                         status);
614                 break;
615         }
616 }
617
618 int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
619                 u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
620                 u32 timeout)
621 {
622         struct asic_fixed_properties *prop = &hdev->asic_prop;
623         u32 status, security_status;
624         int rc;
625
626         if (!hdev->cpu_enable)
627                 return 0;
628
629         /* Need to check two possible scenarios:
630          *
631          * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
632          * the preboot is waiting for the boot fit
633          *
634          * All other status values - for older firmwares where the uboot was
635          * loaded from the FLASH
636          */
637         rc = hl_poll_timeout(
638                 hdev,
639                 cpu_boot_status_reg,
640                 status,
641                 (status == CPU_BOOT_STATUS_IN_UBOOT) ||
642                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
643                 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
644                 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
645                 (status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
646                 (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
647                 10000,
648                 timeout);
649
650         if (rc) {
651                 dev_err(hdev->dev, "Failed to read preboot version\n");
652                 detect_cpu_boot_status(hdev, status);
653                 fw_read_errors(hdev, boot_err0_reg,
654                                 cpu_security_boot_status_reg);
655                 return -EIO;
656         }
657
658         rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
659         if (rc)
660                 return rc;
661
662         security_status = RREG32(cpu_security_boot_status_reg);
663
664         /* We read security status multiple times during boot:
665          * 1. preboot - a. Check whether the security status bits are valid
666          *              b. Check whether fw security is enabled
667          *              c. Check whether hard reset is done by preboot
668          * 2. boot cpu - a. Fetch boot cpu security status
669          *               b. Check whether hard reset is done by boot cpu
670          * 3. FW application - a. Fetch fw application security status
671          *                     b. Check whether hard reset is done by fw app
672          *
673          * Preboot:
674          * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
675          * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
676          */
677         if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
678                 prop->fw_security_status_valid = 1;
679
680                 if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
681                         prop->fw_security_disabled = false;
682                 else
683                         prop->fw_security_disabled = true;
684
685                 if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
686                         prop->hard_reset_done_by_fw = true;
687         } else {
688                 prop->fw_security_status_valid = 0;
689                 prop->fw_security_disabled = true;
690         }
691
692         dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
693                         security_status);
694
695         dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
696                         prop->hard_reset_done_by_fw ? "enabled" : "disabled");
697
698         dev_info(hdev->dev, "firmware-level security is %s\n",
699                         prop->fw_security_disabled ? "disabled" : "enabled");
700
701         return 0;
702 }
703
704 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
705                         u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
706                         u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
707                         bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
708 {
709         struct asic_fixed_properties *prop = &hdev->asic_prop;
710         u32 status;
711         int rc;
712
713         if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU))
714                 return 0;
715
716         dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
717                 cpu_timeout / USEC_PER_SEC);
718
719         /* Wait for boot FIT request */
720         rc = hl_poll_timeout(
721                 hdev,
722                 cpu_boot_status_reg,
723                 status,
724                 status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
725                 10000,
726                 boot_fit_timeout);
727
728         if (rc) {
729                 dev_dbg(hdev->dev,
730                         "No boot fit request received, resuming boot\n");
731         } else {
732                 rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
733                 if (rc)
734                         goto out;
735
736                 /* Clear device CPU message status */
737                 WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
738
739                 /* Signal device CPU that boot loader is ready */
740                 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
741
742                 /* Poll for CPU device ack */
743                 rc = hl_poll_timeout(
744                         hdev,
745                         cpu_msg_status_reg,
746                         status,
747                         status == CPU_MSG_OK,
748                         10000,
749                         boot_fit_timeout);
750
751                 if (rc) {
752                         dev_err(hdev->dev,
753                                 "Timeout waiting for boot fit load ack\n");
754                         goto out;
755                 }
756
757                 /* Clear message */
758                 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
759         }
760
761         /* Make sure CPU boot-loader is running */
762         rc = hl_poll_timeout(
763                 hdev,
764                 cpu_boot_status_reg,
765                 status,
766                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
767                 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
768                 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
769                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
770                 10000,
771                 cpu_timeout);
772
773         dev_dbg(hdev->dev, "uboot status = %d\n", status);
774
775         /* Read U-Boot version now in case we will later fail */
776         hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
777
778         /* Clear reset status since we need to read it again from boot CPU */
779         prop->hard_reset_done_by_fw = false;
780
781         /* Read boot_cpu security bits */
782         if (prop->fw_security_status_valid) {
783                 prop->fw_boot_cpu_security_map =
784                                 RREG32(cpu_security_boot_status_reg);
785
786                 if (prop->fw_boot_cpu_security_map &
787                                 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
788                         prop->hard_reset_done_by_fw = true;
789
790                 dev_dbg(hdev->dev,
791                         "Firmware boot CPU security status %#x\n",
792                         prop->fw_boot_cpu_security_map);
793         }
794
795         dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
796                         prop->hard_reset_done_by_fw ? "enabled" : "disabled");
797
798         if (rc) {
799                 detect_cpu_boot_status(hdev, status);
800                 rc = -EIO;
801                 goto out;
802         }
803
804         if (!(hdev->fw_loading & FW_TYPE_LINUX)) {
805                 dev_info(hdev->dev, "Skip loading Linux F/W\n");
806                 goto out;
807         }
808
809         if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
810                 goto out;
811
812         dev_info(hdev->dev,
813                 "Loading firmware to device, may take some time...\n");
814
815         rc = hdev->asic_funcs->load_firmware_to_device(hdev);
816         if (rc)
817                 goto out;
818
819         if (skip_bmc) {
820                 WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
821
822                 rc = hl_poll_timeout(
823                         hdev,
824                         cpu_boot_status_reg,
825                         status,
826                         (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
827                         10000,
828                         cpu_timeout);
829
830                 if (rc) {
831                         dev_err(hdev->dev,
832                                 "Failed to get ACK on skipping BMC, %d\n",
833                                 status);
834                         WREG32(msg_to_cpu_reg, KMD_MSG_NA);
835                         rc = -EIO;
836                         goto out;
837                 }
838         }
839
840         WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
841
842         rc = hl_poll_timeout(
843                 hdev,
844                 cpu_boot_status_reg,
845                 status,
846                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
847                 10000,
848                 cpu_timeout);
849
850         /* Clear message */
851         WREG32(msg_to_cpu_reg, KMD_MSG_NA);
852
853         if (rc) {
854                 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
855                         dev_err(hdev->dev,
856                                 "Device reports FIT image is corrupted\n");
857                 else
858                         dev_err(hdev->dev,
859                                 "Failed to load firmware to device, %d\n",
860                                 status);
861
862                 rc = -EIO;
863                 goto out;
864         }
865
866         rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
867         if (rc)
868                 return rc;
869
870         /* Clear reset status since we need to read again from app */
871         prop->hard_reset_done_by_fw = false;
872
873         /* Read FW application security bits */
874         if (prop->fw_security_status_valid) {
875                 prop->fw_app_security_map =
876                                 RREG32(cpu_security_boot_status_reg);
877
878                 if (prop->fw_app_security_map &
879                                 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
880                         prop->hard_reset_done_by_fw = true;
881
882                 dev_dbg(hdev->dev,
883                         "Firmware application CPU security status %#x\n",
884                         prop->fw_app_security_map);
885         }
886
887         dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
888                         prop->hard_reset_done_by_fw ? "enabled" : "disabled");
889
890         dev_info(hdev->dev, "Successfully loaded firmware to device\n");
891
892         return 0;
893
894 out:
895         fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
896
897         return rc;
898 }