Merge remote-tracking branch 'torvalds/master' into perf/core
[sfrench/cifs-2.6.git] / drivers / misc / habanalabs / common / device.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #define pr_fmt(fmt)                     "habanalabs: " fmt
9
10 #include "habanalabs.h"
11
12 #include <linux/pci.h>
13 #include <linux/hwmon.h>
14 #include <uapi/misc/habanalabs.h>
15
16 enum hl_device_status hl_device_status(struct hl_device *hdev)
17 {
18         enum hl_device_status status;
19
20         if (atomic_read(&hdev->in_reset))
21                 status = HL_DEVICE_STATUS_IN_RESET;
22         else if (hdev->needs_reset)
23                 status = HL_DEVICE_STATUS_NEEDS_RESET;
24         else if (hdev->disabled)
25                 status = HL_DEVICE_STATUS_MALFUNCTION;
26         else
27                 status = HL_DEVICE_STATUS_OPERATIONAL;
28
29         return status;
30 }
31
32 bool hl_device_operational(struct hl_device *hdev,
33                 enum hl_device_status *status)
34 {
35         enum hl_device_status current_status;
36
37         current_status = hl_device_status(hdev);
38         if (status)
39                 *status = current_status;
40
41         switch (current_status) {
42         case HL_DEVICE_STATUS_IN_RESET:
43         case HL_DEVICE_STATUS_MALFUNCTION:
44         case HL_DEVICE_STATUS_NEEDS_RESET:
45                 return false;
46         case HL_DEVICE_STATUS_OPERATIONAL:
47         default:
48                 return true;
49         }
50 }
51
52 static void hpriv_release(struct kref *ref)
53 {
54         struct hl_fpriv *hpriv;
55         struct hl_device *hdev;
56
57         hpriv = container_of(ref, struct hl_fpriv, refcount);
58
59         hdev = hpriv->hdev;
60
61         put_pid(hpriv->taskpid);
62
63         hl_debugfs_remove_file(hpriv);
64
65         mutex_destroy(&hpriv->restore_phase_mutex);
66
67         mutex_lock(&hdev->fpriv_list_lock);
68         list_del(&hpriv->dev_node);
69         hdev->compute_ctx = NULL;
70         mutex_unlock(&hdev->fpriv_list_lock);
71
72         kfree(hpriv);
73 }
74
75 void hl_hpriv_get(struct hl_fpriv *hpriv)
76 {
77         kref_get(&hpriv->refcount);
78 }
79
80 void hl_hpriv_put(struct hl_fpriv *hpriv)
81 {
82         kref_put(&hpriv->refcount, hpriv_release);
83 }
84
85 /*
86  * hl_device_release - release function for habanalabs device
87  *
88  * @inode: pointer to inode structure
89  * @filp: pointer to file structure
90  *
91  * Called when process closes an habanalabs device
92  */
93 static int hl_device_release(struct inode *inode, struct file *filp)
94 {
95         struct hl_fpriv *hpriv = filp->private_data;
96
97         hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
98         hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
99
100         filp->private_data = NULL;
101
102         hl_hpriv_put(hpriv);
103
104         return 0;
105 }
106
107 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
108 {
109         struct hl_fpriv *hpriv = filp->private_data;
110         struct hl_device *hdev;
111
112         filp->private_data = NULL;
113
114         hdev = hpriv->hdev;
115
116         mutex_lock(&hdev->fpriv_list_lock);
117         list_del(&hpriv->dev_node);
118         mutex_unlock(&hdev->fpriv_list_lock);
119
120         kfree(hpriv);
121
122         return 0;
123 }
124
125 /*
126  * hl_mmap - mmap function for habanalabs device
127  *
128  * @*filp: pointer to file structure
129  * @*vma: pointer to vm_area_struct of the process
130  *
131  * Called when process does an mmap on habanalabs device. Call the device's mmap
132  * function at the end of the common code.
133  */
134 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
135 {
136         struct hl_fpriv *hpriv = filp->private_data;
137         unsigned long vm_pgoff;
138
139         vm_pgoff = vma->vm_pgoff;
140         vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
141
142         switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
143         case HL_MMAP_TYPE_CB:
144                 return hl_cb_mmap(hpriv, vma);
145
146         case HL_MMAP_TYPE_BLOCK:
147                 return hl_hw_block_mmap(hpriv, vma);
148         }
149
150         return -EINVAL;
151 }
152
153 static const struct file_operations hl_ops = {
154         .owner = THIS_MODULE,
155         .open = hl_device_open,
156         .release = hl_device_release,
157         .mmap = hl_mmap,
158         .unlocked_ioctl = hl_ioctl,
159         .compat_ioctl = hl_ioctl
160 };
161
162 static const struct file_operations hl_ctrl_ops = {
163         .owner = THIS_MODULE,
164         .open = hl_device_open_ctrl,
165         .release = hl_device_release_ctrl,
166         .unlocked_ioctl = hl_ioctl_control,
167         .compat_ioctl = hl_ioctl_control
168 };
169
170 static void device_release_func(struct device *dev)
171 {
172         kfree(dev);
173 }
174
175 /*
176  * device_init_cdev - Initialize cdev and device for habanalabs device
177  *
178  * @hdev: pointer to habanalabs device structure
179  * @hclass: pointer to the class object of the device
180  * @minor: minor number of the specific device
181  * @fpos: file operations to install for this device
182  * @name: name of the device as it will appear in the filesystem
183  * @cdev: pointer to the char device object that will be initialized
184  * @dev: pointer to the device object that will be initialized
185  *
186  * Initialize a cdev and a Linux device for habanalabs's device.
187  */
188 static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
189                                 int minor, const struct file_operations *fops,
190                                 char *name, struct cdev *cdev,
191                                 struct device **dev)
192 {
193         cdev_init(cdev, fops);
194         cdev->owner = THIS_MODULE;
195
196         *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
197         if (!*dev)
198                 return -ENOMEM;
199
200         device_initialize(*dev);
201         (*dev)->devt = MKDEV(hdev->major, minor);
202         (*dev)->class = hclass;
203         (*dev)->release = device_release_func;
204         dev_set_drvdata(*dev, hdev);
205         dev_set_name(*dev, "%s", name);
206
207         return 0;
208 }
209
210 static int device_cdev_sysfs_add(struct hl_device *hdev)
211 {
212         int rc;
213
214         rc = cdev_device_add(&hdev->cdev, hdev->dev);
215         if (rc) {
216                 dev_err(hdev->dev,
217                         "failed to add a char device to the system\n");
218                 return rc;
219         }
220
221         rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
222         if (rc) {
223                 dev_err(hdev->dev,
224                         "failed to add a control char device to the system\n");
225                 goto delete_cdev_device;
226         }
227
228         /* hl_sysfs_init() must be done after adding the device to the system */
229         rc = hl_sysfs_init(hdev);
230         if (rc) {
231                 dev_err(hdev->dev, "failed to initialize sysfs\n");
232                 goto delete_ctrl_cdev_device;
233         }
234
235         hdev->cdev_sysfs_created = true;
236
237         return 0;
238
239 delete_ctrl_cdev_device:
240         cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
241 delete_cdev_device:
242         cdev_device_del(&hdev->cdev, hdev->dev);
243         return rc;
244 }
245
246 static void device_cdev_sysfs_del(struct hl_device *hdev)
247 {
248         if (!hdev->cdev_sysfs_created)
249                 goto put_devices;
250
251         hl_sysfs_fini(hdev);
252         cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
253         cdev_device_del(&hdev->cdev, hdev->dev);
254
255 put_devices:
256         put_device(hdev->dev);
257         put_device(hdev->dev_ctrl);
258 }
259
260 static void device_hard_reset_pending(struct work_struct *work)
261 {
262         struct hl_device_reset_work *device_reset_work =
263                 container_of(work, struct hl_device_reset_work,
264                                 reset_work.work);
265         struct hl_device *hdev = device_reset_work->hdev;
266         int rc;
267
268         rc = hl_device_reset(hdev, true, true);
269         if ((rc == -EBUSY) && !hdev->device_fini_pending) {
270                 dev_info(hdev->dev,
271                         "Could not reset device. will try again in %u seconds",
272                         HL_PENDING_RESET_PER_SEC);
273
274                 queue_delayed_work(device_reset_work->wq,
275                         &device_reset_work->reset_work,
276                         msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
277         }
278 }
279
280 /*
281  * device_early_init - do some early initialization for the habanalabs device
282  *
283  * @hdev: pointer to habanalabs device structure
284  *
285  * Install the relevant function pointers and call the early_init function,
286  * if such a function exists
287  */
288 static int device_early_init(struct hl_device *hdev)
289 {
290         int i, rc;
291         char workq_name[32];
292
293         switch (hdev->asic_type) {
294         case ASIC_GOYA:
295                 goya_set_asic_funcs(hdev);
296                 strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
297                 break;
298         case ASIC_GAUDI:
299                 gaudi_set_asic_funcs(hdev);
300                 sprintf(hdev->asic_name, "GAUDI");
301                 break;
302         default:
303                 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
304                         hdev->asic_type);
305                 return -EINVAL;
306         }
307
308         rc = hdev->asic_funcs->early_init(hdev);
309         if (rc)
310                 return rc;
311
312         rc = hl_asid_init(hdev);
313         if (rc)
314                 goto early_fini;
315
316         if (hdev->asic_prop.completion_queues_count) {
317                 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
318                                 sizeof(*hdev->cq_wq),
319                                 GFP_ATOMIC);
320                 if (!hdev->cq_wq) {
321                         rc = -ENOMEM;
322                         goto asid_fini;
323                 }
324         }
325
326         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
327                 snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
328                 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
329                 if (hdev->cq_wq[i] == NULL) {
330                         dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
331                         rc = -ENOMEM;
332                         goto free_cq_wq;
333                 }
334         }
335
336         hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
337         if (hdev->eq_wq == NULL) {
338                 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
339                 rc = -ENOMEM;
340                 goto free_cq_wq;
341         }
342
343         hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
344                                         GFP_KERNEL);
345         if (!hdev->hl_chip_info) {
346                 rc = -ENOMEM;
347                 goto free_eq_wq;
348         }
349
350         hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
351                                         sizeof(struct hl_device_idle_busy_ts),
352                                         (GFP_KERNEL | __GFP_ZERO));
353         if (!hdev->idle_busy_ts_arr) {
354                 rc = -ENOMEM;
355                 goto free_chip_info;
356         }
357
358         rc = hl_mmu_if_set_funcs(hdev);
359         if (rc)
360                 goto free_idle_busy_ts_arr;
361
362         hl_cb_mgr_init(&hdev->kernel_cb_mgr);
363
364         hdev->device_reset_work.wq =
365                         create_singlethread_workqueue("hl_device_reset");
366         if (!hdev->device_reset_work.wq) {
367                 rc = -ENOMEM;
368                 dev_err(hdev->dev, "Failed to create device reset WQ\n");
369                 goto free_cb_mgr;
370         }
371
372         INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
373                         device_hard_reset_pending);
374         hdev->device_reset_work.hdev = hdev;
375         hdev->device_fini_pending = 0;
376
377         mutex_init(&hdev->send_cpu_message_lock);
378         mutex_init(&hdev->debug_lock);
379         INIT_LIST_HEAD(&hdev->cs_mirror_list);
380         spin_lock_init(&hdev->cs_mirror_lock);
381         INIT_LIST_HEAD(&hdev->fpriv_list);
382         mutex_init(&hdev->fpriv_list_lock);
383         atomic_set(&hdev->in_reset, 0);
384
385         return 0;
386
387 free_cb_mgr:
388         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
389 free_idle_busy_ts_arr:
390         kfree(hdev->idle_busy_ts_arr);
391 free_chip_info:
392         kfree(hdev->hl_chip_info);
393 free_eq_wq:
394         destroy_workqueue(hdev->eq_wq);
395 free_cq_wq:
396         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
397                 if (hdev->cq_wq[i])
398                         destroy_workqueue(hdev->cq_wq[i]);
399         kfree(hdev->cq_wq);
400 asid_fini:
401         hl_asid_fini(hdev);
402 early_fini:
403         if (hdev->asic_funcs->early_fini)
404                 hdev->asic_funcs->early_fini(hdev);
405
406         return rc;
407 }
408
409 /*
410  * device_early_fini - finalize all that was done in device_early_init
411  *
412  * @hdev: pointer to habanalabs device structure
413  *
414  */
415 static void device_early_fini(struct hl_device *hdev)
416 {
417         int i;
418
419         mutex_destroy(&hdev->debug_lock);
420         mutex_destroy(&hdev->send_cpu_message_lock);
421
422         mutex_destroy(&hdev->fpriv_list_lock);
423
424         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
425
426         kfree(hdev->idle_busy_ts_arr);
427         kfree(hdev->hl_chip_info);
428
429         destroy_workqueue(hdev->eq_wq);
430         destroy_workqueue(hdev->device_reset_work.wq);
431
432         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
433                 destroy_workqueue(hdev->cq_wq[i]);
434         kfree(hdev->cq_wq);
435
436         hl_asid_fini(hdev);
437
438         if (hdev->asic_funcs->early_fini)
439                 hdev->asic_funcs->early_fini(hdev);
440 }
441
442 static void set_freq_to_low_job(struct work_struct *work)
443 {
444         struct hl_device *hdev = container_of(work, struct hl_device,
445                                                 work_freq.work);
446
447         mutex_lock(&hdev->fpriv_list_lock);
448
449         if (!hdev->compute_ctx)
450                 hl_device_set_frequency(hdev, PLL_LOW);
451
452         mutex_unlock(&hdev->fpriv_list_lock);
453
454         schedule_delayed_work(&hdev->work_freq,
455                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
456 }
457
458 static void hl_device_heartbeat(struct work_struct *work)
459 {
460         struct hl_device *hdev = container_of(work, struct hl_device,
461                                                 work_heartbeat.work);
462
463         if (!hl_device_operational(hdev, NULL))
464                 goto reschedule;
465
466         if (!hdev->asic_funcs->send_heartbeat(hdev))
467                 goto reschedule;
468
469         dev_err(hdev->dev, "Device heartbeat failed!\n");
470         hl_device_reset(hdev, true, false);
471
472         return;
473
474 reschedule:
475         schedule_delayed_work(&hdev->work_heartbeat,
476                         usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
477 }
478
479 /*
480  * device_late_init - do late stuff initialization for the habanalabs device
481  *
482  * @hdev: pointer to habanalabs device structure
483  *
484  * Do stuff that either needs the device H/W queues to be active or needs
485  * to happen after all the rest of the initialization is finished
486  */
487 static int device_late_init(struct hl_device *hdev)
488 {
489         int rc;
490
491         if (hdev->asic_funcs->late_init) {
492                 rc = hdev->asic_funcs->late_init(hdev);
493                 if (rc) {
494                         dev_err(hdev->dev,
495                                 "failed late initialization for the H/W\n");
496                         return rc;
497                 }
498         }
499
500         hdev->high_pll = hdev->asic_prop.high_pll;
501
502         /* force setting to low frequency */
503         hdev->curr_pll_profile = PLL_LOW;
504
505         if (hdev->pm_mng_profile == PM_AUTO)
506                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
507         else
508                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
509
510         INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
511         schedule_delayed_work(&hdev->work_freq,
512         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
513
514         if (hdev->heartbeat) {
515                 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
516                 schedule_delayed_work(&hdev->work_heartbeat,
517                                 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
518         }
519
520         hdev->late_init_done = true;
521
522         return 0;
523 }
524
525 /*
526  * device_late_fini - finalize all that was done in device_late_init
527  *
528  * @hdev: pointer to habanalabs device structure
529  *
530  */
531 static void device_late_fini(struct hl_device *hdev)
532 {
533         if (!hdev->late_init_done)
534                 return;
535
536         cancel_delayed_work_sync(&hdev->work_freq);
537         if (hdev->heartbeat)
538                 cancel_delayed_work_sync(&hdev->work_heartbeat);
539
540         if (hdev->asic_funcs->late_fini)
541                 hdev->asic_funcs->late_fini(hdev);
542
543         hdev->late_init_done = false;
544 }
545
546 uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
547 {
548         struct hl_device_idle_busy_ts *ts;
549         ktime_t zero_ktime, curr = ktime_get();
550         u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
551         s64 period_us, last_start_us, last_end_us, last_busy_time_us,
552                 total_busy_time_us = 0, total_busy_time_ms;
553
554         zero_ktime = ktime_set(0, 0);
555         period_us = period_ms * USEC_PER_MSEC;
556         ts = &hdev->idle_busy_ts_arr[last_index];
557
558         /* check case that device is currently in idle */
559         if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
560                         !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
561
562                 last_index--;
563                 /* Handle case idle_busy_ts_idx was 0 */
564                 if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
565                         last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
566
567                 ts = &hdev->idle_busy_ts_arr[last_index];
568         }
569
570         while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
571                 /* Check if we are in last sample case. i.e. if the sample
572                  * begun before the sampling period. This could be a real
573                  * sample or 0 so need to handle both cases
574                  */
575                 last_start_us = ktime_to_us(
576                                 ktime_sub(curr, ts->idle_to_busy_ts));
577
578                 if (last_start_us > period_us) {
579
580                         /* First check two cases:
581                          * 1. If the device is currently busy
582                          * 2. If the device was idle during the whole sampling
583                          *    period
584                          */
585
586                         if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
587                                 /* Check if the device is currently busy */
588                                 if (ktime_compare(ts->idle_to_busy_ts,
589                                                 zero_ktime))
590                                         return 100;
591
592                                 /* We either didn't have any activity or we
593                                  * reached an entry which is 0. Either way,
594                                  * exit and return what was accumulated so far
595                                  */
596                                 break;
597                         }
598
599                         /* If sample has finished, check it is relevant */
600                         last_end_us = ktime_to_us(
601                                         ktime_sub(curr, ts->busy_to_idle_ts));
602
603                         if (last_end_us > period_us)
604                                 break;
605
606                         /* It is relevant so add it but with adjustment */
607                         last_busy_time_us = ktime_to_us(
608                                                 ktime_sub(ts->busy_to_idle_ts,
609                                                 ts->idle_to_busy_ts));
610                         total_busy_time_us += last_busy_time_us -
611                                         (last_start_us - period_us);
612                         break;
613                 }
614
615                 /* Check if the sample is finished or still open */
616                 if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
617                         last_busy_time_us = ktime_to_us(
618                                                 ktime_sub(ts->busy_to_idle_ts,
619                                                 ts->idle_to_busy_ts));
620                 else
621                         last_busy_time_us = ktime_to_us(
622                                         ktime_sub(curr, ts->idle_to_busy_ts));
623
624                 total_busy_time_us += last_busy_time_us;
625
626                 last_index--;
627                 /* Handle case idle_busy_ts_idx was 0 */
628                 if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
629                         last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
630
631                 ts = &hdev->idle_busy_ts_arr[last_index];
632
633                 overlap_cnt++;
634         }
635
636         total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
637                                                 USEC_PER_MSEC);
638
639         return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
640 }
641
642 /*
643  * hl_device_set_frequency - set the frequency of the device
644  *
645  * @hdev: pointer to habanalabs device structure
646  * @freq: the new frequency value
647  *
648  * Change the frequency if needed. This function has no protection against
649  * concurrency, therefore it is assumed that the calling function has protected
650  * itself against the case of calling this function from multiple threads with
651  * different values
652  *
653  * Returns 0 if no change was done, otherwise returns 1
654  */
655 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
656 {
657         if ((hdev->pm_mng_profile == PM_MANUAL) ||
658                         (hdev->curr_pll_profile == freq))
659                 return 0;
660
661         dev_dbg(hdev->dev, "Changing device frequency to %s\n",
662                 freq == PLL_HIGH ? "high" : "low");
663
664         hdev->asic_funcs->set_pll_profile(hdev, freq);
665
666         hdev->curr_pll_profile = freq;
667
668         return 1;
669 }
670
671 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
672 {
673         int rc = 0;
674
675         mutex_lock(&hdev->debug_lock);
676
677         if (!enable) {
678                 if (!hdev->in_debug) {
679                         dev_err(hdev->dev,
680                                 "Failed to disable debug mode because device was not in debug mode\n");
681                         rc = -EFAULT;
682                         goto out;
683                 }
684
685                 if (!hdev->hard_reset_pending)
686                         hdev->asic_funcs->halt_coresight(hdev);
687
688                 hdev->in_debug = 0;
689
690                 if (!hdev->hard_reset_pending)
691                         hdev->asic_funcs->set_clock_gating(hdev);
692
693                 goto out;
694         }
695
696         if (hdev->in_debug) {
697                 dev_err(hdev->dev,
698                         "Failed to enable debug mode because device is already in debug mode\n");
699                 rc = -EFAULT;
700                 goto out;
701         }
702
703         hdev->asic_funcs->disable_clock_gating(hdev);
704         hdev->in_debug = 1;
705
706 out:
707         mutex_unlock(&hdev->debug_lock);
708
709         return rc;
710 }
711
712 /*
713  * hl_device_suspend - initiate device suspend
714  *
715  * @hdev: pointer to habanalabs device structure
716  *
717  * Puts the hw in the suspend state (all asics).
718  * Returns 0 for success or an error on failure.
719  * Called at driver suspend.
720  */
721 int hl_device_suspend(struct hl_device *hdev)
722 {
723         int rc;
724
725         pci_save_state(hdev->pdev);
726
727         /* Block future CS/VM/JOB completion operations */
728         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
729         if (rc) {
730                 dev_err(hdev->dev, "Can't suspend while in reset\n");
731                 return -EIO;
732         }
733
734         /* This blocks all other stuff that is not blocked by in_reset */
735         hdev->disabled = true;
736
737         /*
738          * Flush anyone that is inside the critical section of enqueue
739          * jobs to the H/W
740          */
741         hdev->asic_funcs->hw_queues_lock(hdev);
742         hdev->asic_funcs->hw_queues_unlock(hdev);
743
744         /* Flush processes that are sending message to CPU */
745         mutex_lock(&hdev->send_cpu_message_lock);
746         mutex_unlock(&hdev->send_cpu_message_lock);
747
748         rc = hdev->asic_funcs->suspend(hdev);
749         if (rc)
750                 dev_err(hdev->dev,
751                         "Failed to disable PCI access of device CPU\n");
752
753         /* Shut down the device */
754         pci_disable_device(hdev->pdev);
755         pci_set_power_state(hdev->pdev, PCI_D3hot);
756
757         return 0;
758 }
759
760 /*
761  * hl_device_resume - initiate device resume
762  *
763  * @hdev: pointer to habanalabs device structure
764  *
765  * Bring the hw back to operating state (all asics).
766  * Returns 0 for success or an error on failure.
767  * Called at driver resume.
768  */
769 int hl_device_resume(struct hl_device *hdev)
770 {
771         int rc;
772
773         pci_set_power_state(hdev->pdev, PCI_D0);
774         pci_restore_state(hdev->pdev);
775         rc = pci_enable_device_mem(hdev->pdev);
776         if (rc) {
777                 dev_err(hdev->dev,
778                         "Failed to enable PCI device in resume\n");
779                 return rc;
780         }
781
782         pci_set_master(hdev->pdev);
783
784         rc = hdev->asic_funcs->resume(hdev);
785         if (rc) {
786                 dev_err(hdev->dev, "Failed to resume device after suspend\n");
787                 goto disable_device;
788         }
789
790
791         hdev->disabled = false;
792         atomic_set(&hdev->in_reset, 0);
793
794         rc = hl_device_reset(hdev, true, false);
795         if (rc) {
796                 dev_err(hdev->dev, "Failed to reset device during resume\n");
797                 goto disable_device;
798         }
799
800         return 0;
801
802 disable_device:
803         pci_clear_master(hdev->pdev);
804         pci_disable_device(hdev->pdev);
805
806         return rc;
807 }
808
809 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
810 {
811         struct hl_fpriv *hpriv;
812         struct task_struct *task = NULL;
813         u32 pending_cnt;
814
815
816         /* Giving time for user to close FD, and for processes that are inside
817          * hl_device_open to finish
818          */
819         if (!list_empty(&hdev->fpriv_list))
820                 ssleep(1);
821
822         if (timeout) {
823                 pending_cnt = timeout;
824         } else {
825                 if (hdev->process_kill_trial_cnt) {
826                         /* Processes have been already killed */
827                         pending_cnt = 1;
828                         goto wait_for_processes;
829                 } else {
830                         /* Wait a small period after process kill */
831                         pending_cnt = HL_PENDING_RESET_PER_SEC;
832                 }
833         }
834
835         mutex_lock(&hdev->fpriv_list_lock);
836
837         /* This section must be protected because we are dereferencing
838          * pointers that are freed if the process exits
839          */
840         list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
841                 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
842                 if (task) {
843                         dev_info(hdev->dev, "Killing user process pid=%d\n",
844                                 task_pid_nr(task));
845                         send_sig(SIGKILL, task, 1);
846                         usleep_range(1000, 10000);
847
848                         put_task_struct(task);
849                 }
850         }
851
852         mutex_unlock(&hdev->fpriv_list_lock);
853
854         /*
855          * We killed the open users, but that doesn't mean they are closed.
856          * It could be that they are running a long cleanup phase in the driver
857          * e.g. MMU unmappings, or running other long teardown flow even before
858          * our cleanup.
859          * Therefore we need to wait again to make sure they are closed before
860          * continuing with the reset.
861          */
862
863 wait_for_processes:
864         while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
865                 dev_dbg(hdev->dev,
866                         "Waiting for all unmap operations to finish before hard reset\n");
867
868                 pending_cnt--;
869
870                 ssleep(1);
871         }
872
873         /* All processes exited successfully */
874         if (list_empty(&hdev->fpriv_list))
875                 return 0;
876
877         /* Give up waiting for processes to exit */
878         if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
879                 return -ETIME;
880
881         hdev->process_kill_trial_cnt++;
882
883         return -EBUSY;
884 }
885
886 /*
887  * hl_device_reset - reset the device
888  *
889  * @hdev: pointer to habanalabs device structure
890  * @hard_reset: should we do hard reset to all engines or just reset the
891  *              compute/dma engines
892  * @from_hard_reset_thread: is the caller the hard-reset thread
893  *
894  * Block future CS and wait for pending CS to be enqueued
895  * Call ASIC H/W fini
896  * Flush all completions
897  * Re-initialize all internal data structures
898  * Call ASIC H/W init, late_init
899  * Test queues
900  * Enable device
901  *
902  * Returns 0 for success or an error on failure.
903  */
904 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
905                         bool from_hard_reset_thread)
906 {
907         int i, rc;
908
909         if (!hdev->init_done) {
910                 dev_err(hdev->dev,
911                         "Can't reset before initialization is done\n");
912                 return 0;
913         }
914
915         if ((!hard_reset) && (!hdev->supports_soft_reset)) {
916                 dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
917                 hard_reset = true;
918         }
919
920         /* Re-entry of reset thread */
921         if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
922                 goto kill_processes;
923
924         /*
925          * Prevent concurrency in this function - only one reset should be
926          * done at any given time. Only need to perform this if we didn't
927          * get from the dedicated hard reset thread
928          */
929         if (!from_hard_reset_thread) {
930                 /* Block future CS/VM/JOB completion operations */
931                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
932                 if (rc)
933                         return 0;
934
935                 if (hard_reset) {
936                         /* Disable PCI access from device F/W so he won't send
937                          * us additional interrupts. We disable MSI/MSI-X at
938                          * the halt_engines function and we can't have the F/W
939                          * sending us interrupts after that. We need to disable
940                          * the access here because if the device is marked
941                          * disable, the message won't be send. Also, in case
942                          * of heartbeat, the device CPU is marked as disable
943                          * so this message won't be sent
944                          */
945                         if (hl_fw_send_pci_access_msg(hdev,
946                                         CPUCP_PACKET_DISABLE_PCI_ACCESS))
947                                 dev_warn(hdev->dev,
948                                         "Failed to disable PCI access by F/W\n");
949                 }
950
951                 /* This also blocks future CS/VM/JOB completion operations */
952                 hdev->disabled = true;
953
954                 /* Flush anyone that is inside the critical section of enqueue
955                  * jobs to the H/W
956                  */
957                 hdev->asic_funcs->hw_queues_lock(hdev);
958                 hdev->asic_funcs->hw_queues_unlock(hdev);
959
960                 /* Flush anyone that is inside device open */
961                 mutex_lock(&hdev->fpriv_list_lock);
962                 mutex_unlock(&hdev->fpriv_list_lock);
963
964                 dev_err(hdev->dev, "Going to RESET device!\n");
965         }
966
967 again:
968         if ((hard_reset) && (!from_hard_reset_thread)) {
969                 hdev->hard_reset_pending = true;
970
971                 hdev->process_kill_trial_cnt = 0;
972
973                 /*
974                  * Because the reset function can't run from interrupt or
975                  * from heartbeat work, we need to call the reset function
976                  * from a dedicated work
977                  */
978                 queue_delayed_work(hdev->device_reset_work.wq,
979                         &hdev->device_reset_work.reset_work, 0);
980
981                 return 0;
982         }
983
984         if (hard_reset) {
985                 device_late_fini(hdev);
986
987                 /*
988                  * Now that the heartbeat thread is closed, flush processes
989                  * which are sending messages to CPU
990                  */
991                 mutex_lock(&hdev->send_cpu_message_lock);
992                 mutex_unlock(&hdev->send_cpu_message_lock);
993         }
994
995         /*
996          * Halt the engines and disable interrupts so we won't get any more
997          * completions from H/W and we won't have any accesses from the
998          * H/W to the host machine
999          */
1000         hdev->asic_funcs->halt_engines(hdev, hard_reset);
1001
1002         /* Go over all the queues, release all CS and their jobs */
1003         hl_cs_rollback_all(hdev);
1004
1005 kill_processes:
1006         if (hard_reset) {
1007                 /* Kill processes here after CS rollback. This is because the
1008                  * process can't really exit until all its CSs are done, which
1009                  * is what we do in cs rollback
1010                  */
1011                 rc = device_kill_open_processes(hdev, 0);
1012
1013                 if (rc == -EBUSY) {
1014                         if (hdev->device_fini_pending) {
1015                                 dev_crit(hdev->dev,
1016                                         "Failed to kill all open processes, stopping hard reset\n");
1017                                 goto out_err;
1018                         }
1019
1020                         /* signal reset thread to reschedule */
1021                         return rc;
1022                 }
1023
1024                 if (rc) {
1025                         dev_crit(hdev->dev,
1026                                 "Failed to kill all open processes, stopping hard reset\n");
1027                         goto out_err;
1028                 }
1029
1030                 /* Flush the Event queue workers to make sure no other thread is
1031                  * reading or writing to registers during the reset
1032                  */
1033                 flush_workqueue(hdev->eq_wq);
1034         }
1035
1036         /* Reset the H/W. It will be in idle state after this returns */
1037         hdev->asic_funcs->hw_fini(hdev, hard_reset);
1038
1039         if (hard_reset) {
1040                 /* Release kernel context */
1041                 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
1042                         hdev->kernel_ctx = NULL;
1043                 hl_vm_fini(hdev);
1044                 hl_mmu_fini(hdev);
1045                 hl_eq_reset(hdev, &hdev->event_queue);
1046         }
1047
1048         /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1049         hl_hw_queue_reset(hdev, hard_reset);
1050         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1051                 hl_cq_reset(hdev, &hdev->completion_queue[i]);
1052
1053         hdev->idle_busy_ts_idx = 0;
1054         hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
1055         hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
1056
1057         if (hdev->cs_active_cnt)
1058                 dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
1059                         hdev->cs_active_cnt);
1060
1061         mutex_lock(&hdev->fpriv_list_lock);
1062
1063         /* Make sure the context switch phase will run again */
1064         if (hdev->compute_ctx) {
1065                 atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1066                 hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1067         }
1068
1069         mutex_unlock(&hdev->fpriv_list_lock);
1070
1071         /* Finished tear-down, starting to re-initialize */
1072
1073         if (hard_reset) {
1074                 hdev->device_cpu_disabled = false;
1075                 hdev->hard_reset_pending = false;
1076
1077                 if (hdev->kernel_ctx) {
1078                         dev_crit(hdev->dev,
1079                                 "kernel ctx was alive during hard reset, something is terribly wrong\n");
1080                         rc = -EBUSY;
1081                         goto out_err;
1082                 }
1083
1084                 rc = hl_mmu_init(hdev);
1085                 if (rc) {
1086                         dev_err(hdev->dev,
1087                                 "Failed to initialize MMU S/W after hard reset\n");
1088                         goto out_err;
1089                 }
1090
1091                 /* Allocate the kernel context */
1092                 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1093                                                 GFP_KERNEL);
1094                 if (!hdev->kernel_ctx) {
1095                         rc = -ENOMEM;
1096                         hl_mmu_fini(hdev);
1097                         goto out_err;
1098                 }
1099
1100                 hdev->compute_ctx = NULL;
1101
1102                 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1103                 if (rc) {
1104                         dev_err(hdev->dev,
1105                                 "failed to init kernel ctx in hard reset\n");
1106                         kfree(hdev->kernel_ctx);
1107                         hdev->kernel_ctx = NULL;
1108                         hl_mmu_fini(hdev);
1109                         goto out_err;
1110                 }
1111         }
1112
1113         /* Device is now enabled as part of the initialization requires
1114          * communication with the device firmware to get information that
1115          * is required for the initialization itself
1116          */
1117         hdev->disabled = false;
1118
1119         rc = hdev->asic_funcs->hw_init(hdev);
1120         if (rc) {
1121                 dev_err(hdev->dev,
1122                         "failed to initialize the H/W after reset\n");
1123                 goto out_err;
1124         }
1125
1126         /* Check that the communication with the device is working */
1127         rc = hdev->asic_funcs->test_queues(hdev);
1128         if (rc) {
1129                 dev_err(hdev->dev,
1130                         "Failed to detect if device is alive after reset\n");
1131                 goto out_err;
1132         }
1133
1134         if (hard_reset) {
1135                 rc = device_late_init(hdev);
1136                 if (rc) {
1137                         dev_err(hdev->dev,
1138                                 "Failed late init after hard reset\n");
1139                         goto out_err;
1140                 }
1141
1142                 rc = hl_vm_init(hdev);
1143                 if (rc) {
1144                         dev_err(hdev->dev,
1145                                 "Failed to init memory module after hard reset\n");
1146                         goto out_err;
1147                 }
1148
1149                 hl_set_max_power(hdev);
1150         } else {
1151                 rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1152                 if (rc) {
1153                         dev_err(hdev->dev,
1154                                 "Failed late init after soft reset\n");
1155                         goto out_err;
1156                 }
1157         }
1158
1159         atomic_set(&hdev->in_reset, 0);
1160         hdev->needs_reset = false;
1161
1162         dev_notice(hdev->dev, "Successfully finished resetting the device\n");
1163
1164         if (hard_reset) {
1165                 hdev->hard_reset_cnt++;
1166
1167                 /* After reset is done, we are ready to receive events from
1168                  * the F/W. We can't do it before because we will ignore events
1169                  * and if those events are fatal, we won't know about it and
1170                  * the device will be operational although it shouldn't be
1171                  */
1172                 hdev->asic_funcs->enable_events_from_fw(hdev);
1173         } else {
1174                 hdev->soft_reset_cnt++;
1175         }
1176
1177         return 0;
1178
1179 out_err:
1180         hdev->disabled = true;
1181
1182         if (hard_reset) {
1183                 dev_err(hdev->dev,
1184                         "Failed to reset! Device is NOT usable\n");
1185                 hdev->hard_reset_cnt++;
1186         } else {
1187                 dev_err(hdev->dev,
1188                         "Failed to do soft-reset, trying hard reset\n");
1189                 hdev->soft_reset_cnt++;
1190                 hard_reset = true;
1191                 goto again;
1192         }
1193
1194         atomic_set(&hdev->in_reset, 0);
1195
1196         return rc;
1197 }
1198
1199 /*
1200  * hl_device_init - main initialization function for habanalabs device
1201  *
1202  * @hdev: pointer to habanalabs device structure
1203  *
1204  * Allocate an id for the device, do early initialization and then call the
1205  * ASIC specific initialization functions. Finally, create the cdev and the
1206  * Linux device to expose it to the user
1207  */
1208 int hl_device_init(struct hl_device *hdev, struct class *hclass)
1209 {
1210         int i, rc, cq_cnt, cq_ready_cnt;
1211         char *name;
1212         bool add_cdev_sysfs_on_err = false;
1213
1214         name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1215         if (!name) {
1216                 rc = -ENOMEM;
1217                 goto out_disabled;
1218         }
1219
1220         /* Initialize cdev and device structures */
1221         rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1222                                 &hdev->cdev, &hdev->dev);
1223
1224         kfree(name);
1225
1226         if (rc)
1227                 goto out_disabled;
1228
1229         name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1230         if (!name) {
1231                 rc = -ENOMEM;
1232                 goto free_dev;
1233         }
1234
1235         /* Initialize cdev and device structures for control device */
1236         rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1237                                 name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1238
1239         kfree(name);
1240
1241         if (rc)
1242                 goto free_dev;
1243
1244         /* Initialize ASIC function pointers and perform early init */
1245         rc = device_early_init(hdev);
1246         if (rc)
1247                 goto free_dev_ctrl;
1248
1249         /*
1250          * Start calling ASIC initialization. First S/W then H/W and finally
1251          * late init
1252          */
1253         rc = hdev->asic_funcs->sw_init(hdev);
1254         if (rc)
1255                 goto early_fini;
1256
1257         /*
1258          * Initialize the H/W queues. Must be done before hw_init, because
1259          * there the addresses of the kernel queue are being written to the
1260          * registers of the device
1261          */
1262         rc = hl_hw_queues_create(hdev);
1263         if (rc) {
1264                 dev_err(hdev->dev, "failed to initialize kernel queues\n");
1265                 goto sw_fini;
1266         }
1267
1268         cq_cnt = hdev->asic_prop.completion_queues_count;
1269
1270         /*
1271          * Initialize the completion queues. Must be done before hw_init,
1272          * because there the addresses of the completion queues are being
1273          * passed as arguments to request_irq
1274          */
1275         if (cq_cnt) {
1276                 hdev->completion_queue = kcalloc(cq_cnt,
1277                                 sizeof(*hdev->completion_queue),
1278                                 GFP_KERNEL);
1279
1280                 if (!hdev->completion_queue) {
1281                         dev_err(hdev->dev,
1282                                 "failed to allocate completion queues\n");
1283                         rc = -ENOMEM;
1284                         goto hw_queues_destroy;
1285                 }
1286         }
1287
1288         for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1289                 rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1290                                 hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1291                 if (rc) {
1292                         dev_err(hdev->dev,
1293                                 "failed to initialize completion queue\n");
1294                         goto cq_fini;
1295                 }
1296                 hdev->completion_queue[i].cq_idx = i;
1297         }
1298
1299         /*
1300          * Initialize the event queue. Must be done before hw_init,
1301          * because there the address of the event queue is being
1302          * passed as argument to request_irq
1303          */
1304         rc = hl_eq_init(hdev, &hdev->event_queue);
1305         if (rc) {
1306                 dev_err(hdev->dev, "failed to initialize event queue\n");
1307                 goto cq_fini;
1308         }
1309
1310         /* MMU S/W must be initialized before kernel context is created */
1311         rc = hl_mmu_init(hdev);
1312         if (rc) {
1313                 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1314                 goto eq_fini;
1315         }
1316
1317         /* Allocate the kernel context */
1318         hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1319         if (!hdev->kernel_ctx) {
1320                 rc = -ENOMEM;
1321                 goto mmu_fini;
1322         }
1323
1324         hdev->compute_ctx = NULL;
1325
1326         hl_debugfs_add_device(hdev);
1327
1328         /* debugfs nodes are created in hl_ctx_init so it must be called after
1329          * hl_debugfs_add_device.
1330          */
1331         rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1332         if (rc) {
1333                 dev_err(hdev->dev, "failed to initialize kernel context\n");
1334                 kfree(hdev->kernel_ctx);
1335                 goto remove_device_from_debugfs;
1336         }
1337
1338         rc = hl_cb_pool_init(hdev);
1339         if (rc) {
1340                 dev_err(hdev->dev, "failed to initialize CB pool\n");
1341                 goto release_ctx;
1342         }
1343
1344         /*
1345          * From this point, in case of an error, add char devices and create
1346          * sysfs nodes as part of the error flow, to allow debugging.
1347          */
1348         add_cdev_sysfs_on_err = true;
1349
1350         /* Device is now enabled as part of the initialization requires
1351          * communication with the device firmware to get information that
1352          * is required for the initialization itself
1353          */
1354         hdev->disabled = false;
1355
1356         rc = hdev->asic_funcs->hw_init(hdev);
1357         if (rc) {
1358                 dev_err(hdev->dev, "failed to initialize the H/W\n");
1359                 rc = 0;
1360                 goto out_disabled;
1361         }
1362
1363         /* Check that the communication with the device is working */
1364         rc = hdev->asic_funcs->test_queues(hdev);
1365         if (rc) {
1366                 dev_err(hdev->dev, "Failed to detect if device is alive\n");
1367                 rc = 0;
1368                 goto out_disabled;
1369         }
1370
1371         rc = device_late_init(hdev);
1372         if (rc) {
1373                 dev_err(hdev->dev, "Failed late initialization\n");
1374                 rc = 0;
1375                 goto out_disabled;
1376         }
1377
1378         dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1379                 hdev->asic_name,
1380                 hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
1381
1382         rc = hl_vm_init(hdev);
1383         if (rc) {
1384                 dev_err(hdev->dev, "Failed to initialize memory module\n");
1385                 rc = 0;
1386                 goto out_disabled;
1387         }
1388
1389         /*
1390          * Expose devices and sysfs nodes to user.
1391          * From here there is no need to add char devices and create sysfs nodes
1392          * in case of an error.
1393          */
1394         add_cdev_sysfs_on_err = false;
1395         rc = device_cdev_sysfs_add(hdev);
1396         if (rc) {
1397                 dev_err(hdev->dev,
1398                         "Failed to add char devices and sysfs nodes\n");
1399                 rc = 0;
1400                 goto out_disabled;
1401         }
1402
1403         /* Need to call this again because the max power might change,
1404          * depending on card type for certain ASICs
1405          */
1406         hl_set_max_power(hdev);
1407
1408         /*
1409          * hl_hwmon_init() must be called after device_late_init(), because only
1410          * there we get the information from the device about which
1411          * hwmon-related sensors the device supports.
1412          * Furthermore, it must be done after adding the device to the system.
1413          */
1414         rc = hl_hwmon_init(hdev);
1415         if (rc) {
1416                 dev_err(hdev->dev, "Failed to initialize hwmon\n");
1417                 rc = 0;
1418                 goto out_disabled;
1419         }
1420
1421         dev_notice(hdev->dev,
1422                 "Successfully added device to habanalabs driver\n");
1423
1424         hdev->init_done = true;
1425
1426         /* After initialization is done, we are ready to receive events from
1427          * the F/W. We can't do it before because we will ignore events and if
1428          * those events are fatal, we won't know about it and the device will
1429          * be operational although it shouldn't be
1430          */
1431         hdev->asic_funcs->enable_events_from_fw(hdev);
1432
1433         return 0;
1434
1435 release_ctx:
1436         if (hl_ctx_put(hdev->kernel_ctx) != 1)
1437                 dev_err(hdev->dev,
1438                         "kernel ctx is still alive on initialization failure\n");
1439 remove_device_from_debugfs:
1440         hl_debugfs_remove_device(hdev);
1441 mmu_fini:
1442         hl_mmu_fini(hdev);
1443 eq_fini:
1444         hl_eq_fini(hdev, &hdev->event_queue);
1445 cq_fini:
1446         for (i = 0 ; i < cq_ready_cnt ; i++)
1447                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1448         kfree(hdev->completion_queue);
1449 hw_queues_destroy:
1450         hl_hw_queues_destroy(hdev);
1451 sw_fini:
1452         hdev->asic_funcs->sw_fini(hdev);
1453 early_fini:
1454         device_early_fini(hdev);
1455 free_dev_ctrl:
1456         put_device(hdev->dev_ctrl);
1457 free_dev:
1458         put_device(hdev->dev);
1459 out_disabled:
1460         hdev->disabled = true;
1461         if (add_cdev_sysfs_on_err)
1462                 device_cdev_sysfs_add(hdev);
1463         if (hdev->pdev)
1464                 dev_err(&hdev->pdev->dev,
1465                         "Failed to initialize hl%d. Device is NOT usable !\n",
1466                         hdev->id / 2);
1467         else
1468                 pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1469                         hdev->id / 2);
1470
1471         return rc;
1472 }
1473
1474 /*
1475  * hl_device_fini - main tear-down function for habanalabs device
1476  *
1477  * @hdev: pointer to habanalabs device structure
1478  *
1479  * Destroy the device, call ASIC fini functions and release the id
1480  */
1481 void hl_device_fini(struct hl_device *hdev)
1482 {
1483         ktime_t timeout;
1484         int i, rc;
1485
1486         dev_info(hdev->dev, "Removing device\n");
1487
1488         hdev->device_fini_pending = 1;
1489         flush_delayed_work(&hdev->device_reset_work.reset_work);
1490
1491         /*
1492          * This function is competing with the reset function, so try to
1493          * take the reset atomic and if we are already in middle of reset,
1494          * wait until reset function is finished. Reset function is designed
1495          * to always finish. However, in Gaudi, because of all the network
1496          * ports, the hard reset could take between 10-30 seconds
1497          */
1498
1499         timeout = ktime_add_us(ktime_get(),
1500                                 HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
1501         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1502         while (rc) {
1503                 usleep_range(50, 200);
1504                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1505                 if (ktime_compare(ktime_get(), timeout) > 0) {
1506                         dev_crit(hdev->dev,
1507                                 "Failed to remove device because reset function did not finish\n");
1508                         return;
1509                 }
1510         }
1511
1512         /* Disable PCI access from device F/W so it won't send us additional
1513          * interrupts. We disable MSI/MSI-X at the halt_engines function and we
1514          * can't have the F/W sending us interrupts after that. We need to
1515          * disable the access here because if the device is marked disable, the
1516          * message won't be send. Also, in case of heartbeat, the device CPU is
1517          * marked as disable so this message won't be sent
1518          */
1519         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1520
1521         /* Mark device as disabled */
1522         hdev->disabled = true;
1523
1524         /* Flush anyone that is inside the critical section of enqueue
1525          * jobs to the H/W
1526          */
1527         hdev->asic_funcs->hw_queues_lock(hdev);
1528         hdev->asic_funcs->hw_queues_unlock(hdev);
1529
1530         /* Flush anyone that is inside device open */
1531         mutex_lock(&hdev->fpriv_list_lock);
1532         mutex_unlock(&hdev->fpriv_list_lock);
1533
1534         hdev->hard_reset_pending = true;
1535
1536         hl_hwmon_fini(hdev);
1537
1538         device_late_fini(hdev);
1539
1540         /*
1541          * Halt the engines and disable interrupts so we won't get any more
1542          * completions from H/W and we won't have any accesses from the
1543          * H/W to the host machine
1544          */
1545         hdev->asic_funcs->halt_engines(hdev, true);
1546
1547         /* Go over all the queues, release all CS and their jobs */
1548         hl_cs_rollback_all(hdev);
1549
1550         /* Kill processes here after CS rollback. This is because the process
1551          * can't really exit until all its CSs are done, which is what we
1552          * do in cs rollback
1553          */
1554         dev_info(hdev->dev,
1555                 "Waiting for all processes to exit (timeout of %u seconds)",
1556                 HL_PENDING_RESET_LONG_SEC);
1557
1558         rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
1559         if (rc)
1560                 dev_crit(hdev->dev, "Failed to kill all open processes\n");
1561
1562         hl_cb_pool_fini(hdev);
1563
1564         /* Reset the H/W. It will be in idle state after this returns */
1565         hdev->asic_funcs->hw_fini(hdev, true);
1566
1567         /* Release kernel context */
1568         if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1569                 dev_err(hdev->dev, "kernel ctx is still alive\n");
1570
1571         hl_debugfs_remove_device(hdev);
1572
1573         hl_vm_fini(hdev);
1574
1575         hl_mmu_fini(hdev);
1576
1577         hl_eq_fini(hdev, &hdev->event_queue);
1578
1579         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1580                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1581         kfree(hdev->completion_queue);
1582
1583         hl_hw_queues_destroy(hdev);
1584
1585         /* Call ASIC S/W finalize function */
1586         hdev->asic_funcs->sw_fini(hdev);
1587
1588         device_early_fini(hdev);
1589
1590         /* Hide devices and sysfs nodes from user */
1591         device_cdev_sysfs_del(hdev);
1592
1593         pr_info("removed device successfully\n");
1594 }
1595
1596 /*
1597  * MMIO register access helper functions.
1598  */
1599
1600 /*
1601  * hl_rreg - Read an MMIO register
1602  *
1603  * @hdev: pointer to habanalabs device structure
1604  * @reg: MMIO register offset (in bytes)
1605  *
1606  * Returns the value of the MMIO register we are asked to read
1607  *
1608  */
1609 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1610 {
1611         return readl(hdev->rmmio + reg);
1612 }
1613
1614 /*
1615  * hl_wreg - Write to an MMIO register
1616  *
1617  * @hdev: pointer to habanalabs device structure
1618  * @reg: MMIO register offset (in bytes)
1619  * @val: 32-bit value
1620  *
1621  * Writes the 32-bit value into the MMIO register
1622  *
1623  */
1624 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1625 {
1626         writel(val, hdev->rmmio + reg);
1627 }