Merge tag 'meminit-v5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees...
[sfrench/cifs-2.6.git] / drivers / misc / habanalabs / device.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "habanalabs.h"
9
10 #include <linux/pci.h>
11 #include <linux/sched/signal.h>
12 #include <linux/hwmon.h>
13
14 #define HL_PLDM_PENDING_RESET_PER_SEC   (HL_PENDING_RESET_PER_SEC * 10)
15
16 bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
17 {
18         if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
19                 return true;
20         else
21                 return false;
22 }
23
24 static void hpriv_release(struct kref *ref)
25 {
26         struct hl_fpriv *hpriv;
27         struct hl_device *hdev;
28
29         hpriv = container_of(ref, struct hl_fpriv, refcount);
30
31         hdev = hpriv->hdev;
32
33         put_pid(hpriv->taskpid);
34
35         hl_debugfs_remove_file(hpriv);
36
37         mutex_destroy(&hpriv->restore_phase_mutex);
38
39         kfree(hpriv);
40
41         /* Now the FD is really closed */
42         atomic_dec(&hdev->fd_open_cnt);
43
44         /* This allows a new user context to open the device */
45         hdev->user_ctx = NULL;
46 }
47
48 void hl_hpriv_get(struct hl_fpriv *hpriv)
49 {
50         kref_get(&hpriv->refcount);
51 }
52
53 void hl_hpriv_put(struct hl_fpriv *hpriv)
54 {
55         kref_put(&hpriv->refcount, hpriv_release);
56 }
57
58 /*
59  * hl_device_release - release function for habanalabs device
60  *
61  * @inode: pointer to inode structure
62  * @filp: pointer to file structure
63  *
64  * Called when process closes an habanalabs device
65  */
66 static int hl_device_release(struct inode *inode, struct file *filp)
67 {
68         struct hl_fpriv *hpriv = filp->private_data;
69
70         hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
71         hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
72
73         filp->private_data = NULL;
74
75         hl_hpriv_put(hpriv);
76
77         return 0;
78 }
79
80 /*
81  * hl_mmap - mmap function for habanalabs device
82  *
83  * @*filp: pointer to file structure
84  * @*vma: pointer to vm_area_struct of the process
85  *
86  * Called when process does an mmap on habanalabs device. Call the device's mmap
87  * function at the end of the common code.
88  */
89 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
90 {
91         struct hl_fpriv *hpriv = filp->private_data;
92
93         if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
94                 vma->vm_pgoff ^= HL_MMAP_CB_MASK;
95                 return hl_cb_mmap(hpriv, vma);
96         }
97
98         return -EINVAL;
99 }
100
101 static const struct file_operations hl_ops = {
102         .owner = THIS_MODULE,
103         .open = hl_device_open,
104         .release = hl_device_release,
105         .mmap = hl_mmap,
106         .unlocked_ioctl = hl_ioctl,
107         .compat_ioctl = hl_ioctl
108 };
109
110 /*
111  * device_setup_cdev - setup cdev and device for habanalabs device
112  *
113  * @hdev: pointer to habanalabs device structure
114  * @hclass: pointer to the class object of the device
115  * @minor: minor number of the specific device
116  * @fpos : file operations to install for this device
117  *
118  * Create a cdev and a Linux device for habanalabs's device. Need to be
119  * called at the end of the habanalabs device initialization process,
120  * because this function exposes the device to the user
121  */
122 static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
123                                 int minor, const struct file_operations *fops)
124 {
125         int err, devno = MKDEV(hdev->major, minor);
126         struct cdev *hdev_cdev = &hdev->cdev;
127         char *name;
128
129         name = kasprintf(GFP_KERNEL, "hl%d", hdev->id);
130         if (!name)
131                 return -ENOMEM;
132
133         cdev_init(hdev_cdev, fops);
134         hdev_cdev->owner = THIS_MODULE;
135         err = cdev_add(hdev_cdev, devno, 1);
136         if (err) {
137                 pr_err("Failed to add char device %s\n", name);
138                 goto err_cdev_add;
139         }
140
141         hdev->dev = device_create(hclass, NULL, devno, NULL, "%s", name);
142         if (IS_ERR(hdev->dev)) {
143                 pr_err("Failed to create device %s\n", name);
144                 err = PTR_ERR(hdev->dev);
145                 goto err_device_create;
146         }
147
148         dev_set_drvdata(hdev->dev, hdev);
149
150         kfree(name);
151
152         return 0;
153
154 err_device_create:
155         cdev_del(hdev_cdev);
156 err_cdev_add:
157         kfree(name);
158         return err;
159 }
160
161 /*
162  * device_early_init - do some early initialization for the habanalabs device
163  *
164  * @hdev: pointer to habanalabs device structure
165  *
166  * Install the relevant function pointers and call the early_init function,
167  * if such a function exists
168  */
169 static int device_early_init(struct hl_device *hdev)
170 {
171         int rc;
172
173         switch (hdev->asic_type) {
174         case ASIC_GOYA:
175                 goya_set_asic_funcs(hdev);
176                 strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
177                 break;
178         default:
179                 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
180                         hdev->asic_type);
181                 return -EINVAL;
182         }
183
184         rc = hdev->asic_funcs->early_init(hdev);
185         if (rc)
186                 return rc;
187
188         rc = hl_asid_init(hdev);
189         if (rc)
190                 goto early_fini;
191
192         hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
193         if (hdev->cq_wq == NULL) {
194                 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
195                 rc = -ENOMEM;
196                 goto asid_fini;
197         }
198
199         hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
200         if (hdev->eq_wq == NULL) {
201                 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
202                 rc = -ENOMEM;
203                 goto free_cq_wq;
204         }
205
206         hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
207                                         GFP_KERNEL);
208         if (!hdev->hl_chip_info) {
209                 rc = -ENOMEM;
210                 goto free_eq_wq;
211         }
212
213         hl_cb_mgr_init(&hdev->kernel_cb_mgr);
214
215         mutex_init(&hdev->fd_open_cnt_lock);
216         mutex_init(&hdev->send_cpu_message_lock);
217         INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
218         spin_lock_init(&hdev->hw_queues_mirror_lock);
219         atomic_set(&hdev->in_reset, 0);
220         atomic_set(&hdev->fd_open_cnt, 0);
221         atomic_set(&hdev->cs_active_cnt, 0);
222
223         return 0;
224
225 free_eq_wq:
226         destroy_workqueue(hdev->eq_wq);
227 free_cq_wq:
228         destroy_workqueue(hdev->cq_wq);
229 asid_fini:
230         hl_asid_fini(hdev);
231 early_fini:
232         if (hdev->asic_funcs->early_fini)
233                 hdev->asic_funcs->early_fini(hdev);
234
235         return rc;
236 }
237
238 /*
239  * device_early_fini - finalize all that was done in device_early_init
240  *
241  * @hdev: pointer to habanalabs device structure
242  *
243  */
244 static void device_early_fini(struct hl_device *hdev)
245 {
246         mutex_destroy(&hdev->send_cpu_message_lock);
247
248         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
249
250         kfree(hdev->hl_chip_info);
251
252         destroy_workqueue(hdev->eq_wq);
253         destroy_workqueue(hdev->cq_wq);
254
255         hl_asid_fini(hdev);
256
257         if (hdev->asic_funcs->early_fini)
258                 hdev->asic_funcs->early_fini(hdev);
259
260         mutex_destroy(&hdev->fd_open_cnt_lock);
261 }
262
263 static void set_freq_to_low_job(struct work_struct *work)
264 {
265         struct hl_device *hdev = container_of(work, struct hl_device,
266                                                 work_freq.work);
267
268         if (atomic_read(&hdev->fd_open_cnt) == 0)
269                 hl_device_set_frequency(hdev, PLL_LOW);
270
271         schedule_delayed_work(&hdev->work_freq,
272                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
273 }
274
275 static void hl_device_heartbeat(struct work_struct *work)
276 {
277         struct hl_device *hdev = container_of(work, struct hl_device,
278                                                 work_heartbeat.work);
279
280         if (hl_device_disabled_or_in_reset(hdev))
281                 goto reschedule;
282
283         if (!hdev->asic_funcs->send_heartbeat(hdev))
284                 goto reschedule;
285
286         dev_err(hdev->dev, "Device heartbeat failed!\n");
287         hl_device_reset(hdev, true, false);
288
289         return;
290
291 reschedule:
292         schedule_delayed_work(&hdev->work_heartbeat,
293                         usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
294 }
295
296 /*
297  * device_late_init - do late stuff initialization for the habanalabs device
298  *
299  * @hdev: pointer to habanalabs device structure
300  *
301  * Do stuff that either needs the device H/W queues to be active or needs
302  * to happen after all the rest of the initialization is finished
303  */
304 static int device_late_init(struct hl_device *hdev)
305 {
306         int rc;
307
308         INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
309         hdev->high_pll = hdev->asic_prop.high_pll;
310
311         /* force setting to low frequency */
312         atomic_set(&hdev->curr_pll_profile, PLL_LOW);
313
314         if (hdev->pm_mng_profile == PM_AUTO)
315                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
316         else
317                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
318
319         if (hdev->asic_funcs->late_init) {
320                 rc = hdev->asic_funcs->late_init(hdev);
321                 if (rc) {
322                         dev_err(hdev->dev,
323                                 "failed late initialization for the H/W\n");
324                         return rc;
325                 }
326         }
327
328         schedule_delayed_work(&hdev->work_freq,
329                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
330
331         if (hdev->heartbeat) {
332                 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
333                 schedule_delayed_work(&hdev->work_heartbeat,
334                                 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
335         }
336
337         hdev->late_init_done = true;
338
339         return 0;
340 }
341
342 /*
343  * device_late_fini - finalize all that was done in device_late_init
344  *
345  * @hdev: pointer to habanalabs device structure
346  *
347  */
348 static void device_late_fini(struct hl_device *hdev)
349 {
350         if (!hdev->late_init_done)
351                 return;
352
353         cancel_delayed_work_sync(&hdev->work_freq);
354         if (hdev->heartbeat)
355                 cancel_delayed_work_sync(&hdev->work_heartbeat);
356
357         if (hdev->asic_funcs->late_fini)
358                 hdev->asic_funcs->late_fini(hdev);
359
360         hdev->late_init_done = false;
361 }
362
363 /*
364  * hl_device_set_frequency - set the frequency of the device
365  *
366  * @hdev: pointer to habanalabs device structure
367  * @freq: the new frequency value
368  *
369  * Change the frequency if needed.
370  * We allose to set PLL to low only if there is no user process
371  * Returns 0 if no change was done, otherwise returns 1;
372  */
373 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
374 {
375         enum hl_pll_frequency old_freq =
376                         (freq == PLL_HIGH) ? PLL_LOW : PLL_HIGH;
377         int ret;
378
379         if (hdev->pm_mng_profile == PM_MANUAL)
380                 return 0;
381
382         ret = atomic_cmpxchg(&hdev->curr_pll_profile, old_freq, freq);
383         if (ret == freq)
384                 return 0;
385
386         /*
387          * in case we want to lower frequency, check if device is not
388          * opened. We must have a check here to workaround race condition with
389          * hl_device_open
390          */
391         if ((freq == PLL_LOW) && (atomic_read(&hdev->fd_open_cnt) > 0)) {
392                 atomic_set(&hdev->curr_pll_profile, PLL_HIGH);
393                 return 0;
394         }
395
396         dev_dbg(hdev->dev, "Changing device frequency to %s\n",
397                 freq == PLL_HIGH ? "high" : "low");
398
399         hdev->asic_funcs->set_pll_profile(hdev, freq);
400
401         return 1;
402 }
403
404 /*
405  * hl_device_suspend - initiate device suspend
406  *
407  * @hdev: pointer to habanalabs device structure
408  *
409  * Puts the hw in the suspend state (all asics).
410  * Returns 0 for success or an error on failure.
411  * Called at driver suspend.
412  */
413 int hl_device_suspend(struct hl_device *hdev)
414 {
415         int rc;
416
417         pci_save_state(hdev->pdev);
418
419         /* Block future CS/VM/JOB completion operations */
420         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
421         if (rc) {
422                 dev_err(hdev->dev, "Can't suspend while in reset\n");
423                 return -EIO;
424         }
425
426         /* This blocks all other stuff that is not blocked by in_reset */
427         hdev->disabled = true;
428
429         /*
430          * Flush anyone that is inside the critical section of enqueue
431          * jobs to the H/W
432          */
433         hdev->asic_funcs->hw_queues_lock(hdev);
434         hdev->asic_funcs->hw_queues_unlock(hdev);
435
436         /* Flush processes that are sending message to CPU */
437         mutex_lock(&hdev->send_cpu_message_lock);
438         mutex_unlock(&hdev->send_cpu_message_lock);
439
440         rc = hdev->asic_funcs->suspend(hdev);
441         if (rc)
442                 dev_err(hdev->dev,
443                         "Failed to disable PCI access of device CPU\n");
444
445         /* Shut down the device */
446         pci_disable_device(hdev->pdev);
447         pci_set_power_state(hdev->pdev, PCI_D3hot);
448
449         return 0;
450 }
451
452 /*
453  * hl_device_resume - initiate device resume
454  *
455  * @hdev: pointer to habanalabs device structure
456  *
457  * Bring the hw back to operating state (all asics).
458  * Returns 0 for success or an error on failure.
459  * Called at driver resume.
460  */
461 int hl_device_resume(struct hl_device *hdev)
462 {
463         int rc;
464
465         pci_set_power_state(hdev->pdev, PCI_D0);
466         pci_restore_state(hdev->pdev);
467         rc = pci_enable_device_mem(hdev->pdev);
468         if (rc) {
469                 dev_err(hdev->dev,
470                         "Failed to enable PCI device in resume\n");
471                 return rc;
472         }
473
474         pci_set_master(hdev->pdev);
475
476         rc = hdev->asic_funcs->resume(hdev);
477         if (rc) {
478                 dev_err(hdev->dev, "Failed to resume device after suspend\n");
479                 goto disable_device;
480         }
481
482
483         hdev->disabled = false;
484         atomic_set(&hdev->in_reset, 0);
485
486         rc = hl_device_reset(hdev, true, false);
487         if (rc) {
488                 dev_err(hdev->dev, "Failed to reset device during resume\n");
489                 goto disable_device;
490         }
491
492         return 0;
493
494 disable_device:
495         pci_clear_master(hdev->pdev);
496         pci_disable_device(hdev->pdev);
497
498         return rc;
499 }
500
501 static void hl_device_hard_reset_pending(struct work_struct *work)
502 {
503         struct hl_device_reset_work *device_reset_work =
504                 container_of(work, struct hl_device_reset_work, reset_work);
505         struct hl_device *hdev = device_reset_work->hdev;
506         u16 pending_total, pending_cnt;
507         struct task_struct *task = NULL;
508
509         if (hdev->pldm)
510                 pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
511         else
512                 pending_total = HL_PENDING_RESET_PER_SEC;
513
514         pending_cnt = pending_total;
515
516         /* Flush all processes that are inside hl_open */
517         mutex_lock(&hdev->fd_open_cnt_lock);
518
519         while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
520
521                 pending_cnt--;
522
523                 dev_info(hdev->dev,
524                         "Can't HARD reset, waiting for user to close FD\n");
525                 ssleep(1);
526         }
527
528         if (atomic_read(&hdev->fd_open_cnt)) {
529                 task = get_pid_task(hdev->user_ctx->hpriv->taskpid,
530                                         PIDTYPE_PID);
531                 if (task) {
532                         dev_info(hdev->dev, "Killing user processes\n");
533                         send_sig(SIGKILL, task, 1);
534                         msleep(100);
535
536                         put_task_struct(task);
537                 }
538         }
539
540         pending_cnt = pending_total;
541
542         while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
543
544                 pending_cnt--;
545
546                 ssleep(1);
547         }
548
549         if (atomic_read(&hdev->fd_open_cnt))
550                 dev_crit(hdev->dev,
551                         "Going to hard reset with open user contexts\n");
552
553         mutex_unlock(&hdev->fd_open_cnt_lock);
554
555         hl_device_reset(hdev, true, true);
556
557         kfree(device_reset_work);
558 }
559
560 /*
561  * hl_device_reset - reset the device
562  *
563  * @hdev: pointer to habanalabs device structure
564  * @hard_reset: should we do hard reset to all engines or just reset the
565  *              compute/dma engines
566  *
567  * Block future CS and wait for pending CS to be enqueued
568  * Call ASIC H/W fini
569  * Flush all completions
570  * Re-initialize all internal data structures
571  * Call ASIC H/W init, late_init
572  * Test queues
573  * Enable device
574  *
575  * Returns 0 for success or an error on failure.
576  */
577 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
578                         bool from_hard_reset_thread)
579 {
580         int i, rc;
581
582         if (!hdev->init_done) {
583                 dev_err(hdev->dev,
584                         "Can't reset before initialization is done\n");
585                 return 0;
586         }
587
588         /*
589          * Prevent concurrency in this function - only one reset should be
590          * done at any given time. Only need to perform this if we didn't
591          * get from the dedicated hard reset thread
592          */
593         if (!from_hard_reset_thread) {
594                 /* Block future CS/VM/JOB completion operations */
595                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
596                 if (rc)
597                         return 0;
598
599                 /* This also blocks future CS/VM/JOB completion operations */
600                 hdev->disabled = true;
601
602                 /*
603                  * Flush anyone that is inside the critical section of enqueue
604                  * jobs to the H/W
605                  */
606                 hdev->asic_funcs->hw_queues_lock(hdev);
607                 hdev->asic_funcs->hw_queues_unlock(hdev);
608
609                 dev_err(hdev->dev, "Going to RESET device!\n");
610         }
611
612 again:
613         if ((hard_reset) && (!from_hard_reset_thread)) {
614                 struct hl_device_reset_work *device_reset_work;
615
616                 if (!hdev->pdev) {
617                         dev_err(hdev->dev,
618                                 "Reset action is NOT supported in simulator\n");
619                         rc = -EINVAL;
620                         goto out_err;
621                 }
622
623                 hdev->hard_reset_pending = true;
624
625                 device_reset_work = kzalloc(sizeof(*device_reset_work),
626                                                 GFP_ATOMIC);
627                 if (!device_reset_work) {
628                         rc = -ENOMEM;
629                         goto out_err;
630                 }
631
632                 /*
633                  * Because the reset function can't run from interrupt or
634                  * from heartbeat work, we need to call the reset function
635                  * from a dedicated work
636                  */
637                 INIT_WORK(&device_reset_work->reset_work,
638                                 hl_device_hard_reset_pending);
639                 device_reset_work->hdev = hdev;
640                 schedule_work(&device_reset_work->reset_work);
641
642                 return 0;
643         }
644
645         if (hard_reset) {
646                 device_late_fini(hdev);
647
648                 /*
649                  * Now that the heartbeat thread is closed, flush processes
650                  * which are sending messages to CPU
651                  */
652                 mutex_lock(&hdev->send_cpu_message_lock);
653                 mutex_unlock(&hdev->send_cpu_message_lock);
654         }
655
656         /*
657          * Halt the engines and disable interrupts so we won't get any more
658          * completions from H/W and we won't have any accesses from the
659          * H/W to the host machine
660          */
661         hdev->asic_funcs->halt_engines(hdev, hard_reset);
662
663         /* Go over all the queues, release all CS and their jobs */
664         hl_cs_rollback_all(hdev);
665
666         if (hard_reset) {
667                 /* Release kernel context */
668                 if (hl_ctx_put(hdev->kernel_ctx) != 1) {
669                         dev_err(hdev->dev,
670                                 "kernel ctx is alive during hard reset\n");
671                         rc = -EBUSY;
672                         goto out_err;
673                 }
674
675                 hdev->kernel_ctx = NULL;
676         }
677
678         /* Reset the H/W. It will be in idle state after this returns */
679         hdev->asic_funcs->hw_fini(hdev, hard_reset);
680
681         if (hard_reset) {
682                 hl_vm_fini(hdev);
683                 hl_eq_reset(hdev, &hdev->event_queue);
684         }
685
686         /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
687         hl_hw_queue_reset(hdev, hard_reset);
688         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
689                 hl_cq_reset(hdev, &hdev->completion_queue[i]);
690
691         /* Make sure the setup phase for the user context will run again */
692         if (hdev->user_ctx) {
693                 atomic_set(&hdev->user_ctx->thread_restore_token, 1);
694                 hdev->user_ctx->thread_restore_wait_token = 0;
695         }
696
697         /* Finished tear-down, starting to re-initialize */
698
699         if (hard_reset) {
700                 hdev->device_cpu_disabled = false;
701
702                 /* Allocate the kernel context */
703                 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
704                                                 GFP_KERNEL);
705                 if (!hdev->kernel_ctx) {
706                         rc = -ENOMEM;
707                         goto out_err;
708                 }
709
710                 hdev->user_ctx = NULL;
711
712                 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
713                 if (rc) {
714                         dev_err(hdev->dev,
715                                 "failed to init kernel ctx in hard reset\n");
716                         kfree(hdev->kernel_ctx);
717                         hdev->kernel_ctx = NULL;
718                         goto out_err;
719                 }
720         }
721
722         rc = hdev->asic_funcs->hw_init(hdev);
723         if (rc) {
724                 dev_err(hdev->dev,
725                         "failed to initialize the H/W after reset\n");
726                 goto out_err;
727         }
728
729         hdev->disabled = false;
730
731         /* Check that the communication with the device is working */
732         rc = hdev->asic_funcs->test_queues(hdev);
733         if (rc) {
734                 dev_err(hdev->dev,
735                         "Failed to detect if device is alive after reset\n");
736                 goto out_err;
737         }
738
739         if (hard_reset) {
740                 rc = device_late_init(hdev);
741                 if (rc) {
742                         dev_err(hdev->dev,
743                                 "Failed late init after hard reset\n");
744                         goto out_err;
745                 }
746
747                 rc = hl_vm_init(hdev);
748                 if (rc) {
749                         dev_err(hdev->dev,
750                                 "Failed to init memory module after hard reset\n");
751                         goto out_err;
752                 }
753
754                 hl_set_max_power(hdev, hdev->max_power);
755
756                 hdev->hard_reset_pending = false;
757         } else {
758                 rc = hdev->asic_funcs->soft_reset_late_init(hdev);
759                 if (rc) {
760                         dev_err(hdev->dev,
761                                 "Failed late init after soft reset\n");
762                         goto out_err;
763                 }
764         }
765
766         atomic_set(&hdev->in_reset, 0);
767
768         if (hard_reset)
769                 hdev->hard_reset_cnt++;
770         else
771                 hdev->soft_reset_cnt++;
772
773         return 0;
774
775 out_err:
776         hdev->disabled = true;
777
778         if (hard_reset) {
779                 dev_err(hdev->dev,
780                         "Failed to reset! Device is NOT usable\n");
781                 hdev->hard_reset_cnt++;
782         } else {
783                 dev_err(hdev->dev,
784                         "Failed to do soft-reset, trying hard reset\n");
785                 hdev->soft_reset_cnt++;
786                 hard_reset = true;
787                 goto again;
788         }
789
790         atomic_set(&hdev->in_reset, 0);
791
792         return rc;
793 }
794
795 /*
796  * hl_device_init - main initialization function for habanalabs device
797  *
798  * @hdev: pointer to habanalabs device structure
799  *
800  * Allocate an id for the device, do early initialization and then call the
801  * ASIC specific initialization functions. Finally, create the cdev and the
802  * Linux device to expose it to the user
803  */
804 int hl_device_init(struct hl_device *hdev, struct class *hclass)
805 {
806         int i, rc, cq_ready_cnt;
807
808         /* Create device */
809         rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
810
811         if (rc)
812                 goto out_disabled;
813
814         /* Initialize ASIC function pointers and perform early init */
815         rc = device_early_init(hdev);
816         if (rc)
817                 goto release_device;
818
819         /*
820          * Start calling ASIC initialization. First S/W then H/W and finally
821          * late init
822          */
823         rc = hdev->asic_funcs->sw_init(hdev);
824         if (rc)
825                 goto early_fini;
826
827         /*
828          * Initialize the H/W queues. Must be done before hw_init, because
829          * there the addresses of the kernel queue are being written to the
830          * registers of the device
831          */
832         rc = hl_hw_queues_create(hdev);
833         if (rc) {
834                 dev_err(hdev->dev, "failed to initialize kernel queues\n");
835                 goto sw_fini;
836         }
837
838         /*
839          * Initialize the completion queues. Must be done before hw_init,
840          * because there the addresses of the completion queues are being
841          * passed as arguments to request_irq
842          */
843         hdev->completion_queue =
844                         kcalloc(hdev->asic_prop.completion_queues_count,
845                                 sizeof(*hdev->completion_queue), GFP_KERNEL);
846
847         if (!hdev->completion_queue) {
848                 dev_err(hdev->dev, "failed to allocate completion queues\n");
849                 rc = -ENOMEM;
850                 goto hw_queues_destroy;
851         }
852
853         for (i = 0, cq_ready_cnt = 0;
854                         i < hdev->asic_prop.completion_queues_count;
855                         i++, cq_ready_cnt++) {
856                 rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
857                 if (rc) {
858                         dev_err(hdev->dev,
859                                 "failed to initialize completion queue\n");
860                         goto cq_fini;
861                 }
862         }
863
864         /*
865          * Initialize the event queue. Must be done before hw_init,
866          * because there the address of the event queue is being
867          * passed as argument to request_irq
868          */
869         rc = hl_eq_init(hdev, &hdev->event_queue);
870         if (rc) {
871                 dev_err(hdev->dev, "failed to initialize event queue\n");
872                 goto cq_fini;
873         }
874
875         /* Allocate the kernel context */
876         hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
877         if (!hdev->kernel_ctx) {
878                 rc = -ENOMEM;
879                 goto eq_fini;
880         }
881
882         hdev->user_ctx = NULL;
883
884         rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
885         if (rc) {
886                 dev_err(hdev->dev, "failed to initialize kernel context\n");
887                 goto free_ctx;
888         }
889
890         rc = hl_cb_pool_init(hdev);
891         if (rc) {
892                 dev_err(hdev->dev, "failed to initialize CB pool\n");
893                 goto release_ctx;
894         }
895
896         rc = hl_sysfs_init(hdev);
897         if (rc) {
898                 dev_err(hdev->dev, "failed to initialize sysfs\n");
899                 goto free_cb_pool;
900         }
901
902         hl_debugfs_add_device(hdev);
903
904         if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
905                 dev_info(hdev->dev,
906                         "H/W state is dirty, must reset before initializing\n");
907                 hdev->asic_funcs->hw_fini(hdev, true);
908         }
909
910         rc = hdev->asic_funcs->hw_init(hdev);
911         if (rc) {
912                 dev_err(hdev->dev, "failed to initialize the H/W\n");
913                 rc = 0;
914                 goto out_disabled;
915         }
916
917         hdev->disabled = false;
918
919         /* Check that the communication with the device is working */
920         rc = hdev->asic_funcs->test_queues(hdev);
921         if (rc) {
922                 dev_err(hdev->dev, "Failed to detect if device is alive\n");
923                 rc = 0;
924                 goto out_disabled;
925         }
926
927         /* After test_queues, KMD can start sending messages to device CPU */
928
929         rc = device_late_init(hdev);
930         if (rc) {
931                 dev_err(hdev->dev, "Failed late initialization\n");
932                 rc = 0;
933                 goto out_disabled;
934         }
935
936         dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
937                 hdev->asic_name,
938                 hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
939
940         rc = hl_vm_init(hdev);
941         if (rc) {
942                 dev_err(hdev->dev, "Failed to initialize memory module\n");
943                 rc = 0;
944                 goto out_disabled;
945         }
946
947         /*
948          * hl_hwmon_init must be called after device_late_init, because only
949          * there we get the information from the device about which
950          * hwmon-related sensors the device supports
951          */
952         rc = hl_hwmon_init(hdev);
953         if (rc) {
954                 dev_err(hdev->dev, "Failed to initialize hwmon\n");
955                 rc = 0;
956                 goto out_disabled;
957         }
958
959         dev_notice(hdev->dev,
960                 "Successfully added device to habanalabs driver\n");
961
962         hdev->init_done = true;
963
964         return 0;
965
966 free_cb_pool:
967         hl_cb_pool_fini(hdev);
968 release_ctx:
969         if (hl_ctx_put(hdev->kernel_ctx) != 1)
970                 dev_err(hdev->dev,
971                         "kernel ctx is still alive on initialization failure\n");
972 free_ctx:
973         kfree(hdev->kernel_ctx);
974 eq_fini:
975         hl_eq_fini(hdev, &hdev->event_queue);
976 cq_fini:
977         for (i = 0 ; i < cq_ready_cnt ; i++)
978                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
979         kfree(hdev->completion_queue);
980 hw_queues_destroy:
981         hl_hw_queues_destroy(hdev);
982 sw_fini:
983         hdev->asic_funcs->sw_fini(hdev);
984 early_fini:
985         device_early_fini(hdev);
986 release_device:
987         device_destroy(hclass, hdev->dev->devt);
988         cdev_del(&hdev->cdev);
989 out_disabled:
990         hdev->disabled = true;
991         if (hdev->pdev)
992                 dev_err(&hdev->pdev->dev,
993                         "Failed to initialize hl%d. Device is NOT usable !\n",
994                         hdev->id);
995         else
996                 pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
997                         hdev->id);
998
999         return rc;
1000 }
1001
1002 /*
1003  * hl_device_fini - main tear-down function for habanalabs device
1004  *
1005  * @hdev: pointer to habanalabs device structure
1006  *
1007  * Destroy the device, call ASIC fini functions and release the id
1008  */
1009 void hl_device_fini(struct hl_device *hdev)
1010 {
1011         int i, rc;
1012         ktime_t timeout;
1013
1014         dev_info(hdev->dev, "Removing device\n");
1015
1016         /*
1017          * This function is competing with the reset function, so try to
1018          * take the reset atomic and if we are already in middle of reset,
1019          * wait until reset function is finished. Reset function is designed
1020          * to always finish (could take up to a few seconds in worst case).
1021          */
1022
1023         timeout = ktime_add_us(ktime_get(),
1024                                 HL_PENDING_RESET_PER_SEC * 1000 * 1000 * 4);
1025         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1026         while (rc) {
1027                 usleep_range(50, 200);
1028                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1029                 if (ktime_compare(ktime_get(), timeout) > 0) {
1030                         WARN(1, "Failed to remove device because reset function did not finish\n");
1031                         return;
1032                 }
1033         };
1034
1035         /* Mark device as disabled */
1036         hdev->disabled = true;
1037
1038         hl_hwmon_fini(hdev);
1039
1040         device_late_fini(hdev);
1041
1042         hl_debugfs_remove_device(hdev);
1043
1044         hl_sysfs_fini(hdev);
1045
1046         /*
1047          * Halt the engines and disable interrupts so we won't get any more
1048          * completions from H/W and we won't have any accesses from the
1049          * H/W to the host machine
1050          */
1051         hdev->asic_funcs->halt_engines(hdev, true);
1052
1053         /* Go over all the queues, release all CS and their jobs */
1054         hl_cs_rollback_all(hdev);
1055
1056         hl_cb_pool_fini(hdev);
1057
1058         /* Release kernel context */
1059         if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1060                 dev_err(hdev->dev, "kernel ctx is still alive\n");
1061
1062         /* Reset the H/W. It will be in idle state after this returns */
1063         hdev->asic_funcs->hw_fini(hdev, true);
1064
1065         hl_vm_fini(hdev);
1066
1067         hl_eq_fini(hdev, &hdev->event_queue);
1068
1069         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1070                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1071         kfree(hdev->completion_queue);
1072
1073         hl_hw_queues_destroy(hdev);
1074
1075         /* Call ASIC S/W finalize function */
1076         hdev->asic_funcs->sw_fini(hdev);
1077
1078         device_early_fini(hdev);
1079
1080         /* Hide device from user */
1081         device_destroy(hdev->dev->class, hdev->dev->devt);
1082         cdev_del(&hdev->cdev);
1083
1084         pr_info("removed device successfully\n");
1085 }
1086
1087 /*
1088  * hl_poll_timeout_memory - Periodically poll a host memory address
1089  *                              until it is not zero or a timeout occurs
1090  * @hdev: pointer to habanalabs device structure
1091  * @addr: Address to poll
1092  * @timeout_us: timeout in us
1093  * @val: Variable to read the value into
1094  *
1095  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
1096  * case, the last read value at @addr is stored in @val. Must not
1097  * be called from atomic context if sleep_us or timeout_us are used.
1098  *
1099  * The function sleeps for 100us with timeout value of
1100  * timeout_us
1101  */
1102 int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
1103                                 u32 timeout_us, u32 *val)
1104 {
1105         /*
1106          * address in this function points always to a memory location in the
1107          * host's (server's) memory. That location is updated asynchronously
1108          * either by the direct access of the device or by another core
1109          */
1110         u32 *paddr = (u32 *) (uintptr_t) addr;
1111         ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
1112
1113         might_sleep();
1114
1115         for (;;) {
1116                 /*
1117                  * Flush CPU read/write buffers to make sure we read updates
1118                  * done by other cores or by the device
1119                  */
1120                 mb();
1121                 *val = *paddr;
1122                 if (*val)
1123                         break;
1124                 if (ktime_compare(ktime_get(), timeout) > 0) {
1125                         *val = *paddr;
1126                         break;
1127                 }
1128                 usleep_range((100 >> 2) + 1, 100);
1129         }
1130
1131         return *val ? 0 : -ETIMEDOUT;
1132 }
1133
1134 /*
1135  * hl_poll_timeout_devicememory - Periodically poll a device memory address
1136  *                                until it is not zero or a timeout occurs
1137  * @hdev: pointer to habanalabs device structure
1138  * @addr: Device address to poll
1139  * @timeout_us: timeout in us
1140  * @val: Variable to read the value into
1141  *
1142  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
1143  * case, the last read value at @addr is stored in @val. Must not
1144  * be called from atomic context if sleep_us or timeout_us are used.
1145  *
1146  * The function sleeps for 100us with timeout value of
1147  * timeout_us
1148  */
1149 int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
1150                                 u32 timeout_us, u32 *val)
1151 {
1152         ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
1153
1154         might_sleep();
1155
1156         for (;;) {
1157                 *val = readl(addr);
1158                 if (*val)
1159                         break;
1160                 if (ktime_compare(ktime_get(), timeout) > 0) {
1161                         *val = readl(addr);
1162                         break;
1163                 }
1164                 usleep_range((100 >> 2) + 1, 100);
1165         }
1166
1167         return *val ? 0 : -ETIMEDOUT;
1168 }
1169
1170 /*
1171  * MMIO register access helper functions.
1172  */
1173
1174 /*
1175  * hl_rreg - Read an MMIO register
1176  *
1177  * @hdev: pointer to habanalabs device structure
1178  * @reg: MMIO register offset (in bytes)
1179  *
1180  * Returns the value of the MMIO register we are asked to read
1181  *
1182  */
1183 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1184 {
1185         return readl(hdev->rmmio + reg);
1186 }
1187
1188 /*
1189  * hl_wreg - Write to an MMIO register
1190  *
1191  * @hdev: pointer to habanalabs device structure
1192  * @reg: MMIO register offset (in bytes)
1193  * @val: 32-bit value
1194  *
1195  * Writes the 32-bit value into the MMIO register
1196  *
1197  */
1198 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1199 {
1200         writel(val, hdev->rmmio + reg);
1201 }