drivers/misc/habanalabs/common/device.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #define pr_fmt(fmt)                     "habanalabs: " fmt
   9
  10 #include "habanalabs.h"
  11
  12 #include <linux/pci.h>
  13 #include <linux/hwmon.h>
  14 #include <uapi/misc/habanalabs.h>
  15
  16 enum hl_device_status hl_device_status(struct hl_device *hdev)
  17 {
  18         enum hl_device_status status;
  19
  20         if (atomic_read(&hdev->in_reset))
  21                 status = HL_DEVICE_STATUS_IN_RESET;
  22         else if (hdev->needs_reset)
  23                 status = HL_DEVICE_STATUS_NEEDS_RESET;
  24         else if (hdev->disabled)
  25                 status = HL_DEVICE_STATUS_MALFUNCTION;
  26         else
  27                 status = HL_DEVICE_STATUS_OPERATIONAL;
  28
  29         return status;
  30 }
  31
  32 bool hl_device_operational(struct hl_device *hdev,
  33                 enum hl_device_status *status)
  34 {
  35         enum hl_device_status current_status;
  36
  37         current_status = hl_device_status(hdev);
  38         if (status)
  39                 *status = current_status;
  40
  41         switch (current_status) {
  42         case HL_DEVICE_STATUS_IN_RESET:
  43         case HL_DEVICE_STATUS_MALFUNCTION:
  44         case HL_DEVICE_STATUS_NEEDS_RESET:
  45                 return false;
  46         case HL_DEVICE_STATUS_OPERATIONAL:
  47         default:
  48                 return true;
  49         }
  50 }
  51
  52 static void hpriv_release(struct kref *ref)
  53 {
  54         struct hl_fpriv *hpriv;
  55         struct hl_device *hdev;
  56
  57         hpriv = container_of(ref, struct hl_fpriv, refcount);
  58
  59         hdev = hpriv->hdev;
  60
  61         put_pid(hpriv->taskpid);
  62
  63         hl_debugfs_remove_file(hpriv);
  64
  65         mutex_destroy(&hpriv->restore_phase_mutex);
  66
  67         mutex_lock(&hdev->fpriv_list_lock);
  68         list_del(&hpriv->dev_node);
  69         hdev->compute_ctx = NULL;
  70         mutex_unlock(&hdev->fpriv_list_lock);
  71
  72         kfree(hpriv);
  73 }
  74
  75 void hl_hpriv_get(struct hl_fpriv *hpriv)
  76 {
  77         kref_get(&hpriv->refcount);
  78 }
  79
  80 void hl_hpriv_put(struct hl_fpriv *hpriv)
  81 {
  82         kref_put(&hpriv->refcount, hpriv_release);
  83 }
  84
  85 /*
  86  * hl_device_release - release function for habanalabs device
  87  *
  88  * @inode: pointer to inode structure
  89  * @filp: pointer to file structure
  90  *
  91  * Called when process closes an habanalabs device
  92  */
  93 static int hl_device_release(struct inode *inode, struct file *filp)
  94 {
  95         struct hl_fpriv *hpriv = filp->private_data;
  96
  97         hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
  98         hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
  99
 100         filp->private_data = NULL;
 101
 102         hl_hpriv_put(hpriv);
 103
 104         return 0;
 105 }
 106
 107 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 108 {
 109         struct hl_fpriv *hpriv = filp->private_data;
 110         struct hl_device *hdev;
 111
 112         filp->private_data = NULL;
 113
 114         hdev = hpriv->hdev;
 115
 116         mutex_lock(&hdev->fpriv_list_lock);
 117         list_del(&hpriv->dev_node);
 118         mutex_unlock(&hdev->fpriv_list_lock);
 119
 120         kfree(hpriv);
 121
 122         return 0;
 123 }
 124
 125 /*
 126  * hl_mmap - mmap function for habanalabs device
 127  *
 128  * @*filp: pointer to file structure
 129  * @*vma: pointer to vm_area_struct of the process
 130  *
 131  * Called when process does an mmap on habanalabs device. Call the device's mmap
 132  * function at the end of the common code.
 133  */
 134 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 135 {
 136         struct hl_fpriv *hpriv = filp->private_data;
 137         unsigned long vm_pgoff;
 138
 139         vm_pgoff = vma->vm_pgoff;
 140         vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
 141
 142         switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
 143         case HL_MMAP_TYPE_CB:
 144                 return hl_cb_mmap(hpriv, vma);
 145
 146         case HL_MMAP_TYPE_BLOCK:
 147                 return hl_hw_block_mmap(hpriv, vma);
 148         }
 149
 150         return -EINVAL;
 151 }
 152
 153 static const struct file_operations hl_ops = {
 154         .owner = THIS_MODULE,
 155         .open = hl_device_open,
 156         .release = hl_device_release,
 157         .mmap = hl_mmap,
 158         .unlocked_ioctl = hl_ioctl,
 159         .compat_ioctl = hl_ioctl
 160 };
 161
 162 static const struct file_operations hl_ctrl_ops = {
 163         .owner = THIS_MODULE,
 164         .open = hl_device_open_ctrl,
 165         .release = hl_device_release_ctrl,
 166         .unlocked_ioctl = hl_ioctl_control,
 167         .compat_ioctl = hl_ioctl_control
 168 };
 169
 170 static void device_release_func(struct device *dev)
 171 {
 172         kfree(dev);
 173 }
 174
 175 /*
 176  * device_init_cdev - Initialize cdev and device for habanalabs device
 177  *
 178  * @hdev: pointer to habanalabs device structure
 179  * @hclass: pointer to the class object of the device
 180  * @minor: minor number of the specific device
 181  * @fpos: file operations to install for this device
 182  * @name: name of the device as it will appear in the filesystem
 183  * @cdev: pointer to the char device object that will be initialized
 184  * @dev: pointer to the device object that will be initialized
 185  *
 186  * Initialize a cdev and a Linux device for habanalabs's device.
 187  */
 188 static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
 189                                 int minor, const struct file_operations *fops,
 190                                 char *name, struct cdev *cdev,
 191                                 struct device **dev)
 192 {
 193         cdev_init(cdev, fops);
 194         cdev->owner = THIS_MODULE;
 195
 196         *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
 197         if (!*dev)
 198                 return -ENOMEM;
 199
 200         device_initialize(*dev);
 201         (*dev)->devt = MKDEV(hdev->major, minor);
 202         (*dev)->class = hclass;
 203         (*dev)->release = device_release_func;
 204         dev_set_drvdata(*dev, hdev);
 205         dev_set_name(*dev, "%s", name);
 206
 207         return 0;
 208 }
 209
 210 static int device_cdev_sysfs_add(struct hl_device *hdev)
 211 {
 212         int rc;
 213
 214         rc = cdev_device_add(&hdev->cdev, hdev->dev);
 215         if (rc) {
 216                 dev_err(hdev->dev,
 217                         "failed to add a char device to the system\n");
 218                 return rc;
 219         }
 220
 221         rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 222         if (rc) {
 223                 dev_err(hdev->dev,
 224                         "failed to add a control char device to the system\n");
 225                 goto delete_cdev_device;
 226         }
 227
 228         /* hl_sysfs_init() must be done after adding the device to the system */
 229         rc = hl_sysfs_init(hdev);
 230         if (rc) {
 231                 dev_err(hdev->dev, "failed to initialize sysfs\n");
 232                 goto delete_ctrl_cdev_device;
 233         }
 234
 235         hdev->cdev_sysfs_created = true;
 236
 237         return 0;
 238
 239 delete_ctrl_cdev_device:
 240         cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 241 delete_cdev_device:
 242         cdev_device_del(&hdev->cdev, hdev->dev);
 243         return rc;
 244 }
 245
 246 static void device_cdev_sysfs_del(struct hl_device *hdev)
 247 {
 248         if (!hdev->cdev_sysfs_created)
 249                 goto put_devices;
 250
 251         hl_sysfs_fini(hdev);
 252         cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 253         cdev_device_del(&hdev->cdev, hdev->dev);
 254
 255 put_devices:
 256         put_device(hdev->dev);
 257         put_device(hdev->dev_ctrl);
 258 }
 259
 260 static void device_hard_reset_pending(struct work_struct *work)
 261 {
 262         struct hl_device_reset_work *device_reset_work =
 263                 container_of(work, struct hl_device_reset_work,
 264                                 reset_work.work);
 265         struct hl_device *hdev = device_reset_work->hdev;
 266         int rc;
 267
 268         rc = hl_device_reset(hdev, true, true);
 269         if ((rc == -EBUSY) && !hdev->device_fini_pending) {
 270                 dev_info(hdev->dev,
 271                         "Could not reset device. will try again in %u seconds",
 272                         HL_PENDING_RESET_PER_SEC);
 273
 274                 queue_delayed_work(device_reset_work->wq,
 275                         &device_reset_work->reset_work,
 276                         msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
 277         }
 278 }
 279
 280 /*
 281  * device_early_init - do some early initialization for the habanalabs device
 282  *
 283  * @hdev: pointer to habanalabs device structure
 284  *
 285  * Install the relevant function pointers and call the early_init function,
 286  * if such a function exists
 287  */
 288 static int device_early_init(struct hl_device *hdev)
 289 {
 290         int i, rc;
 291         char workq_name[32];
 292
 293         switch (hdev->asic_type) {
 294         case ASIC_GOYA:
 295                 goya_set_asic_funcs(hdev);
 296                 strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 297                 break;
 298         case ASIC_GAUDI:
 299                 gaudi_set_asic_funcs(hdev);
 300                 sprintf(hdev->asic_name, "GAUDI");
 301                 break;
 302         default:
 303                 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 304                         hdev->asic_type);
 305                 return -EINVAL;
 306         }
 307
 308         rc = hdev->asic_funcs->early_init(hdev);
 309         if (rc)
 310                 return rc;
 311
 312         rc = hl_asid_init(hdev);
 313         if (rc)
 314                 goto early_fini;
 315
 316         if (hdev->asic_prop.completion_queues_count) {
 317                 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
 318                                 sizeof(*hdev->cq_wq),
 319                                 GFP_ATOMIC);
 320                 if (!hdev->cq_wq) {
 321                         rc = -ENOMEM;
 322                         goto asid_fini;
 323                 }
 324         }
 325
 326         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
 327                 snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
 328                 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
 329                 if (hdev->cq_wq[i] == NULL) {
 330                         dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 331                         rc = -ENOMEM;
 332                         goto free_cq_wq;
 333                 }
 334         }
 335
 336         hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 337         if (hdev->eq_wq == NULL) {
 338                 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 339                 rc = -ENOMEM;
 340                 goto free_cq_wq;
 341         }
 342
 343         hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 344                                         GFP_KERNEL);
 345         if (!hdev->hl_chip_info) {
 346                 rc = -ENOMEM;
 347                 goto free_eq_wq;
 348         }
 349
 350         hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
 351                                         sizeof(struct hl_device_idle_busy_ts),
 352                                         (GFP_KERNEL | __GFP_ZERO));
 353         if (!hdev->idle_busy_ts_arr) {
 354                 rc = -ENOMEM;
 355                 goto free_chip_info;
 356         }
 357
 358         rc = hl_mmu_if_set_funcs(hdev);
 359         if (rc)
 360                 goto free_idle_busy_ts_arr;
 361
 362         hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 363
 364         hdev->device_reset_work.wq =
 365                         create_singlethread_workqueue("hl_device_reset");
 366         if (!hdev->device_reset_work.wq) {
 367                 rc = -ENOMEM;
 368                 dev_err(hdev->dev, "Failed to create device reset WQ\n");
 369                 goto free_cb_mgr;
 370         }
 371
 372         INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
 373                         device_hard_reset_pending);
 374         hdev->device_reset_work.hdev = hdev;
 375         hdev->device_fini_pending = 0;
 376
 377         mutex_init(&hdev->send_cpu_message_lock);
 378         mutex_init(&hdev->debug_lock);
 379         INIT_LIST_HEAD(&hdev->cs_mirror_list);
 380         spin_lock_init(&hdev->cs_mirror_lock);
 381         INIT_LIST_HEAD(&hdev->fpriv_list);
 382         mutex_init(&hdev->fpriv_list_lock);
 383         atomic_set(&hdev->in_reset, 0);
 384
 385         return 0;
 386
 387 free_cb_mgr:
 388         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 389 free_idle_busy_ts_arr:
 390         kfree(hdev->idle_busy_ts_arr);
 391 free_chip_info:
 392         kfree(hdev->hl_chip_info);
 393 free_eq_wq:
 394         destroy_workqueue(hdev->eq_wq);
 395 free_cq_wq:
 396         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 397                 if (hdev->cq_wq[i])
 398                         destroy_workqueue(hdev->cq_wq[i]);
 399         kfree(hdev->cq_wq);
 400 asid_fini:
 401         hl_asid_fini(hdev);
 402 early_fini:
 403         if (hdev->asic_funcs->early_fini)
 404                 hdev->asic_funcs->early_fini(hdev);
 405
 406         return rc;
 407 }
 408
 409 /*
 410  * device_early_fini - finalize all that was done in device_early_init
 411  *
 412  * @hdev: pointer to habanalabs device structure
 413  *
 414  */
 415 static void device_early_fini(struct hl_device *hdev)
 416 {
 417         int i;
 418
 419         mutex_destroy(&hdev->debug_lock);
 420         mutex_destroy(&hdev->send_cpu_message_lock);
 421
 422         mutex_destroy(&hdev->fpriv_list_lock);
 423
 424         hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 425
 426         kfree(hdev->idle_busy_ts_arr);
 427         kfree(hdev->hl_chip_info);
 428
 429         destroy_workqueue(hdev->eq_wq);
 430         destroy_workqueue(hdev->device_reset_work.wq);
 431
 432         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 433                 destroy_workqueue(hdev->cq_wq[i]);
 434         kfree(hdev->cq_wq);
 435
 436         hl_asid_fini(hdev);
 437
 438         if (hdev->asic_funcs->early_fini)
 439                 hdev->asic_funcs->early_fini(hdev);
 440 }
 441
 442 static void set_freq_to_low_job(struct work_struct *work)
 443 {
 444         struct hl_device *hdev = container_of(work, struct hl_device,
 445                                                 work_freq.work);
 446
 447         mutex_lock(&hdev->fpriv_list_lock);
 448
 449         if (!hdev->compute_ctx)
 450                 hl_device_set_frequency(hdev, PLL_LOW);
 451
 452         mutex_unlock(&hdev->fpriv_list_lock);
 453
 454         schedule_delayed_work(&hdev->work_freq,
 455                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 456 }
 457
 458 static void hl_device_heartbeat(struct work_struct *work)
 459 {
 460         struct hl_device *hdev = container_of(work, struct hl_device,
 461                                                 work_heartbeat.work);
 462
 463         if (!hl_device_operational(hdev, NULL))
 464                 goto reschedule;
 465
 466         if (!hdev->asic_funcs->send_heartbeat(hdev))
 467                 goto reschedule;
 468
 469         dev_err(hdev->dev, "Device heartbeat failed!\n");
 470         hl_device_reset(hdev, true, false);
 471
 472         return;
 473
 474 reschedule:
 475         schedule_delayed_work(&hdev->work_heartbeat,
 476                         usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 477 }
 478
 479 /*
 480  * device_late_init - do late stuff initialization for the habanalabs device
 481  *
 482  * @hdev: pointer to habanalabs device structure
 483  *
 484  * Do stuff that either needs the device H/W queues to be active or needs
 485  * to happen after all the rest of the initialization is finished
 486  */
 487 static int device_late_init(struct hl_device *hdev)
 488 {
 489         int rc;
 490
 491         if (hdev->asic_funcs->late_init) {
 492                 rc = hdev->asic_funcs->late_init(hdev);
 493                 if (rc) {
 494                         dev_err(hdev->dev,
 495                                 "failed late initialization for the H/W\n");
 496                         return rc;
 497                 }
 498         }
 499
 500         hdev->high_pll = hdev->asic_prop.high_pll;
 501
 502         /* force setting to low frequency */
 503         hdev->curr_pll_profile = PLL_LOW;
 504
 505         if (hdev->pm_mng_profile == PM_AUTO)
 506                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
 507         else
 508                 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
 509
 510         INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
 511         schedule_delayed_work(&hdev->work_freq,
 512         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 513
 514         if (hdev->heartbeat) {
 515                 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
 516                 schedule_delayed_work(&hdev->work_heartbeat,
 517                                 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 518         }
 519
 520         hdev->late_init_done = true;
 521
 522         return 0;
 523 }
 524
 525 /*
 526  * device_late_fini - finalize all that was done in device_late_init
 527  *
 528  * @hdev: pointer to habanalabs device structure
 529  *
 530  */
 531 static void device_late_fini(struct hl_device *hdev)
 532 {
 533         if (!hdev->late_init_done)
 534                 return;
 535
 536         cancel_delayed_work_sync(&hdev->work_freq);
 537         if (hdev->heartbeat)
 538                 cancel_delayed_work_sync(&hdev->work_heartbeat);
 539
 540         if (hdev->asic_funcs->late_fini)
 541                 hdev->asic_funcs->late_fini(hdev);
 542
 543         hdev->late_init_done = false;
 544 }
 545
 546 uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
 547 {
 548         struct hl_device_idle_busy_ts *ts;
 549         ktime_t zero_ktime, curr = ktime_get();
 550         u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
 551         s64 period_us, last_start_us, last_end_us, last_busy_time_us,
 552                 total_busy_time_us = 0, total_busy_time_ms;
 553
 554         zero_ktime = ktime_set(0, 0);
 555         period_us = period_ms * USEC_PER_MSEC;
 556         ts = &hdev->idle_busy_ts_arr[last_index];
 557
 558         /* check case that device is currently in idle */
 559         if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
 560                         !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
 561
 562                 last_index--;
 563                 /* Handle case idle_busy_ts_idx was 0 */
 564                 if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
 565                         last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
 566
 567                 ts = &hdev->idle_busy_ts_arr[last_index];
 568         }
 569
 570         while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
 571                 /* Check if we are in last sample case. i.e. if the sample
 572                  * begun before the sampling period. This could be a real
 573                  * sample or 0 so need to handle both cases
 574                  */
 575                 last_start_us = ktime_to_us(
 576                                 ktime_sub(curr, ts->idle_to_busy_ts));
 577
 578                 if (last_start_us > period_us) {
 579
 580                         /* First check two cases:
 581                          * 1. If the device is currently busy
 582                          * 2. If the device was idle during the whole sampling
 583                          *    period
 584                          */
 585
 586                         if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
 587                                 /* Check if the device is currently busy */
 588                                 if (ktime_compare(ts->idle_to_busy_ts,
 589                                                 zero_ktime))
 590                                         return 100;
 591
 592                                 /* We either didn't have any activity or we
 593                                  * reached an entry which is 0. Either way,
 594                                  * exit and return what was accumulated so far
 595                                  */
 596                                 break;
 597                         }
 598
 599                         /* If sample has finished, check it is relevant */
 600                         last_end_us = ktime_to_us(
 601                                         ktime_sub(curr, ts->busy_to_idle_ts));
 602
 603                         if (last_end_us > period_us)
 604                                 break;
 605
 606                         /* It is relevant so add it but with adjustment */
 607                         last_busy_time_us = ktime_to_us(
 608                                                 ktime_sub(ts->busy_to_idle_ts,
 609                                                 ts->idle_to_busy_ts));
 610                         total_busy_time_us += last_busy_time_us -
 611                                         (last_start_us - period_us);
 612                         break;
 613                 }
 614
 615                 /* Check if the sample is finished or still open */
 616                 if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
 617                         last_busy_time_us = ktime_to_us(
 618                                                 ktime_sub(ts->busy_to_idle_ts,
 619                                                 ts->idle_to_busy_ts));
 620                 else
 621                         last_busy_time_us = ktime_to_us(
 622                                         ktime_sub(curr, ts->idle_to_busy_ts));
 623
 624                 total_busy_time_us += last_busy_time_us;
 625
 626                 last_index--;
 627                 /* Handle case idle_busy_ts_idx was 0 */
 628                 if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
 629                         last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
 630
 631                 ts = &hdev->idle_busy_ts_arr[last_index];
 632
 633                 overlap_cnt++;
 634         }
 635
 636         total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
 637                                                 USEC_PER_MSEC);
 638
 639         return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
 640 }
 641
 642 /*
 643  * hl_device_set_frequency - set the frequency of the device
 644  *
 645  * @hdev: pointer to habanalabs device structure
 646  * @freq: the new frequency value
 647  *
 648  * Change the frequency if needed. This function has no protection against
 649  * concurrency, therefore it is assumed that the calling function has protected
 650  * itself against the case of calling this function from multiple threads with
 651  * different values
 652  *
 653  * Returns 0 if no change was done, otherwise returns 1
 654  */
 655 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
 656 {
 657         if ((hdev->pm_mng_profile == PM_MANUAL) ||
 658                         (hdev->curr_pll_profile == freq))
 659                 return 0;
 660
 661         dev_dbg(hdev->dev, "Changing device frequency to %s\n",
 662                 freq == PLL_HIGH ? "high" : "low");
 663
 664         hdev->asic_funcs->set_pll_profile(hdev, freq);
 665
 666         hdev->curr_pll_profile = freq;
 667
 668         return 1;
 669 }
 670
 671 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
 672 {
 673         int rc = 0;
 674
 675         mutex_lock(&hdev->debug_lock);
 676
 677         if (!enable) {
 678                 if (!hdev->in_debug) {
 679                         dev_err(hdev->dev,
 680                                 "Failed to disable debug mode because device was not in debug mode\n");
 681                         rc = -EFAULT;
 682                         goto out;
 683                 }
 684
 685                 if (!hdev->hard_reset_pending)
 686                         hdev->asic_funcs->halt_coresight(hdev);
 687
 688                 hdev->in_debug = 0;
 689
 690                 if (!hdev->hard_reset_pending)
 691                         hdev->asic_funcs->set_clock_gating(hdev);
 692
 693                 goto out;
 694         }
 695
 696         if (hdev->in_debug) {
 697                 dev_err(hdev->dev,
 698                         "Failed to enable debug mode because device is already in debug mode\n");
 699                 rc = -EFAULT;
 700                 goto out;
 701         }
 702
 703         hdev->asic_funcs->disable_clock_gating(hdev);
 704         hdev->in_debug = 1;
 705
 706 out:
 707         mutex_unlock(&hdev->debug_lock);
 708
 709         return rc;
 710 }
 711
 712 /*
 713  * hl_device_suspend - initiate device suspend
 714  *
 715  * @hdev: pointer to habanalabs device structure
 716  *
 717  * Puts the hw in the suspend state (all asics).
 718  * Returns 0 for success or an error on failure.
 719  * Called at driver suspend.
 720  */
 721 int hl_device_suspend(struct hl_device *hdev)
 722 {
 723         int rc;
 724
 725         pci_save_state(hdev->pdev);
 726
 727         /* Block future CS/VM/JOB completion operations */
 728         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 729         if (rc) {
 730                 dev_err(hdev->dev, "Can't suspend while in reset\n");
 731                 return -EIO;
 732         }
 733
 734         /* This blocks all other stuff that is not blocked by in_reset */
 735         hdev->disabled = true;
 736
 737         /*
 738          * Flush anyone that is inside the critical section of enqueue
 739          * jobs to the H/W
 740          */
 741         hdev->asic_funcs->hw_queues_lock(hdev);
 742         hdev->asic_funcs->hw_queues_unlock(hdev);
 743
 744         /* Flush processes that are sending message to CPU */
 745         mutex_lock(&hdev->send_cpu_message_lock);
 746         mutex_unlock(&hdev->send_cpu_message_lock);
 747
 748         rc = hdev->asic_funcs->suspend(hdev);
 749         if (rc)
 750                 dev_err(hdev->dev,
 751                         "Failed to disable PCI access of device CPU\n");
 752
 753         /* Shut down the device */
 754         pci_disable_device(hdev->pdev);
 755         pci_set_power_state(hdev->pdev, PCI_D3hot);
 756
 757         return 0;
 758 }
 759
 760 /*
 761  * hl_device_resume - initiate device resume
 762  *
 763  * @hdev: pointer to habanalabs device structure
 764  *
 765  * Bring the hw back to operating state (all asics).
 766  * Returns 0 for success or an error on failure.
 767  * Called at driver resume.
 768  */
 769 int hl_device_resume(struct hl_device *hdev)
 770 {
 771         int rc;
 772
 773         pci_set_power_state(hdev->pdev, PCI_D0);
 774         pci_restore_state(hdev->pdev);
 775         rc = pci_enable_device_mem(hdev->pdev);
 776         if (rc) {
 777                 dev_err(hdev->dev,
 778                         "Failed to enable PCI device in resume\n");
 779                 return rc;
 780         }
 781
 782         pci_set_master(hdev->pdev);
 783
 784         rc = hdev->asic_funcs->resume(hdev);
 785         if (rc) {
 786                 dev_err(hdev->dev, "Failed to resume device after suspend\n");
 787                 goto disable_device;
 788         }
 789
 790
 791         hdev->disabled = false;
 792         atomic_set(&hdev->in_reset, 0);
 793
 794         rc = hl_device_reset(hdev, true, false);
 795         if (rc) {
 796                 dev_err(hdev->dev, "Failed to reset device during resume\n");
 797                 goto disable_device;
 798         }
 799
 800         return 0;
 801
 802 disable_device:
 803         pci_clear_master(hdev->pdev);
 804         pci_disable_device(hdev->pdev);
 805
 806         return rc;
 807 }
 808
 809 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
 810 {
 811         struct hl_fpriv *hpriv;
 812         struct task_struct *task = NULL;
 813         u32 pending_cnt;
 814
 815
 816         /* Giving time for user to close FD, and for processes that are inside
 817          * hl_device_open to finish
 818          */
 819         if (!list_empty(&hdev->fpriv_list))
 820                 ssleep(1);
 821
 822         if (timeout) {
 823                 pending_cnt = timeout;
 824         } else {
 825                 if (hdev->process_kill_trial_cnt) {
 826                         /* Processes have been already killed */
 827                         pending_cnt = 1;
 828                         goto wait_for_processes;
 829                 } else {
 830                         /* Wait a small period after process kill */
 831                         pending_cnt = HL_PENDING_RESET_PER_SEC;
 832                 }
 833         }
 834
 835         mutex_lock(&hdev->fpriv_list_lock);
 836
 837         /* This section must be protected because we are dereferencing
 838          * pointers that are freed if the process exits
 839          */
 840         list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
 841                 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 842                 if (task) {
 843                         dev_info(hdev->dev, "Killing user process pid=%d\n",
 844                                 task_pid_nr(task));
 845                         send_sig(SIGKILL, task, 1);
 846                         usleep_range(1000, 10000);
 847
 848                         put_task_struct(task);
 849                 }
 850         }
 851
 852         mutex_unlock(&hdev->fpriv_list_lock);
 853
 854         /*
 855          * We killed the open users, but that doesn't mean they are closed.
 856          * It could be that they are running a long cleanup phase in the driver
 857          * e.g. MMU unmappings, or running other long teardown flow even before
 858          * our cleanup.
 859          * Therefore we need to wait again to make sure they are closed before
 860          * continuing with the reset.
 861          */
 862
 863 wait_for_processes:
 864         while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
 865                 dev_dbg(hdev->dev,
 866                         "Waiting for all unmap operations to finish before hard reset\n");
 867
 868                 pending_cnt--;
 869
 870                 ssleep(1);
 871         }
 872
 873         /* All processes exited successfully */
 874         if (list_empty(&hdev->fpriv_list))
 875                 return 0;
 876
 877         /* Give up waiting for processes to exit */
 878         if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
 879                 return -ETIME;
 880
 881         hdev->process_kill_trial_cnt++;
 882
 883         return -EBUSY;
 884 }
 885
 886 /*
 887  * hl_device_reset - reset the device
 888  *
 889  * @hdev: pointer to habanalabs device structure
 890  * @hard_reset: should we do hard reset to all engines or just reset the
 891  *              compute/dma engines
 892  * @from_hard_reset_thread: is the caller the hard-reset thread
 893  *
 894  * Block future CS and wait for pending CS to be enqueued
 895  * Call ASIC H/W fini
 896  * Flush all completions
 897  * Re-initialize all internal data structures
 898  * Call ASIC H/W init, late_init
 899  * Test queues
 900  * Enable device
 901  *
 902  * Returns 0 for success or an error on failure.
 903  */
 904 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 905                         bool from_hard_reset_thread)
 906 {
 907         int i, rc;
 908
 909         if (!hdev->init_done) {
 910                 dev_err(hdev->dev,
 911                         "Can't reset before initialization is done\n");
 912                 return 0;
 913         }
 914
 915         if ((!hard_reset) && (!hdev->supports_soft_reset)) {
 916                 dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
 917                 hard_reset = true;
 918         }
 919
 920         /* Re-entry of reset thread */
 921         if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
 922                 goto kill_processes;
 923
 924         /*
 925          * Prevent concurrency in this function - only one reset should be
 926          * done at any given time. Only need to perform this if we didn't
 927          * get from the dedicated hard reset thread
 928          */
 929         if (!from_hard_reset_thread) {
 930                 /* Block future CS/VM/JOB completion operations */
 931                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 932                 if (rc)
 933                         return 0;
 934
 935                 if (hard_reset) {
 936                         /* Disable PCI access from device F/W so he won't send
 937                          * us additional interrupts. We disable MSI/MSI-X at
 938                          * the halt_engines function and we can't have the F/W
 939                          * sending us interrupts after that. We need to disable
 940                          * the access here because if the device is marked
 941                          * disable, the message won't be send. Also, in case
 942                          * of heartbeat, the device CPU is marked as disable
 943                          * so this message won't be sent
 944                          */
 945                         if (hl_fw_send_pci_access_msg(hdev,
 946                                         CPUCP_PACKET_DISABLE_PCI_ACCESS))
 947                                 dev_warn(hdev->dev,
 948                                         "Failed to disable PCI access by F/W\n");
 949                 }
 950
 951                 /* This also blocks future CS/VM/JOB completion operations */
 952                 hdev->disabled = true;
 953
 954                 /* Flush anyone that is inside the critical section of enqueue
 955                  * jobs to the H/W
 956                  */
 957                 hdev->asic_funcs->hw_queues_lock(hdev);
 958                 hdev->asic_funcs->hw_queues_unlock(hdev);
 959
 960                 /* Flush anyone that is inside device open */
 961                 mutex_lock(&hdev->fpriv_list_lock);
 962                 mutex_unlock(&hdev->fpriv_list_lock);
 963
 964                 dev_err(hdev->dev, "Going to RESET device!\n");
 965         }
 966
 967 again:
 968         if ((hard_reset) && (!from_hard_reset_thread)) {
 969                 hdev->hard_reset_pending = true;
 970
 971                 hdev->process_kill_trial_cnt = 0;
 972
 973                 /*
 974                  * Because the reset function can't run from interrupt or
 975                  * from heartbeat work, we need to call the reset function
 976                  * from a dedicated work
 977                  */
 978                 queue_delayed_work(hdev->device_reset_work.wq,
 979                         &hdev->device_reset_work.reset_work, 0);
 980
 981                 return 0;
 982         }
 983
 984         if (hard_reset) {
 985                 device_late_fini(hdev);
 986
 987                 /*
 988                  * Now that the heartbeat thread is closed, flush processes
 989                  * which are sending messages to CPU
 990                  */
 991                 mutex_lock(&hdev->send_cpu_message_lock);
 992                 mutex_unlock(&hdev->send_cpu_message_lock);
 993         }
 994
 995         /*
 996          * Halt the engines and disable interrupts so we won't get any more
 997          * completions from H/W and we won't have any accesses from the
 998          * H/W to the host machine
 999          */
1000         hdev->asic_funcs->halt_engines(hdev, hard_reset);
1001
1002         /* Go over all the queues, release all CS and their jobs */
1003         hl_cs_rollback_all(hdev);
1004
1005 kill_processes:
1006         if (hard_reset) {
1007                 /* Kill processes here after CS rollback. This is because the
1008                  * process can't really exit until all its CSs are done, which
1009                  * is what we do in cs rollback
1010                  */
1011                 rc = device_kill_open_processes(hdev, 0);
1012
1013                 if (rc == -EBUSY) {
1014                         if (hdev->device_fini_pending) {
1015                                 dev_crit(hdev->dev,
1016                                         "Failed to kill all open processes, stopping hard reset\n");
1017                                 goto out_err;
1018                         }
1019
1020                         /* signal reset thread to reschedule */
1021                         return rc;
1022                 }
1023
1024                 if (rc) {
1025                         dev_crit(hdev->dev,
1026                                 "Failed to kill all open processes, stopping hard reset\n");
1027                         goto out_err;
1028                 }
1029
1030                 /* Flush the Event queue workers to make sure no other thread is
1031                  * reading or writing to registers during the reset
1032                  */
1033                 flush_workqueue(hdev->eq_wq);
1034         }
1035
1036         /* Reset the H/W. It will be in idle state after this returns */
1037         hdev->asic_funcs->hw_fini(hdev, hard_reset);
1038
1039         if (hard_reset) {
1040                 /* Release kernel context */
1041                 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
1042                         hdev->kernel_ctx = NULL;
1043                 hl_vm_fini(hdev);
1044                 hl_mmu_fini(hdev);
1045                 hl_eq_reset(hdev, &hdev->event_queue);
1046         }
1047
1048         /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1049         hl_hw_queue_reset(hdev, hard_reset);
1050         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1051                 hl_cq_reset(hdev, &hdev->completion_queue[i]);
1052
1053         hdev->idle_busy_ts_idx = 0;
1054         hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
1055         hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
1056
1057         if (hdev->cs_active_cnt)
1058                 dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
1059                         hdev->cs_active_cnt);
1060
1061         mutex_lock(&hdev->fpriv_list_lock);
1062
1063         /* Make sure the context switch phase will run again */
1064         if (hdev->compute_ctx) {
1065                 atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1066                 hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1067         }
1068
1069         mutex_unlock(&hdev->fpriv_list_lock);
1070
1071         /* Finished tear-down, starting to re-initialize */
1072
1073         if (hard_reset) {
1074                 hdev->device_cpu_disabled = false;
1075                 hdev->hard_reset_pending = false;
1076
1077                 if (hdev->kernel_ctx) {
1078                         dev_crit(hdev->dev,
1079                                 "kernel ctx was alive during hard reset, something is terribly wrong\n");
1080                         rc = -EBUSY;
1081                         goto out_err;
1082                 }
1083
1084                 rc = hl_mmu_init(hdev);
1085                 if (rc) {
1086                         dev_err(hdev->dev,
1087                                 "Failed to initialize MMU S/W after hard reset\n");
1088                         goto out_err;
1089                 }
1090
1091                 /* Allocate the kernel context */
1092                 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1093                                                 GFP_KERNEL);
1094                 if (!hdev->kernel_ctx) {
1095                         rc = -ENOMEM;
1096                         hl_mmu_fini(hdev);
1097                         goto out_err;
1098                 }
1099
1100                 hdev->compute_ctx = NULL;
1101
1102                 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1103                 if (rc) {
1104                         dev_err(hdev->dev,
1105                                 "failed to init kernel ctx in hard reset\n");
1106                         kfree(hdev->kernel_ctx);
1107                         hdev->kernel_ctx = NULL;
1108                         hl_mmu_fini(hdev);
1109                         goto out_err;
1110                 }
1111         }
1112
1113         /* Device is now enabled as part of the initialization requires
1114          * communication with the device firmware to get information that
1115          * is required for the initialization itself
1116          */
1117         hdev->disabled = false;
1118
1119         rc = hdev->asic_funcs->hw_init(hdev);
1120         if (rc) {
1121                 dev_err(hdev->dev,
1122                         "failed to initialize the H/W after reset\n");
1123                 goto out_err;
1124         }
1125
1126         /* Check that the communication with the device is working */
1127         rc = hdev->asic_funcs->test_queues(hdev);
1128         if (rc) {
1129                 dev_err(hdev->dev,
1130                         "Failed to detect if device is alive after reset\n");
1131                 goto out_err;
1132         }
1133
1134         if (hard_reset) {
1135                 rc = device_late_init(hdev);
1136                 if (rc) {
1137                         dev_err(hdev->dev,
1138                                 "Failed late init after hard reset\n");
1139                         goto out_err;
1140                 }
1141
1142                 rc = hl_vm_init(hdev);
1143                 if (rc) {
1144                         dev_err(hdev->dev,
1145                                 "Failed to init memory module after hard reset\n");
1146                         goto out_err;
1147                 }
1148
1149                 hl_set_max_power(hdev);
1150         } else {
1151                 rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1152                 if (rc) {
1153                         dev_err(hdev->dev,
1154                                 "Failed late init after soft reset\n");
1155                         goto out_err;
1156                 }
1157         }
1158
1159         atomic_set(&hdev->in_reset, 0);
1160         hdev->needs_reset = false;
1161
1162         dev_notice(hdev->dev, "Successfully finished resetting the device\n");
1163
1164         if (hard_reset) {
1165                 hdev->hard_reset_cnt++;
1166
1167                 /* After reset is done, we are ready to receive events from
1168                  * the F/W. We can't do it before because we will ignore events
1169                  * and if those events are fatal, we won't know about it and
1170                  * the device will be operational although it shouldn't be
1171                  */
1172                 hdev->asic_funcs->enable_events_from_fw(hdev);
1173         } else {
1174                 hdev->soft_reset_cnt++;
1175         }
1176
1177         return 0;
1178
1179 out_err:
1180         hdev->disabled = true;
1181
1182         if (hard_reset) {
1183                 dev_err(hdev->dev,
1184                         "Failed to reset! Device is NOT usable\n");
1185                 hdev->hard_reset_cnt++;
1186         } else {
1187                 dev_err(hdev->dev,
1188                         "Failed to do soft-reset, trying hard reset\n");
1189                 hdev->soft_reset_cnt++;
1190                 hard_reset = true;
1191                 goto again;
1192         }
1193
1194         atomic_set(&hdev->in_reset, 0);
1195
1196         return rc;
1197 }
1198
1199 /*
1200  * hl_device_init - main initialization function for habanalabs device
1201  *
1202  * @hdev: pointer to habanalabs device structure
1203  *
1204  * Allocate an id for the device, do early initialization and then call the
1205  * ASIC specific initialization functions. Finally, create the cdev and the
1206  * Linux device to expose it to the user
1207  */
1208 int hl_device_init(struct hl_device *hdev, struct class *hclass)
1209 {
1210         int i, rc, cq_cnt, cq_ready_cnt;
1211         char *name;
1212         bool add_cdev_sysfs_on_err = false;
1213
1214         name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1215         if (!name) {
1216                 rc = -ENOMEM;
1217                 goto out_disabled;
1218         }
1219
1220         /* Initialize cdev and device structures */
1221         rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1222                                 &hdev->cdev, &hdev->dev);
1223
1224         kfree(name);
1225
1226         if (rc)
1227                 goto out_disabled;
1228
1229         name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1230         if (!name) {
1231                 rc = -ENOMEM;
1232                 goto free_dev;
1233         }
1234
1235         /* Initialize cdev and device structures for control device */
1236         rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1237                                 name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1238
1239         kfree(name);
1240
1241         if (rc)
1242                 goto free_dev;
1243
1244         /* Initialize ASIC function pointers and perform early init */
1245         rc = device_early_init(hdev);
1246         if (rc)
1247                 goto free_dev_ctrl;
1248
1249         /*
1250          * Start calling ASIC initialization. First S/W then H/W and finally
1251          * late init
1252          */
1253         rc = hdev->asic_funcs->sw_init(hdev);
1254         if (rc)
1255                 goto early_fini;
1256
1257         /*
1258          * Initialize the H/W queues. Must be done before hw_init, because
1259          * there the addresses of the kernel queue are being written to the
1260          * registers of the device
1261          */
1262         rc = hl_hw_queues_create(hdev);
1263         if (rc) {
1264                 dev_err(hdev->dev, "failed to initialize kernel queues\n");
1265                 goto sw_fini;
1266         }
1267
1268         cq_cnt = hdev->asic_prop.completion_queues_count;
1269
1270         /*
1271          * Initialize the completion queues. Must be done before hw_init,
1272          * because there the addresses of the completion queues are being
1273          * passed as arguments to request_irq
1274          */
1275         if (cq_cnt) {
1276                 hdev->completion_queue = kcalloc(cq_cnt,
1277                                 sizeof(*hdev->completion_queue),
1278                                 GFP_KERNEL);
1279
1280                 if (!hdev->completion_queue) {
1281                         dev_err(hdev->dev,
1282                                 "failed to allocate completion queues\n");
1283                         rc = -ENOMEM;
1284                         goto hw_queues_destroy;
1285                 }
1286         }
1287
1288         for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1289                 rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1290                                 hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1291                 if (rc) {
1292                         dev_err(hdev->dev,
1293                                 "failed to initialize completion queue\n");
1294                         goto cq_fini;
1295                 }
1296                 hdev->completion_queue[i].cq_idx = i;
1297         }
1298
1299         /*
1300          * Initialize the event queue. Must be done before hw_init,
1301          * because there the address of the event queue is being
1302          * passed as argument to request_irq
1303          */
1304         rc = hl_eq_init(hdev, &hdev->event_queue);
1305         if (rc) {
1306                 dev_err(hdev->dev, "failed to initialize event queue\n");
1307                 goto cq_fini;
1308         }
1309
1310         /* MMU S/W must be initialized before kernel context is created */
1311         rc = hl_mmu_init(hdev);
1312         if (rc) {
1313                 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1314                 goto eq_fini;
1315         }
1316
1317         /* Allocate the kernel context */
1318         hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1319         if (!hdev->kernel_ctx) {
1320                 rc = -ENOMEM;
1321                 goto mmu_fini;
1322         }
1323
1324         hdev->compute_ctx = NULL;
1325
1326         hl_debugfs_add_device(hdev);
1327
1328         /* debugfs nodes are created in hl_ctx_init so it must be called after
1329          * hl_debugfs_add_device.
1330          */
1331         rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1332         if (rc) {
1333                 dev_err(hdev->dev, "failed to initialize kernel context\n");
1334                 kfree(hdev->kernel_ctx);
1335                 goto remove_device_from_debugfs;
1336         }
1337
1338         rc = hl_cb_pool_init(hdev);
1339         if (rc) {
1340                 dev_err(hdev->dev, "failed to initialize CB pool\n");
1341                 goto release_ctx;
1342         }
1343
1344         /*
1345          * From this point, in case of an error, add char devices and create
1346          * sysfs nodes as part of the error flow, to allow debugging.
1347          */
1348         add_cdev_sysfs_on_err = true;
1349
1350         /* Device is now enabled as part of the initialization requires
1351          * communication with the device firmware to get information that
1352          * is required for the initialization itself
1353          */
1354         hdev->disabled = false;
1355
1356         rc = hdev->asic_funcs->hw_init(hdev);
1357         if (rc) {
1358                 dev_err(hdev->dev, "failed to initialize the H/W\n");
1359                 rc = 0;
1360                 goto out_disabled;
1361         }
1362
1363         /* Check that the communication with the device is working */
1364         rc = hdev->asic_funcs->test_queues(hdev);
1365         if (rc) {
1366                 dev_err(hdev->dev, "Failed to detect if device is alive\n");
1367                 rc = 0;
1368                 goto out_disabled;
1369         }
1370
1371         rc = device_late_init(hdev);
1372         if (rc) {
1373                 dev_err(hdev->dev, "Failed late initialization\n");
1374                 rc = 0;
1375                 goto out_disabled;
1376         }
1377
1378         dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1379                 hdev->asic_name,
1380                 hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
1381
1382         rc = hl_vm_init(hdev);
1383         if (rc) {
1384                 dev_err(hdev->dev, "Failed to initialize memory module\n");
1385                 rc = 0;
1386                 goto out_disabled;
1387         }
1388
1389         /*
1390          * Expose devices and sysfs nodes to user.
1391          * From here there is no need to add char devices and create sysfs nodes
1392          * in case of an error.
1393          */
1394         add_cdev_sysfs_on_err = false;
1395         rc = device_cdev_sysfs_add(hdev);
1396         if (rc) {
1397                 dev_err(hdev->dev,
1398                         "Failed to add char devices and sysfs nodes\n");
1399                 rc = 0;
1400                 goto out_disabled;
1401         }
1402
1403         /* Need to call this again because the max power might change,
1404          * depending on card type for certain ASICs
1405          */
1406         hl_set_max_power(hdev);
1407
1408         /*
1409          * hl_hwmon_init() must be called after device_late_init(), because only
1410          * there we get the information from the device about which
1411          * hwmon-related sensors the device supports.
1412          * Furthermore, it must be done after adding the device to the system.
1413          */
1414         rc = hl_hwmon_init(hdev);
1415         if (rc) {
1416                 dev_err(hdev->dev, "Failed to initialize hwmon\n");
1417                 rc = 0;
1418                 goto out_disabled;
1419         }
1420
1421         dev_notice(hdev->dev,
1422                 "Successfully added device to habanalabs driver\n");
1423
1424         hdev->init_done = true;
1425
1426         /* After initialization is done, we are ready to receive events from
1427          * the F/W. We can't do it before because we will ignore events and if
1428          * those events are fatal, we won't know about it and the device will
1429          * be operational although it shouldn't be
1430          */
1431         hdev->asic_funcs->enable_events_from_fw(hdev);
1432
1433         return 0;
1434
1435 release_ctx:
1436         if (hl_ctx_put(hdev->kernel_ctx) != 1)
1437                 dev_err(hdev->dev,
1438                         "kernel ctx is still alive on initialization failure\n");
1439 remove_device_from_debugfs:
1440         hl_debugfs_remove_device(hdev);
1441 mmu_fini:
1442         hl_mmu_fini(hdev);
1443 eq_fini:
1444         hl_eq_fini(hdev, &hdev->event_queue);
1445 cq_fini:
1446         for (i = 0 ; i < cq_ready_cnt ; i++)
1447                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1448         kfree(hdev->completion_queue);
1449 hw_queues_destroy:
1450         hl_hw_queues_destroy(hdev);
1451 sw_fini:
1452         hdev->asic_funcs->sw_fini(hdev);
1453 early_fini:
1454         device_early_fini(hdev);
1455 free_dev_ctrl:
1456         put_device(hdev->dev_ctrl);
1457 free_dev:
1458         put_device(hdev->dev);
1459 out_disabled:
1460         hdev->disabled = true;
1461         if (add_cdev_sysfs_on_err)
1462                 device_cdev_sysfs_add(hdev);
1463         if (hdev->pdev)
1464                 dev_err(&hdev->pdev->dev,
1465                         "Failed to initialize hl%d. Device is NOT usable !\n",
1466                         hdev->id / 2);
1467         else
1468                 pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1469                         hdev->id / 2);
1470
1471         return rc;
1472 }
1473
1474 /*
1475  * hl_device_fini - main tear-down function for habanalabs device
1476  *
1477  * @hdev: pointer to habanalabs device structure
1478  *
1479  * Destroy the device, call ASIC fini functions and release the id
1480  */
1481 void hl_device_fini(struct hl_device *hdev)
1482 {
1483         ktime_t timeout;
1484         int i, rc;
1485
1486         dev_info(hdev->dev, "Removing device\n");
1487
1488         hdev->device_fini_pending = 1;
1489         flush_delayed_work(&hdev->device_reset_work.reset_work);
1490
1491         /*
1492          * This function is competing with the reset function, so try to
1493          * take the reset atomic and if we are already in middle of reset,
1494          * wait until reset function is finished. Reset function is designed
1495          * to always finish. However, in Gaudi, because of all the network
1496          * ports, the hard reset could take between 10-30 seconds
1497          */
1498
1499         timeout = ktime_add_us(ktime_get(),
1500                                 HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
1501         rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1502         while (rc) {
1503                 usleep_range(50, 200);
1504                 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1505                 if (ktime_compare(ktime_get(), timeout) > 0) {
1506                         dev_crit(hdev->dev,
1507                                 "Failed to remove device because reset function did not finish\n");
1508                         return;
1509                 }
1510         }
1511
1512         /* Disable PCI access from device F/W so it won't send us additional
1513          * interrupts. We disable MSI/MSI-X at the halt_engines function and we
1514          * can't have the F/W sending us interrupts after that. We need to
1515          * disable the access here because if the device is marked disable, the
1516          * message won't be send. Also, in case of heartbeat, the device CPU is
1517          * marked as disable so this message won't be sent
1518          */
1519         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1520
1521         /* Mark device as disabled */
1522         hdev->disabled = true;
1523
1524         /* Flush anyone that is inside the critical section of enqueue
1525          * jobs to the H/W
1526          */
1527         hdev->asic_funcs->hw_queues_lock(hdev);
1528         hdev->asic_funcs->hw_queues_unlock(hdev);
1529
1530         /* Flush anyone that is inside device open */
1531         mutex_lock(&hdev->fpriv_list_lock);
1532         mutex_unlock(&hdev->fpriv_list_lock);
1533
1534         hdev->hard_reset_pending = true;
1535
1536         hl_hwmon_fini(hdev);
1537
1538         device_late_fini(hdev);
1539
1540         /*
1541          * Halt the engines and disable interrupts so we won't get any more
1542          * completions from H/W and we won't have any accesses from the
1543          * H/W to the host machine
1544          */
1545         hdev->asic_funcs->halt_engines(hdev, true);
1546
1547         /* Go over all the queues, release all CS and their jobs */
1548         hl_cs_rollback_all(hdev);
1549
1550         /* Kill processes here after CS rollback. This is because the process
1551          * can't really exit until all its CSs are done, which is what we
1552          * do in cs rollback
1553          */
1554         dev_info(hdev->dev,
1555                 "Waiting for all processes to exit (timeout of %u seconds)",
1556                 HL_PENDING_RESET_LONG_SEC);
1557
1558         rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
1559         if (rc)
1560                 dev_crit(hdev->dev, "Failed to kill all open processes\n");
1561
1562         hl_cb_pool_fini(hdev);
1563
1564         /* Reset the H/W. It will be in idle state after this returns */
1565         hdev->asic_funcs->hw_fini(hdev, true);
1566
1567         /* Release kernel context */
1568         if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1569                 dev_err(hdev->dev, "kernel ctx is still alive\n");
1570
1571         hl_debugfs_remove_device(hdev);
1572
1573         hl_vm_fini(hdev);
1574
1575         hl_mmu_fini(hdev);
1576
1577         hl_eq_fini(hdev, &hdev->event_queue);
1578
1579         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1580                 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1581         kfree(hdev->completion_queue);
1582
1583         hl_hw_queues_destroy(hdev);
1584
1585         /* Call ASIC S/W finalize function */
1586         hdev->asic_funcs->sw_fini(hdev);
1587
1588         device_early_fini(hdev);
1589
1590         /* Hide devices and sysfs nodes from user */
1591         device_cdev_sysfs_del(hdev);
1592
1593         pr_info("removed device successfully\n");
1594 }
1595
1596 /*
1597  * MMIO register access helper functions.
1598  */
1599
1600 /*
1601  * hl_rreg - Read an MMIO register
1602  *
1603  * @hdev: pointer to habanalabs device structure
1604  * @reg: MMIO register offset (in bytes)
1605  *
1606  * Returns the value of the MMIO register we are asked to read
1607  *
1608  */
1609 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1610 {
1611         return readl(hdev->rmmio + reg);
1612 }
1613
1614 /*
1615  * hl_wreg - Write to an MMIO register
1616  *
1617  * @hdev: pointer to habanalabs device structure
1618  * @reg: MMIO register offset (in bytes)
1619  * @val: 32-bit value
1620  *
1621  * Writes the 32-bit value into the MMIO register
1622  *
1623  */
1624 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1625 {
1626         writel(val, hdev->rmmio + reg);
1627 }