arch/powerpc/platforms/powernv/npu-dma.c

   1 /*
   2  * This file implements the DMA operations for NVLink devices. The NPU
   3  * devices all point to the same iommu table as the parent PCI device.
   4  *
   5  * Copyright Alistair Popple, IBM Corporation 2015.
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of version 2 of the GNU General Public
   9  * License as published by the Free Software Foundation.
  10  */
  11
  12 #include <linux/mmu_notifier.h>
  13 #include <linux/mmu_context.h>
  14 #include <linux/of.h>
  15 #include <linux/pci.h>
  16 #include <linux/memblock.h>
  17 #include <linux/sizes.h>
  18
  19 #include <asm/debugfs.h>
  20 #include <asm/powernv.h>
  21 #include <asm/opal.h>
  22
  23 #include "pci.h"
  24
  25 /*
  26  * spinlock to protect initialisation of an npu_context for a particular
  27  * mm_struct.
  28  */
  29 static DEFINE_SPINLOCK(npu_context_lock);
  30
  31 /*
  32  * Other types of TCE cache invalidation are not functional in the
  33  * hardware.
  34  */
  35 static struct pci_dev *get_pci_dev(struct device_node *dn)
  36 {
  37         struct pci_dn *pdn = PCI_DN(dn);
  38
  39         return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
  40                                            pdn->busno, pdn->devfn);
  41 }
  42
  43 /* Given a NPU device get the associated PCI device. */
  44 struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
  45 {
  46         struct device_node *dn;
  47         struct pci_dev *gpdev;
  48
  49         if (WARN_ON(!npdev))
  50                 return NULL;
  51
  52         if (WARN_ON(!npdev->dev.of_node))
  53                 return NULL;
  54
  55         /* Get assoicated PCI device */
  56         dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
  57         if (!dn)
  58                 return NULL;
  59
  60         gpdev = get_pci_dev(dn);
  61         of_node_put(dn);
  62
  63         return gpdev;
  64 }
  65 EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
  66
  67 /* Given the real PCI device get a linked NPU device. */
  68 struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
  69 {
  70         struct device_node *dn;
  71         struct pci_dev *npdev;
  72
  73         if (WARN_ON(!gpdev))
  74                 return NULL;
  75
  76         /* Not all PCI devices have device-tree nodes */
  77         if (!gpdev->dev.of_node)
  78                 return NULL;
  79
  80         /* Get assoicated PCI device */
  81         dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
  82         if (!dn)
  83                 return NULL;
  84
  85         npdev = get_pci_dev(dn);
  86         of_node_put(dn);
  87
  88         return npdev;
  89 }
  90 EXPORT_SYMBOL(pnv_pci_get_npu_dev);
  91
  92 /*
  93  * Returns the PE assoicated with the PCI device of the given
  94  * NPU. Returns the linked pci device if pci_dev != NULL.
  95  */
  96 static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
  97                                                   struct pci_dev **gpdev)
  98 {
  99         struct pnv_phb *phb;
 100         struct pci_controller *hose;
 101         struct pci_dev *pdev;
 102         struct pnv_ioda_pe *pe;
 103         struct pci_dn *pdn;
 104
 105         pdev = pnv_pci_get_gpu_dev(npe->pdev);
 106         if (!pdev)
 107                 return NULL;
 108
 109         pdn = pci_get_pdn(pdev);
 110         if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
 111                 return NULL;
 112
 113         hose = pci_bus_to_host(pdev->bus);
 114         phb = hose->private_data;
 115         pe = &phb->ioda.pe_array[pdn->pe_number];
 116
 117         if (gpdev)
 118                 *gpdev = pdev;
 119
 120         return pe;
 121 }
 122
 123 static long pnv_npu_unset_window(struct iommu_table_group *table_group,
 124                 int num);
 125
 126 static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
 127                 struct iommu_table *tbl)
 128 {
 129         struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
 130                         table_group);
 131         struct pnv_phb *phb = npe->phb;
 132         int64_t rc;
 133         const unsigned long size = tbl->it_indirect_levels ?
 134                 tbl->it_level_size : tbl->it_size;
 135         const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
 136         const __u64 win_size = tbl->it_size << tbl->it_page_shift;
 137         int num2 = (num == 0) ? 1 : 0;
 138
 139         /* NPU has just one TVE so if there is another table, remove it first */
 140         if (npe->table_group.tables[num2])
 141                 pnv_npu_unset_window(&npe->table_group, num2);
 142
 143         pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
 144                         start_addr, start_addr + win_size - 1,
 145                         IOMMU_PAGE_SIZE(tbl));
 146
 147         rc = opal_pci_map_pe_dma_window(phb->opal_id,
 148                         npe->pe_number,
 149                         npe->pe_number,
 150                         tbl->it_indirect_levels + 1,
 151                         __pa(tbl->it_base),
 152                         size << 3,
 153                         IOMMU_PAGE_SIZE(tbl));
 154         if (rc) {
 155                 pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
 156                 return rc;
 157         }
 158         pnv_pci_ioda2_tce_invalidate_entire(phb, false);
 159
 160         /* Add the table to the list so its TCE cache will get invalidated */
 161         pnv_pci_link_table_and_group(phb->hose->node, num,
 162                         tbl, &npe->table_group);
 163
 164         return 0;
 165 }
 166
 167 static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
 168 {
 169         struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
 170                         table_group);
 171         struct pnv_phb *phb = npe->phb;
 172         int64_t rc;
 173
 174         if (!npe->table_group.tables[num])
 175                 return 0;
 176
 177         pe_info(npe, "Removing DMA window\n");
 178
 179         rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
 180                         npe->pe_number,
 181                         0/* levels */, 0/* table address */,
 182                         0/* table size */, 0/* page size */);
 183         if (rc) {
 184                 pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
 185                 return rc;
 186         }
 187         pnv_pci_ioda2_tce_invalidate_entire(phb, false);
 188
 189         pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
 190                         &npe->table_group);
 191
 192         return 0;
 193 }
 194
 195 /*
 196  * Enables 32 bit DMA on NPU.
 197  */
 198 static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
 199 {
 200         struct pci_dev *gpdev;
 201         struct pnv_ioda_pe *gpe;
 202         int64_t rc;
 203
 204         /*
 205          * Find the assoicated PCI devices and get the dma window
 206          * information from there.
 207          */
 208         if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
 209                 return;
 210
 211         gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
 212         if (!gpe)
 213                 return;
 214
 215         rc = pnv_npu_set_window(&npe->table_group, 0,
 216                         gpe->table_group.tables[0]);
 217
 218         /*
 219          * NVLink devices use the same TCE table configuration as
 220          * their parent device so drivers shouldn't be doing DMA
 221          * operations directly on these devices.
 222          */
 223         set_dma_ops(&npe->pdev->dev, NULL);
 224 }
 225
 226 /*
 227  * Enables bypass mode on the NPU. The NPU only supports one
 228  * window per link, so bypass needs to be explicitly enabled or
 229  * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
 230  * active at the same time.
 231  */
 232 static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
 233 {
 234         struct pnv_phb *phb = npe->phb;
 235         int64_t rc = 0;
 236         phys_addr_t top = memblock_end_of_DRAM();
 237
 238         if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
 239                 return -EINVAL;
 240
 241         rc = pnv_npu_unset_window(&npe->table_group, 0);
 242         if (rc != OPAL_SUCCESS)
 243                 return rc;
 244
 245         /* Enable the bypass window */
 246
 247         top = roundup_pow_of_two(top);
 248         dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n",
 249                         npe->pe_number);
 250         rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
 251                         npe->pe_number, npe->pe_number,
 252                         0 /* bypass base */, top);
 253
 254         if (rc == OPAL_SUCCESS)
 255                 pnv_pci_ioda2_tce_invalidate_entire(phb, false);
 256
 257         return rc;
 258 }
 259
 260 void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
 261 {
 262         int i;
 263         struct pnv_phb *phb;
 264         struct pci_dn *pdn;
 265         struct pnv_ioda_pe *npe;
 266         struct pci_dev *npdev;
 267
 268         for (i = 0; ; ++i) {
 269                 npdev = pnv_pci_get_npu_dev(gpdev, i);
 270
 271                 if (!npdev)
 272                         break;
 273
 274                 pdn = pci_get_pdn(npdev);
 275                 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
 276                         return;
 277
 278                 phb = pci_bus_to_host(npdev->bus)->private_data;
 279
 280                 /* We only do bypass if it's enabled on the linked device */
 281                 npe = &phb->ioda.pe_array[pdn->pe_number];
 282
 283                 if (bypass) {
 284                         dev_info(&npdev->dev,
 285                                         "Using 64-bit DMA iommu bypass\n");
 286                         pnv_npu_dma_set_bypass(npe);
 287                 } else {
 288                         dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
 289                         pnv_npu_dma_set_32(npe);
 290                 }
 291         }
 292 }
 293
 294 #ifdef CONFIG_IOMMU_API
 295 /* Switch ownership from platform code to external user (e.g. VFIO) */
 296 static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
 297 {
 298         struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
 299                         table_group);
 300         struct pnv_phb *phb = npe->phb;
 301         int64_t rc;
 302         struct pci_dev *gpdev = NULL;
 303
 304         /*
 305          * Note: NPU has just a single TVE in the hardware which means that
 306          * while used by the kernel, it can have either 32bit window or
 307          * DMA bypass but never both. So we deconfigure 32bit window only
 308          * if it was enabled at the moment of ownership change.
 309          */
 310         if (npe->table_group.tables[0]) {
 311                 pnv_npu_unset_window(&npe->table_group, 0);
 312                 return;
 313         }
 314
 315         /* Disable bypass */
 316         rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
 317                         npe->pe_number, npe->pe_number,
 318                         0 /* bypass base */, 0);
 319         if (rc) {
 320                 pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
 321                 return;
 322         }
 323         pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
 324
 325         get_gpu_pci_dev_and_pe(npe, &gpdev);
 326         if (gpdev)
 327                 pnv_npu2_unmap_lpar_dev(gpdev);
 328 }
 329
 330 static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
 331 {
 332         struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
 333                         table_group);
 334         struct pci_dev *gpdev = NULL;
 335
 336         get_gpu_pci_dev_and_pe(npe, &gpdev);
 337         if (gpdev)
 338                 pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
 339 }
 340
 341 static struct iommu_table_group_ops pnv_pci_npu_ops = {
 342         .set_window = pnv_npu_set_window,
 343         .unset_window = pnv_npu_unset_window,
 344         .take_ownership = pnv_npu_take_ownership,
 345         .release_ownership = pnv_npu_release_ownership,
 346 };
 347 #endif /* !CONFIG_IOMMU_API */
 348
 349 /*
 350  * NPU2 ATS
 351  */
 352 /* Maximum possible number of ATSD MMIO registers per NPU */
 353 #define NV_NMMU_ATSD_REGS 8
 354 #define NV_NPU_MAX_PE_NUM       16
 355
 356 /*
 357  * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
 358  * up to 3 x (GPU + 2xNPUs) (POWER9).
 359  */
 360 struct npu_comp {
 361         struct iommu_table_group table_group;
 362         int pe_num;
 363         struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
 364 };
 365
 366 /* An NPU descriptor, valid for POWER9 only */
 367 struct npu {
 368         int index;
 369         __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
 370         unsigned int mmio_atsd_count;
 371
 372         /* Bitmask for MMIO register usage */
 373         unsigned long mmio_atsd_usage;
 374
 375         /* Do we need to explicitly flush the nest mmu? */
 376         bool nmmu_flush;
 377
 378         struct npu_comp npucomp;
 379 };
 380
 381 #ifdef CONFIG_IOMMU_API
 382 static long pnv_npu_peers_create_table_userspace(
 383                 struct iommu_table_group *table_group,
 384                 int num, __u32 page_shift, __u64 window_size, __u32 levels,
 385                 struct iommu_table **ptbl)
 386 {
 387         struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
 388                         table_group);
 389
 390         if (!npucomp->pe_num || !npucomp->pe[0] ||
 391                         !npucomp->pe[0]->table_group.ops ||
 392                         !npucomp->pe[0]->table_group.ops->create_table)
 393                 return -EFAULT;
 394
 395         return npucomp->pe[0]->table_group.ops->create_table(
 396                         &npucomp->pe[0]->table_group, num, page_shift,
 397                         window_size, levels, ptbl);
 398 }
 399
 400 static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
 401                 int num, struct iommu_table *tbl)
 402 {
 403         int i, j;
 404         long ret = 0;
 405         struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
 406                         table_group);
 407
 408         for (i = 0; i < npucomp->pe_num; ++i) {
 409                 struct pnv_ioda_pe *pe = npucomp->pe[i];
 410
 411                 if (!pe->table_group.ops->set_window)
 412                         continue;
 413
 414                 ret = pe->table_group.ops->set_window(&pe->table_group,
 415                                 num, tbl);
 416                 if (ret)
 417                         break;
 418         }
 419
 420         if (ret) {
 421                 for (j = 0; j < i; ++j) {
 422                         struct pnv_ioda_pe *pe = npucomp->pe[j];
 423
 424                         if (!pe->table_group.ops->unset_window)
 425                                 continue;
 426
 427                         ret = pe->table_group.ops->unset_window(
 428                                         &pe->table_group, num);
 429                         if (ret)
 430                                 break;
 431                 }
 432         } else {
 433                 table_group->tables[num] = iommu_tce_table_get(tbl);
 434         }
 435
 436         return ret;
 437 }
 438
 439 static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
 440                 int num)
 441 {
 442         int i, j;
 443         long ret = 0;
 444         struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
 445                         table_group);
 446
 447         for (i = 0; i < npucomp->pe_num; ++i) {
 448                 struct pnv_ioda_pe *pe = npucomp->pe[i];
 449
 450                 WARN_ON(npucomp->table_group.tables[num] !=
 451                                 table_group->tables[num]);
 452                 if (!npucomp->table_group.tables[num])
 453                         continue;
 454
 455                 if (!pe->table_group.ops->unset_window)
 456                         continue;
 457
 458                 ret = pe->table_group.ops->unset_window(&pe->table_group, num);
 459                 if (ret)
 460                         break;
 461         }
 462
 463         if (ret) {
 464                 for (j = 0; j < i; ++j) {
 465                         struct pnv_ioda_pe *pe = npucomp->pe[j];
 466
 467                         if (!npucomp->table_group.tables[num])
 468                                 continue;
 469
 470                         if (!pe->table_group.ops->set_window)
 471                                 continue;
 472
 473                         ret = pe->table_group.ops->set_window(&pe->table_group,
 474                                         num, table_group->tables[num]);
 475                         if (ret)
 476                                 break;
 477                 }
 478         } else if (table_group->tables[num]) {
 479                 iommu_tce_table_put(table_group->tables[num]);
 480                 table_group->tables[num] = NULL;
 481         }
 482
 483         return ret;
 484 }
 485
 486 static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
 487 {
 488         int i;
 489         struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
 490                         table_group);
 491
 492         for (i = 0; i < npucomp->pe_num; ++i) {
 493                 struct pnv_ioda_pe *pe = npucomp->pe[i];
 494
 495                 if (!pe->table_group.ops->take_ownership)
 496                         continue;
 497                 pe->table_group.ops->take_ownership(&pe->table_group);
 498         }
 499 }
 500
 501 static void pnv_npu_peers_release_ownership(
 502                 struct iommu_table_group *table_group)
 503 {
 504         int i;
 505         struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
 506                         table_group);
 507
 508         for (i = 0; i < npucomp->pe_num; ++i) {
 509                 struct pnv_ioda_pe *pe = npucomp->pe[i];
 510
 511                 if (!pe->table_group.ops->release_ownership)
 512                         continue;
 513                 pe->table_group.ops->release_ownership(&pe->table_group);
 514         }
 515 }
 516
 517 static struct iommu_table_group_ops pnv_npu_peers_ops = {
 518         .get_table_size = pnv_pci_ioda2_get_table_size,
 519         .create_table = pnv_npu_peers_create_table_userspace,
 520         .set_window = pnv_npu_peers_set_window,
 521         .unset_window = pnv_npu_peers_unset_window,
 522         .take_ownership = pnv_npu_peers_take_ownership,
 523         .release_ownership = pnv_npu_peers_release_ownership,
 524 };
 525
 526 static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
 527                 struct pnv_ioda_pe *pe)
 528 {
 529         if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
 530                 return;
 531
 532         npucomp->pe[npucomp->pe_num] = pe;
 533         ++npucomp->pe_num;
 534 }
 535
 536 struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
 537 {
 538         struct iommu_table_group *table_group;
 539         struct npu_comp *npucomp;
 540         struct pci_dev *gpdev = NULL;
 541         struct pci_controller *hose;
 542         struct pci_dev *npdev = NULL;
 543
 544         list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
 545                 npdev = pnv_pci_get_npu_dev(gpdev, 0);
 546                 if (npdev)
 547                         break;
 548         }
 549
 550         if (!npdev)
 551                 /* It is not an NPU attached device, skip */
 552                 return NULL;
 553
 554         hose = pci_bus_to_host(npdev->bus);
 555
 556         if (hose->npu) {
 557                 table_group = &hose->npu->npucomp.table_group;
 558
 559                 if (!table_group->group) {
 560                         table_group->ops = &pnv_npu_peers_ops;
 561                         iommu_register_group(table_group,
 562                                         hose->global_number,
 563                                         pe->pe_number);
 564                 }
 565         } else {
 566                 /* Create a group for 1 GPU and attached NPUs for POWER8 */
 567                 pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL);
 568                 table_group = &pe->npucomp->table_group;
 569                 table_group->ops = &pnv_npu_peers_ops;
 570                 iommu_register_group(table_group, hose->global_number,
 571                                 pe->pe_number);
 572         }
 573
 574         /* Steal capabilities from a GPU PE */
 575         table_group->max_dynamic_windows_supported =
 576                 pe->table_group.max_dynamic_windows_supported;
 577         table_group->tce32_start = pe->table_group.tce32_start;
 578         table_group->tce32_size = pe->table_group.tce32_size;
 579         table_group->max_levels = pe->table_group.max_levels;
 580         if (!table_group->pgsizes)
 581                 table_group->pgsizes = pe->table_group.pgsizes;
 582
 583         npucomp = container_of(table_group, struct npu_comp, table_group);
 584         pnv_comp_attach_table_group(npucomp, pe);
 585
 586         return table_group;
 587 }
 588
 589 struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
 590 {
 591         struct iommu_table_group *table_group;
 592         struct npu_comp *npucomp;
 593         struct pci_dev *gpdev = NULL;
 594         struct pci_dev *npdev;
 595         struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
 596
 597         WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
 598         if (!gpe)
 599                 return NULL;
 600
 601         /*
 602          * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
 603          * but NPU bridges do not have this hook defined so we do it here.
 604          * We do not setup other table group parameters as they won't be used
 605          * anyway - NVLink bridges are subordinate PEs.
 606          */
 607         pe->table_group.ops = &pnv_pci_npu_ops;
 608
 609         table_group = iommu_group_get_iommudata(
 610                         iommu_group_get(&gpdev->dev));
 611
 612         /*
 613          * On P9 NPU PHB and PCI PHB support different page sizes,
 614          * keep only matching. We expect here that NVLink bridge PE pgsizes is
 615          * initialized by the caller.
 616          */
 617         table_group->pgsizes &= pe->table_group.pgsizes;
 618         npucomp = container_of(table_group, struct npu_comp, table_group);
 619         pnv_comp_attach_table_group(npucomp, pe);
 620
 621         list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
 622                 struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
 623
 624                 if (gpdevtmp != gpdev)
 625                         continue;
 626
 627                 iommu_add_device(table_group, &npdev->dev);
 628         }
 629
 630         return table_group;
 631 }
 632 #endif /* CONFIG_IOMMU_API */
 633
 634 /* Maximum number of nvlinks per npu */
 635 #define NV_MAX_LINKS 6
 636
 637 /* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
 638 static int max_npu2_index;
 639
 640 struct npu_context {
 641         struct mm_struct *mm;
 642         struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
 643         struct mmu_notifier mn;
 644         struct kref kref;
 645         bool nmmu_flush;
 646
 647         /* Callback to stop translation requests on a given GPU */
 648         void (*release_cb)(struct npu_context *context, void *priv);
 649
 650         /*
 651          * Private pointer passed to the above callback for usage by
 652          * device drivers.
 653          */
 654         void *priv;
 655 };
 656
 657 struct mmio_atsd_reg {
 658         struct npu *npu;
 659         int reg;
 660 };
 661
 662 /*
 663  * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
 664  * if none are available.
 665  */
 666 static int get_mmio_atsd_reg(struct npu *npu)
 667 {
 668         int i;
 669
 670         for (i = 0; i < npu->mmio_atsd_count; i++) {
 671                 if (!test_bit(i, &npu->mmio_atsd_usage))
 672                         if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
 673                                 return i;
 674         }
 675
 676         return -ENOSPC;
 677 }
 678
 679 static void put_mmio_atsd_reg(struct npu *npu, int reg)
 680 {
 681         clear_bit_unlock(reg, &npu->mmio_atsd_usage);
 682 }
 683
 684 /* MMIO ATSD register offsets */
 685 #define XTS_ATSD_LAUNCH 0
 686 #define XTS_ATSD_AVA    1
 687 #define XTS_ATSD_STAT   2
 688
 689 static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
 690 {
 691         unsigned long launch = 0;
 692
 693         if (psize == MMU_PAGE_COUNT) {
 694                 /* IS set to invalidate entire matching PID */
 695                 launch |= PPC_BIT(12);
 696         } else {
 697                 /* AP set to invalidate region of psize */
 698                 launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
 699         }
 700
 701         /* PRS set to process-scoped */
 702         launch |= PPC_BIT(13);
 703
 704         /* PID */
 705         launch |= pid << PPC_BITLSHIFT(38);
 706
 707         /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
 708
 709         return launch;
 710 }
 711
 712 static void mmio_atsd_regs_write(struct mmio_atsd_reg
 713                         mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
 714                         unsigned long val)
 715 {
 716         struct npu *npu;
 717         int i, reg;
 718
 719         for (i = 0; i <= max_npu2_index; i++) {
 720                 reg = mmio_atsd_reg[i].reg;
 721                 if (reg < 0)
 722                         continue;
 723
 724                 npu = mmio_atsd_reg[i].npu;
 725                 __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
 726         }
 727 }
 728
 729 static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
 730                                 unsigned long pid)
 731 {
 732         unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
 733
 734         /* Invalidating the entire process doesn't use a va */
 735         mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
 736 }
 737
 738 static void mmio_invalidate_range(struct mmio_atsd_reg
 739                         mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
 740                         unsigned long start, unsigned long psize)
 741 {
 742         unsigned long launch = get_atsd_launch_val(pid, psize);
 743
 744         /* Write all VAs first */
 745         mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
 746
 747         /* Issue one barrier for all address writes */
 748         eieio();
 749
 750         /* Launch */
 751         mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
 752 }
 753
 754 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 755
 756 static void mmio_invalidate_wait(
 757         struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
 758 {
 759         struct npu *npu;
 760         int i, reg;
 761
 762         /* Wait for all invalidations to complete */
 763         for (i = 0; i <= max_npu2_index; i++) {
 764                 if (mmio_atsd_reg[i].reg < 0)
 765                         continue;
 766
 767                 /* Wait for completion */
 768                 npu = mmio_atsd_reg[i].npu;
 769                 reg = mmio_atsd_reg[i].reg;
 770                 while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
 771                         cpu_relax();
 772         }
 773 }
 774
 775 /*
 776  * Acquires all the address translation shootdown (ATSD) registers required to
 777  * launch an ATSD on all links this npu_context is active on.
 778  */
 779 static void acquire_atsd_reg(struct npu_context *npu_context,
 780                         struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
 781 {
 782         int i, j;
 783         struct npu *npu;
 784         struct pci_dev *npdev;
 785
 786         for (i = 0; i <= max_npu2_index; i++) {
 787                 mmio_atsd_reg[i].reg = -1;
 788                 for (j = 0; j < NV_MAX_LINKS; j++) {
 789                         /*
 790                          * There are no ordering requirements with respect to
 791                          * the setup of struct npu_context, but to ensure
 792                          * consistent behaviour we need to ensure npdev[][] is
 793                          * only read once.
 794                          */
 795                         npdev = READ_ONCE(npu_context->npdev[i][j]);
 796                         if (!npdev)
 797                                 continue;
 798
 799                         npu = pci_bus_to_host(npdev->bus)->npu;
 800                         if (!npu)
 801                                 continue;
 802
 803                         mmio_atsd_reg[i].npu = npu;
 804                         mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
 805                         while (mmio_atsd_reg[i].reg < 0) {
 806                                 mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
 807                                 cpu_relax();
 808                         }
 809                         break;
 810                 }
 811         }
 812 }
 813
 814 /*
 815  * Release previously acquired ATSD registers. To avoid deadlocks the registers
 816  * must be released in the same order they were acquired above in
 817  * acquire_atsd_reg.
 818  */
 819 static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
 820 {
 821         int i;
 822
 823         for (i = 0; i <= max_npu2_index; i++) {
 824                 /*
 825                  * We can't rely on npu_context->npdev[][] being the same here
 826                  * as when acquire_atsd_reg() was called, hence we use the
 827                  * values stored in mmio_atsd_reg during the acquire phase
 828                  * rather than re-reading npdev[][].
 829                  */
 830                 if (mmio_atsd_reg[i].reg < 0)
 831                         continue;
 832
 833                 put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
 834         }
 835 }
 836
 837 /*
 838  * Invalidate a virtual address range
 839  */
 840 static void mmio_invalidate(struct npu_context *npu_context,
 841                         unsigned long start, unsigned long size)
 842 {
 843         struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 844         unsigned long pid = npu_context->mm->context.id;
 845         unsigned long atsd_start = 0;
 846         unsigned long end = start + size - 1;
 847         int atsd_psize = MMU_PAGE_COUNT;
 848
 849         /*
 850          * Convert the input range into one of the supported sizes. If the range
 851          * doesn't fit, use the next larger supported size. Invalidation latency
 852          * is high, so over-invalidation is preferred to issuing multiple
 853          * invalidates.
 854          *
 855          * A 4K page size isn't supported by NPU/GPU ATS, so that case is
 856          * ignored.
 857          */
 858         if (size == SZ_64K) {
 859                 atsd_start = start;
 860                 atsd_psize = MMU_PAGE_64K;
 861         } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
 862                 atsd_start = ALIGN_DOWN(start, SZ_2M);
 863                 atsd_psize = MMU_PAGE_2M;
 864         } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
 865                 atsd_start = ALIGN_DOWN(start, SZ_1G);
 866                 atsd_psize = MMU_PAGE_1G;
 867         }
 868
 869         if (npu_context->nmmu_flush)
 870                 /*
 871                  * Unfortunately the nest mmu does not support flushing specific
 872                  * addresses so we have to flush the whole mm once before
 873                  * shooting down the GPU translation.
 874                  */
 875                 flush_all_mm(npu_context->mm);
 876
 877         /*
 878          * Loop over all the NPUs this process is active on and launch
 879          * an invalidate.
 880          */
 881         acquire_atsd_reg(npu_context, mmio_atsd_reg);
 882
 883         if (atsd_psize == MMU_PAGE_COUNT)
 884                 mmio_invalidate_pid(mmio_atsd_reg, pid);
 885         else
 886                 mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
 887                                         atsd_psize);
 888
 889         mmio_invalidate_wait(mmio_atsd_reg);
 890
 891         /*
 892          * The GPU requires two flush ATSDs to ensure all entries have been
 893          * flushed. We use PID 0 as it will never be used for a process on the
 894          * GPU.
 895          */
 896         mmio_invalidate_pid(mmio_atsd_reg, 0);
 897         mmio_invalidate_wait(mmio_atsd_reg);
 898         mmio_invalidate_pid(mmio_atsd_reg, 0);
 899         mmio_invalidate_wait(mmio_atsd_reg);
 900
 901         release_atsd_reg(mmio_atsd_reg);
 902 }
 903
 904 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
 905                                 struct mm_struct *mm)
 906 {
 907         struct npu_context *npu_context = mn_to_npu_context(mn);
 908
 909         /* Call into device driver to stop requests to the NMMU */
 910         if (npu_context->release_cb)
 911                 npu_context->release_cb(npu_context, npu_context->priv);
 912
 913         /*
 914          * There should be no more translation requests for this PID, but we
 915          * need to ensure any entries for it are removed from the TLB.
 916          */
 917         mmio_invalidate(npu_context, 0, ~0UL);
 918 }
 919
 920 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 921                                 struct mm_struct *mm,
 922                                 unsigned long address,
 923                                 pte_t pte)
 924 {
 925         struct npu_context *npu_context = mn_to_npu_context(mn);
 926         mmio_invalidate(npu_context, address, PAGE_SIZE);
 927 }
 928
 929 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 930                                         struct mm_struct *mm,
 931                                         unsigned long start, unsigned long end)
 932 {
 933         struct npu_context *npu_context = mn_to_npu_context(mn);
 934         mmio_invalidate(npu_context, start, end - start);
 935 }
 936
 937 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
 938         .release = pnv_npu2_mn_release,
 939         .change_pte = pnv_npu2_mn_change_pte,
 940         .invalidate_range = pnv_npu2_mn_invalidate_range,
 941 };
 942
 943 /*
 944  * Call into OPAL to setup the nmmu context for the current task in
 945  * the NPU. This must be called to setup the context tables before the
 946  * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
 947  *
 948  * A release callback should be registered to allow a device driver to
 949  * be notified that it should not launch any new translation requests
 950  * as the final TLB invalidate is about to occur.
 951  *
 952  * Returns an error if there no contexts are currently available or a
 953  * npu_context which should be passed to pnv_npu2_handle_fault().
 954  *
 955  * mmap_sem must be held in write mode and must not be called from interrupt
 956  * context.
 957  */
 958 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 959                         unsigned long flags,
 960                         void (*cb)(struct npu_context *, void *),
 961                         void *priv)
 962 {
 963         int rc;
 964         u32 nvlink_index;
 965         struct device_node *nvlink_dn;
 966         struct mm_struct *mm = current->mm;
 967         struct npu *npu;
 968         struct npu_context *npu_context;
 969         struct pci_controller *hose;
 970
 971         /*
 972          * At present we don't support GPUs connected to multiple NPUs and I'm
 973          * not sure the hardware does either.
 974          */
 975         struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
 976
 977         if (!npdev)
 978                 /* No nvlink associated with this GPU device */
 979                 return ERR_PTR(-ENODEV);
 980
 981         /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */
 982         if (flags & ~(MSR_DR | MSR_PR | MSR_HV))
 983                 return ERR_PTR(-EINVAL);
 984
 985         nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
 986         if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
 987                                                         &nvlink_index)))
 988                 return ERR_PTR(-ENODEV);
 989
 990         if (!mm || mm->context.id == 0) {
 991                 /*
 992                  * Kernel thread contexts are not supported and context id 0 is
 993                  * reserved on the GPU.
 994                  */
 995                 return ERR_PTR(-EINVAL);
 996         }
 997
 998         hose = pci_bus_to_host(npdev->bus);
 999         npu = hose->npu;
1000         if (!npu)
1001                 return ERR_PTR(-ENODEV);
1002
1003         /*
1004          * We store the npu pci device so we can more easily get at the
1005          * associated npus.
1006          */
1007         spin_lock(&npu_context_lock);
1008         npu_context = mm->context.npu_context;
1009         if (npu_context) {
1010                 if (npu_context->release_cb != cb ||
1011                         npu_context->priv != priv) {
1012                         spin_unlock(&npu_context_lock);
1013                         return ERR_PTR(-EINVAL);
1014                 }
1015
1016                 WARN_ON(!kref_get_unless_zero(&npu_context->kref));
1017         }
1018         spin_unlock(&npu_context_lock);
1019
1020         if (!npu_context) {
1021                 /*
1022                  * We can set up these fields without holding the
1023                  * npu_context_lock as the npu_context hasn't been returned to
1024                  * the caller meaning it can't be destroyed. Parallel allocation
1025                  * is protected against by mmap_sem.
1026                  */
1027                 rc = -ENOMEM;
1028                 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
1029                 if (npu_context) {
1030                         kref_init(&npu_context->kref);
1031                         npu_context->mm = mm;
1032                         npu_context->mn.ops = &nv_nmmu_notifier_ops;
1033                         rc = __mmu_notifier_register(&npu_context->mn, mm);
1034                 }
1035
1036                 if (rc) {
1037                         kfree(npu_context);
1038                         return ERR_PTR(rc);
1039                 }
1040
1041                 mm->context.npu_context = npu_context;
1042         }
1043
1044         npu_context->release_cb = cb;
1045         npu_context->priv = priv;
1046
1047         /*
1048          * npdev is a pci_dev pointer setup by the PCI code. We assign it to
1049          * npdev[][] to indicate to the mmu notifiers that an invalidation
1050          * should also be sent over this nvlink. The notifiers don't use any
1051          * other fields in npu_context, so we just need to ensure that when they
1052          * deference npu_context->npdev[][] it is either a valid pointer or
1053          * NULL.
1054          */
1055         WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
1056
1057         if (!npu->nmmu_flush) {
1058                 /*
1059                  * If we're not explicitly flushing ourselves we need to mark
1060                  * the thread for global flushes
1061                  */
1062                 npu_context->nmmu_flush = false;
1063                 mm_context_add_copro(mm);
1064         } else
1065                 npu_context->nmmu_flush = true;
1066
1067         return npu_context;
1068 }
1069 EXPORT_SYMBOL(pnv_npu2_init_context);
1070
1071 static void pnv_npu2_release_context(struct kref *kref)
1072 {
1073         struct npu_context *npu_context =
1074                 container_of(kref, struct npu_context, kref);
1075
1076         if (!npu_context->nmmu_flush)
1077                 mm_context_remove_copro(npu_context->mm);
1078
1079         npu_context->mm->context.npu_context = NULL;
1080 }
1081
1082 /*
1083  * Destroy a context on the given GPU. May free the npu_context if it is no
1084  * longer active on any GPUs. Must not be called from interrupt context.
1085  */
1086 void pnv_npu2_destroy_context(struct npu_context *npu_context,
1087                         struct pci_dev *gpdev)
1088 {
1089         int removed;
1090         struct npu *npu;
1091         struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
1092         struct device_node *nvlink_dn;
1093         u32 nvlink_index;
1094         struct pci_controller *hose;
1095
1096         if (WARN_ON(!npdev))
1097                 return;
1098
1099         hose = pci_bus_to_host(npdev->bus);
1100         npu = hose->npu;
1101         if (!npu)
1102                 return;
1103         nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
1104         if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
1105                                                         &nvlink_index)))
1106                 return;
1107         WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
1108         spin_lock(&npu_context_lock);
1109         removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
1110         spin_unlock(&npu_context_lock);
1111
1112         /*
1113          * We need to do this outside of pnv_npu2_release_context so that it is
1114          * outside the spinlock as mmu_notifier_destroy uses SRCU.
1115          */
1116         if (removed) {
1117                 mmu_notifier_unregister(&npu_context->mn,
1118                                         npu_context->mm);
1119
1120                 kfree(npu_context);
1121         }
1122
1123 }
1124 EXPORT_SYMBOL(pnv_npu2_destroy_context);
1125
1126 /*
1127  * Assumes mmap_sem is held for the contexts associated mm.
1128  */
1129 int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
1130                         unsigned long *flags, unsigned long *status, int count)
1131 {
1132         u64 rc = 0, result = 0;
1133         int i, is_write;
1134         struct page *page[1];
1135         const char __user *u;
1136         char c;
1137
1138         /* mmap_sem should be held so the struct_mm must be present */
1139         struct mm_struct *mm = context->mm;
1140
1141         WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
1142
1143         for (i = 0; i < count; i++) {
1144                 is_write = flags[i] & NPU2_WRITE;
1145                 rc = get_user_pages_remote(NULL, mm, ea[i], 1,
1146                                         is_write ? FOLL_WRITE : 0,
1147                                         page, NULL, NULL);
1148
1149                 if (rc != 1) {
1150                         status[i] = rc;
1151                         result = -EFAULT;
1152                         continue;
1153                 }
1154
1155                 /* Make sure partition scoped tree gets a pte */
1156                 u = page_address(page[0]);
1157                 if (__get_user(c, u))
1158                         result = -EFAULT;
1159
1160                 status[i] = 0;
1161                 put_page(page[0]);
1162         }
1163
1164         return result;
1165 }
1166 EXPORT_SYMBOL(pnv_npu2_handle_fault);
1167
1168 int pnv_npu2_init(struct pci_controller *hose)
1169 {
1170         unsigned int i;
1171         u64 mmio_atsd;
1172         static int npu_index;
1173         struct npu *npu;
1174         int ret;
1175
1176         npu = kzalloc(sizeof(*npu), GFP_KERNEL);
1177         if (!npu)
1178                 return -ENOMEM;
1179
1180         npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush");
1181
1182         for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) &&
1183                         !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
1184                                 i, &mmio_atsd); i++)
1185                 npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
1186
1187         pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i);
1188         npu->mmio_atsd_count = i;
1189         npu->mmio_atsd_usage = 0;
1190         npu_index++;
1191         if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
1192                 ret = -ENOSPC;
1193                 goto fail_exit;
1194         }
1195         max_npu2_index = npu_index;
1196         npu->index = npu_index;
1197         hose->npu = npu;
1198
1199         return 0;
1200
1201 fail_exit:
1202         for (i = 0; i < npu->mmio_atsd_count; ++i)
1203                 iounmap(npu->mmio_atsd_regs[i]);
1204
1205         kfree(npu);
1206
1207         return ret;
1208 }
1209
1210 int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
1211                 unsigned long msr)
1212 {
1213         int ret;
1214         struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
1215         struct pci_controller *hose;
1216         struct pnv_phb *nphb;
1217
1218         if (!npdev)
1219                 return -ENODEV;
1220
1221         hose = pci_bus_to_host(npdev->bus);
1222         nphb = hose->private_data;
1223
1224         dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
1225                         nphb->opal_id, lparid);
1226         /*
1227          * Currently we only support radix and non-zero LPCR only makes sense
1228          * for hash tables so skiboot expects the LPCR parameter to be a zero.
1229          */
1230         ret = opal_npu_map_lpar(nphb->opal_id,
1231                         PCI_DEVID(gpdev->bus->number, gpdev->devfn), lparid,
1232                         0 /* LPCR bits */);
1233         if (ret) {
1234                 dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
1235                 return ret;
1236         }
1237
1238         dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
1239                         nphb->opal_id, msr);
1240         ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
1241                         PCI_DEVID(gpdev->bus->number, gpdev->devfn));
1242         if (ret < 0)
1243                 dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
1244         else
1245                 ret = 0;
1246
1247         return 0;
1248 }
1249 EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
1250
1251 void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
1252 {
1253         struct pci_dev *gpdev;
1254
1255         list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
1256                 pnv_npu2_map_lpar_dev(gpdev, 0, msr);
1257 }
1258
1259 int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
1260 {
1261         int ret;
1262         struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
1263         struct pci_controller *hose;
1264         struct pnv_phb *nphb;
1265
1266         if (!npdev)
1267                 return -ENODEV;
1268
1269         hose = pci_bus_to_host(npdev->bus);
1270         nphb = hose->private_data;
1271
1272         dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
1273                         nphb->opal_id);
1274         ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
1275                         PCI_DEVID(gpdev->bus->number, gpdev->devfn));
1276         if (ret < 0) {
1277                 dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
1278                 return ret;
1279         }
1280
1281         /* Set LPID to 0 anyway, just to be safe */
1282         dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
1283         ret = opal_npu_map_lpar(nphb->opal_id,
1284                         PCI_DEVID(gpdev->bus->number, gpdev->devfn), 0 /*LPID*/,
1285                         0 /* LPCR bits */);
1286         if (ret)
1287                 dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
1288
1289         return ret;
1290 }
1291 EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);