arch/powerpc/kernel/eeh_driver.c

   1 /*
   2  * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
   3  * Copyright IBM Corp. 2004 2005
   4  * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
   5  *
   6  * All rights reserved.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or (at
  11  * your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  16  * NON INFRINGEMENT.  See the GNU General Public License for more
  17  * details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22  *
  23  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  24  */
  25 #include <linux/delay.h>
  26 #include <linux/interrupt.h>
  27 #include <linux/irq.h>
  28 #include <linux/module.h>
  29 #include <linux/pci.h>
  30 #include <asm/eeh.h>
  31 #include <asm/eeh_event.h>
  32 #include <asm/ppc-pci.h>
  33 #include <asm/pci-bridge.h>
  34 #include <asm/prom.h>
  35 #include <asm/rtas.h>
  36
  37 struct eeh_rmv_data {
  38         struct list_head edev_list;
  39         int removed;
  40 };
  41
  42 static int eeh_result_priority(enum pci_ers_result result)
  43 {
  44         switch (result) {
  45         case PCI_ERS_RESULT_NONE:
  46                 return 1;
  47         case PCI_ERS_RESULT_NO_AER_DRIVER:
  48                 return 2;
  49         case PCI_ERS_RESULT_RECOVERED:
  50                 return 3;
  51         case PCI_ERS_RESULT_CAN_RECOVER:
  52                 return 4;
  53         case PCI_ERS_RESULT_DISCONNECT:
  54                 return 5;
  55         case PCI_ERS_RESULT_NEED_RESET:
  56                 return 6;
  57         default:
  58                 WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
  59                 return 0;
  60         }
  61 };
  62
  63 const char *pci_ers_result_name(enum pci_ers_result result)
  64 {
  65         switch (result) {
  66         case PCI_ERS_RESULT_NONE:
  67                 return "none";
  68         case PCI_ERS_RESULT_CAN_RECOVER:
  69                 return "can recover";
  70         case PCI_ERS_RESULT_NEED_RESET:
  71                 return "need reset";
  72         case PCI_ERS_RESULT_DISCONNECT:
  73                 return "disconnect";
  74         case PCI_ERS_RESULT_RECOVERED:
  75                 return "recovered";
  76         case PCI_ERS_RESULT_NO_AER_DRIVER:
  77                 return "no AER driver";
  78         default:
  79                 WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
  80                 return "unknown";
  81         }
  82 };
  83
  84 static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
  85                                          const char *fmt, ...)
  86 {
  87         struct va_format vaf;
  88         va_list args;
  89
  90         va_start(args, fmt);
  91
  92         vaf.fmt = fmt;
  93         vaf.va = &args;
  94
  95         printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
  96                edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
  97
  98         va_end(args);
  99 }
 100
 101 static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
 102                                                 enum pci_ers_result new)
 103 {
 104         if (eeh_result_priority(new) > eeh_result_priority(old))
 105                 return new;
 106         return old;
 107 }
 108
 109 static bool eeh_dev_removed(struct eeh_dev *edev)
 110 {
 111         return !edev || (edev->mode & EEH_DEV_REMOVED);
 112 }
 113
 114 static bool eeh_edev_actionable(struct eeh_dev *edev)
 115 {
 116         return (edev->pdev && !eeh_dev_removed(edev) &&
 117                 !eeh_pe_passed(edev->pe));
 118 }
 119
 120 /**
 121  * eeh_pcid_get - Get the PCI device driver
 122  * @pdev: PCI device
 123  *
 124  * The function is used to retrieve the PCI device driver for
 125  * the indicated PCI device. Besides, we will increase the reference
 126  * of the PCI device driver to prevent that being unloaded on
 127  * the fly. Otherwise, kernel crash would be seen.
 128  */
 129 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
 130 {
 131         if (!pdev || !pdev->driver)
 132                 return NULL;
 133
 134         if (!try_module_get(pdev->driver->driver.owner))
 135                 return NULL;
 136
 137         return pdev->driver;
 138 }
 139
 140 /**
 141  * eeh_pcid_put - Dereference on the PCI device driver
 142  * @pdev: PCI device
 143  *
 144  * The function is called to do dereference on the PCI device
 145  * driver of the indicated PCI device.
 146  */
 147 static inline void eeh_pcid_put(struct pci_dev *pdev)
 148 {
 149         if (!pdev || !pdev->driver)
 150                 return;
 151
 152         module_put(pdev->driver->driver.owner);
 153 }
 154
 155 /**
 156  * eeh_disable_irq - Disable interrupt for the recovering device
 157  * @dev: PCI device
 158  *
 159  * This routine must be called when reporting temporary or permanent
 160  * error to the particular PCI device to disable interrupt of that
 161  * device. If the device has enabled MSI or MSI-X interrupt, we needn't
 162  * do real work because EEH should freeze DMA transfers for those PCI
 163  * devices encountering EEH errors, which includes MSI or MSI-X.
 164  */
 165 static void eeh_disable_irq(struct eeh_dev *edev)
 166 {
 167         /* Don't disable MSI and MSI-X interrupts. They are
 168          * effectively disabled by the DMA Stopped state
 169          * when an EEH error occurs.
 170          */
 171         if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
 172                 return;
 173
 174         if (!irq_has_action(edev->pdev->irq))
 175                 return;
 176
 177         edev->mode |= EEH_DEV_IRQ_DISABLED;
 178         disable_irq_nosync(edev->pdev->irq);
 179 }
 180
 181 /**
 182  * eeh_enable_irq - Enable interrupt for the recovering device
 183  * @dev: PCI device
 184  *
 185  * This routine must be called to enable interrupt while failed
 186  * device could be resumed.
 187  */
 188 static void eeh_enable_irq(struct eeh_dev *edev)
 189 {
 190         if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
 191                 edev->mode &= ~EEH_DEV_IRQ_DISABLED;
 192                 /*
 193                  * FIXME !!!!!
 194                  *
 195                  * This is just ass backwards. This maze has
 196                  * unbalanced irq_enable/disable calls. So instead of
 197                  * finding the root cause it works around the warning
 198                  * in the irq_enable code by conditionally calling
 199                  * into it.
 200                  *
 201                  * That's just wrong.The warning in the core code is
 202                  * there to tell people to fix their asymmetries in
 203                  * their own code, not by abusing the core information
 204                  * to avoid it.
 205                  *
 206                  * I so wish that the assymetry would be the other way
 207                  * round and a few more irq_disable calls render that
 208                  * shit unusable forever.
 209                  *
 210                  *      tglx
 211                  */
 212                 if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
 213                         enable_irq(edev->pdev->irq);
 214         }
 215 }
 216
 217 static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
 218 {
 219         struct pci_dev *pdev;
 220
 221         if (!edev)
 222                 return NULL;
 223
 224         /*
 225          * We cannot access the config space on some adapters.
 226          * Otherwise, it will cause fenced PHB. We don't save
 227          * the content in their config space and will restore
 228          * from the initial config space saved when the EEH
 229          * device is created.
 230          */
 231         if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
 232                 return NULL;
 233
 234         pdev = eeh_dev_to_pci_dev(edev);
 235         if (!pdev)
 236                 return NULL;
 237
 238         pci_save_state(pdev);
 239         return NULL;
 240 }
 241
 242 static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
 243 {
 244         struct eeh_pe *pe;
 245         struct eeh_dev *edev, *tmp;
 246
 247         eeh_for_each_pe(root, pe)
 248                 eeh_pe_for_each_dev(pe, edev, tmp)
 249                         if (eeh_edev_actionable(edev))
 250                                 edev->pdev->error_state = s;
 251 }
 252
 253 static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
 254 {
 255         struct eeh_pe *pe;
 256         struct eeh_dev *edev, *tmp;
 257
 258         eeh_for_each_pe(root, pe) {
 259                 eeh_pe_for_each_dev(pe, edev, tmp) {
 260                         if (!eeh_edev_actionable(edev))
 261                                 continue;
 262
 263                         if (!eeh_pcid_get(edev->pdev))
 264                                 continue;
 265
 266                         if (enable)
 267                                 eeh_enable_irq(edev);
 268                         else
 269                                 eeh_disable_irq(edev);
 270
 271                         eeh_pcid_put(edev->pdev);
 272                 }
 273         }
 274 }
 275
 276 typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
 277                                              struct pci_driver *);
 278 static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
 279                                enum pci_ers_result *result)
 280 {
 281         struct pci_driver *driver;
 282         enum pci_ers_result new_result;
 283
 284         device_lock(&edev->pdev->dev);
 285         if (eeh_edev_actionable(edev)) {
 286                 driver = eeh_pcid_get(edev->pdev);
 287
 288                 if (!driver)
 289                         eeh_edev_info(edev, "no driver");
 290                 else if (!driver->err_handler)
 291                         eeh_edev_info(edev, "driver not EEH aware");
 292                 else if (edev->mode & EEH_DEV_NO_HANDLER)
 293                         eeh_edev_info(edev, "driver bound too late");
 294                 else {
 295                         new_result = fn(edev, driver);
 296                         eeh_edev_info(edev, "%s driver reports: '%s'",
 297                                       driver->name,
 298                                       pci_ers_result_name(new_result));
 299                         if (result)
 300                                 *result = pci_ers_merge_result(*result,
 301                                                                new_result);
 302                 }
 303                 if (driver)
 304                         eeh_pcid_put(edev->pdev);
 305         } else {
 306                 eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
 307                               !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
 308         }
 309         device_unlock(&edev->pdev->dev);
 310 }
 311
 312 static void eeh_pe_report(const char *name, struct eeh_pe *root,
 313                           eeh_report_fn fn, enum pci_ers_result *result)
 314 {
 315         struct eeh_pe *pe;
 316         struct eeh_dev *edev, *tmp;
 317
 318         pr_info("EEH: Beginning: '%s'\n", name);
 319         eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
 320                 eeh_pe_report_edev(edev, fn, result);
 321         if (result)
 322                 pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
 323                         name, pci_ers_result_name(*result));
 324         else
 325                 pr_info("EEH: Finished:'%s'", name);
 326 }
 327
 328 /**
 329  * eeh_report_error - Report pci error to each device driver
 330  * @edev: eeh device
 331  * @driver: device's PCI driver
 332  *
 333  * Report an EEH error to each device driver.
 334  */
 335 static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
 336                                             struct pci_driver *driver)
 337 {
 338         enum pci_ers_result rc;
 339         struct pci_dev *dev = edev->pdev;
 340
 341         if (!driver->err_handler->error_detected)
 342                 return PCI_ERS_RESULT_NONE;
 343
 344         eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
 345                       driver->name);
 346         rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 347
 348         edev->in_error = true;
 349         pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
 350         return rc;
 351 }
 352
 353 /**
 354  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
 355  * @edev: eeh device
 356  * @driver: device's PCI driver
 357  *
 358  * Tells each device driver that IO ports, MMIO and config space I/O
 359  * are now enabled.
 360  */
 361 static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
 362                                                    struct pci_driver *driver)
 363 {
 364         if (!driver->err_handler->mmio_enabled)
 365                 return PCI_ERS_RESULT_NONE;
 366         eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
 367         return driver->err_handler->mmio_enabled(edev->pdev);
 368 }
 369
 370 /**
 371  * eeh_report_reset - Tell device that slot has been reset
 372  * @edev: eeh device
 373  * @driver: device's PCI driver
 374  *
 375  * This routine must be called while EEH tries to reset particular
 376  * PCI device so that the associated PCI device driver could take
 377  * some actions, usually to save data the driver needs so that the
 378  * driver can work again while the device is recovered.
 379  */
 380 static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
 381                                             struct pci_driver *driver)
 382 {
 383         if (!driver->err_handler->slot_reset || !edev->in_error)
 384                 return PCI_ERS_RESULT_NONE;
 385         eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
 386         return driver->err_handler->slot_reset(edev->pdev);
 387 }
 388
 389 static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
 390 {
 391         struct pci_dev *pdev;
 392
 393         if (!edev)
 394                 return NULL;
 395
 396         /*
 397          * The content in the config space isn't saved because
 398          * the blocked config space on some adapters. We have
 399          * to restore the initial saved config space when the
 400          * EEH device is created.
 401          */
 402         if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
 403                 if (list_is_last(&edev->list, &edev->pe->edevs))
 404                         eeh_pe_restore_bars(edev->pe);
 405
 406                 return NULL;
 407         }
 408
 409         pdev = eeh_dev_to_pci_dev(edev);
 410         if (!pdev)
 411                 return NULL;
 412
 413         pci_restore_state(pdev);
 414         return NULL;
 415 }
 416
 417 /**
 418  * eeh_report_resume - Tell device to resume normal operations
 419  * @edev: eeh device
 420  * @driver: device's PCI driver
 421  *
 422  * This routine must be called to notify the device driver that it
 423  * could resume so that the device driver can do some initialization
 424  * to make the recovered device work again.
 425  */
 426 static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
 427                                              struct pci_driver *driver)
 428 {
 429         if (!driver->err_handler->resume || !edev->in_error)
 430                 return PCI_ERS_RESULT_NONE;
 431
 432         eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
 433         driver->err_handler->resume(edev->pdev);
 434
 435         pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
 436 #ifdef CONFIG_PCI_IOV
 437         if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
 438                 eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 439 #endif
 440         return PCI_ERS_RESULT_NONE;
 441 }
 442
 443 /**
 444  * eeh_report_failure - Tell device driver that device is dead.
 445  * @edev: eeh device
 446  * @driver: device's PCI driver
 447  *
 448  * This informs the device driver that the device is permanently
 449  * dead, and that no further recovery attempts will be made on it.
 450  */
 451 static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
 452                                               struct pci_driver *driver)
 453 {
 454         enum pci_ers_result rc;
 455
 456         if (!driver->err_handler->error_detected)
 457                 return PCI_ERS_RESULT_NONE;
 458
 459         eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
 460                       driver->name);
 461         rc = driver->err_handler->error_detected(edev->pdev,
 462                                                  pci_channel_io_perm_failure);
 463
 464         pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
 465         return rc;
 466 }
 467
 468 static void *eeh_add_virt_device(void *data, void *userdata)
 469 {
 470         struct pci_driver *driver;
 471         struct eeh_dev *edev = (struct eeh_dev *)data;
 472         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 473         struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 474
 475         if (!(edev->physfn)) {
 476                 pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
 477                         __func__, pdn->phb->global_number, pdn->busno,
 478                         PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 479                 return NULL;
 480         }
 481
 482         driver = eeh_pcid_get(dev);
 483         if (driver) {
 484                 if (driver->err_handler) {
 485                         eeh_pcid_put(dev);
 486                         return NULL;
 487                 }
 488                 eeh_pcid_put(dev);
 489         }
 490
 491 #ifdef CONFIG_PCI_IOV
 492         pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
 493 #endif
 494         return NULL;
 495 }
 496
 497 static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 498 {
 499         struct pci_driver *driver;
 500         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 501         struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
 502         int *removed = rmv_data ? &rmv_data->removed : NULL;
 503
 504         /*
 505          * Actually, we should remove the PCI bridges as well.
 506          * However, that's lots of complexity to do that,
 507          * particularly some of devices under the bridge might
 508          * support EEH. So we just care about PCI devices for
 509          * simplicity here.
 510          */
 511         if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
 512                 return NULL;
 513
 514         /*
 515          * We rely on count-based pcibios_release_device() to
 516          * detach permanently offlined PEs. Unfortunately, that's
 517          * not reliable enough. We might have the permanently
 518          * offlined PEs attached, but we needn't take care of
 519          * them and their child devices.
 520          */
 521         if (eeh_dev_removed(edev))
 522                 return NULL;
 523
 524         if (removed) {
 525                 if (eeh_pe_passed(edev->pe))
 526                         return NULL;
 527                 driver = eeh_pcid_get(dev);
 528                 if (driver) {
 529                         if (driver->err_handler &&
 530                             driver->err_handler->error_detected &&
 531                             driver->err_handler->slot_reset) {
 532                                 eeh_pcid_put(dev);
 533                                 return NULL;
 534                         }
 535                         eeh_pcid_put(dev);
 536                 }
 537         }
 538
 539         /* Remove it from PCI subsystem */
 540         pr_debug("EEH: Removing %s without EEH sensitive driver\n",
 541                  pci_name(dev));
 542         edev->bus = dev->bus;
 543         edev->mode |= EEH_DEV_DISCONNECTED;
 544         if (removed)
 545                 (*removed)++;
 546
 547         if (edev->physfn) {
 548 #ifdef CONFIG_PCI_IOV
 549                 struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 550
 551                 pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
 552                 edev->pdev = NULL;
 553
 554                 /*
 555                  * We have to set the VF PE number to invalid one, which is
 556                  * required to plug the VF successfully.
 557                  */
 558                 pdn->pe_number = IODA_INVALID_PE;
 559 #endif
 560                 if (rmv_data)
 561                         list_add(&edev->rmv_list, &rmv_data->edev_list);
 562         } else {
 563                 pci_lock_rescan_remove();
 564                 pci_stop_and_remove_bus_device(dev);
 565                 pci_unlock_rescan_remove();
 566         }
 567
 568         return NULL;
 569 }
 570
 571 static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
 572 {
 573         struct eeh_dev *edev, *tmp;
 574
 575         eeh_pe_for_each_dev(pe, edev, tmp) {
 576                 if (!(edev->mode & EEH_DEV_DISCONNECTED))
 577                         continue;
 578
 579                 edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
 580                 eeh_rmv_from_parent_pe(edev);
 581         }
 582
 583         return NULL;
 584 }
 585
 586 /*
 587  * Explicitly clear PE's frozen state for PowerNV where
 588  * we have frozen PE until BAR restore is completed. It's
 589  * harmless to clear it for pSeries. To be consistent with
 590  * PE reset (for 3 times), we try to clear the frozen state
 591  * for 3 times as well.
 592  */
 593 static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
 594 {
 595         bool clear_sw_state = *(bool *)flag;
 596         int i, rc = 1;
 597
 598         for (i = 0; rc && i < 3; i++)
 599                 rc = eeh_unfreeze_pe(pe, clear_sw_state);
 600
 601         /* Stop immediately on any errors */
 602         if (rc) {
 603                 pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
 604                         __func__, rc, pe->phb->global_number, pe->addr);
 605                 return (void *)pe;
 606         }
 607
 608         return NULL;
 609 }
 610
 611 static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
 612                                      bool clear_sw_state)
 613 {
 614         void *rc;
 615
 616         rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
 617         if (!rc)
 618                 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 619
 620         return rc ? -EIO : 0;
 621 }
 622
 623 int eeh_pe_reset_and_recover(struct eeh_pe *pe)
 624 {
 625         int ret;
 626
 627         /* Bail if the PE is being recovered */
 628         if (pe->state & EEH_PE_RECOVERING)
 629                 return 0;
 630
 631         /* Put the PE into recovery mode */
 632         eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 633
 634         /* Save states */
 635         eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
 636
 637         /* Issue reset */
 638         ret = eeh_pe_reset_full(pe);
 639         if (ret) {
 640                 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 641                 return ret;
 642         }
 643
 644         /* Unfreeze the PE */
 645         ret = eeh_clear_pe_frozen_state(pe, true);
 646         if (ret) {
 647                 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 648                 return ret;
 649         }
 650
 651         /* Restore device state */
 652         eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
 653
 654         /* Clear recovery mode */
 655         eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 656
 657         return 0;
 658 }
 659
 660 /**
 661  * eeh_reset_device - Perform actual reset of a pci slot
 662  * @driver_eeh_aware: Does the device's driver provide EEH support?
 663  * @pe: EEH PE
 664  * @bus: PCI bus corresponding to the isolcated slot
 665  * @rmv_data: Optional, list to record removed devices
 666  *
 667  * This routine must be called to do reset on the indicated PE.
 668  * During the reset, udev might be invoked because those affected
 669  * PCI devices will be removed and then added.
 670  */
 671 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 672                             struct eeh_rmv_data *rmv_data,
 673                             bool driver_eeh_aware)
 674 {
 675         time64_t tstamp;
 676         int cnt, rc;
 677         struct eeh_dev *edev;
 678
 679         /* pcibios will clear the counter; save the value */
 680         cnt = pe->freeze_count;
 681         tstamp = pe->tstamp;
 682
 683         /*
 684          * We don't remove the corresponding PE instances because
 685          * we need the information afterwords. The attached EEH
 686          * devices are expected to be attached soon when calling
 687          * into pci_hp_add_devices().
 688          */
 689         eeh_pe_state_mark(pe, EEH_PE_KEEP);
 690         if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
 691                 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 692         } else {
 693                 pci_lock_rescan_remove();
 694                 pci_hp_remove_devices(bus);
 695                 pci_unlock_rescan_remove();
 696         }
 697
 698         /*
 699          * Reset the pci controller. (Asserts RST#; resets config space).
 700          * Reconfigure bridges and devices. Don't try to bring the system
 701          * up if the reset failed for some reason.
 702          *
 703          * During the reset, it's very dangerous to have uncontrolled PCI
 704          * config accesses. So we prefer to block them. However, controlled
 705          * PCI config accesses initiated from EEH itself are allowed.
 706          */
 707         rc = eeh_pe_reset_full(pe);
 708         if (rc)
 709                 return rc;
 710
 711         pci_lock_rescan_remove();
 712
 713         /* Restore PE */
 714         eeh_ops->configure_bridge(pe);
 715         eeh_pe_restore_bars(pe);
 716
 717         /* Clear frozen state */
 718         rc = eeh_clear_pe_frozen_state(pe, false);
 719         if (rc) {
 720                 pci_unlock_rescan_remove();
 721                 return rc;
 722         }
 723
 724         /* Give the system 5 seconds to finish running the user-space
 725          * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
 726          * this is a hack, but if we don't do this, and try to bring
 727          * the device up before the scripts have taken it down,
 728          * potentially weird things happen.
 729          */
 730         if (!driver_eeh_aware || rmv_data->removed) {
 731                 pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
 732                         (driver_eeh_aware ? "partial" : "complete"));
 733                 ssleep(5);
 734
 735                 /*
 736                  * The EEH device is still connected with its parent
 737                  * PE. We should disconnect it so the binding can be
 738                  * rebuilt when adding PCI devices.
 739                  */
 740                 edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 741                 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
 742                 if (pe->type & EEH_PE_VF) {
 743                         eeh_add_virt_device(edev, NULL);
 744                 } else {
 745                         if (!driver_eeh_aware)
 746                                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 747                         pci_hp_add_devices(bus);
 748                 }
 749         }
 750         eeh_pe_state_clear(pe, EEH_PE_KEEP);
 751
 752         pe->tstamp = tstamp;
 753         pe->freeze_count = cnt;
 754
 755         pci_unlock_rescan_remove();
 756         return 0;
 757 }
 758
 759 /* The longest amount of time to wait for a pci device
 760  * to come back on line, in seconds.
 761  */
 762 #define MAX_WAIT_FOR_RECOVERY 300
 763
 764 /**
 765  * eeh_handle_normal_event - Handle EEH events on a specific PE
 766  * @pe: EEH PE - which should not be used after we return, as it may
 767  * have been invalidated.
 768  *
 769  * Attempts to recover the given PE.  If recovery fails or the PE has failed
 770  * too many times, remove the PE.
 771  *
 772  * While PHB detects address or data parity errors on particular PCI
 773  * slot, the associated PE will be frozen. Besides, DMA's occurring
 774  * to wild addresses (which usually happen due to bugs in device
 775  * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
 776  * #PERR or other misc PCI-related errors also can trigger EEH errors.
 777  *
 778  * Recovery process consists of unplugging the device driver (which
 779  * generated hotplug events to userspace), then issuing a PCI #RST to
 780  * the device, then reconfiguring the PCI config space for all bridges
 781  * & devices under this slot, and then finally restarting the device
 782  * drivers (which cause a second set of hotplug events to go out to
 783  * userspace).
 784  */
 785 void eeh_handle_normal_event(struct eeh_pe *pe)
 786 {
 787         struct pci_bus *bus;
 788         struct eeh_dev *edev, *tmp;
 789         struct eeh_pe *tmp_pe;
 790         int rc = 0;
 791         enum pci_ers_result result = PCI_ERS_RESULT_NONE;
 792         struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
 793
 794         bus = eeh_pe_bus_get(pe);
 795         if (!bus) {
 796                 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
 797                         __func__, pe->phb->global_number, pe->addr);
 798                 return;
 799         }
 800
 801         eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 802
 803         eeh_pe_update_time_stamp(pe);
 804         pe->freeze_count++;
 805         if (pe->freeze_count > eeh_max_freezes) {
 806                 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
 807                        pe->phb->global_number, pe->addr,
 808                        pe->freeze_count);
 809                 goto hard_fail;
 810         }
 811         pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
 812                 pe->freeze_count, eeh_max_freezes);
 813
 814         /* Walk the various device drivers attached to this slot through
 815          * a reset sequence, giving each an opportunity to do what it needs
 816          * to accomplish the reset.  Each child gets a report of the
 817          * status ... if any child can't handle the reset, then the entire
 818          * slot is dlpar removed and added.
 819          *
 820          * When the PHB is fenced, we have to issue a reset to recover from
 821          * the error. Override the result if necessary to have partially
 822          * hotplug for this case.
 823          */
 824         pr_info("EEH: Notify device drivers to shutdown\n");
 825         eeh_set_channel_state(pe, pci_channel_io_frozen);
 826         eeh_set_irq_state(pe, false);
 827         eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
 828                       &result);
 829         if ((pe->type & EEH_PE_PHB) &&
 830             result != PCI_ERS_RESULT_NONE &&
 831             result != PCI_ERS_RESULT_NEED_RESET)
 832                 result = PCI_ERS_RESULT_NEED_RESET;
 833
 834         /* Get the current PCI slot state. This can take a long time,
 835          * sometimes over 300 seconds for certain systems.
 836          */
 837         rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
 838         if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
 839                 pr_warn("EEH: Permanent failure\n");
 840                 goto hard_fail;
 841         }
 842
 843         /* Since rtas may enable MMIO when posting the error log,
 844          * don't post the error log until after all dev drivers
 845          * have been informed.
 846          */
 847         pr_info("EEH: Collect temporary log\n");
 848         eeh_slot_error_detail(pe, EEH_LOG_TEMP);
 849
 850         /* If all device drivers were EEH-unaware, then shut
 851          * down all of the device drivers, and hope they
 852          * go down willingly, without panicing the system.
 853          */
 854         if (result == PCI_ERS_RESULT_NONE) {
 855                 pr_info("EEH: Reset with hotplug activity\n");
 856                 rc = eeh_reset_device(pe, bus, NULL, false);
 857                 if (rc) {
 858                         pr_warn("%s: Unable to reset, err=%d\n",
 859                                 __func__, rc);
 860                         goto hard_fail;
 861                 }
 862         }
 863
 864         /* If all devices reported they can proceed, then re-enable MMIO */
 865         if (result == PCI_ERS_RESULT_CAN_RECOVER) {
 866                 pr_info("EEH: Enable I/O for affected devices\n");
 867                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 868
 869                 if (rc < 0)
 870                         goto hard_fail;
 871                 if (rc) {
 872                         result = PCI_ERS_RESULT_NEED_RESET;
 873                 } else {
 874                         pr_info("EEH: Notify device drivers to resume I/O\n");
 875                         eeh_pe_report("mmio_enabled", pe,
 876                                       eeh_report_mmio_enabled, &result);
 877                 }
 878         }
 879
 880         /* If all devices reported they can proceed, then re-enable DMA */
 881         if (result == PCI_ERS_RESULT_CAN_RECOVER) {
 882                 pr_info("EEH: Enabled DMA for affected devices\n");
 883                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
 884
 885                 if (rc < 0)
 886                         goto hard_fail;
 887                 if (rc) {
 888                         result = PCI_ERS_RESULT_NEED_RESET;
 889                 } else {
 890                         /*
 891                          * We didn't do PE reset for the case. The PE
 892                          * is still in frozen state. Clear it before
 893                          * resuming the PE.
 894                          */
 895                         eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 896                         result = PCI_ERS_RESULT_RECOVERED;
 897                 }
 898         }
 899
 900         /* If any device has a hard failure, then shut off everything. */
 901         if (result == PCI_ERS_RESULT_DISCONNECT) {
 902                 pr_warn("EEH: Device driver gave up\n");
 903                 goto hard_fail;
 904         }
 905
 906         /* If any device called out for a reset, then reset the slot */
 907         if (result == PCI_ERS_RESULT_NEED_RESET) {
 908                 pr_info("EEH: Reset without hotplug activity\n");
 909                 rc = eeh_reset_device(pe, bus, &rmv_data, true);
 910                 if (rc) {
 911                         pr_warn("%s: Cannot reset, err=%d\n",
 912                                 __func__, rc);
 913                         goto hard_fail;
 914                 }
 915
 916                 pr_info("EEH: Notify device drivers "
 917                         "the completion of reset\n");
 918                 result = PCI_ERS_RESULT_NONE;
 919                 eeh_set_channel_state(pe, pci_channel_io_normal);
 920                 eeh_set_irq_state(pe, true);
 921                 eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
 922         }
 923
 924         /* All devices should claim they have recovered by now. */
 925         if ((result != PCI_ERS_RESULT_RECOVERED) &&
 926             (result != PCI_ERS_RESULT_NONE)) {
 927                 pr_warn("EEH: Not recovered\n");
 928                 goto hard_fail;
 929         }
 930
 931         /*
 932          * For those hot removed VFs, we should add back them after PF get
 933          * recovered properly.
 934          */
 935         list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
 936                 eeh_add_virt_device(edev, NULL);
 937                 list_del(&edev->rmv_list);
 938         }
 939
 940         /* Tell all device drivers that they can resume operations */
 941         pr_info("EEH: Notify device driver to resume\n");
 942         eeh_set_channel_state(pe, pci_channel_io_normal);
 943         eeh_set_irq_state(pe, true);
 944         eeh_pe_report("resume", pe, eeh_report_resume, NULL);
 945         eeh_for_each_pe(pe, tmp_pe) {
 946                 eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
 947                         edev->mode &= ~EEH_DEV_NO_HANDLER;
 948                         edev->in_error = false;
 949                 }
 950         }
 951
 952         pr_info("EEH: Recovery successful.\n");
 953         goto final;
 954
 955 hard_fail:
 956         /*
 957          * About 90% of all real-life EEH failures in the field
 958          * are due to poorly seated PCI cards. Only 10% or so are
 959          * due to actual, failed cards.
 960          */
 961         pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
 962                "Please try reseating or replacing it\n",
 963                 pe->phb->global_number, pe->addr);
 964
 965         eeh_slot_error_detail(pe, EEH_LOG_PERM);
 966
 967         /* Notify all devices that they're about to go down. */
 968         eeh_set_channel_state(pe, pci_channel_io_perm_failure);
 969         eeh_set_irq_state(pe, false);
 970         eeh_pe_report("error_detected(permanent failure)", pe,
 971                       eeh_report_failure, NULL);
 972
 973         /* Mark the PE to be removed permanently */
 974         eeh_pe_state_mark(pe, EEH_PE_REMOVED);
 975
 976         /*
 977          * Shut down the device drivers for good. We mark
 978          * all removed devices correctly to avoid access
 979          * the their PCI config any more.
 980          */
 981         if (pe->type & EEH_PE_VF) {
 982                 eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
 983                 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 984         } else {
 985                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 986                 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 987
 988                 pci_lock_rescan_remove();
 989                 pci_hp_remove_devices(bus);
 990                 pci_unlock_rescan_remove();
 991                 /* The passed PE should no longer be used */
 992                 return;
 993         }
 994 final:
 995         eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 996 }
 997
 998 /**
 999  * eeh_handle_special_event - Handle EEH events without a specific failing PE
1000  *
1001  * Called when an EEH event is detected but can't be narrowed down to a
1002  * specific PE.  Iterates through possible failures and handles them as
1003  * necessary.
1004  */
1005 void eeh_handle_special_event(void)
1006 {
1007         struct eeh_pe *pe, *phb_pe;
1008         struct pci_bus *bus;
1009         struct pci_controller *hose;
1010         unsigned long flags;
1011         int rc;
1012
1013
1014         do {
1015                 rc = eeh_ops->next_error(&pe);
1016
1017                 switch (rc) {
1018                 case EEH_NEXT_ERR_DEAD_IOC:
1019                         /* Mark all PHBs in dead state */
1020                         eeh_serialize_lock(&flags);
1021
1022                         /* Purge all events */
1023                         eeh_remove_event(NULL, true);
1024
1025                         list_for_each_entry(hose, &hose_list, list_node) {
1026                                 phb_pe = eeh_phb_pe_get(hose);
1027                                 if (!phb_pe) continue;
1028
1029                                 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
1030                         }
1031
1032                         eeh_serialize_unlock(flags);
1033
1034                         break;
1035                 case EEH_NEXT_ERR_FROZEN_PE:
1036                 case EEH_NEXT_ERR_FENCED_PHB:
1037                 case EEH_NEXT_ERR_DEAD_PHB:
1038                         /* Mark the PE in fenced state */
1039                         eeh_serialize_lock(&flags);
1040
1041                         /* Purge all events of the PHB */
1042                         eeh_remove_event(pe, true);
1043
1044                         if (rc == EEH_NEXT_ERR_DEAD_PHB)
1045                                 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
1046                         else
1047                                 eeh_pe_state_mark(pe,
1048                                         EEH_PE_ISOLATED | EEH_PE_RECOVERING);
1049
1050                         eeh_serialize_unlock(flags);
1051
1052                         break;
1053                 case EEH_NEXT_ERR_NONE:
1054                         return;
1055                 default:
1056                         pr_warn("%s: Invalid value %d from next_error()\n",
1057                                 __func__, rc);
1058                         return;
1059                 }
1060
1061                 /*
1062                  * For fenced PHB and frozen PE, it's handled as normal
1063                  * event. We have to remove the affected PHBs for dead
1064                  * PHB and IOC
1065                  */
1066                 if (rc == EEH_NEXT_ERR_FROZEN_PE ||
1067                     rc == EEH_NEXT_ERR_FENCED_PHB) {
1068                         eeh_handle_normal_event(pe);
1069                 } else {
1070                         pci_lock_rescan_remove();
1071                         list_for_each_entry(hose, &hose_list, list_node) {
1072                                 phb_pe = eeh_phb_pe_get(hose);
1073                                 if (!phb_pe ||
1074                                     !(phb_pe->state & EEH_PE_ISOLATED) ||
1075                                     (phb_pe->state & EEH_PE_RECOVERING))
1076                                         continue;
1077
1078                                 /* Notify all devices to be down */
1079                                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
1080                                 eeh_set_channel_state(pe, pci_channel_io_perm_failure);
1081                                 eeh_pe_report(
1082                                         "error_detected(permanent failure)", pe,
1083                                         eeh_report_failure, NULL);
1084                                 bus = eeh_pe_bus_get(phb_pe);
1085                                 if (!bus) {
1086                                         pr_err("%s: Cannot find PCI bus for "
1087                                                "PHB#%x-PE#%x\n",
1088                                                __func__,
1089                                                pe->phb->global_number,
1090                                                pe->addr);
1091                                         break;
1092                                 }
1093                                 pci_hp_remove_devices(bus);
1094                         }
1095                         pci_unlock_rescan_remove();
1096                 }
1097
1098                 /*
1099                  * If we have detected dead IOC, we needn't proceed
1100                  * any more since all PHBs would have been removed
1101                  */
1102                 if (rc == EEH_NEXT_ERR_DEAD_IOC)
1103                         break;
1104         } while (rc != EEH_NEXT_ERR_NONE);
1105 }