Merge tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / arch / powerpc / platforms / powernv / eeh-ioda.c
1 /*
2  * The file intends to implement the functions needed by EEH, which is
3  * built on IODA compliant chip. Actually, lots of functions related
4  * to EEH would be built based on the OPAL APIs.
5  *
6  * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  */
13
14 #include <linux/bootmem.h>
15 #include <linux/debugfs.h>
16 #include <linux/delay.h>
17 #include <linux/io.h>
18 #include <linux/irq.h>
19 #include <linux/kernel.h>
20 #include <linux/msi.h>
21 #include <linux/notifier.h>
22 #include <linux/pci.h>
23 #include <linux/string.h>
24
25 #include <asm/eeh.h>
26 #include <asm/eeh_event.h>
27 #include <asm/io.h>
28 #include <asm/iommu.h>
29 #include <asm/msi_bitmap.h>
30 #include <asm/opal.h>
31 #include <asm/pci-bridge.h>
32 #include <asm/ppc-pci.h>
33 #include <asm/tce.h>
34
35 #include "powernv.h"
36 #include "pci.h"
37
38 static int ioda_eeh_nb_init = 0;
39
40 static int ioda_eeh_event(struct notifier_block *nb,
41                           unsigned long events, void *change)
42 {
43         uint64_t changed_evts = (uint64_t)change;
44
45         /*
46          * We simply send special EEH event if EEH has
47          * been enabled, or clear pending events in
48          * case that we enable EEH soon
49          */
50         if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
51             !(events & OPAL_EVENT_PCI_ERROR))
52                 return 0;
53
54         if (eeh_enabled())
55                 eeh_send_failure_event(NULL);
56         else
57                 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
58
59         return 0;
60 }
61
62 static struct notifier_block ioda_eeh_nb = {
63         .notifier_call  = ioda_eeh_event,
64         .next           = NULL,
65         .priority       = 0
66 };
67
68 #ifdef CONFIG_DEBUG_FS
69 static ssize_t ioda_eeh_ei_write(struct file *filp,
70                                  const char __user *user_buf,
71                                  size_t count, loff_t *ppos)
72 {
73         struct pci_controller *hose = filp->private_data;
74         struct pnv_phb *phb = hose->private_data;
75         struct eeh_dev *edev;
76         struct eeh_pe *pe;
77         int pe_no, type, func;
78         unsigned long addr, mask;
79         char buf[50];
80         int ret;
81
82         if (!phb->eeh_ops || !phb->eeh_ops->err_inject)
83                 return -ENXIO;
84
85         ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
86         if (!ret)
87                 return -EFAULT;
88
89         /* Retrieve parameters */
90         ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
91                      &pe_no, &type, &func, &addr, &mask);
92         if (ret != 5)
93                 return -EINVAL;
94
95         /* Retrieve PE */
96         edev = kzalloc(sizeof(*edev), GFP_KERNEL);
97         if (!edev)
98                 return -ENOMEM;
99         edev->phb = hose;
100         edev->pe_config_addr = pe_no;
101         pe = eeh_pe_get(edev);
102         kfree(edev);
103         if (!pe)
104                 return -ENODEV;
105
106         /* Do error injection */
107         ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
108         return ret < 0 ? ret : count;
109 }
110
111 static const struct file_operations ioda_eeh_ei_fops = {
112         .open   = simple_open,
113         .llseek = no_llseek,
114         .write  = ioda_eeh_ei_write,
115 };
116
117 static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
118 {
119         struct pci_controller *hose = data;
120         struct pnv_phb *phb = hose->private_data;
121
122         out_be64(phb->regs + offset, val);
123         return 0;
124 }
125
126 static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val)
127 {
128         struct pci_controller *hose = data;
129         struct pnv_phb *phb = hose->private_data;
130
131         *val = in_be64(phb->regs + offset);
132         return 0;
133 }
134
135 static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
136 {
137         return ioda_eeh_dbgfs_set(data, 0xD10, val);
138 }
139
140 static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val)
141 {
142         return ioda_eeh_dbgfs_get(data, 0xD10, val);
143 }
144
145 static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
146 {
147         return ioda_eeh_dbgfs_set(data, 0xD90, val);
148 }
149
150 static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val)
151 {
152         return ioda_eeh_dbgfs_get(data, 0xD90, val);
153 }
154
155 static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
156 {
157         return ioda_eeh_dbgfs_set(data, 0xE10, val);
158 }
159
160 static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val)
161 {
162         return ioda_eeh_dbgfs_get(data, 0xE10, val);
163 }
164
165 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
166                         ioda_eeh_outb_dbgfs_set, "0x%llx\n");
167 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
168                         ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
169 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
170                         ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
171 #endif /* CONFIG_DEBUG_FS */
172
173
174 /**
175  * ioda_eeh_post_init - Chip dependent post initialization
176  * @hose: PCI controller
177  *
178  * The function will be called after eeh PEs and devices
179  * have been built. That means the EEH is ready to supply
180  * service with I/O cache.
181  */
182 static int ioda_eeh_post_init(struct pci_controller *hose)
183 {
184         struct pnv_phb *phb = hose->private_data;
185         int ret;
186
187         /* Register OPAL event notifier */
188         if (!ioda_eeh_nb_init) {
189                 ret = opal_notifier_register(&ioda_eeh_nb);
190                 if (ret) {
191                         pr_err("%s: Can't register OPAL event notifier (%d)\n",
192                                __func__, ret);
193                         return ret;
194                 }
195
196                 ioda_eeh_nb_init = 1;
197         }
198
199 #ifdef CONFIG_DEBUG_FS
200         if (!phb->has_dbgfs && phb->dbgfs) {
201                 phb->has_dbgfs = 1;
202
203                 debugfs_create_file("err_injct", 0200,
204                                     phb->dbgfs, hose,
205                                     &ioda_eeh_ei_fops);
206
207                 debugfs_create_file("err_injct_outbound", 0600,
208                                     phb->dbgfs, hose,
209                                     &ioda_eeh_outb_dbgfs_ops);
210                 debugfs_create_file("err_injct_inboundA", 0600,
211                                     phb->dbgfs, hose,
212                                     &ioda_eeh_inbA_dbgfs_ops);
213                 debugfs_create_file("err_injct_inboundB", 0600,
214                                     phb->dbgfs, hose,
215                                     &ioda_eeh_inbB_dbgfs_ops);
216         }
217 #endif
218
219         /* If EEH is enabled, we're going to rely on that.
220          * Otherwise, we restore to conventional mechanism
221          * to clear frozen PE during PCI config access.
222          */
223         if (eeh_enabled())
224                 phb->flags |= PNV_PHB_FLAG_EEH;
225         else
226                 phb->flags &= ~PNV_PHB_FLAG_EEH;
227
228         return 0;
229 }
230
231 /**
232  * ioda_eeh_set_option - Set EEH operation or I/O setting
233  * @pe: EEH PE
234  * @option: options
235  *
236  * Enable or disable EEH option for the indicated PE. The
237  * function also can be used to enable I/O or DMA for the
238  * PE.
239  */
240 static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
241 {
242         struct pci_controller *hose = pe->phb;
243         struct pnv_phb *phb = hose->private_data;
244         bool freeze_pe = false;
245         int enable, ret = 0;
246         s64 rc;
247
248         /* Check on PE number */
249         if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
250                 pr_err("%s: PE address %x out of range [0, %x] "
251                        "on PHB#%x\n",
252                         __func__, pe->addr, phb->ioda.total_pe,
253                         hose->global_number);
254                 return -EINVAL;
255         }
256
257         switch (option) {
258         case EEH_OPT_DISABLE:
259                 return -EPERM;
260         case EEH_OPT_ENABLE:
261                 return 0;
262         case EEH_OPT_THAW_MMIO:
263                 enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
264                 break;
265         case EEH_OPT_THAW_DMA:
266                 enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
267                 break;
268         case EEH_OPT_FREEZE_PE:
269                 freeze_pe = true;
270                 enable = OPAL_EEH_ACTION_SET_FREEZE_ALL;
271                 break;
272         default:
273                 pr_warn("%s: Invalid option %d\n",
274                         __func__, option);
275                 return -EINVAL;
276         }
277
278         /* If PHB supports compound PE, to handle it */
279         if (freeze_pe) {
280                 if (phb->freeze_pe) {
281                         phb->freeze_pe(phb, pe->addr);
282                 } else {
283                         rc = opal_pci_eeh_freeze_set(phb->opal_id,
284                                                      pe->addr,
285                                                      enable);
286                         if (rc != OPAL_SUCCESS) {
287                                 pr_warn("%s: Failure %lld freezing "
288                                         "PHB#%x-PE#%x\n",
289                                         __func__, rc,
290                                         phb->hose->global_number, pe->addr);
291                                 ret = -EIO;
292                         }
293                 }
294         } else {
295                 if (phb->unfreeze_pe) {
296                         ret = phb->unfreeze_pe(phb, pe->addr, enable);
297                 } else {
298                         rc = opal_pci_eeh_freeze_clear(phb->opal_id,
299                                                        pe->addr,
300                                                        enable);
301                         if (rc != OPAL_SUCCESS) {
302                                 pr_warn("%s: Failure %lld enable %d "
303                                         "for PHB#%x-PE#%x\n",
304                                         __func__, rc, option,
305                                         phb->hose->global_number, pe->addr);
306                                 ret = -EIO;
307                         }
308                 }
309         }
310
311         return ret;
312 }
313
314 static void ioda_eeh_phb_diag(struct eeh_pe *pe)
315 {
316         struct pnv_phb *phb = pe->phb->private_data;
317         long rc;
318
319         rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
320                                          PNV_PCI_DIAG_BUF_SIZE);
321         if (rc != OPAL_SUCCESS)
322                 pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
323                         __func__, pe->phb->global_number, rc);
324 }
325
326 static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
327 {
328         struct pnv_phb *phb = pe->phb->private_data;
329         u8 fstate;
330         __be16 pcierr;
331         s64 rc;
332         int result = 0;
333
334         rc = opal_pci_eeh_freeze_status(phb->opal_id,
335                                         pe->addr,
336                                         &fstate,
337                                         &pcierr,
338                                         NULL);
339         if (rc != OPAL_SUCCESS) {
340                 pr_warn("%s: Failure %lld getting PHB#%x state\n",
341                         __func__, rc, phb->hose->global_number);
342                 return EEH_STATE_NOT_SUPPORT;
343         }
344
345         /*
346          * Check PHB state. If the PHB is frozen for the
347          * first time, to dump the PHB diag-data.
348          */
349         if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
350                 result = (EEH_STATE_MMIO_ACTIVE  |
351                           EEH_STATE_DMA_ACTIVE   |
352                           EEH_STATE_MMIO_ENABLED |
353                           EEH_STATE_DMA_ENABLED);
354         } else if (!(pe->state & EEH_PE_ISOLATED)) {
355                 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
356                 ioda_eeh_phb_diag(pe);
357         }
358
359         return result;
360 }
361
362 static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
363 {
364         struct pnv_phb *phb = pe->phb->private_data;
365         u8 fstate;
366         __be16 pcierr;
367         s64 rc;
368         int result;
369
370         /*
371          * We don't clobber hardware frozen state until PE
372          * reset is completed. In order to keep EEH core
373          * moving forward, we have to return operational
374          * state during PE reset.
375          */
376         if (pe->state & EEH_PE_CFG_BLOCKED) {
377                 result = (EEH_STATE_MMIO_ACTIVE  |
378                           EEH_STATE_DMA_ACTIVE   |
379                           EEH_STATE_MMIO_ENABLED |
380                           EEH_STATE_DMA_ENABLED);
381                 return result;
382         }
383
384         /*
385          * Fetch PE state from hardware. If the PHB
386          * supports compound PE, let it handle that.
387          */
388         if (phb->get_pe_state) {
389                 fstate = phb->get_pe_state(phb, pe->addr);
390         } else {
391                 rc = opal_pci_eeh_freeze_status(phb->opal_id,
392                                                 pe->addr,
393                                                 &fstate,
394                                                 &pcierr,
395                                                 NULL);
396                 if (rc != OPAL_SUCCESS) {
397                         pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
398                                 __func__, rc, phb->hose->global_number, pe->addr);
399                         return EEH_STATE_NOT_SUPPORT;
400                 }
401         }
402
403         /* Figure out state */
404         switch (fstate) {
405         case OPAL_EEH_STOPPED_NOT_FROZEN:
406                 result = (EEH_STATE_MMIO_ACTIVE  |
407                           EEH_STATE_DMA_ACTIVE   |
408                           EEH_STATE_MMIO_ENABLED |
409                           EEH_STATE_DMA_ENABLED);
410                 break;
411         case OPAL_EEH_STOPPED_MMIO_FREEZE:
412                 result = (EEH_STATE_DMA_ACTIVE |
413                           EEH_STATE_DMA_ENABLED);
414                 break;
415         case OPAL_EEH_STOPPED_DMA_FREEZE:
416                 result = (EEH_STATE_MMIO_ACTIVE |
417                           EEH_STATE_MMIO_ENABLED);
418                 break;
419         case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
420                 result = 0;
421                 break;
422         case OPAL_EEH_STOPPED_RESET:
423                 result = EEH_STATE_RESET_ACTIVE;
424                 break;
425         case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
426                 result = EEH_STATE_UNAVAILABLE;
427                 break;
428         case OPAL_EEH_STOPPED_PERM_UNAVAIL:
429                 result = EEH_STATE_NOT_SUPPORT;
430                 break;
431         default:
432                 result = EEH_STATE_NOT_SUPPORT;
433                 pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
434                         __func__, phb->hose->global_number,
435                         pe->addr, fstate);
436         }
437
438         /*
439          * If PHB supports compound PE, to freeze all
440          * slave PEs for consistency.
441          *
442          * If the PE is switching to frozen state for the
443          * first time, to dump the PHB diag-data.
444          */
445         if (!(result & EEH_STATE_NOT_SUPPORT) &&
446             !(result & EEH_STATE_UNAVAILABLE) &&
447             !(result & EEH_STATE_MMIO_ACTIVE) &&
448             !(result & EEH_STATE_DMA_ACTIVE)  &&
449             !(pe->state & EEH_PE_ISOLATED)) {
450                 if (phb->freeze_pe)
451                         phb->freeze_pe(phb, pe->addr);
452
453                 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
454                 ioda_eeh_phb_diag(pe);
455         }
456
457         return result;
458 }
459
460 /**
461  * ioda_eeh_get_state - Retrieve the state of PE
462  * @pe: EEH PE
463  *
464  * The PE's state should be retrieved from the PEEV, PEST
465  * IODA tables. Since the OPAL has exported the function
466  * to do it, it'd better to use that.
467  */
468 static int ioda_eeh_get_state(struct eeh_pe *pe)
469 {
470         struct pnv_phb *phb = pe->phb->private_data;
471
472         /* Sanity check on PE number. PHB PE should have 0 */
473         if (pe->addr < 0 ||
474             pe->addr >= phb->ioda.total_pe) {
475                 pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
476                         __func__, phb->hose->global_number,
477                         pe->addr, phb->ioda.total_pe);
478                 return EEH_STATE_NOT_SUPPORT;
479         }
480
481         if (pe->type & EEH_PE_PHB)
482                 return ioda_eeh_get_phb_state(pe);
483
484         return ioda_eeh_get_pe_state(pe);
485 }
486
487 static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
488 {
489         s64 rc = OPAL_HARDWARE;
490
491         while (1) {
492                 rc = opal_pci_poll(phb->opal_id);
493                 if (rc <= 0)
494                         break;
495
496                 if (system_state < SYSTEM_RUNNING)
497                         udelay(1000 * rc);
498                 else
499                         msleep(rc);
500         }
501
502         return rc;
503 }
504
505 int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
506 {
507         struct pnv_phb *phb = hose->private_data;
508         s64 rc = OPAL_HARDWARE;
509
510         pr_debug("%s: Reset PHB#%x, option=%d\n",
511                  __func__, hose->global_number, option);
512
513         /* Issue PHB complete reset request */
514         if (option == EEH_RESET_FUNDAMENTAL ||
515             option == EEH_RESET_HOT)
516                 rc = opal_pci_reset(phb->opal_id,
517                                 OPAL_RESET_PHB_COMPLETE,
518                                 OPAL_ASSERT_RESET);
519         else if (option == EEH_RESET_DEACTIVATE)
520                 rc = opal_pci_reset(phb->opal_id,
521                                 OPAL_RESET_PHB_COMPLETE,
522                                 OPAL_DEASSERT_RESET);
523         if (rc < 0)
524                 goto out;
525
526         /*
527          * Poll state of the PHB until the request is done
528          * successfully. The PHB reset is usually PHB complete
529          * reset followed by hot reset on root bus. So we also
530          * need the PCI bus settlement delay.
531          */
532         rc = ioda_eeh_phb_poll(phb);
533         if (option == EEH_RESET_DEACTIVATE) {
534                 if (system_state < SYSTEM_RUNNING)
535                         udelay(1000 * EEH_PE_RST_SETTLE_TIME);
536                 else
537                         msleep(EEH_PE_RST_SETTLE_TIME);
538         }
539 out:
540         if (rc != OPAL_SUCCESS)
541                 return -EIO;
542
543         return 0;
544 }
545
546 static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
547 {
548         struct pnv_phb *phb = hose->private_data;
549         s64 rc = OPAL_SUCCESS;
550
551         pr_debug("%s: Reset PHB#%x, option=%d\n",
552                  __func__, hose->global_number, option);
553
554         /*
555          * During the reset deassert time, we needn't care
556          * the reset scope because the firmware does nothing
557          * for fundamental or hot reset during deassert phase.
558          */
559         if (option == EEH_RESET_FUNDAMENTAL)
560                 rc = opal_pci_reset(phb->opal_id,
561                                 OPAL_RESET_PCI_FUNDAMENTAL,
562                                 OPAL_ASSERT_RESET);
563         else if (option == EEH_RESET_HOT)
564                 rc = opal_pci_reset(phb->opal_id,
565                                 OPAL_RESET_PCI_HOT,
566                                 OPAL_ASSERT_RESET);
567         else if (option == EEH_RESET_DEACTIVATE)
568                 rc = opal_pci_reset(phb->opal_id,
569                                 OPAL_RESET_PCI_HOT,
570                                 OPAL_DEASSERT_RESET);
571         if (rc < 0)
572                 goto out;
573
574         /* Poll state of the PHB until the request is done */
575         rc = ioda_eeh_phb_poll(phb);
576         if (option == EEH_RESET_DEACTIVATE)
577                 msleep(EEH_PE_RST_SETTLE_TIME);
578 out:
579         if (rc != OPAL_SUCCESS)
580                 return -EIO;
581
582         return 0;
583 }
584
585 static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
586
587 {
588         struct device_node *dn = pci_device_to_OF_node(dev);
589         struct eeh_dev *edev = of_node_to_eeh_dev(dn);
590         int aer = edev ? edev->aer_cap : 0;
591         u32 ctrl;
592
593         pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
594                  __func__, pci_domain_nr(dev->bus),
595                  dev->bus->number, option);
596
597         switch (option) {
598         case EEH_RESET_FUNDAMENTAL:
599         case EEH_RESET_HOT:
600                 /* Don't report linkDown event */
601                 if (aer) {
602                         eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
603                                              4, &ctrl);
604                         ctrl |= PCI_ERR_UNC_SURPDN;
605                         eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
606                                               4, ctrl);
607                 }
608
609                 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
610                 ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
611                 eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
612                 msleep(EEH_PE_RST_HOLD_TIME);
613
614                 break;
615         case EEH_RESET_DEACTIVATE:
616                 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
617                 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
618                 eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
619                 msleep(EEH_PE_RST_SETTLE_TIME);
620
621                 /* Continue reporting linkDown event */
622                 if (aer) {
623                         eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
624                                              4, &ctrl);
625                         ctrl &= ~PCI_ERR_UNC_SURPDN;
626                         eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
627                                               4, ctrl);
628                 }
629
630                 break;
631         }
632
633         return 0;
634 }
635
636 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
637 {
638         struct pci_controller *hose;
639
640         if (pci_is_root_bus(dev->bus)) {
641                 hose = pci_bus_to_host(dev->bus);
642                 ioda_eeh_root_reset(hose, EEH_RESET_HOT);
643                 ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
644         } else {
645                 ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
646                 ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
647         }
648 }
649
650 /**
651  * ioda_eeh_reset - Reset the indicated PE
652  * @pe: EEH PE
653  * @option: reset option
654  *
655  * Do reset on the indicated PE. For PCI bus sensitive PE,
656  * we need to reset the parent p2p bridge. The PHB has to
657  * be reinitialized if the p2p bridge is root bridge. For
658  * PCI device sensitive PE, we will try to reset the device
659  * through FLR. For now, we don't have OPAL APIs to do HARD
660  * reset yet, so all reset would be SOFT (HOT) reset.
661  */
662 static int ioda_eeh_reset(struct eeh_pe *pe, int option)
663 {
664         struct pci_controller *hose = pe->phb;
665         struct pci_bus *bus;
666         int ret;
667
668         /*
669          * For PHB reset, we always have complete reset. For those PEs whose
670          * primary bus derived from root complex (root bus) or root port
671          * (usually bus#1), we apply hot or fundamental reset on the root port.
672          * For other PEs, we always have hot reset on the PE primary bus.
673          *
674          * Here, we have different design to pHyp, which always clear the
675          * frozen state during PE reset. However, the good idea here from
676          * benh is to keep frozen state before we get PE reset done completely
677          * (until BAR restore). With the frozen state, HW drops illegal IO
678          * or MMIO access, which can incur recrusive frozen PE during PE
679          * reset. The side effect is that EEH core has to clear the frozen
680          * state explicitly after BAR restore.
681          */
682         if (pe->type & EEH_PE_PHB) {
683                 ret = ioda_eeh_phb_reset(hose, option);
684         } else {
685                 struct pnv_phb *phb;
686                 s64 rc;
687
688                 /*
689                  * The frozen PE might be caused by PAPR error injection
690                  * registers, which are expected to be cleared after hitting
691                  * frozen PE as stated in the hardware spec. Unfortunately,
692                  * that's not true on P7IOC. So we have to clear it manually
693                  * to avoid recursive EEH errors during recovery.
694                  */
695                 phb = hose->private_data;
696                 if (phb->model == PNV_PHB_MODEL_P7IOC &&
697                     (option == EEH_RESET_HOT ||
698                     option == EEH_RESET_FUNDAMENTAL)) {
699                         rc = opal_pci_reset(phb->opal_id,
700                                             OPAL_RESET_PHB_ERROR,
701                                             OPAL_ASSERT_RESET);
702                         if (rc != OPAL_SUCCESS) {
703                                 pr_warn("%s: Failure %lld clearing "
704                                         "error injection registers\n",
705                                         __func__, rc);
706                                 return -EIO;
707                         }
708                 }
709
710                 bus = eeh_pe_bus_get(pe);
711                 if (pci_is_root_bus(bus) ||
712                     pci_is_root_bus(bus->parent))
713                         ret = ioda_eeh_root_reset(hose, option);
714                 else
715                         ret = ioda_eeh_bridge_reset(bus->self, option);
716         }
717
718         return ret;
719 }
720
721 /**
722  * ioda_eeh_get_log - Retrieve error log
723  * @pe: frozen PE
724  * @severity: permanent or temporary error
725  * @drv_log: device driver log
726  * @len: length of device driver log
727  *
728  * Retrieve error log, which contains log from device driver
729  * and firmware.
730  */
731 static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
732                             char *drv_log, unsigned long len)
733 {
734         pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
735
736         return 0;
737 }
738
739 /**
740  * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
741  * @pe: EEH PE
742  *
743  * For particular PE, it might have included PCI bridges. In order
744  * to make the PE work properly, those PCI bridges should be configured
745  * correctly. However, we need do nothing on P7IOC since the reset
746  * function will do everything that should be covered by the function.
747  */
748 static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
749 {
750         return 0;
751 }
752
753 static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func,
754                                unsigned long addr, unsigned long mask)
755 {
756         struct pci_controller *hose = pe->phb;
757         struct pnv_phb *phb = hose->private_data;
758         s64 ret;
759
760         /* Sanity check on error type */
761         if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
762             type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
763                 pr_warn("%s: Invalid error type %d\n",
764                         __func__, type);
765                 return -ERANGE;
766         }
767
768         if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
769             func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
770                 pr_warn("%s: Invalid error function %d\n",
771                         __func__, func);
772                 return -ERANGE;
773         }
774
775         /* Firmware supports error injection ? */
776         if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
777                 pr_warn("%s: Firmware doesn't support error injection\n",
778                         __func__);
779                 return -ENXIO;
780         }
781
782         /* Do error injection */
783         ret = opal_pci_err_inject(phb->opal_id, pe->addr,
784                                   type, func, addr, mask);
785         if (ret != OPAL_SUCCESS) {
786                 pr_warn("%s: Failure %lld injecting error "
787                         "%d-%d to PHB#%x-PE#%x\n",
788                         __func__, ret, type, func,
789                         hose->global_number, pe->addr);
790                 return -EIO;
791         }
792
793         return 0;
794 }
795
796 static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
797 {
798         /* GEM */
799         if (data->gemXfir || data->gemRfir ||
800             data->gemRirqfir || data->gemMask || data->gemRwof)
801                 pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
802                         be64_to_cpu(data->gemXfir),
803                         be64_to_cpu(data->gemRfir),
804                         be64_to_cpu(data->gemRirqfir),
805                         be64_to_cpu(data->gemMask),
806                         be64_to_cpu(data->gemRwof));
807
808         /* LEM */
809         if (data->lemFir || data->lemErrMask ||
810             data->lemAction0 || data->lemAction1 || data->lemWof)
811                 pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
812                         be64_to_cpu(data->lemFir),
813                         be64_to_cpu(data->lemErrMask),
814                         be64_to_cpu(data->lemAction0),
815                         be64_to_cpu(data->lemAction1),
816                         be64_to_cpu(data->lemWof));
817 }
818
819 static void ioda_eeh_hub_diag(struct pci_controller *hose)
820 {
821         struct pnv_phb *phb = hose->private_data;
822         struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
823         long rc;
824
825         rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
826         if (rc != OPAL_SUCCESS) {
827                 pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
828                         __func__, phb->hub_id, rc);
829                 return;
830         }
831
832         switch (data->type) {
833         case OPAL_P7IOC_DIAG_TYPE_RGC:
834                 pr_info("P7IOC diag-data for RGC\n\n");
835                 ioda_eeh_hub_diag_common(data);
836                 if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
837                         pr_info("  RGC: %016llx %016llx\n",
838                                 be64_to_cpu(data->rgc.rgcStatus),
839                                 be64_to_cpu(data->rgc.rgcLdcp));
840                 break;
841         case OPAL_P7IOC_DIAG_TYPE_BI:
842                 pr_info("P7IOC diag-data for BI %s\n\n",
843                         data->bi.biDownbound ? "Downbound" : "Upbound");
844                 ioda_eeh_hub_diag_common(data);
845                 if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
846                     data->bi.biLdcp2 || data->bi.biFenceStatus)
847                         pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
848                                 be64_to_cpu(data->bi.biLdcp0),
849                                 be64_to_cpu(data->bi.biLdcp1),
850                                 be64_to_cpu(data->bi.biLdcp2),
851                                 be64_to_cpu(data->bi.biFenceStatus));
852                 break;
853         case OPAL_P7IOC_DIAG_TYPE_CI:
854                 pr_info("P7IOC diag-data for CI Port %d\n\n",
855                         data->ci.ciPort);
856                 ioda_eeh_hub_diag_common(data);
857                 if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
858                         pr_info("  CI:  %016llx %016llx\n",
859                                 be64_to_cpu(data->ci.ciPortStatus),
860                                 be64_to_cpu(data->ci.ciPortLdcp));
861                 break;
862         case OPAL_P7IOC_DIAG_TYPE_MISC:
863                 pr_info("P7IOC diag-data for MISC\n\n");
864                 ioda_eeh_hub_diag_common(data);
865                 break;
866         case OPAL_P7IOC_DIAG_TYPE_I2C:
867                 pr_info("P7IOC diag-data for I2C\n\n");
868                 ioda_eeh_hub_diag_common(data);
869                 break;
870         default:
871                 pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
872                         __func__, phb->hub_id, data->type);
873         }
874 }
875
876 static int ioda_eeh_get_pe(struct pci_controller *hose,
877                            u16 pe_no, struct eeh_pe **pe)
878 {
879         struct pnv_phb *phb = hose->private_data;
880         struct pnv_ioda_pe *pnv_pe;
881         struct eeh_pe *dev_pe;
882         struct eeh_dev edev;
883
884         /*
885          * If PHB supports compound PE, to fetch
886          * the master PE because slave PE is invisible
887          * to EEH core.
888          */
889         pnv_pe = &phb->ioda.pe_array[pe_no];
890         if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
891                 pnv_pe = pnv_pe->master;
892                 WARN_ON(!pnv_pe ||
893                         !(pnv_pe->flags & PNV_IODA_PE_MASTER));
894                 pe_no = pnv_pe->pe_number;
895         }
896
897         /* Find the PE according to PE# */
898         memset(&edev, 0, sizeof(struct eeh_dev));
899         edev.phb = hose;
900         edev.pe_config_addr = pe_no;
901         dev_pe = eeh_pe_get(&edev);
902         if (!dev_pe)
903                 return -EEXIST;
904
905         /* Freeze the (compound) PE */
906         *pe = dev_pe;
907         if (!(dev_pe->state & EEH_PE_ISOLATED))
908                 phb->freeze_pe(phb, pe_no);
909
910         /*
911          * At this point, we're sure the (compound) PE should
912          * have been frozen. However, we still need poke until
913          * hitting the frozen PE on top level.
914          */
915         dev_pe = dev_pe->parent;
916         while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
917                 int ret;
918                 int active_flags = (EEH_STATE_MMIO_ACTIVE |
919                                     EEH_STATE_DMA_ACTIVE);
920
921                 ret = eeh_ops->get_state(dev_pe, NULL);
922                 if (ret <= 0 || (ret & active_flags) == active_flags) {
923                         dev_pe = dev_pe->parent;
924                         continue;
925                 }
926
927                 /* Frozen parent PE */
928                 *pe = dev_pe;
929                 if (!(dev_pe->state & EEH_PE_ISOLATED))
930                         phb->freeze_pe(phb, dev_pe->addr);
931
932                 /* Next one */
933                 dev_pe = dev_pe->parent;
934         }
935
936         return 0;
937 }
938
939 /**
940  * ioda_eeh_next_error - Retrieve next error for EEH core to handle
941  * @pe: The affected PE
942  *
943  * The function is expected to be called by EEH core while it gets
944  * special EEH event (without binding PE). The function calls to
945  * OPAL APIs for next error to handle. The informational error is
946  * handled internally by platform. However, the dead IOC, dead PHB,
947  * fenced PHB and frozen PE should be handled by EEH core eventually.
948  */
949 static int ioda_eeh_next_error(struct eeh_pe **pe)
950 {
951         struct pci_controller *hose;
952         struct pnv_phb *phb;
953         struct eeh_pe *phb_pe, *parent_pe;
954         __be64 frozen_pe_no;
955         __be16 err_type, severity;
956         int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
957         long rc;
958         int state, ret = EEH_NEXT_ERR_NONE;
959
960         /*
961          * While running here, it's safe to purge the event queue.
962          * And we should keep the cached OPAL notifier event sychronized
963          * between the kernel and firmware.
964          */
965         eeh_remove_event(NULL, false);
966         opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
967
968         list_for_each_entry(hose, &hose_list, list_node) {
969                 /*
970                  * If the subordinate PCI buses of the PHB has been
971                  * removed or is exactly under error recovery, we
972                  * needn't take care of it any more.
973                  */
974                 phb = hose->private_data;
975                 phb_pe = eeh_phb_pe_get(hose);
976                 if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
977                         continue;
978
979                 rc = opal_pci_next_error(phb->opal_id,
980                                 &frozen_pe_no, &err_type, &severity);
981
982                 /* If OPAL API returns error, we needn't proceed */
983                 if (rc != OPAL_SUCCESS) {
984                         pr_devel("%s: Invalid return value on "
985                                  "PHB#%x (0x%lx) from opal_pci_next_error",
986                                  __func__, hose->global_number, rc);
987                         continue;
988                 }
989
990                 /* If the PHB doesn't have error, stop processing */
991                 if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
992                     be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
993                         pr_devel("%s: No error found on PHB#%x\n",
994                                  __func__, hose->global_number);
995                         continue;
996                 }
997
998                 /*
999                  * Processing the error. We're expecting the error with
1000                  * highest priority reported upon multiple errors on the
1001                  * specific PHB.
1002                  */
1003                 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1004                          __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
1005                          be64_to_cpu(frozen_pe_no), hose->global_number);
1006                 switch (be16_to_cpu(err_type)) {
1007                 case OPAL_EEH_IOC_ERROR:
1008                         if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
1009                                 pr_err("EEH: dead IOC detected\n");
1010                                 ret = EEH_NEXT_ERR_DEAD_IOC;
1011                         } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1012                                 pr_info("EEH: IOC informative error "
1013                                         "detected\n");
1014                                 ioda_eeh_hub_diag(hose);
1015                                 ret = EEH_NEXT_ERR_NONE;
1016                         }
1017
1018                         break;
1019                 case OPAL_EEH_PHB_ERROR:
1020                         if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
1021                                 *pe = phb_pe;
1022                                 pr_err("EEH: dead PHB#%x detected, "
1023                                        "location: %s\n",
1024                                        hose->global_number,
1025                                        eeh_pe_loc_get(phb_pe));
1026                                 ret = EEH_NEXT_ERR_DEAD_PHB;
1027                         } else if (be16_to_cpu(severity) ==
1028                                                 OPAL_EEH_SEV_PHB_FENCED) {
1029                                 *pe = phb_pe;
1030                                 pr_err("EEH: Fenced PHB#%x detected, "
1031                                        "location: %s\n",
1032                                        hose->global_number,
1033                                        eeh_pe_loc_get(phb_pe));
1034                                 ret = EEH_NEXT_ERR_FENCED_PHB;
1035                         } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1036                                 pr_info("EEH: PHB#%x informative error "
1037                                         "detected, location: %s\n",
1038                                         hose->global_number,
1039                                         eeh_pe_loc_get(phb_pe));
1040                                 ioda_eeh_phb_diag(phb_pe);
1041                                 pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
1042                                 ret = EEH_NEXT_ERR_NONE;
1043                         }
1044
1045                         break;
1046                 case OPAL_EEH_PE_ERROR:
1047                         /*
1048                          * If we can't find the corresponding PE, we
1049                          * just try to unfreeze.
1050                          */
1051                         if (ioda_eeh_get_pe(hose,
1052                                             be64_to_cpu(frozen_pe_no), pe)) {
1053                                 /* Try best to clear it */
1054                                 pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1055                                         hose->global_number, frozen_pe_no);
1056                                 pr_info("EEH: PHB location: %s\n",
1057                                         eeh_pe_loc_get(phb_pe));
1058                                 opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
1059                                         OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
1060                                 ret = EEH_NEXT_ERR_NONE;
1061                         } else if ((*pe)->state & EEH_PE_ISOLATED ||
1062                                    eeh_pe_passed(*pe)) {
1063                                 ret = EEH_NEXT_ERR_NONE;
1064                         } else {
1065                                 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
1066                                         (*pe)->addr, (*pe)->phb->global_number);
1067                                 pr_err("EEH: PE location: %s, PHB location: %s\n",
1068                                         eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
1069                                 ret = EEH_NEXT_ERR_FROZEN_PE;
1070                         }
1071
1072                         break;
1073                 default:
1074                         pr_warn("%s: Unexpected error type %d\n",
1075                                 __func__, be16_to_cpu(err_type));
1076                 }
1077
1078                 /*
1079                  * EEH core will try recover from fenced PHB or
1080                  * frozen PE. In the time for frozen PE, EEH core
1081                  * enable IO path for that before collecting logs,
1082                  * but it ruins the site. So we have to dump the
1083                  * log in advance here.
1084                  */
1085                 if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
1086                     ret == EEH_NEXT_ERR_FENCED_PHB) &&
1087                     !((*pe)->state & EEH_PE_ISOLATED)) {
1088                         eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1089                         ioda_eeh_phb_diag(*pe);
1090                 }
1091
1092                 /*
1093                  * We probably have the frozen parent PE out there and
1094                  * we need have to handle frozen parent PE firstly.
1095                  */
1096                 if (ret == EEH_NEXT_ERR_FROZEN_PE) {
1097                         parent_pe = (*pe)->parent;
1098                         while (parent_pe) {
1099                                 /* Hit the ceiling ? */
1100                                 if (parent_pe->type & EEH_PE_PHB)
1101                                         break;
1102
1103                                 /* Frozen parent PE ? */
1104                                 state = ioda_eeh_get_state(parent_pe);
1105                                 if (state > 0 &&
1106                                     (state & active_flags) != active_flags)
1107                                         *pe = parent_pe;
1108
1109                                 /* Next parent level */
1110                                 parent_pe = parent_pe->parent;
1111                         }
1112
1113                         /* We possibly migrate to another PE */
1114                         eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
1115                 }
1116
1117                 /*
1118                  * If we have no errors on the specific PHB or only
1119                  * informative error there, we continue poking it.
1120                  * Otherwise, we need actions to be taken by upper
1121                  * layer.
1122                  */
1123                 if (ret > EEH_NEXT_ERR_INF)
1124                         break;
1125         }
1126
1127         return ret;
1128 }
1129
1130 struct pnv_eeh_ops ioda_eeh_ops = {
1131         .post_init              = ioda_eeh_post_init,
1132         .set_option             = ioda_eeh_set_option,
1133         .get_state              = ioda_eeh_get_state,
1134         .reset                  = ioda_eeh_reset,
1135         .get_log                = ioda_eeh_get_log,
1136         .configure_bridge       = ioda_eeh_configure_bridge,
1137         .err_inject             = ioda_eeh_err_inject,
1138         .next_error             = ioda_eeh_next_error
1139 };