2 * Machine check exception handling.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #define pr_fmt(fmt) "mce: " fmt
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
32 #include <asm/machdep.h>
36 static DEFINE_PER_CPU(int, mce_nest_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
39 /* Queue for delayed MCE events. */
40 static DEFINE_PER_CPU(int, mce_queue_count);
41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
43 /* Queue for delayed MCE UE events. */
44 static DEFINE_PER_CPU(int, mce_ue_count);
45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
48 static void machine_check_process_queued_event(struct irq_work *work);
49 void machine_check_ue_event(struct machine_check_event *evt);
50 static void machine_process_ue_event(struct work_struct *work);
52 static struct irq_work mce_event_process_work = {
53 .func = machine_check_process_queued_event,
56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
58 static void mce_set_error_info(struct machine_check_event *mce,
59 struct mce_error_info *mce_err)
61 mce->error_type = mce_err->error_type;
62 switch (mce_err->error_type) {
63 case MCE_ERROR_TYPE_UE:
64 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
66 case MCE_ERROR_TYPE_SLB:
67 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
69 case MCE_ERROR_TYPE_ERAT:
70 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
72 case MCE_ERROR_TYPE_TLB:
73 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
75 case MCE_ERROR_TYPE_USER:
76 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
78 case MCE_ERROR_TYPE_RA:
79 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
81 case MCE_ERROR_TYPE_LINK:
82 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
84 case MCE_ERROR_TYPE_UNKNOWN:
91 * Decode and save high level MCE information into per cpu buffer which
92 * is an array of machine_check_event structure.
94 void save_mce_event(struct pt_regs *regs, long handled,
95 struct mce_error_info *mce_err,
96 uint64_t nip, uint64_t addr, uint64_t phys_addr)
98 int index = __this_cpu_inc_return(mce_nest_count) - 1;
99 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
102 * Return if we don't have enough space to log mce event.
103 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
104 * the check below will stop buffer overrun.
106 if (index >= MAX_MC_EVT)
109 /* Populate generic machine check info */
110 mce->version = MCE_V1;
112 mce->srr1 = regs->msr;
113 mce->gpr3 = regs->gpr[3];
116 /* Mark it recovered if we have handled it and MSR(RI=1). */
117 if (handled && (regs->msr & MSR_RI))
118 mce->disposition = MCE_DISPOSITION_RECOVERED;
120 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
122 mce->initiator = mce_err->initiator;
123 mce->severity = mce_err->severity;
126 * Populate the mce error_type and type-specific error_type.
128 mce_set_error_info(mce, mce_err);
133 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
134 mce->u.tlb_error.effective_address_provided = true;
135 mce->u.tlb_error.effective_address = addr;
136 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
137 mce->u.slb_error.effective_address_provided = true;
138 mce->u.slb_error.effective_address = addr;
139 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
140 mce->u.erat_error.effective_address_provided = true;
141 mce->u.erat_error.effective_address = addr;
142 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
143 mce->u.user_error.effective_address_provided = true;
144 mce->u.user_error.effective_address = addr;
145 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
146 mce->u.ra_error.effective_address_provided = true;
147 mce->u.ra_error.effective_address = addr;
148 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
149 mce->u.link_error.effective_address_provided = true;
150 mce->u.link_error.effective_address = addr;
151 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
152 mce->u.ue_error.effective_address_provided = true;
153 mce->u.ue_error.effective_address = addr;
154 if (phys_addr != ULONG_MAX) {
155 mce->u.ue_error.physical_address_provided = true;
156 mce->u.ue_error.physical_address = phys_addr;
157 machine_check_ue_event(mce);
165 * mce Pointer to machine_check_event structure to be filled.
166 * release Flag to indicate whether to free the event slot or not.
167 * 0 <= do not release the mce event. Caller will invoke
168 * release_mce_event() once event has been consumed.
169 * 1 <= release the slot.
174 * get_mce_event() will be called by platform specific machine check
175 * handle routine and in KVM.
176 * When we call get_mce_event(), we are still in interrupt context and
177 * preemption will not be scheduled until ret_from_expect() routine
180 int get_mce_event(struct machine_check_event *mce, bool release)
182 int index = __this_cpu_read(mce_nest_count) - 1;
183 struct machine_check_event *mc_evt;
190 /* Check if we have MCE info to process. */
191 if (index < MAX_MC_EVT) {
192 mc_evt = this_cpu_ptr(&mce_event[index]);
193 /* Copy the event structure and release the original */
200 /* Decrement the count to free the slot. */
202 __this_cpu_dec(mce_nest_count);
207 void release_mce_event(void)
209 get_mce_event(NULL, true);
214 * Queue up the MCE event which then can be handled later.
216 void machine_check_ue_event(struct machine_check_event *evt)
220 index = __this_cpu_inc_return(mce_ue_count) - 1;
221 /* If queue is full, just return for now. */
222 if (index >= MAX_MC_EVT) {
223 __this_cpu_dec(mce_ue_count);
226 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
228 /* Queue work to process this event later. */
229 schedule_work(&mce_ue_event_work);
233 * Queue up the MCE event which then can be handled later.
235 void machine_check_queue_event(void)
238 struct machine_check_event evt;
240 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
243 index = __this_cpu_inc_return(mce_queue_count) - 1;
244 /* If queue is full, just return for now. */
245 if (index >= MAX_MC_EVT) {
246 __this_cpu_dec(mce_queue_count);
249 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
251 /* Queue irq work to process this event later. */
252 irq_work_queue(&mce_event_process_work);
255 * process pending MCE event from the mce event queue. This function will be
256 * called during syscall exit.
258 static void machine_process_ue_event(struct work_struct *work)
261 struct machine_check_event *evt;
263 while (__this_cpu_read(mce_ue_count) > 0) {
264 index = __this_cpu_read(mce_ue_count) - 1;
265 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
266 #ifdef CONFIG_MEMORY_FAILURE
268 * This should probably queued elsewhere, but
271 if (evt->error_type == MCE_ERROR_TYPE_UE) {
272 if (evt->u.ue_error.physical_address_provided) {
275 pfn = evt->u.ue_error.physical_address >>
277 memory_failure(pfn, 0);
279 pr_warn("Failed to identify bad address from "
280 "where the uncorrectable error (UE) "
284 __this_cpu_dec(mce_ue_count);
288 * process pending MCE event from the mce event queue. This function will be
289 * called during syscall exit.
291 static void machine_check_process_queued_event(struct irq_work *work)
294 struct machine_check_event *evt;
296 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
299 * For now just print it to console.
300 * TODO: log this error event to FSP or nvram.
302 while (__this_cpu_read(mce_queue_count) > 0) {
303 index = __this_cpu_read(mce_queue_count) - 1;
304 evt = this_cpu_ptr(&mce_event_queue[index]);
305 machine_check_print_event_info(evt, false, false);
306 __this_cpu_dec(mce_queue_count);
310 void machine_check_print_event_info(struct machine_check_event *evt,
311 bool user_mode, bool in_guest)
313 const char *level, *sevstr, *subtype;
314 static const char *mc_ue_types[] = {
317 "Page table walk ifetch",
319 "Page table walk Load/Store",
321 static const char *mc_slb_types[] = {
326 static const char *mc_erat_types[] = {
331 static const char *mc_tlb_types[] = {
336 static const char *mc_user_types[] = {
340 static const char *mc_ra_types[] = {
342 "Instruction fetch (bad)",
343 "Instruction fetch (foreign)",
344 "Page table walk ifetch (bad)",
345 "Page table walk ifetch (foreign)",
348 "Page table walk Load/Store (bad)",
349 "Page table walk Load/Store (foreign)",
350 "Load/Store (foreign)",
352 static const char *mc_link_types[] = {
354 "Instruction fetch (timeout)",
355 "Page table walk ifetch (timeout)",
358 "Page table walk Load/Store (timeout)",
361 /* Print things out */
362 if (evt->version != MCE_V1) {
363 pr_err("Machine Check Exception, Unknown event version %d !\n",
367 switch (evt->severity) {
368 case MCE_SEV_NO_ERROR:
372 case MCE_SEV_WARNING:
373 level = KERN_WARNING;
376 case MCE_SEV_ERROR_SYNC:
387 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
388 evt->disposition == MCE_DISPOSITION_RECOVERED ?
389 "Recovered" : "Not recovered");
392 printk("%s Guest NIP: %016llx\n", level, evt->srr0);
393 } else if (user_mode) {
394 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
395 evt->srr0, current->pid, current->comm);
397 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
401 printk("%s Initiator: %s\n", level,
402 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
403 switch (evt->error_type) {
404 case MCE_ERROR_TYPE_UE:
405 subtype = evt->u.ue_error.ue_error_type <
406 ARRAY_SIZE(mc_ue_types) ?
407 mc_ue_types[evt->u.ue_error.ue_error_type]
409 printk("%s Error type: UE [%s]\n", level, subtype);
410 if (evt->u.ue_error.effective_address_provided)
411 printk("%s Effective address: %016llx\n",
412 level, evt->u.ue_error.effective_address);
413 if (evt->u.ue_error.physical_address_provided)
414 printk("%s Physical address: %016llx\n",
415 level, evt->u.ue_error.physical_address);
417 case MCE_ERROR_TYPE_SLB:
418 subtype = evt->u.slb_error.slb_error_type <
419 ARRAY_SIZE(mc_slb_types) ?
420 mc_slb_types[evt->u.slb_error.slb_error_type]
422 printk("%s Error type: SLB [%s]\n", level, subtype);
423 if (evt->u.slb_error.effective_address_provided)
424 printk("%s Effective address: %016llx\n",
425 level, evt->u.slb_error.effective_address);
427 case MCE_ERROR_TYPE_ERAT:
428 subtype = evt->u.erat_error.erat_error_type <
429 ARRAY_SIZE(mc_erat_types) ?
430 mc_erat_types[evt->u.erat_error.erat_error_type]
432 printk("%s Error type: ERAT [%s]\n", level, subtype);
433 if (evt->u.erat_error.effective_address_provided)
434 printk("%s Effective address: %016llx\n",
435 level, evt->u.erat_error.effective_address);
437 case MCE_ERROR_TYPE_TLB:
438 subtype = evt->u.tlb_error.tlb_error_type <
439 ARRAY_SIZE(mc_tlb_types) ?
440 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
442 printk("%s Error type: TLB [%s]\n", level, subtype);
443 if (evt->u.tlb_error.effective_address_provided)
444 printk("%s Effective address: %016llx\n",
445 level, evt->u.tlb_error.effective_address);
447 case MCE_ERROR_TYPE_USER:
448 subtype = evt->u.user_error.user_error_type <
449 ARRAY_SIZE(mc_user_types) ?
450 mc_user_types[evt->u.user_error.user_error_type]
452 printk("%s Error type: User [%s]\n", level, subtype);
453 if (evt->u.user_error.effective_address_provided)
454 printk("%s Effective address: %016llx\n",
455 level, evt->u.user_error.effective_address);
457 case MCE_ERROR_TYPE_RA:
458 subtype = evt->u.ra_error.ra_error_type <
459 ARRAY_SIZE(mc_ra_types) ?
460 mc_ra_types[evt->u.ra_error.ra_error_type]
462 printk("%s Error type: Real address [%s]\n", level, subtype);
463 if (evt->u.ra_error.effective_address_provided)
464 printk("%s Effective address: %016llx\n",
465 level, evt->u.ra_error.effective_address);
467 case MCE_ERROR_TYPE_LINK:
468 subtype = evt->u.link_error.link_error_type <
469 ARRAY_SIZE(mc_link_types) ?
470 mc_link_types[evt->u.link_error.link_error_type]
472 printk("%s Error type: Link [%s]\n", level, subtype);
473 if (evt->u.link_error.effective_address_provided)
474 printk("%s Effective address: %016llx\n",
475 level, evt->u.link_error.effective_address);
478 case MCE_ERROR_TYPE_UNKNOWN:
479 printk("%s Error type: Unknown\n", level);
483 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
486 * This function is called in real mode. Strictly no printk's please.
488 * regs->nip and regs->msr contains srr0 and ssr1.
490 long machine_check_early(struct pt_regs *regs)
494 hv_nmi_check_nonrecoverable(regs);
497 * See if platform is capable of handling machine check.
499 if (ppc_md.machine_check_early)
500 handled = ppc_md.machine_check_early(regs);
504 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
507 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
508 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
509 } hmer_debug_trig_function;
511 static int init_debug_trig_function(void)
514 struct device_node *cpun;
515 struct property *prop = NULL;
518 /* First look in the device tree */
520 cpun = of_get_cpu_node(smp_processor_id(), NULL);
522 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
524 if (strcmp(str, "bit17-vector-ci-load") == 0)
525 hmer_debug_trig_function = DTRIG_VECTOR_CI;
526 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
527 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
533 /* If we found the property, don't look at PVR */
537 pvr = mfspr(SPRN_PVR);
538 /* Check for POWER9 Nimbus (scale-out) */
539 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
540 /* DD2.2 and later */
541 if ((pvr & 0xfff) >= 0x202)
542 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
543 /* DD2.0 and DD2.1 - used for vector CI load emulation */
544 else if ((pvr & 0xfff) >= 0x200)
545 hmer_debug_trig_function = DTRIG_VECTOR_CI;
549 switch (hmer_debug_trig_function) {
550 case DTRIG_VECTOR_CI:
551 pr_debug("HMI debug trigger used for vector CI load\n");
553 case DTRIG_SUSPEND_ESCAPE:
554 pr_debug("HMI debug trigger used for TM suspend escape\n");
561 __initcall(init_debug_trig_function);
564 * Handle HMIs that occur as a result of a debug trigger.
566 * -1 means this is not a HMI cause that we know about
567 * 0 means no further handling is required
568 * 1 means further handling is required
570 long hmi_handle_debugtrig(struct pt_regs *regs)
572 unsigned long hmer = mfspr(SPRN_HMER);
575 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
576 if (!((hmer & HMER_DEBUG_TRIG)
577 && hmer_debug_trig_function != DTRIG_UNKNOWN))
580 hmer &= ~HMER_DEBUG_TRIG;
581 /* HMER is a write-AND register */
582 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
584 switch (hmer_debug_trig_function) {
585 case DTRIG_VECTOR_CI:
587 * Now to avoid problems with soft-disable we
588 * only do the emulation if we are coming from
591 if (regs && user_mode(regs))
592 ret = local_paca->hmi_p9_special_emu = 1;
601 * See if any other HMI causes remain to be handled
603 if (hmer & mfspr(SPRN_HMEER))
612 long hmi_exception_realmode(struct pt_regs *regs)
616 __this_cpu_inc(irq_stat.hmi_exceptions);
618 ret = hmi_handle_debugtrig(regs);
622 wait_for_subcore_guest_exit();
624 if (ppc_md.hmi_exception_early)
625 ppc_md.hmi_exception_early(regs);
627 wait_for_tb_resync();