2 * Machine check exception handling CPU-side for power7 and power8
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #define pr_fmt(fmt) "mce_power: " fmt
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
29 #include <asm/machdep.h>
31 static void flush_tlb_206(unsigned int num_sets, unsigned int action)
37 case TLB_INVAL_SCOPE_GLOBAL:
38 rb = TLBIEL_INVAL_SET;
40 case TLB_INVAL_SCOPE_LPID:
41 rb = TLBIEL_INVAL_SET_LPID;
48 asm volatile("ptesync" : : : "memory");
49 for (i = 0; i < num_sets; i++) {
50 asm volatile("tlbiel %0" : : "r" (rb));
51 rb += 1 << TLBIEL_INVAL_SET_SHIFT;
53 asm volatile("ptesync" : : : "memory");
57 * Generic routines to flush TLB on POWER processors. These routines
58 * are used as flush_tlb hook in the cpu_spec.
60 * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
61 * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
63 void __flush_tlb_power7(unsigned int action)
65 flush_tlb_206(POWER7_TLB_SETS, action);
68 void __flush_tlb_power8(unsigned int action)
70 flush_tlb_206(POWER8_TLB_SETS, action);
73 void __flush_tlb_power9(unsigned int action)
76 flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
78 flush_tlb_206(POWER9_TLB_SETS_HASH, action);
82 /* flush SLBs and reload */
83 #ifdef CONFIG_PPC_STD_MMU_64
84 static void flush_and_reload_slb(void)
86 struct slb_shadow *slb;
89 /* Invalidate all SLBs */
90 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
92 #ifdef CONFIG_KVM_BOOK3S_HANDLER
94 * If machine check is hit when in guest or in transition, we will
95 * only flush the SLBs and continue.
97 if (get_paca()->kvm_hstate.in_guest)
101 /* For host kernel, reload the SLBs from shadow SLB buffer. */
102 slb = get_slb_shadow();
106 n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
108 /* Load up the SLB entries from shadow SLB */
109 for (i = 0; i < n; i++) {
110 unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
111 unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
113 rb = (rb & ~0xFFFul) | i;
114 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
119 static void flush_erat(void)
121 asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
124 #define MCE_FLUSH_SLB 1
125 #define MCE_FLUSH_TLB 2
126 #define MCE_FLUSH_ERAT 3
128 static int mce_flush(int what)
130 #ifdef CONFIG_PPC_STD_MMU_64
131 if (what == MCE_FLUSH_SLB) {
132 flush_and_reload_slb();
136 if (what == MCE_FLUSH_ERAT) {
140 if (what == MCE_FLUSH_TLB) {
141 if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
142 cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
150 static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
152 if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
154 if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
156 if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
158 /* Any other errors we don't understand? */
164 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
169 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
170 * reset the error bits whenever we handle them so that at the end
171 * we can check whether we handled all of them or not.
173 #ifdef CONFIG_PPC_STD_MMU_64
174 if (dsisr & slb_error_bits) {
175 flush_and_reload_slb();
176 /* reset error bits */
177 dsisr &= ~(slb_error_bits);
179 if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
180 if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
181 cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
182 /* reset error bits */
183 dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
186 /* Any other errors we don't understand? */
187 if (dsisr & 0xffffffffUL)
193 static long mce_handle_derror_p7(uint64_t dsisr)
195 return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
198 static long mce_handle_common_ierror(uint64_t srr1)
202 switch (P7_SRR1_MC_IFETCH(srr1)) {
205 #ifdef CONFIG_PPC_STD_MMU_64
206 case P7_SRR1_MC_IFETCH_SLB_PARITY:
207 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
208 /* flush and reload SLBs for SLB errors. */
209 flush_and_reload_slb();
212 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
213 if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
214 cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
226 static long mce_handle_ierror_p7(uint64_t srr1)
230 handled = mce_handle_common_ierror(srr1);
232 #ifdef CONFIG_PPC_STD_MMU_64
233 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
234 flush_and_reload_slb();
241 static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
243 switch (P7_SRR1_MC_IFETCH(srr1)) {
244 case P7_SRR1_MC_IFETCH_SLB_PARITY:
245 mce_err->error_type = MCE_ERROR_TYPE_SLB;
246 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
248 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
249 mce_err->error_type = MCE_ERROR_TYPE_SLB;
250 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
252 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
253 mce_err->error_type = MCE_ERROR_TYPE_TLB;
254 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
256 case P7_SRR1_MC_IFETCH_UE:
257 case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
258 mce_err->error_type = MCE_ERROR_TYPE_UE;
259 mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
261 case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
262 mce_err->error_type = MCE_ERROR_TYPE_UE;
263 mce_err->u.ue_error_type =
264 MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
269 static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
271 mce_get_common_ierror(mce_err, srr1);
272 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
273 mce_err->error_type = MCE_ERROR_TYPE_SLB;
274 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
278 static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
280 if (dsisr & P7_DSISR_MC_UE) {
281 mce_err->error_type = MCE_ERROR_TYPE_UE;
282 mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
283 } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
284 mce_err->error_type = MCE_ERROR_TYPE_UE;
285 mce_err->u.ue_error_type =
286 MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
287 } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
288 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
289 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
290 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
291 mce_err->error_type = MCE_ERROR_TYPE_SLB;
292 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
293 } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
294 mce_err->error_type = MCE_ERROR_TYPE_SLB;
295 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
296 } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
297 mce_err->error_type = MCE_ERROR_TYPE_TLB;
298 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
299 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
300 mce_err->error_type = MCE_ERROR_TYPE_SLB;
301 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
305 static long mce_handle_ue_error(struct pt_regs *regs)
310 * On specific SCOM read via MMIO we may get a machine check
311 * exception with SRR0 pointing inside opal. If that is the
312 * case OPAL may have recovery address to re-read SCOM data in
313 * different way and hence we can recover from this MC.
316 if (ppc_md.mce_check_early_recovery) {
317 if (ppc_md.mce_check_early_recovery(regs))
323 long __machine_check_early_realmode_p7(struct pt_regs *regs)
325 uint64_t srr1, nip, addr;
327 struct mce_error_info mce_error_info = { 0 };
329 mce_error_info.severity = MCE_SEV_ERROR_SYNC;
330 mce_error_info.initiator = MCE_INITIATOR_CPU;
336 * Handle memory errors depending whether this was a load/store or
337 * ifetch exception. Also, populate the mce error_type and
338 * type-specific error_type from either SRR1 or DSISR, depending
339 * whether this was a load/store or ifetch exception
341 if (P7_SRR1_MC_LOADSTORE(srr1)) {
342 handled = mce_handle_derror_p7(regs->dsisr);
343 mce_get_derror_p7(&mce_error_info, regs->dsisr);
346 handled = mce_handle_ierror_p7(srr1);
347 mce_get_ierror_p7(&mce_error_info, srr1);
351 /* Handle UE error. */
352 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
353 handled = mce_handle_ue_error(regs);
355 save_mce_event(regs, handled, &mce_error_info, nip, addr);
359 static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
361 mce_get_common_ierror(mce_err, srr1);
362 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
363 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
364 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
368 static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
370 mce_get_derror_p7(mce_err, dsisr);
371 if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
372 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
373 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
377 static long mce_handle_ierror_p8(uint64_t srr1)
381 handled = mce_handle_common_ierror(srr1);
383 #ifdef CONFIG_PPC_STD_MMU_64
384 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
385 flush_and_reload_slb();
392 static long mce_handle_derror_p8(uint64_t dsisr)
394 return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
397 long __machine_check_early_realmode_p8(struct pt_regs *regs)
399 uint64_t srr1, nip, addr;
401 struct mce_error_info mce_error_info = { 0 };
403 mce_error_info.severity = MCE_SEV_ERROR_SYNC;
404 mce_error_info.initiator = MCE_INITIATOR_CPU;
409 if (P7_SRR1_MC_LOADSTORE(srr1)) {
410 handled = mce_handle_derror_p8(regs->dsisr);
411 mce_get_derror_p8(&mce_error_info, regs->dsisr);
414 handled = mce_handle_ierror_p8(srr1);
415 mce_get_ierror_p8(&mce_error_info, srr1);
419 /* Handle UE error. */
420 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
421 handled = mce_handle_ue_error(regs);
423 save_mce_event(regs, handled, &mce_error_info, nip, addr);
427 static int mce_handle_derror_p9(struct pt_regs *regs)
429 uint64_t dsisr = regs->dsisr;
431 return mce_handle_flush_derrors(dsisr,
432 P9_DSISR_MC_SLB_PARITY_MFSLB |
433 P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
435 P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
437 P9_DSISR_MC_ERAT_MULTIHIT);
440 static int mce_handle_ierror_p9(struct pt_regs *regs)
442 uint64_t srr1 = regs->msr;
444 switch (P9_SRR1_MC_IFETCH(srr1)) {
445 case P9_SRR1_MC_IFETCH_SLB_PARITY:
446 case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
447 return mce_flush(MCE_FLUSH_SLB);
448 case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
449 return mce_flush(MCE_FLUSH_TLB);
450 case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
451 return mce_flush(MCE_FLUSH_ERAT);
457 static void mce_get_derror_p9(struct pt_regs *regs,
458 struct mce_error_info *mce_err, uint64_t *addr)
460 uint64_t dsisr = regs->dsisr;
462 mce_err->severity = MCE_SEV_ERROR_SYNC;
463 mce_err->initiator = MCE_INITIATOR_CPU;
465 if (dsisr & P9_DSISR_MC_USER_TLBIE)
470 if (dsisr & P9_DSISR_MC_UE) {
471 mce_err->error_type = MCE_ERROR_TYPE_UE;
472 mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
473 } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
474 mce_err->error_type = MCE_ERROR_TYPE_UE;
475 mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
476 } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
477 mce_err->error_type = MCE_ERROR_TYPE_LINK;
478 mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
479 } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
480 mce_err->error_type = MCE_ERROR_TYPE_LINK;
481 mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
482 } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
483 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
484 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
485 } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
486 mce_err->error_type = MCE_ERROR_TYPE_TLB;
487 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
488 } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
489 mce_err->error_type = MCE_ERROR_TYPE_USER;
490 mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
491 } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
492 mce_err->error_type = MCE_ERROR_TYPE_SLB;
493 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
494 } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
495 mce_err->error_type = MCE_ERROR_TYPE_SLB;
496 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
497 } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
498 mce_err->error_type = MCE_ERROR_TYPE_RA;
499 mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
500 } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
501 mce_err->error_type = MCE_ERROR_TYPE_RA;
502 mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
503 } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
504 mce_err->error_type = MCE_ERROR_TYPE_RA;
505 mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
506 } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
507 mce_err->error_type = MCE_ERROR_TYPE_RA;
508 mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
512 static void mce_get_ierror_p9(struct pt_regs *regs,
513 struct mce_error_info *mce_err, uint64_t *addr)
515 uint64_t srr1 = regs->msr;
517 switch (P9_SRR1_MC_IFETCH(srr1)) {
518 case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
519 case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
520 mce_err->severity = MCE_SEV_FATAL;
523 mce_err->severity = MCE_SEV_ERROR_SYNC;
527 mce_err->initiator = MCE_INITIATOR_CPU;
531 switch (P9_SRR1_MC_IFETCH(srr1)) {
532 case P9_SRR1_MC_IFETCH_UE:
533 mce_err->error_type = MCE_ERROR_TYPE_UE;
534 mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
536 case P9_SRR1_MC_IFETCH_SLB_PARITY:
537 mce_err->error_type = MCE_ERROR_TYPE_SLB;
538 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
540 case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
541 mce_err->error_type = MCE_ERROR_TYPE_SLB;
542 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
544 case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
545 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
546 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
548 case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
549 mce_err->error_type = MCE_ERROR_TYPE_TLB;
550 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
552 case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
553 mce_err->error_type = MCE_ERROR_TYPE_UE;
554 mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
556 case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
557 mce_err->error_type = MCE_ERROR_TYPE_LINK;
558 mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
560 case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
561 mce_err->error_type = MCE_ERROR_TYPE_LINK;
562 mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
564 case P9_SRR1_MC_IFETCH_RA:
565 mce_err->error_type = MCE_ERROR_TYPE_RA;
566 mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
568 case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
569 mce_err->error_type = MCE_ERROR_TYPE_RA;
570 mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
572 case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
573 mce_err->error_type = MCE_ERROR_TYPE_RA;
574 mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
576 case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
577 mce_err->error_type = MCE_ERROR_TYPE_LINK;
578 mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
580 case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
581 mce_err->error_type = MCE_ERROR_TYPE_RA;
582 mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
589 long __machine_check_early_realmode_p9(struct pt_regs *regs)
593 struct mce_error_info mce_error_info = { 0 };
597 if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
598 handled = mce_handle_derror_p9(regs);
599 mce_get_derror_p9(regs, &mce_error_info, &addr);
601 handled = mce_handle_ierror_p9(regs);
602 mce_get_ierror_p9(regs, &mce_error_info, &addr);
605 /* Handle UE error. */
606 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
607 handled = mce_handle_ue_error(regs);
609 save_mce_event(regs, handled, &mce_error_info, nip, addr);