Merge tag 'audit-pr-20170907' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoor...
[sfrench/cifs-2.6.git] / arch / powerpc / kernel / mce.c
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34
35 static DEFINE_PER_CPU(int, mce_nest_count);
36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37
38 /* Queue for delayed MCE events. */
39 static DEFINE_PER_CPU(int, mce_queue_count);
40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41
42 static void machine_check_process_queued_event(struct irq_work *work);
43 static struct irq_work mce_event_process_work = {
44         .func = machine_check_process_queued_event,
45 };
46
47 static void mce_set_error_info(struct machine_check_event *mce,
48                                struct mce_error_info *mce_err)
49 {
50         mce->error_type = mce_err->error_type;
51         switch (mce_err->error_type) {
52         case MCE_ERROR_TYPE_UE:
53                 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
54                 break;
55         case MCE_ERROR_TYPE_SLB:
56                 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
57                 break;
58         case MCE_ERROR_TYPE_ERAT:
59                 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
60                 break;
61         case MCE_ERROR_TYPE_TLB:
62                 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
63                 break;
64         case MCE_ERROR_TYPE_USER:
65                 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
66                 break;
67         case MCE_ERROR_TYPE_RA:
68                 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
69                 break;
70         case MCE_ERROR_TYPE_LINK:
71                 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
72                 break;
73         case MCE_ERROR_TYPE_UNKNOWN:
74         default:
75                 break;
76         }
77 }
78
79 /*
80  * Decode and save high level MCE information into per cpu buffer which
81  * is an array of machine_check_event structure.
82  */
83 void save_mce_event(struct pt_regs *regs, long handled,
84                     struct mce_error_info *mce_err,
85                     uint64_t nip, uint64_t addr)
86 {
87         int index = __this_cpu_inc_return(mce_nest_count) - 1;
88         struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
89
90         /*
91          * Return if we don't have enough space to log mce event.
92          * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
93          * the check below will stop buffer overrun.
94          */
95         if (index >= MAX_MC_EVT)
96                 return;
97
98         /* Populate generic machine check info */
99         mce->version = MCE_V1;
100         mce->srr0 = nip;
101         mce->srr1 = regs->msr;
102         mce->gpr3 = regs->gpr[3];
103         mce->in_use = 1;
104
105         /* Mark it recovered if we have handled it and MSR(RI=1). */
106         if (handled && (regs->msr & MSR_RI))
107                 mce->disposition = MCE_DISPOSITION_RECOVERED;
108         else
109                 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
110
111         mce->initiator = mce_err->initiator;
112         mce->severity = mce_err->severity;
113
114         /*
115          * Populate the mce error_type and type-specific error_type.
116          */
117         mce_set_error_info(mce, mce_err);
118
119         if (!addr)
120                 return;
121
122         if (mce->error_type == MCE_ERROR_TYPE_TLB) {
123                 mce->u.tlb_error.effective_address_provided = true;
124                 mce->u.tlb_error.effective_address = addr;
125         } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
126                 mce->u.slb_error.effective_address_provided = true;
127                 mce->u.slb_error.effective_address = addr;
128         } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
129                 mce->u.erat_error.effective_address_provided = true;
130                 mce->u.erat_error.effective_address = addr;
131         } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
132                 mce->u.user_error.effective_address_provided = true;
133                 mce->u.user_error.effective_address = addr;
134         } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
135                 mce->u.ra_error.effective_address_provided = true;
136                 mce->u.ra_error.effective_address = addr;
137         } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
138                 mce->u.link_error.effective_address_provided = true;
139                 mce->u.link_error.effective_address = addr;
140         } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
141                 mce->u.ue_error.effective_address_provided = true;
142                 mce->u.ue_error.effective_address = addr;
143         }
144         return;
145 }
146
147 /*
148  * get_mce_event:
149  *      mce     Pointer to machine_check_event structure to be filled.
150  *      release Flag to indicate whether to free the event slot or not.
151  *              0 <= do not release the mce event. Caller will invoke
152  *                   release_mce_event() once event has been consumed.
153  *              1 <= release the slot.
154  *
155  *      return  1 = success
156  *              0 = failure
157  *
158  * get_mce_event() will be called by platform specific machine check
159  * handle routine and in KVM.
160  * When we call get_mce_event(), we are still in interrupt context and
161  * preemption will not be scheduled until ret_from_expect() routine
162  * is called.
163  */
164 int get_mce_event(struct machine_check_event *mce, bool release)
165 {
166         int index = __this_cpu_read(mce_nest_count) - 1;
167         struct machine_check_event *mc_evt;
168         int ret = 0;
169
170         /* Sanity check */
171         if (index < 0)
172                 return ret;
173
174         /* Check if we have MCE info to process. */
175         if (index < MAX_MC_EVT) {
176                 mc_evt = this_cpu_ptr(&mce_event[index]);
177                 /* Copy the event structure and release the original */
178                 if (mce)
179                         *mce = *mc_evt;
180                 if (release)
181                         mc_evt->in_use = 0;
182                 ret = 1;
183         }
184         /* Decrement the count to free the slot. */
185         if (release)
186                 __this_cpu_dec(mce_nest_count);
187
188         return ret;
189 }
190
191 void release_mce_event(void)
192 {
193         get_mce_event(NULL, true);
194 }
195
196 /*
197  * Queue up the MCE event which then can be handled later.
198  */
199 void machine_check_queue_event(void)
200 {
201         int index;
202         struct machine_check_event evt;
203
204         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
205                 return;
206
207         index = __this_cpu_inc_return(mce_queue_count) - 1;
208         /* If queue is full, just return for now. */
209         if (index >= MAX_MC_EVT) {
210                 __this_cpu_dec(mce_queue_count);
211                 return;
212         }
213         memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
214
215         /* Queue irq work to process this event later. */
216         irq_work_queue(&mce_event_process_work);
217 }
218
219 /*
220  * process pending MCE event from the mce event queue. This function will be
221  * called during syscall exit.
222  */
223 static void machine_check_process_queued_event(struct irq_work *work)
224 {
225         int index;
226
227         add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
228
229         /*
230          * For now just print it to console.
231          * TODO: log this error event to FSP or nvram.
232          */
233         while (__this_cpu_read(mce_queue_count) > 0) {
234                 index = __this_cpu_read(mce_queue_count) - 1;
235                 machine_check_print_event_info(
236                                 this_cpu_ptr(&mce_event_queue[index]), false);
237                 __this_cpu_dec(mce_queue_count);
238         }
239 }
240
241 void machine_check_print_event_info(struct machine_check_event *evt,
242                                     bool user_mode)
243 {
244         const char *level, *sevstr, *subtype;
245         static const char *mc_ue_types[] = {
246                 "Indeterminate",
247                 "Instruction fetch",
248                 "Page table walk ifetch",
249                 "Load/Store",
250                 "Page table walk Load/Store",
251         };
252         static const char *mc_slb_types[] = {
253                 "Indeterminate",
254                 "Parity",
255                 "Multihit",
256         };
257         static const char *mc_erat_types[] = {
258                 "Indeterminate",
259                 "Parity",
260                 "Multihit",
261         };
262         static const char *mc_tlb_types[] = {
263                 "Indeterminate",
264                 "Parity",
265                 "Multihit",
266         };
267         static const char *mc_user_types[] = {
268                 "Indeterminate",
269                 "tlbie(l) invalid",
270         };
271         static const char *mc_ra_types[] = {
272                 "Indeterminate",
273                 "Instruction fetch (bad)",
274                 "Instruction fetch (foreign)",
275                 "Page table walk ifetch (bad)",
276                 "Page table walk ifetch (foreign)",
277                 "Load (bad)",
278                 "Store (bad)",
279                 "Page table walk Load/Store (bad)",
280                 "Page table walk Load/Store (foreign)",
281                 "Load/Store (foreign)",
282         };
283         static const char *mc_link_types[] = {
284                 "Indeterminate",
285                 "Instruction fetch (timeout)",
286                 "Page table walk ifetch (timeout)",
287                 "Load (timeout)",
288                 "Store (timeout)",
289                 "Page table walk Load/Store (timeout)",
290         };
291
292         /* Print things out */
293         if (evt->version != MCE_V1) {
294                 pr_err("Machine Check Exception, Unknown event version %d !\n",
295                        evt->version);
296                 return;
297         }
298         switch (evt->severity) {
299         case MCE_SEV_NO_ERROR:
300                 level = KERN_INFO;
301                 sevstr = "Harmless";
302                 break;
303         case MCE_SEV_WARNING:
304                 level = KERN_WARNING;
305                 sevstr = "";
306                 break;
307         case MCE_SEV_ERROR_SYNC:
308                 level = KERN_ERR;
309                 sevstr = "Severe";
310                 break;
311         case MCE_SEV_FATAL:
312         default:
313                 level = KERN_ERR;
314                 sevstr = "Fatal";
315                 break;
316         }
317
318         printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
319                evt->disposition == MCE_DISPOSITION_RECOVERED ?
320                "Recovered" : "Not recovered");
321
322         if (user_mode) {
323                 printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
324                         evt->srr0, current->pid, current->comm);
325         } else {
326                 printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
327                        (void *)evt->srr0);
328         }
329
330         printk("%s  Initiator: %s\n", level,
331                evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
332         switch (evt->error_type) {
333         case MCE_ERROR_TYPE_UE:
334                 subtype = evt->u.ue_error.ue_error_type <
335                         ARRAY_SIZE(mc_ue_types) ?
336                         mc_ue_types[evt->u.ue_error.ue_error_type]
337                         : "Unknown";
338                 printk("%s  Error type: UE [%s]\n", level, subtype);
339                 if (evt->u.ue_error.effective_address_provided)
340                         printk("%s    Effective address: %016llx\n",
341                                level, evt->u.ue_error.effective_address);
342                 if (evt->u.ue_error.physical_address_provided)
343                         printk("%s      Physical address: %016llx\n",
344                                level, evt->u.ue_error.physical_address);
345                 break;
346         case MCE_ERROR_TYPE_SLB:
347                 subtype = evt->u.slb_error.slb_error_type <
348                         ARRAY_SIZE(mc_slb_types) ?
349                         mc_slb_types[evt->u.slb_error.slb_error_type]
350                         : "Unknown";
351                 printk("%s  Error type: SLB [%s]\n", level, subtype);
352                 if (evt->u.slb_error.effective_address_provided)
353                         printk("%s    Effective address: %016llx\n",
354                                level, evt->u.slb_error.effective_address);
355                 break;
356         case MCE_ERROR_TYPE_ERAT:
357                 subtype = evt->u.erat_error.erat_error_type <
358                         ARRAY_SIZE(mc_erat_types) ?
359                         mc_erat_types[evt->u.erat_error.erat_error_type]
360                         : "Unknown";
361                 printk("%s  Error type: ERAT [%s]\n", level, subtype);
362                 if (evt->u.erat_error.effective_address_provided)
363                         printk("%s    Effective address: %016llx\n",
364                                level, evt->u.erat_error.effective_address);
365                 break;
366         case MCE_ERROR_TYPE_TLB:
367                 subtype = evt->u.tlb_error.tlb_error_type <
368                         ARRAY_SIZE(mc_tlb_types) ?
369                         mc_tlb_types[evt->u.tlb_error.tlb_error_type]
370                         : "Unknown";
371                 printk("%s  Error type: TLB [%s]\n", level, subtype);
372                 if (evt->u.tlb_error.effective_address_provided)
373                         printk("%s    Effective address: %016llx\n",
374                                level, evt->u.tlb_error.effective_address);
375                 break;
376         case MCE_ERROR_TYPE_USER:
377                 subtype = evt->u.user_error.user_error_type <
378                         ARRAY_SIZE(mc_user_types) ?
379                         mc_user_types[evt->u.user_error.user_error_type]
380                         : "Unknown";
381                 printk("%s  Error type: User [%s]\n", level, subtype);
382                 if (evt->u.user_error.effective_address_provided)
383                         printk("%s    Effective address: %016llx\n",
384                                level, evt->u.user_error.effective_address);
385                 break;
386         case MCE_ERROR_TYPE_RA:
387                 subtype = evt->u.ra_error.ra_error_type <
388                         ARRAY_SIZE(mc_ra_types) ?
389                         mc_ra_types[evt->u.ra_error.ra_error_type]
390                         : "Unknown";
391                 printk("%s  Error type: Real address [%s]\n", level, subtype);
392                 if (evt->u.ra_error.effective_address_provided)
393                         printk("%s    Effective address: %016llx\n",
394                                level, evt->u.ra_error.effective_address);
395                 break;
396         case MCE_ERROR_TYPE_LINK:
397                 subtype = evt->u.link_error.link_error_type <
398                         ARRAY_SIZE(mc_link_types) ?
399                         mc_link_types[evt->u.link_error.link_error_type]
400                         : "Unknown";
401                 printk("%s  Error type: Link [%s]\n", level, subtype);
402                 if (evt->u.link_error.effective_address_provided)
403                         printk("%s    Effective address: %016llx\n",
404                                level, evt->u.link_error.effective_address);
405                 break;
406         default:
407         case MCE_ERROR_TYPE_UNKNOWN:
408                 printk("%s  Error type: Unknown\n", level);
409                 break;
410         }
411 }
412 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
413
414 uint64_t get_mce_fault_addr(struct machine_check_event *evt)
415 {
416         switch (evt->error_type) {
417         case MCE_ERROR_TYPE_UE:
418                 if (evt->u.ue_error.effective_address_provided)
419                         return evt->u.ue_error.effective_address;
420                 break;
421         case MCE_ERROR_TYPE_SLB:
422                 if (evt->u.slb_error.effective_address_provided)
423                         return evt->u.slb_error.effective_address;
424                 break;
425         case MCE_ERROR_TYPE_ERAT:
426                 if (evt->u.erat_error.effective_address_provided)
427                         return evt->u.erat_error.effective_address;
428                 break;
429         case MCE_ERROR_TYPE_TLB:
430                 if (evt->u.tlb_error.effective_address_provided)
431                         return evt->u.tlb_error.effective_address;
432                 break;
433         case MCE_ERROR_TYPE_USER:
434                 if (evt->u.user_error.effective_address_provided)
435                         return evt->u.user_error.effective_address;
436                 break;
437         case MCE_ERROR_TYPE_RA:
438                 if (evt->u.ra_error.effective_address_provided)
439                         return evt->u.ra_error.effective_address;
440                 break;
441         case MCE_ERROR_TYPE_LINK:
442                 if (evt->u.link_error.effective_address_provided)
443                         return evt->u.link_error.effective_address;
444                 break;
445         default:
446         case MCE_ERROR_TYPE_UNKNOWN:
447                 break;
448         }
449         return 0;
450 }
451 EXPORT_SYMBOL(get_mce_fault_addr);
452
453 /*
454  * This function is called in real mode. Strictly no printk's please.
455  *
456  * regs->nip and regs->msr contains srr0 and ssr1.
457  */
458 long machine_check_early(struct pt_regs *regs)
459 {
460         long handled = 0;
461
462         __this_cpu_inc(irq_stat.mce_exceptions);
463
464         if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
465                 handled = cur_cpu_spec->machine_check_early(regs);
466         return handled;
467 }
468
469 long hmi_exception_realmode(struct pt_regs *regs)
470 {
471         __this_cpu_inc(irq_stat.hmi_exceptions);
472
473         wait_for_subcore_guest_exit();
474
475         if (ppc_md.hmi_exception_early)
476                 ppc_md.hmi_exception_early(regs);
477
478         wait_for_tb_resync();
479
480         return 0;
481 }