Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / arch / powerpc / kernel / mce.c
1 /*
2  * Machine check exception handling.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <linux/percpu.h>
28 #include <linux/export.h>
29 #include <linux/irq_work.h>
30 #include <asm/mce.h>
31
32 static DEFINE_PER_CPU(int, mce_nest_count);
33 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
34
35 /* Queue for delayed MCE events. */
36 static DEFINE_PER_CPU(int, mce_queue_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
38
39 static void machine_check_process_queued_event(struct irq_work *work);
40 static struct irq_work mce_event_process_work = {
41         .func = machine_check_process_queued_event,
42 };
43
44 static void mce_set_error_info(struct machine_check_event *mce,
45                                struct mce_error_info *mce_err)
46 {
47         mce->error_type = mce_err->error_type;
48         switch (mce_err->error_type) {
49         case MCE_ERROR_TYPE_UE:
50                 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
51                 break;
52         case MCE_ERROR_TYPE_SLB:
53                 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
54                 break;
55         case MCE_ERROR_TYPE_ERAT:
56                 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
57                 break;
58         case MCE_ERROR_TYPE_TLB:
59                 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
60                 break;
61         case MCE_ERROR_TYPE_USER:
62                 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
63                 break;
64         case MCE_ERROR_TYPE_RA:
65                 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
66                 break;
67         case MCE_ERROR_TYPE_LINK:
68                 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
69                 break;
70         case MCE_ERROR_TYPE_UNKNOWN:
71         default:
72                 break;
73         }
74 }
75
76 /*
77  * Decode and save high level MCE information into per cpu buffer which
78  * is an array of machine_check_event structure.
79  */
80 void save_mce_event(struct pt_regs *regs, long handled,
81                     struct mce_error_info *mce_err,
82                     uint64_t nip, uint64_t addr)
83 {
84         int index = __this_cpu_inc_return(mce_nest_count) - 1;
85         struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
86
87         /*
88          * Return if we don't have enough space to log mce event.
89          * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
90          * the check below will stop buffer overrun.
91          */
92         if (index >= MAX_MC_EVT)
93                 return;
94
95         /* Populate generic machine check info */
96         mce->version = MCE_V1;
97         mce->srr0 = nip;
98         mce->srr1 = regs->msr;
99         mce->gpr3 = regs->gpr[3];
100         mce->in_use = 1;
101
102         /* Mark it recovered if we have handled it and MSR(RI=1). */
103         if (handled && (regs->msr & MSR_RI))
104                 mce->disposition = MCE_DISPOSITION_RECOVERED;
105         else
106                 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
107
108         mce->initiator = mce_err->initiator;
109         mce->severity = mce_err->severity;
110
111         /*
112          * Populate the mce error_type and type-specific error_type.
113          */
114         mce_set_error_info(mce, mce_err);
115
116         if (!addr)
117                 return;
118
119         if (mce->error_type == MCE_ERROR_TYPE_TLB) {
120                 mce->u.tlb_error.effective_address_provided = true;
121                 mce->u.tlb_error.effective_address = addr;
122         } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
123                 mce->u.slb_error.effective_address_provided = true;
124                 mce->u.slb_error.effective_address = addr;
125         } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
126                 mce->u.erat_error.effective_address_provided = true;
127                 mce->u.erat_error.effective_address = addr;
128         } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
129                 mce->u.user_error.effective_address_provided = true;
130                 mce->u.user_error.effective_address = addr;
131         } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
132                 mce->u.ra_error.effective_address_provided = true;
133                 mce->u.ra_error.effective_address = addr;
134         } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
135                 mce->u.link_error.effective_address_provided = true;
136                 mce->u.link_error.effective_address = addr;
137         } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
138                 mce->u.ue_error.effective_address_provided = true;
139                 mce->u.ue_error.effective_address = addr;
140         }
141         return;
142 }
143
144 /*
145  * get_mce_event:
146  *      mce     Pointer to machine_check_event structure to be filled.
147  *      release Flag to indicate whether to free the event slot or not.
148  *              0 <= do not release the mce event. Caller will invoke
149  *                   release_mce_event() once event has been consumed.
150  *              1 <= release the slot.
151  *
152  *      return  1 = success
153  *              0 = failure
154  *
155  * get_mce_event() will be called by platform specific machine check
156  * handle routine and in KVM.
157  * When we call get_mce_event(), we are still in interrupt context and
158  * preemption will not be scheduled until ret_from_expect() routine
159  * is called.
160  */
161 int get_mce_event(struct machine_check_event *mce, bool release)
162 {
163         int index = __this_cpu_read(mce_nest_count) - 1;
164         struct machine_check_event *mc_evt;
165         int ret = 0;
166
167         /* Sanity check */
168         if (index < 0)
169                 return ret;
170
171         /* Check if we have MCE info to process. */
172         if (index < MAX_MC_EVT) {
173                 mc_evt = this_cpu_ptr(&mce_event[index]);
174                 /* Copy the event structure and release the original */
175                 if (mce)
176                         *mce = *mc_evt;
177                 if (release)
178                         mc_evt->in_use = 0;
179                 ret = 1;
180         }
181         /* Decrement the count to free the slot. */
182         if (release)
183                 __this_cpu_dec(mce_nest_count);
184
185         return ret;
186 }
187
188 void release_mce_event(void)
189 {
190         get_mce_event(NULL, true);
191 }
192
193 /*
194  * Queue up the MCE event which then can be handled later.
195  */
196 void machine_check_queue_event(void)
197 {
198         int index;
199         struct machine_check_event evt;
200
201         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
202                 return;
203
204         index = __this_cpu_inc_return(mce_queue_count) - 1;
205         /* If queue is full, just return for now. */
206         if (index >= MAX_MC_EVT) {
207                 __this_cpu_dec(mce_queue_count);
208                 return;
209         }
210         memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
211
212         /* Queue irq work to process this event later. */
213         irq_work_queue(&mce_event_process_work);
214 }
215
216 /*
217  * process pending MCE event from the mce event queue. This function will be
218  * called during syscall exit.
219  */
220 static void machine_check_process_queued_event(struct irq_work *work)
221 {
222         int index;
223
224         add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
225
226         /*
227          * For now just print it to console.
228          * TODO: log this error event to FSP or nvram.
229          */
230         while (__this_cpu_read(mce_queue_count) > 0) {
231                 index = __this_cpu_read(mce_queue_count) - 1;
232                 machine_check_print_event_info(
233                                 this_cpu_ptr(&mce_event_queue[index]), false);
234                 __this_cpu_dec(mce_queue_count);
235         }
236 }
237
238 void machine_check_print_event_info(struct machine_check_event *evt,
239                                     bool user_mode)
240 {
241         const char *level, *sevstr, *subtype;
242         static const char *mc_ue_types[] = {
243                 "Indeterminate",
244                 "Instruction fetch",
245                 "Page table walk ifetch",
246                 "Load/Store",
247                 "Page table walk Load/Store",
248         };
249         static const char *mc_slb_types[] = {
250                 "Indeterminate",
251                 "Parity",
252                 "Multihit",
253         };
254         static const char *mc_erat_types[] = {
255                 "Indeterminate",
256                 "Parity",
257                 "Multihit",
258         };
259         static const char *mc_tlb_types[] = {
260                 "Indeterminate",
261                 "Parity",
262                 "Multihit",
263         };
264         static const char *mc_user_types[] = {
265                 "Indeterminate",
266                 "tlbie(l) invalid",
267         };
268         static const char *mc_ra_types[] = {
269                 "Indeterminate",
270                 "Instruction fetch (bad)",
271                 "Instruction fetch (foreign)",
272                 "Page table walk ifetch (bad)",
273                 "Page table walk ifetch (foreign)",
274                 "Load (bad)",
275                 "Store (bad)",
276                 "Page table walk Load/Store (bad)",
277                 "Page table walk Load/Store (foreign)",
278                 "Load/Store (foreign)",
279         };
280         static const char *mc_link_types[] = {
281                 "Indeterminate",
282                 "Instruction fetch (timeout)",
283                 "Page table walk ifetch (timeout)",
284                 "Load (timeout)",
285                 "Store (timeout)",
286                 "Page table walk Load/Store (timeout)",
287         };
288
289         /* Print things out */
290         if (evt->version != MCE_V1) {
291                 pr_err("Machine Check Exception, Unknown event version %d !\n",
292                        evt->version);
293                 return;
294         }
295         switch (evt->severity) {
296         case MCE_SEV_NO_ERROR:
297                 level = KERN_INFO;
298                 sevstr = "Harmless";
299                 break;
300         case MCE_SEV_WARNING:
301                 level = KERN_WARNING;
302                 sevstr = "";
303                 break;
304         case MCE_SEV_ERROR_SYNC:
305                 level = KERN_ERR;
306                 sevstr = "Severe";
307                 break;
308         case MCE_SEV_FATAL:
309         default:
310                 level = KERN_ERR;
311                 sevstr = "Fatal";
312                 break;
313         }
314
315         printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
316                evt->disposition == MCE_DISPOSITION_RECOVERED ?
317                "Recovered" : "Not recovered");
318
319         if (user_mode) {
320                 printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
321                         evt->srr0, current->pid, current->comm);
322         } else {
323                 printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
324                        (void *)evt->srr0);
325         }
326
327         printk("%s  Initiator: %s\n", level,
328                evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
329         switch (evt->error_type) {
330         case MCE_ERROR_TYPE_UE:
331                 subtype = evt->u.ue_error.ue_error_type <
332                         ARRAY_SIZE(mc_ue_types) ?
333                         mc_ue_types[evt->u.ue_error.ue_error_type]
334                         : "Unknown";
335                 printk("%s  Error type: UE [%s]\n", level, subtype);
336                 if (evt->u.ue_error.effective_address_provided)
337                         printk("%s    Effective address: %016llx\n",
338                                level, evt->u.ue_error.effective_address);
339                 if (evt->u.ue_error.physical_address_provided)
340                         printk("%s      Physical address: %016llx\n",
341                                level, evt->u.ue_error.physical_address);
342                 break;
343         case MCE_ERROR_TYPE_SLB:
344                 subtype = evt->u.slb_error.slb_error_type <
345                         ARRAY_SIZE(mc_slb_types) ?
346                         mc_slb_types[evt->u.slb_error.slb_error_type]
347                         : "Unknown";
348                 printk("%s  Error type: SLB [%s]\n", level, subtype);
349                 if (evt->u.slb_error.effective_address_provided)
350                         printk("%s    Effective address: %016llx\n",
351                                level, evt->u.slb_error.effective_address);
352                 break;
353         case MCE_ERROR_TYPE_ERAT:
354                 subtype = evt->u.erat_error.erat_error_type <
355                         ARRAY_SIZE(mc_erat_types) ?
356                         mc_erat_types[evt->u.erat_error.erat_error_type]
357                         : "Unknown";
358                 printk("%s  Error type: ERAT [%s]\n", level, subtype);
359                 if (evt->u.erat_error.effective_address_provided)
360                         printk("%s    Effective address: %016llx\n",
361                                level, evt->u.erat_error.effective_address);
362                 break;
363         case MCE_ERROR_TYPE_TLB:
364                 subtype = evt->u.tlb_error.tlb_error_type <
365                         ARRAY_SIZE(mc_tlb_types) ?
366                         mc_tlb_types[evt->u.tlb_error.tlb_error_type]
367                         : "Unknown";
368                 printk("%s  Error type: TLB [%s]\n", level, subtype);
369                 if (evt->u.tlb_error.effective_address_provided)
370                         printk("%s    Effective address: %016llx\n",
371                                level, evt->u.tlb_error.effective_address);
372                 break;
373         case MCE_ERROR_TYPE_USER:
374                 subtype = evt->u.user_error.user_error_type <
375                         ARRAY_SIZE(mc_user_types) ?
376                         mc_user_types[evt->u.user_error.user_error_type]
377                         : "Unknown";
378                 printk("%s  Error type: User [%s]\n", level, subtype);
379                 if (evt->u.user_error.effective_address_provided)
380                         printk("%s    Effective address: %016llx\n",
381                                level, evt->u.user_error.effective_address);
382                 break;
383         case MCE_ERROR_TYPE_RA:
384                 subtype = evt->u.ra_error.ra_error_type <
385                         ARRAY_SIZE(mc_ra_types) ?
386                         mc_ra_types[evt->u.ra_error.ra_error_type]
387                         : "Unknown";
388                 printk("%s  Error type: Real address [%s]\n", level, subtype);
389                 if (evt->u.ra_error.effective_address_provided)
390                         printk("%s    Effective address: %016llx\n",
391                                level, evt->u.ra_error.effective_address);
392                 break;
393         case MCE_ERROR_TYPE_LINK:
394                 subtype = evt->u.link_error.link_error_type <
395                         ARRAY_SIZE(mc_link_types) ?
396                         mc_link_types[evt->u.link_error.link_error_type]
397                         : "Unknown";
398                 printk("%s  Error type: Link [%s]\n", level, subtype);
399                 if (evt->u.link_error.effective_address_provided)
400                         printk("%s    Effective address: %016llx\n",
401                                level, evt->u.link_error.effective_address);
402                 break;
403         default:
404         case MCE_ERROR_TYPE_UNKNOWN:
405                 printk("%s  Error type: Unknown\n", level);
406                 break;
407         }
408 }
409 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
410
411 uint64_t get_mce_fault_addr(struct machine_check_event *evt)
412 {
413         switch (evt->error_type) {
414         case MCE_ERROR_TYPE_UE:
415                 if (evt->u.ue_error.effective_address_provided)
416                         return evt->u.ue_error.effective_address;
417                 break;
418         case MCE_ERROR_TYPE_SLB:
419                 if (evt->u.slb_error.effective_address_provided)
420                         return evt->u.slb_error.effective_address;
421                 break;
422         case MCE_ERROR_TYPE_ERAT:
423                 if (evt->u.erat_error.effective_address_provided)
424                         return evt->u.erat_error.effective_address;
425                 break;
426         case MCE_ERROR_TYPE_TLB:
427                 if (evt->u.tlb_error.effective_address_provided)
428                         return evt->u.tlb_error.effective_address;
429                 break;
430         case MCE_ERROR_TYPE_USER:
431                 if (evt->u.user_error.effective_address_provided)
432                         return evt->u.user_error.effective_address;
433                 break;
434         case MCE_ERROR_TYPE_RA:
435                 if (evt->u.ra_error.effective_address_provided)
436                         return evt->u.ra_error.effective_address;
437                 break;
438         case MCE_ERROR_TYPE_LINK:
439                 if (evt->u.link_error.effective_address_provided)
440                         return evt->u.link_error.effective_address;
441                 break;
442         default:
443         case MCE_ERROR_TYPE_UNKNOWN:
444                 break;
445         }
446         return 0;
447 }
448 EXPORT_SYMBOL(get_mce_fault_addr);