Merge tag 'v4.19-rc2' into next-general
[sfrench/cifs-2.6.git] / drivers / firmware / efi / cper.c
1 /*
2  * UEFI Common Platform Error Record (CPER) support
3  *
4  * Copyright (C) 2010, Intel Corp.
5  *      Author: Huang Ying <ying.huang@intel.com>
6  *
7  * CPER is the format used to describe platform hardware error by
8  * various tables, such as ERST, BERT and HEST etc.
9  *
10  * For more information about CPER, please refer to Appendix N of UEFI
11  * Specification version 2.4.
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25  */
26
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35 #include <linux/printk.h>
36 #include <linux/bcd.h>
37 #include <acpi/ghes.h>
38 #include <ras/ras_event.h>
39
40 static char rcd_decode_str[CPER_REC_LEN];
41
42 /*
43  * CPER record ID need to be unique even after reboot, because record
44  * ID is used as index for ERST storage, while CPER records from
45  * multiple boot may co-exist in ERST.
46  */
47 u64 cper_next_record_id(void)
48 {
49         static atomic64_t seq;
50
51         if (!atomic64_read(&seq)) {
52                 time64_t time = ktime_get_real_seconds();
53
54                 /*
55                  * This code is unlikely to still be needed in year 2106,
56                  * but just in case, let's use a few more bits for timestamps
57                  * after y2038 to be sure they keep increasing monotonically
58                  * for the next few hundred years...
59                  */
60                 if (time < 0x80000000)
61                         atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
62                 else
63                         atomic64_set(&seq, 0x8000000000000000ull |
64                                            ktime_get_real_seconds() << 24);
65         }
66
67         return atomic64_inc_return(&seq);
68 }
69 EXPORT_SYMBOL_GPL(cper_next_record_id);
70
71 static const char * const severity_strs[] = {
72         "recoverable",
73         "fatal",
74         "corrected",
75         "info",
76 };
77
78 const char *cper_severity_str(unsigned int severity)
79 {
80         return severity < ARRAY_SIZE(severity_strs) ?
81                 severity_strs[severity] : "unknown";
82 }
83 EXPORT_SYMBOL_GPL(cper_severity_str);
84
85 /*
86  * cper_print_bits - print strings for set bits
87  * @pfx: prefix for each line, including log level and prefix string
88  * @bits: bit mask
89  * @strs: string array, indexed by bit position
90  * @strs_size: size of the string array: @strs
91  *
92  * For each set bit in @bits, print the corresponding string in @strs.
93  * If the output length is longer than 80, multiple line will be
94  * printed, with @pfx is printed at the beginning of each line.
95  */
96 void cper_print_bits(const char *pfx, unsigned int bits,
97                      const char * const strs[], unsigned int strs_size)
98 {
99         int i, len = 0;
100         const char *str;
101         char buf[84];
102
103         for (i = 0; i < strs_size; i++) {
104                 if (!(bits & (1U << i)))
105                         continue;
106                 str = strs[i];
107                 if (!str)
108                         continue;
109                 if (len && len + strlen(str) + 2 > 80) {
110                         printk("%s\n", buf);
111                         len = 0;
112                 }
113                 if (!len)
114                         len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
115                 else
116                         len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
117         }
118         if (len)
119                 printk("%s\n", buf);
120 }
121
122 static const char * const proc_type_strs[] = {
123         "IA32/X64",
124         "IA64",
125         "ARM",
126 };
127
128 static const char * const proc_isa_strs[] = {
129         "IA32",
130         "IA64",
131         "X64",
132         "ARM A32/T32",
133         "ARM A64",
134 };
135
136 const char * const cper_proc_error_type_strs[] = {
137         "cache error",
138         "TLB error",
139         "bus error",
140         "micro-architectural error",
141 };
142
143 static const char * const proc_op_strs[] = {
144         "unknown or generic",
145         "data read",
146         "data write",
147         "instruction execution",
148 };
149
150 static const char * const proc_flag_strs[] = {
151         "restartable",
152         "precise IP",
153         "overflow",
154         "corrected",
155 };
156
157 static void cper_print_proc_generic(const char *pfx,
158                                     const struct cper_sec_proc_generic *proc)
159 {
160         if (proc->validation_bits & CPER_PROC_VALID_TYPE)
161                 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
162                        proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
163                        proc_type_strs[proc->proc_type] : "unknown");
164         if (proc->validation_bits & CPER_PROC_VALID_ISA)
165                 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
166                        proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
167                        proc_isa_strs[proc->proc_isa] : "unknown");
168         if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
169                 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
170                 cper_print_bits(pfx, proc->proc_error_type,
171                                 cper_proc_error_type_strs,
172                                 ARRAY_SIZE(cper_proc_error_type_strs));
173         }
174         if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
175                 printk("%s""operation: %d, %s\n", pfx, proc->operation,
176                        proc->operation < ARRAY_SIZE(proc_op_strs) ?
177                        proc_op_strs[proc->operation] : "unknown");
178         if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
179                 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
180                 cper_print_bits(pfx, proc->flags, proc_flag_strs,
181                                 ARRAY_SIZE(proc_flag_strs));
182         }
183         if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
184                 printk("%s""level: %d\n", pfx, proc->level);
185         if (proc->validation_bits & CPER_PROC_VALID_VERSION)
186                 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
187         if (proc->validation_bits & CPER_PROC_VALID_ID)
188                 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
189         if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
190                 printk("%s""target_address: 0x%016llx\n",
191                        pfx, proc->target_addr);
192         if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
193                 printk("%s""requestor_id: 0x%016llx\n",
194                        pfx, proc->requestor_id);
195         if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
196                 printk("%s""responder_id: 0x%016llx\n",
197                        pfx, proc->responder_id);
198         if (proc->validation_bits & CPER_PROC_VALID_IP)
199                 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
200 }
201
202 static const char * const mem_err_type_strs[] = {
203         "unknown",
204         "no error",
205         "single-bit ECC",
206         "multi-bit ECC",
207         "single-symbol chipkill ECC",
208         "multi-symbol chipkill ECC",
209         "master abort",
210         "target abort",
211         "parity error",
212         "watchdog timeout",
213         "invalid address",
214         "mirror Broken",
215         "memory sparing",
216         "scrub corrected error",
217         "scrub uncorrected error",
218         "physical memory map-out event",
219 };
220
221 const char *cper_mem_err_type_str(unsigned int etype)
222 {
223         return etype < ARRAY_SIZE(mem_err_type_strs) ?
224                 mem_err_type_strs[etype] : "unknown";
225 }
226 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
227
228 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
229 {
230         u32 len, n;
231
232         if (!msg)
233                 return 0;
234
235         n = 0;
236         len = CPER_REC_LEN - 1;
237         if (mem->validation_bits & CPER_MEM_VALID_NODE)
238                 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
239         if (mem->validation_bits & CPER_MEM_VALID_CARD)
240                 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
241         if (mem->validation_bits & CPER_MEM_VALID_MODULE)
242                 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
243         if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
244                 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
245         if (mem->validation_bits & CPER_MEM_VALID_BANK)
246                 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
247         if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
248                 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
249         if (mem->validation_bits & CPER_MEM_VALID_ROW)
250                 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
251         if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
252                 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
253         if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
254                 n += scnprintf(msg + n, len - n, "bit_position: %d ",
255                                mem->bit_pos);
256         if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
257                 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
258                                mem->requestor_id);
259         if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
260                 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
261                                mem->responder_id);
262         if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
263                 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
264                           mem->target_id);
265
266         msg[n] = '\0';
267         return n;
268 }
269
270 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
271 {
272         u32 len, n;
273         const char *bank = NULL, *device = NULL;
274
275         if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
276                 return 0;
277
278         n = 0;
279         len = CPER_REC_LEN - 1;
280         dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
281         if (bank && device)
282                 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
283         else
284                 n = snprintf(msg, len,
285                              "DIMM location: not present. DMI handle: 0x%.4x ",
286                              mem->mem_dev_handle);
287
288         msg[n] = '\0';
289         return n;
290 }
291
292 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
293                        struct cper_mem_err_compact *cmem)
294 {
295         cmem->validation_bits = mem->validation_bits;
296         cmem->node = mem->node;
297         cmem->card = mem->card;
298         cmem->module = mem->module;
299         cmem->bank = mem->bank;
300         cmem->device = mem->device;
301         cmem->row = mem->row;
302         cmem->column = mem->column;
303         cmem->bit_pos = mem->bit_pos;
304         cmem->requestor_id = mem->requestor_id;
305         cmem->responder_id = mem->responder_id;
306         cmem->target_id = mem->target_id;
307         cmem->rank = mem->rank;
308         cmem->mem_array_handle = mem->mem_array_handle;
309         cmem->mem_dev_handle = mem->mem_dev_handle;
310 }
311
312 const char *cper_mem_err_unpack(struct trace_seq *p,
313                                 struct cper_mem_err_compact *cmem)
314 {
315         const char *ret = trace_seq_buffer_ptr(p);
316
317         if (cper_mem_err_location(cmem, rcd_decode_str))
318                 trace_seq_printf(p, "%s", rcd_decode_str);
319         if (cper_dimm_err_location(cmem, rcd_decode_str))
320                 trace_seq_printf(p, "%s", rcd_decode_str);
321         trace_seq_putc(p, '\0');
322
323         return ret;
324 }
325
326 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
327         int len)
328 {
329         struct cper_mem_err_compact cmem;
330
331         /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
332         if (len == sizeof(struct cper_sec_mem_err_old) &&
333             (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
334                 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
335                 return;
336         }
337         if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
338                 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
339         if (mem->validation_bits & CPER_MEM_VALID_PA)
340                 printk("%s""physical_address: 0x%016llx\n",
341                        pfx, mem->physical_addr);
342         if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
343                 printk("%s""physical_address_mask: 0x%016llx\n",
344                        pfx, mem->physical_addr_mask);
345         cper_mem_err_pack(mem, &cmem);
346         if (cper_mem_err_location(&cmem, rcd_decode_str))
347                 printk("%s%s\n", pfx, rcd_decode_str);
348         if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
349                 u8 etype = mem->error_type;
350                 printk("%s""error_type: %d, %s\n", pfx, etype,
351                        cper_mem_err_type_str(etype));
352         }
353         if (cper_dimm_err_location(&cmem, rcd_decode_str))
354                 printk("%s%s\n", pfx, rcd_decode_str);
355 }
356
357 static const char * const pcie_port_type_strs[] = {
358         "PCIe end point",
359         "legacy PCI end point",
360         "unknown",
361         "unknown",
362         "root port",
363         "upstream switch port",
364         "downstream switch port",
365         "PCIe to PCI/PCI-X bridge",
366         "PCI/PCI-X to PCIe bridge",
367         "root complex integrated endpoint device",
368         "root complex event collector",
369 };
370
371 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
372                             const struct acpi_hest_generic_data *gdata)
373 {
374         if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
375                 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
376                        pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
377                        pcie_port_type_strs[pcie->port_type] : "unknown");
378         if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
379                 printk("%s""version: %d.%d\n", pfx,
380                        pcie->version.major, pcie->version.minor);
381         if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
382                 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
383                        pcie->command, pcie->status);
384         if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
385                 const __u8 *p;
386                 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
387                        pcie->device_id.segment, pcie->device_id.bus,
388                        pcie->device_id.device, pcie->device_id.function);
389                 printk("%s""slot: %d\n", pfx,
390                        pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
391                 printk("%s""secondary_bus: 0x%02x\n", pfx,
392                        pcie->device_id.secondary_bus);
393                 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
394                        pcie->device_id.vendor_id, pcie->device_id.device_id);
395                 p = pcie->device_id.class_code;
396                 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
397         }
398         if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
399                 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
400                        pcie->serial_number.lower, pcie->serial_number.upper);
401         if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
402                 printk(
403         "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
404         pfx, pcie->bridge.secondary_status, pcie->bridge.control);
405 }
406
407 static void cper_print_tstamp(const char *pfx,
408                                    struct acpi_hest_generic_data_v300 *gdata)
409 {
410         __u8 hour, min, sec, day, mon, year, century, *timestamp;
411
412         if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
413                 timestamp = (__u8 *)&(gdata->time_stamp);
414                 sec       = bcd2bin(timestamp[0]);
415                 min       = bcd2bin(timestamp[1]);
416                 hour      = bcd2bin(timestamp[2]);
417                 day       = bcd2bin(timestamp[4]);
418                 mon       = bcd2bin(timestamp[5]);
419                 year      = bcd2bin(timestamp[6]);
420                 century   = bcd2bin(timestamp[7]);
421
422                 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
423                        (timestamp[3] & 0x1 ? "precise " : "imprecise "),
424                        century, year, mon, day, hour, min, sec);
425         }
426 }
427
428 static void
429 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
430                            int sec_no)
431 {
432         guid_t *sec_type = (guid_t *)gdata->section_type;
433         __u16 severity;
434         char newpfx[64];
435
436         if (acpi_hest_get_version(gdata) >= 3)
437                 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
438
439         severity = gdata->error_severity;
440         printk("%s""Error %d, type: %s\n", pfx, sec_no,
441                cper_severity_str(severity));
442         if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
443                 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
444         if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
445                 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
446
447         snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
448         if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
449                 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
450
451                 printk("%s""section_type: general processor error\n", newpfx);
452                 if (gdata->error_data_length >= sizeof(*proc_err))
453                         cper_print_proc_generic(newpfx, proc_err);
454                 else
455                         goto err_section_too_small;
456         } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
457                 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
458
459                 printk("%s""section_type: memory error\n", newpfx);
460                 if (gdata->error_data_length >=
461                     sizeof(struct cper_sec_mem_err_old))
462                         cper_print_mem(newpfx, mem_err,
463                                        gdata->error_data_length);
464                 else
465                         goto err_section_too_small;
466         } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
467                 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
468
469                 printk("%s""section_type: PCIe error\n", newpfx);
470                 if (gdata->error_data_length >= sizeof(*pcie))
471                         cper_print_pcie(newpfx, pcie, gdata);
472                 else
473                         goto err_section_too_small;
474 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
475         } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
476                 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
477
478                 printk("%ssection_type: ARM processor error\n", newpfx);
479                 if (gdata->error_data_length >= sizeof(*arm_err))
480                         cper_print_proc_arm(newpfx, arm_err);
481                 else
482                         goto err_section_too_small;
483 #endif
484 #if defined(CONFIG_UEFI_CPER_X86)
485         } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
486                 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
487
488                 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
489                 if (gdata->error_data_length >= sizeof(*ia_err))
490                         cper_print_proc_ia(newpfx, ia_err);
491                 else
492                         goto err_section_too_small;
493 #endif
494         } else {
495                 const void *err = acpi_hest_get_payload(gdata);
496
497                 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
498                 printk("%ssection length: %#x\n", newpfx,
499                        gdata->error_data_length);
500                 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
501                                gdata->error_data_length, true);
502         }
503
504         return;
505
506 err_section_too_small:
507         pr_err(FW_WARN "error section length is too small\n");
508 }
509
510 void cper_estatus_print(const char *pfx,
511                         const struct acpi_hest_generic_status *estatus)
512 {
513         struct acpi_hest_generic_data *gdata;
514         int sec_no = 0;
515         char newpfx[64];
516         __u16 severity;
517
518         severity = estatus->error_severity;
519         if (severity == CPER_SEV_CORRECTED)
520                 printk("%s%s\n", pfx,
521                        "It has been corrected by h/w "
522                        "and requires no further action");
523         printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
524         snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
525
526         apei_estatus_for_each_section(estatus, gdata) {
527                 cper_estatus_print_section(newpfx, gdata, sec_no);
528                 sec_no++;
529         }
530 }
531 EXPORT_SYMBOL_GPL(cper_estatus_print);
532
533 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
534 {
535         if (estatus->data_length &&
536             estatus->data_length < sizeof(struct acpi_hest_generic_data))
537                 return -EINVAL;
538         if (estatus->raw_data_length &&
539             estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
540                 return -EINVAL;
541
542         return 0;
543 }
544 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
545
546 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
547 {
548         struct acpi_hest_generic_data *gdata;
549         unsigned int data_len, gedata_len;
550         int rc;
551
552         rc = cper_estatus_check_header(estatus);
553         if (rc)
554                 return rc;
555         data_len = estatus->data_length;
556
557         apei_estatus_for_each_section(estatus, gdata) {
558                 gedata_len = acpi_hest_get_error_length(gdata);
559                 if (gedata_len > data_len - acpi_hest_get_size(gdata))
560                         return -EINVAL;
561                 data_len -= acpi_hest_get_record_size(gdata);
562         }
563         if (data_len)
564                 return -EINVAL;
565
566         return 0;
567 }
568 EXPORT_SYMBOL_GPL(cper_estatus_check);