Merge tag 'acpi-5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[sfrench/cifs-2.6.git] / drivers / firmware / efi / cper.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * UEFI Common Platform Error Record (CPER) support
4  *
5  * Copyright (C) 2010, Intel Corp.
6  *      Author: Huang Ying <ying.huang@intel.com>
7  *
8  * CPER is the format used to describe platform hardware error by
9  * various tables, such as ERST, BERT and HEST etc.
10  *
11  * For more information about CPER, please refer to Appendix N of UEFI
12  * Specification version 2.4.
13  */
14
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <linux/time.h>
18 #include <linux/cper.h>
19 #include <linux/dmi.h>
20 #include <linux/acpi.h>
21 #include <linux/pci.h>
22 #include <linux/aer.h>
23 #include <linux/printk.h>
24 #include <linux/bcd.h>
25 #include <acpi/ghes.h>
26 #include <ras/ras_event.h>
27
28 static char rcd_decode_str[CPER_REC_LEN];
29
30 /*
31  * CPER record ID need to be unique even after reboot, because record
32  * ID is used as index for ERST storage, while CPER records from
33  * multiple boot may co-exist in ERST.
34  */
35 u64 cper_next_record_id(void)
36 {
37         static atomic64_t seq;
38
39         if (!atomic64_read(&seq)) {
40                 time64_t time = ktime_get_real_seconds();
41
42                 /*
43                  * This code is unlikely to still be needed in year 2106,
44                  * but just in case, let's use a few more bits for timestamps
45                  * after y2038 to be sure they keep increasing monotonically
46                  * for the next few hundred years...
47                  */
48                 if (time < 0x80000000)
49                         atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
50                 else
51                         atomic64_set(&seq, 0x8000000000000000ull |
52                                            ktime_get_real_seconds() << 24);
53         }
54
55         return atomic64_inc_return(&seq);
56 }
57 EXPORT_SYMBOL_GPL(cper_next_record_id);
58
59 static const char * const severity_strs[] = {
60         "recoverable",
61         "fatal",
62         "corrected",
63         "info",
64 };
65
66 const char *cper_severity_str(unsigned int severity)
67 {
68         return severity < ARRAY_SIZE(severity_strs) ?
69                 severity_strs[severity] : "unknown";
70 }
71 EXPORT_SYMBOL_GPL(cper_severity_str);
72
73 /*
74  * cper_print_bits - print strings for set bits
75  * @pfx: prefix for each line, including log level and prefix string
76  * @bits: bit mask
77  * @strs: string array, indexed by bit position
78  * @strs_size: size of the string array: @strs
79  *
80  * For each set bit in @bits, print the corresponding string in @strs.
81  * If the output length is longer than 80, multiple line will be
82  * printed, with @pfx is printed at the beginning of each line.
83  */
84 void cper_print_bits(const char *pfx, unsigned int bits,
85                      const char * const strs[], unsigned int strs_size)
86 {
87         int i, len = 0;
88         const char *str;
89         char buf[84];
90
91         for (i = 0; i < strs_size; i++) {
92                 if (!(bits & (1U << i)))
93                         continue;
94                 str = strs[i];
95                 if (!str)
96                         continue;
97                 if (len && len + strlen(str) + 2 > 80) {
98                         printk("%s\n", buf);
99                         len = 0;
100                 }
101                 if (!len)
102                         len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
103                 else
104                         len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
105         }
106         if (len)
107                 printk("%s\n", buf);
108 }
109
110 static const char * const proc_type_strs[] = {
111         "IA32/X64",
112         "IA64",
113         "ARM",
114 };
115
116 static const char * const proc_isa_strs[] = {
117         "IA32",
118         "IA64",
119         "X64",
120         "ARM A32/T32",
121         "ARM A64",
122 };
123
124 const char * const cper_proc_error_type_strs[] = {
125         "cache error",
126         "TLB error",
127         "bus error",
128         "micro-architectural error",
129 };
130
131 static const char * const proc_op_strs[] = {
132         "unknown or generic",
133         "data read",
134         "data write",
135         "instruction execution",
136 };
137
138 static const char * const proc_flag_strs[] = {
139         "restartable",
140         "precise IP",
141         "overflow",
142         "corrected",
143 };
144
145 static void cper_print_proc_generic(const char *pfx,
146                                     const struct cper_sec_proc_generic *proc)
147 {
148         if (proc->validation_bits & CPER_PROC_VALID_TYPE)
149                 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
150                        proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
151                        proc_type_strs[proc->proc_type] : "unknown");
152         if (proc->validation_bits & CPER_PROC_VALID_ISA)
153                 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
154                        proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
155                        proc_isa_strs[proc->proc_isa] : "unknown");
156         if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
157                 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
158                 cper_print_bits(pfx, proc->proc_error_type,
159                                 cper_proc_error_type_strs,
160                                 ARRAY_SIZE(cper_proc_error_type_strs));
161         }
162         if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
163                 printk("%s""operation: %d, %s\n", pfx, proc->operation,
164                        proc->operation < ARRAY_SIZE(proc_op_strs) ?
165                        proc_op_strs[proc->operation] : "unknown");
166         if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
167                 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
168                 cper_print_bits(pfx, proc->flags, proc_flag_strs,
169                                 ARRAY_SIZE(proc_flag_strs));
170         }
171         if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
172                 printk("%s""level: %d\n", pfx, proc->level);
173         if (proc->validation_bits & CPER_PROC_VALID_VERSION)
174                 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
175         if (proc->validation_bits & CPER_PROC_VALID_ID)
176                 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
177         if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
178                 printk("%s""target_address: 0x%016llx\n",
179                        pfx, proc->target_addr);
180         if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
181                 printk("%s""requestor_id: 0x%016llx\n",
182                        pfx, proc->requestor_id);
183         if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
184                 printk("%s""responder_id: 0x%016llx\n",
185                        pfx, proc->responder_id);
186         if (proc->validation_bits & CPER_PROC_VALID_IP)
187                 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
188 }
189
190 static const char * const mem_err_type_strs[] = {
191         "unknown",
192         "no error",
193         "single-bit ECC",
194         "multi-bit ECC",
195         "single-symbol chipkill ECC",
196         "multi-symbol chipkill ECC",
197         "master abort",
198         "target abort",
199         "parity error",
200         "watchdog timeout",
201         "invalid address",
202         "mirror Broken",
203         "memory sparing",
204         "scrub corrected error",
205         "scrub uncorrected error",
206         "physical memory map-out event",
207 };
208
209 const char *cper_mem_err_type_str(unsigned int etype)
210 {
211         return etype < ARRAY_SIZE(mem_err_type_strs) ?
212                 mem_err_type_strs[etype] : "unknown";
213 }
214 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
215
216 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
217 {
218         u32 len, n;
219
220         if (!msg)
221                 return 0;
222
223         n = 0;
224         len = CPER_REC_LEN - 1;
225         if (mem->validation_bits & CPER_MEM_VALID_NODE)
226                 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
227         if (mem->validation_bits & CPER_MEM_VALID_CARD)
228                 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
229         if (mem->validation_bits & CPER_MEM_VALID_MODULE)
230                 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
231         if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
232                 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
233         if (mem->validation_bits & CPER_MEM_VALID_BANK)
234                 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
235         if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
236                 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
237         if (mem->validation_bits & CPER_MEM_VALID_ROW)
238                 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
239         if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
240                 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
241         if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
242                 n += scnprintf(msg + n, len - n, "bit_position: %d ",
243                                mem->bit_pos);
244         if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
245                 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
246                                mem->requestor_id);
247         if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
248                 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
249                                mem->responder_id);
250         if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
251                 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
252                           mem->target_id);
253
254         msg[n] = '\0';
255         return n;
256 }
257
258 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
259 {
260         u32 len, n;
261         const char *bank = NULL, *device = NULL;
262
263         if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
264                 return 0;
265
266         n = 0;
267         len = CPER_REC_LEN - 1;
268         dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
269         if (bank && device)
270                 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
271         else
272                 n = snprintf(msg, len,
273                              "DIMM location: not present. DMI handle: 0x%.4x ",
274                              mem->mem_dev_handle);
275
276         msg[n] = '\0';
277         return n;
278 }
279
280 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
281                        struct cper_mem_err_compact *cmem)
282 {
283         cmem->validation_bits = mem->validation_bits;
284         cmem->node = mem->node;
285         cmem->card = mem->card;
286         cmem->module = mem->module;
287         cmem->bank = mem->bank;
288         cmem->device = mem->device;
289         cmem->row = mem->row;
290         cmem->column = mem->column;
291         cmem->bit_pos = mem->bit_pos;
292         cmem->requestor_id = mem->requestor_id;
293         cmem->responder_id = mem->responder_id;
294         cmem->target_id = mem->target_id;
295         cmem->rank = mem->rank;
296         cmem->mem_array_handle = mem->mem_array_handle;
297         cmem->mem_dev_handle = mem->mem_dev_handle;
298 }
299
300 const char *cper_mem_err_unpack(struct trace_seq *p,
301                                 struct cper_mem_err_compact *cmem)
302 {
303         const char *ret = trace_seq_buffer_ptr(p);
304
305         if (cper_mem_err_location(cmem, rcd_decode_str))
306                 trace_seq_printf(p, "%s", rcd_decode_str);
307         if (cper_dimm_err_location(cmem, rcd_decode_str))
308                 trace_seq_printf(p, "%s", rcd_decode_str);
309         trace_seq_putc(p, '\0');
310
311         return ret;
312 }
313
314 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
315         int len)
316 {
317         struct cper_mem_err_compact cmem;
318
319         /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
320         if (len == sizeof(struct cper_sec_mem_err_old) &&
321             (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
322                 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
323                 return;
324         }
325         if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
326                 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
327         if (mem->validation_bits & CPER_MEM_VALID_PA)
328                 printk("%s""physical_address: 0x%016llx\n",
329                        pfx, mem->physical_addr);
330         if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
331                 printk("%s""physical_address_mask: 0x%016llx\n",
332                        pfx, mem->physical_addr_mask);
333         cper_mem_err_pack(mem, &cmem);
334         if (cper_mem_err_location(&cmem, rcd_decode_str))
335                 printk("%s%s\n", pfx, rcd_decode_str);
336         if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
337                 u8 etype = mem->error_type;
338                 printk("%s""error_type: %d, %s\n", pfx, etype,
339                        cper_mem_err_type_str(etype));
340         }
341         if (cper_dimm_err_location(&cmem, rcd_decode_str))
342                 printk("%s%s\n", pfx, rcd_decode_str);
343 }
344
345 static const char * const pcie_port_type_strs[] = {
346         "PCIe end point",
347         "legacy PCI end point",
348         "unknown",
349         "unknown",
350         "root port",
351         "upstream switch port",
352         "downstream switch port",
353         "PCIe to PCI/PCI-X bridge",
354         "PCI/PCI-X to PCIe bridge",
355         "root complex integrated endpoint device",
356         "root complex event collector",
357 };
358
359 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
360                             const struct acpi_hest_generic_data *gdata)
361 {
362         if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
363                 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
364                        pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
365                        pcie_port_type_strs[pcie->port_type] : "unknown");
366         if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
367                 printk("%s""version: %d.%d\n", pfx,
368                        pcie->version.major, pcie->version.minor);
369         if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
370                 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
371                        pcie->command, pcie->status);
372         if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
373                 const __u8 *p;
374                 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
375                        pcie->device_id.segment, pcie->device_id.bus,
376                        pcie->device_id.device, pcie->device_id.function);
377                 printk("%s""slot: %d\n", pfx,
378                        pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
379                 printk("%s""secondary_bus: 0x%02x\n", pfx,
380                        pcie->device_id.secondary_bus);
381                 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
382                        pcie->device_id.vendor_id, pcie->device_id.device_id);
383                 p = pcie->device_id.class_code;
384                 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
385         }
386         if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
387                 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
388                        pcie->serial_number.lower, pcie->serial_number.upper);
389         if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
390                 printk(
391         "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
392         pfx, pcie->bridge.secondary_status, pcie->bridge.control);
393 }
394
395 static void cper_print_tstamp(const char *pfx,
396                                    struct acpi_hest_generic_data_v300 *gdata)
397 {
398         __u8 hour, min, sec, day, mon, year, century, *timestamp;
399
400         if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
401                 timestamp = (__u8 *)&(gdata->time_stamp);
402                 sec       = bcd2bin(timestamp[0]);
403                 min       = bcd2bin(timestamp[1]);
404                 hour      = bcd2bin(timestamp[2]);
405                 day       = bcd2bin(timestamp[4]);
406                 mon       = bcd2bin(timestamp[5]);
407                 year      = bcd2bin(timestamp[6]);
408                 century   = bcd2bin(timestamp[7]);
409
410                 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
411                        (timestamp[3] & 0x1 ? "precise " : "imprecise "),
412                        century, year, mon, day, hour, min, sec);
413         }
414 }
415
416 static void
417 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
418                            int sec_no)
419 {
420         guid_t *sec_type = (guid_t *)gdata->section_type;
421         __u16 severity;
422         char newpfx[64];
423
424         if (acpi_hest_get_version(gdata) >= 3)
425                 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
426
427         severity = gdata->error_severity;
428         printk("%s""Error %d, type: %s\n", pfx, sec_no,
429                cper_severity_str(severity));
430         if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
431                 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
432         if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
433                 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
434
435         snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
436         if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
437                 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
438
439                 printk("%s""section_type: general processor error\n", newpfx);
440                 if (gdata->error_data_length >= sizeof(*proc_err))
441                         cper_print_proc_generic(newpfx, proc_err);
442                 else
443                         goto err_section_too_small;
444         } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
445                 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
446
447                 printk("%s""section_type: memory error\n", newpfx);
448                 if (gdata->error_data_length >=
449                     sizeof(struct cper_sec_mem_err_old))
450                         cper_print_mem(newpfx, mem_err,
451                                        gdata->error_data_length);
452                 else
453                         goto err_section_too_small;
454         } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
455                 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
456
457                 printk("%s""section_type: PCIe error\n", newpfx);
458                 if (gdata->error_data_length >= sizeof(*pcie))
459                         cper_print_pcie(newpfx, pcie, gdata);
460                 else
461                         goto err_section_too_small;
462 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
463         } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
464                 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
465
466                 printk("%ssection_type: ARM processor error\n", newpfx);
467                 if (gdata->error_data_length >= sizeof(*arm_err))
468                         cper_print_proc_arm(newpfx, arm_err);
469                 else
470                         goto err_section_too_small;
471 #endif
472 #if defined(CONFIG_UEFI_CPER_X86)
473         } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
474                 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
475
476                 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
477                 if (gdata->error_data_length >= sizeof(*ia_err))
478                         cper_print_proc_ia(newpfx, ia_err);
479                 else
480                         goto err_section_too_small;
481 #endif
482         } else {
483                 const void *err = acpi_hest_get_payload(gdata);
484
485                 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
486                 printk("%ssection length: %#x\n", newpfx,
487                        gdata->error_data_length);
488                 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
489                                gdata->error_data_length, true);
490         }
491
492         return;
493
494 err_section_too_small:
495         pr_err(FW_WARN "error section length is too small\n");
496 }
497
498 void cper_estatus_print(const char *pfx,
499                         const struct acpi_hest_generic_status *estatus)
500 {
501         struct acpi_hest_generic_data *gdata;
502         int sec_no = 0;
503         char newpfx[64];
504         __u16 severity;
505
506         severity = estatus->error_severity;
507         if (severity == CPER_SEV_CORRECTED)
508                 printk("%s%s\n", pfx,
509                        "It has been corrected by h/w "
510                        "and requires no further action");
511         printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
512         snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
513
514         apei_estatus_for_each_section(estatus, gdata) {
515                 cper_estatus_print_section(newpfx, gdata, sec_no);
516                 sec_no++;
517         }
518 }
519 EXPORT_SYMBOL_GPL(cper_estatus_print);
520
521 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
522 {
523         if (estatus->data_length &&
524             estatus->data_length < sizeof(struct acpi_hest_generic_data))
525                 return -EINVAL;
526         if (estatus->raw_data_length &&
527             estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
528                 return -EINVAL;
529
530         return 0;
531 }
532 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
533
534 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
535 {
536         struct acpi_hest_generic_data *gdata;
537         unsigned int data_len, record_size;
538         int rc;
539
540         rc = cper_estatus_check_header(estatus);
541         if (rc)
542                 return rc;
543
544         data_len = estatus->data_length;
545
546         apei_estatus_for_each_section(estatus, gdata) {
547                 if (sizeof(struct acpi_hest_generic_data) > data_len)
548                         return -EINVAL;
549
550                 record_size = acpi_hest_get_record_size(gdata);
551                 if (record_size > data_len)
552                         return -EINVAL;
553
554                 data_len -= record_size;
555         }
556         if (data_len)
557                 return -EINVAL;
558
559         return 0;
560 }
561 EXPORT_SYMBOL_GPL(cper_estatus_check);