Merge tag 'devdax-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
[sfrench/cifs-2.6.git] / drivers / acpi / numa.c
1 /*
2  *  acpi_numa.c - ACPI NUMA support
3  *
4  *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
5  *
6  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7  *
8  *  This program is free software; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19  *
20  */
21
22 #define pr_fmt(fmt) "ACPI: " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/kernel.h>
27 #include <linux/types.h>
28 #include <linux/errno.h>
29 #include <linux/acpi.h>
30 #include <linux/memblock.h>
31 #include <linux/numa.h>
32 #include <linux/nodemask.h>
33 #include <linux/topology.h>
34
35 static nodemask_t nodes_found_map = NODE_MASK_NONE;
36
37 /* maps to convert between proximity domain and logical node ID */
38 static int pxm_to_node_map[MAX_PXM_DOMAINS]
39                         = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE };
40 static int node_to_pxm_map[MAX_NUMNODES]
41                         = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
42
43 unsigned char acpi_srat_revision __initdata;
44 int acpi_numa __initdata;
45
46 int pxm_to_node(int pxm)
47 {
48         if (pxm < 0)
49                 return NUMA_NO_NODE;
50         return pxm_to_node_map[pxm];
51 }
52
53 int node_to_pxm(int node)
54 {
55         if (node < 0)
56                 return PXM_INVAL;
57         return node_to_pxm_map[node];
58 }
59
60 static void __acpi_map_pxm_to_node(int pxm, int node)
61 {
62         if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm])
63                 pxm_to_node_map[pxm] = node;
64         if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node])
65                 node_to_pxm_map[node] = pxm;
66 }
67
68 int acpi_map_pxm_to_node(int pxm)
69 {
70         int node;
71
72         if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off)
73                 return NUMA_NO_NODE;
74
75         node = pxm_to_node_map[pxm];
76
77         if (node == NUMA_NO_NODE) {
78                 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
79                         return NUMA_NO_NODE;
80                 node = first_unset_node(nodes_found_map);
81                 __acpi_map_pxm_to_node(pxm, node);
82                 node_set(node, nodes_found_map);
83         }
84
85         return node;
86 }
87 EXPORT_SYMBOL(acpi_map_pxm_to_node);
88
89 /**
90  * acpi_map_pxm_to_online_node - Map proximity ID to online node
91  * @pxm: ACPI proximity ID
92  *
93  * This is similar to acpi_map_pxm_to_node(), but always returns an online
94  * node.  When the mapped node from a given proximity ID is offline, it
95  * looks up the node distance table and returns the nearest online node.
96  *
97  * ACPI device drivers, which are called after the NUMA initialization has
98  * completed in the kernel, can call this interface to obtain their device
99  * NUMA topology from ACPI tables.  Such drivers do not have to deal with
100  * offline nodes.  A node may be offline when a device proximity ID is
101  * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
102  * "numa=off" on x86.
103  */
104 int acpi_map_pxm_to_online_node(int pxm)
105 {
106         int node, min_node;
107
108         node = acpi_map_pxm_to_node(pxm);
109
110         if (node == NUMA_NO_NODE)
111                 node = 0;
112
113         min_node = node;
114         if (!node_online(node)) {
115                 int min_dist = INT_MAX, dist, n;
116
117                 for_each_online_node(n) {
118                         dist = node_distance(node, n);
119                         if (dist < min_dist) {
120                                 min_dist = dist;
121                                 min_node = n;
122                         }
123                 }
124         }
125
126         return min_node;
127 }
128 EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
129
130 static void __init
131 acpi_table_print_srat_entry(struct acpi_subtable_header *header)
132 {
133         switch (header->type) {
134         case ACPI_SRAT_TYPE_CPU_AFFINITY:
135                 {
136                         struct acpi_srat_cpu_affinity *p =
137                             (struct acpi_srat_cpu_affinity *)header;
138                         pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
139                                  p->apic_id, p->local_sapic_eid,
140                                  p->proximity_domain_lo,
141                                  (p->flags & ACPI_SRAT_CPU_ENABLED) ?
142                                  "enabled" : "disabled");
143                 }
144                 break;
145
146         case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
147                 {
148                         struct acpi_srat_mem_affinity *p =
149                             (struct acpi_srat_mem_affinity *)header;
150                         pr_debug("SRAT Memory (0x%llx length 0x%llx) in proximity domain %d %s%s%s\n",
151                                  (unsigned long long)p->base_address,
152                                  (unsigned long long)p->length,
153                                  p->proximity_domain,
154                                  (p->flags & ACPI_SRAT_MEM_ENABLED) ?
155                                  "enabled" : "disabled",
156                                  (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
157                                  " hot-pluggable" : "",
158                                  (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ?
159                                  " non-volatile" : "");
160                 }
161                 break;
162
163         case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
164                 {
165                         struct acpi_srat_x2apic_cpu_affinity *p =
166                             (struct acpi_srat_x2apic_cpu_affinity *)header;
167                         pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
168                                  p->apic_id,
169                                  p->proximity_domain,
170                                  (p->flags & ACPI_SRAT_CPU_ENABLED) ?
171                                  "enabled" : "disabled");
172                 }
173                 break;
174
175         case ACPI_SRAT_TYPE_GICC_AFFINITY:
176                 {
177                         struct acpi_srat_gicc_affinity *p =
178                             (struct acpi_srat_gicc_affinity *)header;
179                         pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
180                                  p->acpi_processor_uid,
181                                  p->proximity_domain,
182                                  (p->flags & ACPI_SRAT_GICC_ENABLED) ?
183                                  "enabled" : "disabled");
184                 }
185                 break;
186
187         default:
188                 pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
189                         header->type);
190                 break;
191         }
192 }
193
194 /*
195  * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
196  * up the NUMA heuristics which wants the local node to have a smaller
197  * distance than the others.
198  * Do some quick checks here and only use the SLIT if it passes.
199  */
200 static int __init slit_valid(struct acpi_table_slit *slit)
201 {
202         int i, j;
203         int d = slit->locality_count;
204         for (i = 0; i < d; i++) {
205                 for (j = 0; j < d; j++)  {
206                         u8 val = slit->entry[d*i + j];
207                         if (i == j) {
208                                 if (val != LOCAL_DISTANCE)
209                                         return 0;
210                         } else if (val <= LOCAL_DISTANCE)
211                                 return 0;
212                 }
213         }
214         return 1;
215 }
216
217 void __init bad_srat(void)
218 {
219         pr_err("SRAT: SRAT not used.\n");
220         acpi_numa = -1;
221 }
222
223 int __init srat_disabled(void)
224 {
225         return acpi_numa < 0;
226 }
227
228 #if defined(CONFIG_X86) || defined(CONFIG_ARM64)
229 /*
230  * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
231  * I/O localities since SRAT does not list them.  I/O localities are
232  * not supported at this point.
233  */
234 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
235 {
236         int i, j;
237
238         for (i = 0; i < slit->locality_count; i++) {
239                 const int from_node = pxm_to_node(i);
240
241                 if (from_node == NUMA_NO_NODE)
242                         continue;
243
244                 for (j = 0; j < slit->locality_count; j++) {
245                         const int to_node = pxm_to_node(j);
246
247                         if (to_node == NUMA_NO_NODE)
248                                 continue;
249
250                         numa_set_distance(from_node, to_node,
251                                 slit->entry[slit->locality_count * i + j]);
252                 }
253         }
254 }
255
256 /*
257  * Default callback for parsing of the Proximity Domain <-> Memory
258  * Area mappings
259  */
260 int __init
261 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
262 {
263         u64 start, end;
264         u32 hotpluggable;
265         int node, pxm;
266
267         if (srat_disabled())
268                 goto out_err;
269         if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) {
270                 pr_err("SRAT: Unexpected header length: %d\n",
271                        ma->header.length);
272                 goto out_err_bad_srat;
273         }
274         if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
275                 goto out_err;
276         hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
277         if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
278                 goto out_err;
279
280         start = ma->base_address;
281         end = start + ma->length;
282         pxm = ma->proximity_domain;
283         if (acpi_srat_revision <= 1)
284                 pxm &= 0xff;
285
286         node = acpi_map_pxm_to_node(pxm);
287         if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
288                 pr_err("SRAT: Too many proximity domains.\n");
289                 goto out_err_bad_srat;
290         }
291
292         if (numa_add_memblk(node, start, end) < 0) {
293                 pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
294                        node, (unsigned long long) start,
295                        (unsigned long long) end - 1);
296                 goto out_err_bad_srat;
297         }
298
299         node_set(node, numa_nodes_parsed);
300
301         pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
302                 node, pxm,
303                 (unsigned long long) start, (unsigned long long) end - 1,
304                 hotpluggable ? " hotplug" : "",
305                 ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
306
307         /* Mark hotplug range in memblock. */
308         if (hotpluggable && memblock_mark_hotplug(start, ma->length))
309                 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
310                         (unsigned long long)start, (unsigned long long)end - 1);
311
312         max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
313
314         return 0;
315 out_err_bad_srat:
316         bad_srat();
317 out_err:
318         return -EINVAL;
319 }
320 #endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
321
322 static int __init acpi_parse_slit(struct acpi_table_header *table)
323 {
324         struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
325
326         if (!slit_valid(slit)) {
327                 pr_info("SLIT table looks invalid. Not used.\n");
328                 return -EINVAL;
329         }
330         acpi_numa_slit_init(slit);
331
332         return 0;
333 }
334
335 void __init __weak
336 acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
337 {
338         pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
339 }
340
341 static int __init
342 acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
343                            const unsigned long end)
344 {
345         struct acpi_srat_x2apic_cpu_affinity *processor_affinity;
346
347         processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header;
348         if (!processor_affinity)
349                 return -EINVAL;
350
351         acpi_table_print_srat_entry(header);
352
353         /* let architecture-dependent part to do it */
354         acpi_numa_x2apic_affinity_init(processor_affinity);
355
356         return 0;
357 }
358
359 static int __init
360 acpi_parse_processor_affinity(struct acpi_subtable_header *header,
361                               const unsigned long end)
362 {
363         struct acpi_srat_cpu_affinity *processor_affinity;
364
365         processor_affinity = (struct acpi_srat_cpu_affinity *)header;
366         if (!processor_affinity)
367                 return -EINVAL;
368
369         acpi_table_print_srat_entry(header);
370
371         /* let architecture-dependent part to do it */
372         acpi_numa_processor_affinity_init(processor_affinity);
373
374         return 0;
375 }
376
377 static int __init
378 acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
379                          const unsigned long end)
380 {
381         struct acpi_srat_gicc_affinity *processor_affinity;
382
383         processor_affinity = (struct acpi_srat_gicc_affinity *)header;
384         if (!processor_affinity)
385                 return -EINVAL;
386
387         acpi_table_print_srat_entry(header);
388
389         /* let architecture-dependent part to do it */
390         acpi_numa_gicc_affinity_init(processor_affinity);
391
392         return 0;
393 }
394
395 static int __initdata parsed_numa_memblks;
396
397 static int __init
398 acpi_parse_memory_affinity(struct acpi_subtable_header * header,
399                            const unsigned long end)
400 {
401         struct acpi_srat_mem_affinity *memory_affinity;
402
403         memory_affinity = (struct acpi_srat_mem_affinity *)header;
404         if (!memory_affinity)
405                 return -EINVAL;
406
407         acpi_table_print_srat_entry(header);
408
409         /* let architecture-dependent part to do it */
410         if (!acpi_numa_memory_affinity_init(memory_affinity))
411                 parsed_numa_memblks++;
412         return 0;
413 }
414
415 static int __init acpi_parse_srat(struct acpi_table_header *table)
416 {
417         struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
418
419         acpi_srat_revision = srat->header.revision;
420
421         /* Real work done in acpi_table_parse_srat below. */
422
423         return 0;
424 }
425
426 static int __init
427 acpi_table_parse_srat(enum acpi_srat_type id,
428                       acpi_tbl_entry_handler handler, unsigned int max_entries)
429 {
430         return acpi_table_parse_entries(ACPI_SIG_SRAT,
431                                             sizeof(struct acpi_table_srat), id,
432                                             handler, max_entries);
433 }
434
435 int __init acpi_numa_init(void)
436 {
437         int cnt = 0;
438
439         if (acpi_disabled)
440                 return -EINVAL;
441
442         /*
443          * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
444          * SRAT cpu entries could have different order with that in MADT.
445          * So go over all cpu entries in SRAT to get apicid to node mapping.
446          */
447
448         /* SRAT: System Resource Affinity Table */
449         if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
450                 struct acpi_subtable_proc srat_proc[3];
451
452                 memset(srat_proc, 0, sizeof(srat_proc));
453                 srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
454                 srat_proc[0].handler = acpi_parse_processor_affinity;
455                 srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
456                 srat_proc[1].handler = acpi_parse_x2apic_affinity;
457                 srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
458                 srat_proc[2].handler = acpi_parse_gicc_affinity;
459
460                 acpi_table_parse_entries_array(ACPI_SIG_SRAT,
461                                         sizeof(struct acpi_table_srat),
462                                         srat_proc, ARRAY_SIZE(srat_proc), 0);
463
464                 cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
465                                             acpi_parse_memory_affinity, 0);
466         }
467
468         /* SLIT: System Locality Information Table */
469         acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
470
471         if (cnt < 0)
472                 return cnt;
473         else if (!parsed_numa_memblks)
474                 return -ENOENT;
475         return 0;
476 }
477
478 static int acpi_get_pxm(acpi_handle h)
479 {
480         unsigned long long pxm;
481         acpi_status status;
482         acpi_handle handle;
483         acpi_handle phandle = h;
484
485         do {
486                 handle = phandle;
487                 status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
488                 if (ACPI_SUCCESS(status))
489                         return pxm;
490                 status = acpi_get_parent(handle, &phandle);
491         } while (ACPI_SUCCESS(status));
492         return -1;
493 }
494
495 int acpi_get_node(acpi_handle handle)
496 {
497         int pxm;
498
499         pxm = acpi_get_pxm(handle);
500
501         return acpi_map_pxm_to_node(pxm);
502 }
503 EXPORT_SYMBOL(acpi_get_node);