1 // SPDX-License-Identifier: GPL-2.0-only
5 * The APIC IDs describe the system topology in multiple domain levels.
6 * The CPUID topology parser provides the information which part of the
7 * APIC ID is associated to the individual levels:
9 * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
11 * The root space contains the package (socket) IDs.
13 * Not enumerated levels consume 0 bits space, but conceptually they are
14 * always represented. If e.g. only CORE and THREAD levels are enumerated
15 * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
17 * If SMT is not supported, then the THREAD domain is still used. It then
18 * has the same physical ID as the CORE domain and is the only child of
21 * This allows a unified view on the system independent of the enumerated
22 * domain levels without requiring any conditionals in the code.
24 #define pr_fmt(fmt) "CPU topo: " fmt
25 #include <linux/cpu.h>
30 #include <asm/hypervisor.h>
31 #include <asm/io_apic.h>
32 #include <asm/mpspec.h>
38 * Map cpu index to physical APIC ID
40 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
41 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
42 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
43 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
45 /* Bitmap of physically present CPUs. */
46 DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
48 /* Used for CPU number allocation and parallel CPU bringup */
49 u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
51 /* Bitmaps to mark registered APICs at each topology domain */
52 static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
55 * Keep track of assigned, disabled and rejected CPUs. Present assigned
56 * with 1 as CPU #0 is reserved for the boot CPU.
59 unsigned int nr_assigned_cpus;
60 unsigned int nr_disabled_cpus;
61 unsigned int nr_rejected_cpus;
64 } topo_info __ro_after_init = {
65 .nr_assigned_cpus = 1,
66 .boot_cpu_apic_id = BAD_APICID,
67 .real_bsp_apic_id = BAD_APICID,
70 #define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
72 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
74 return phys_id == (u64)cpuid_to_apicid[cpu];
78 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
80 if (!(apicid & (__max_threads_per_core - 1)))
81 cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
84 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
88 * Convert the APIC ID to a domain level ID by masking out the low bits
89 * below the domain level @dom.
91 static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
93 if (dom == TOPO_SMT_DOMAIN)
95 return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
98 static int topo_lookup_cpuid(u32 apic_id)
102 /* CPU# to APICID mapping is persistent once it is established */
103 for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
104 if (cpuid_to_apicid[i] == apic_id)
110 static __init int topo_get_cpunr(u32 apic_id)
112 int cpu = topo_lookup_cpuid(apic_id);
117 return topo_info.nr_assigned_cpus++;
120 static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
122 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
123 early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
124 early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
126 set_cpu_possible(cpu, true);
127 set_cpu_present(cpu, true);
130 static __init bool check_for_real_bsp(u32 apic_id)
133 * There is no real good way to detect whether this a kdump()
134 * kernel, but except on the Voyager SMP monstrosity which is not
135 * longer supported, the real BSP APIC ID is the first one which is
136 * enumerated by firmware. That allows to detect whether the boot
137 * CPU is the real BSP. If it is not, then do not register the APIC
138 * because sending INIT to the real BSP would reset the whole
141 * The first APIC ID which is enumerated by firmware is detectable
142 * because the boot CPU APIC ID is registered before that without
143 * invoking this code.
145 if (topo_info.real_bsp_apic_id != BAD_APICID)
148 if (apic_id == topo_info.boot_cpu_apic_id) {
149 topo_info.real_bsp_apic_id = apic_id;
153 pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
154 topo_info.boot_cpu_apic_id, apic_id);
155 pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
157 topo_info.real_bsp_apic_id = apic_id;
161 static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
164 unsigned int id, end, cnt = 0;
166 /* Calculate the exclusive end */
167 end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
169 /* Unfortunately there is no bitmap_weight_range() */
170 for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
175 static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
180 set_bit(apic_id, phys_cpu_present_map);
183 * Double registration is valid in case of the boot CPU
184 * APIC because that is registered before the enumeration
185 * of the APICs via firmware parsers or VM guest
188 if (apic_id == topo_info.boot_cpu_apic_id)
191 cpu = topo_get_cpunr(apic_id);
193 cpuid_to_apicid[cpu] = apic_id;
194 topo_set_cpuids(cpu, apic_id, acpi_id);
196 u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
199 * Check for present APICs in the same package when running
200 * on bare metal. Allow the bogosity in a guest.
202 if (hypervisor_is_type(X86_HYPER_NATIVE) &&
203 topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
204 pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
206 topo_info.nr_rejected_cpus++;
210 topo_info.nr_disabled_cpus++;
213 /* Register present and possible CPUs in the domain maps */
214 for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
215 set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
219 * topology_register_apic - Register an APIC in early topology maps
220 * @apic_id: The APIC ID to set up
221 * @acpi_id: The ACPI ID associated to the APIC
222 * @present: True if the corresponding CPU is present
224 void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
226 if (apic_id >= MAX_LOCAL_APIC) {
227 pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
228 topo_info.nr_rejected_cpus++;
232 if (check_for_real_bsp(apic_id)) {
233 topo_info.nr_rejected_cpus++;
237 /* CPU numbers exhausted? */
238 if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
239 pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
240 topo_info.nr_rejected_cpus++;
244 topo_register_apic(apic_id, acpi_id, present);
248 * topology_register_boot_apic - Register the boot CPU APIC
249 * @apic_id: The APIC ID to set up
251 * Separate so CPU #0 can be assigned
253 void __init topology_register_boot_apic(u32 apic_id)
255 WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
257 topo_info.boot_cpu_apic_id = apic_id;
258 topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
262 * topology_get_logical_id - Retrieve the logical ID at a given topology domain level
263 * @apicid: The APIC ID for which to lookup the logical ID
264 * @at_level: The topology domain level to use
266 * @apicid must be a full APIC ID, not the normalized variant. It's valid to have
267 * all bits below the domain level specified by @at_level to be clear. So both
268 * real APIC IDs and backshifted normalized APIC IDs work correctly.
271 * - >= 0: The requested logical ID
272 * - -ERANGE: @apicid is out of range
273 * - -ENODEV: @apicid is not registered
275 int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
277 /* Remove the bits below @at_level to get the proper level ID of @apicid */
278 unsigned int lvlid = topo_apicid(apicid, at_level);
280 if (lvlid >= MAX_LOCAL_APIC)
282 if (!test_bit(lvlid, apic_maps[at_level].map))
284 /* Get the number of set bits before @lvlid. */
285 return bitmap_weight(apic_maps[at_level].map, lvlid);
287 EXPORT_SYMBOL_GPL(topology_get_logical_id);
290 * topology_unit_count - Retrieve the count of specified units at a given topology domain level
291 * @apicid: The APIC ID which specifies the search range
292 * @which_units: The domain level specifying the units to count
293 * @at_level: The domain level at which @which_units have to be counted
295 * This returns the number of possible units according to the enumerated
298 * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
299 * counts the number of possible cores in the package to which @apicid
302 * @at_level must obviously be greater than @which_level to produce useful
303 * results. If @at_level is equal to @which_units the result is
304 * unsurprisingly 1. If @at_level is less than @which_units the results
305 * is by definition undefined and the function returns 0.
307 unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
308 enum x86_topology_domains at_level)
310 /* Remove the bits below @at_level to get the proper level ID of @apicid */
311 unsigned int lvlid = topo_apicid(apicid, at_level);
313 if (lvlid >= MAX_LOCAL_APIC)
315 if (!test_bit(lvlid, apic_maps[at_level].map))
317 if (which_units > at_level)
319 if (which_units == at_level)
321 return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
324 #ifdef CONFIG_ACPI_HOTPLUG_CPU
326 * topology_hotplug_apic - Handle a physical hotplugged APIC after boot
327 * @apic_id: The APIC ID to set up
328 * @acpi_id: The ACPI ID associated to the APIC
330 int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
334 if (apic_id >= MAX_LOCAL_APIC)
337 /* Reject if the APIC ID was not registered during enumeration. */
338 if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
341 cpu = topo_lookup_cpuid(apic_id);
345 set_bit(apic_id, phys_cpu_present_map);
346 topo_set_cpuids(cpu, apic_id, acpi_id);
347 cpu_mark_primary_thread(cpu, apic_id);
352 * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
353 * @cpu: The CPU number for which the APIC ID is removed
355 void topology_hotunplug_apic(unsigned int cpu)
357 u32 apic_id = cpuid_to_apicid[cpu];
359 if (apic_id == BAD_APICID)
362 per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
363 clear_bit(apic_id, phys_cpu_present_map);
364 set_cpu_present(cpu, false);
368 #ifdef CONFIG_X86_LOCAL_APIC
369 static unsigned int max_possible_cpus __initdata = NR_CPUS;
372 * topology_apply_cmdline_limits_early - Apply topology command line limits early
374 * Ensure that command line limits are in effect before firmware parsing
377 void __init topology_apply_cmdline_limits_early(void)
379 unsigned int possible = nr_cpu_ids;
381 /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
382 if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
385 /* 'possible_cpus=N' */
386 possible = min_t(unsigned int, max_possible_cpus, possible);
388 if (possible < nr_cpu_ids) {
389 pr_info("Limiting to %u possible CPUs\n", possible);
390 set_nr_cpu_ids(possible);
394 static __init bool restrict_to_up(void)
396 if (!smp_found_config || ioapic_is_disabled)
399 * XEN PV is special as it does not advertise the local APIC
400 * properly, but provides a fake topology for it so that the
401 * infrastructure works. So don't apply the restrictions vs. APIC
407 return apic_is_disabled;
410 void __init topology_init_possible_cpus(void)
412 unsigned int assigned = topo_info.nr_assigned_cpus;
413 unsigned int disabled = topo_info.nr_disabled_cpus;
414 unsigned int cnta, cntb, cpu, allowed = 1;
415 unsigned int total = assigned + disabled;
419 * If there was no APIC registered, then fake one so that the
420 * topology bitmap is populated. That ensures that the code below
421 * is valid and the various query interfaces can be used
422 * unconditionally. This does not affect the actual APIC code in
423 * any way because either the local APIC address has not been
424 * registered or the local APIC was disabled on the command line.
426 if (topo_info.boot_cpu_apic_id == BAD_APICID)
427 topology_register_boot_apic(0);
429 if (!restrict_to_up()) {
430 if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
431 disabled += assigned - nr_cpu_ids;
432 assigned = nr_cpu_ids;
434 allowed = min_t(unsigned int, total, nr_cpu_ids);
438 pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
440 assigned = min_t(unsigned int, allowed, assigned);
441 disabled = allowed - assigned;
443 topo_info.nr_assigned_cpus = assigned;
444 topo_info.nr_disabled_cpus = disabled;
446 total_cpus = allowed;
447 set_nr_cpu_ids(allowed);
449 cnta = domain_weight(TOPO_PKG_DOMAIN);
450 cntb = domain_weight(TOPO_DIE_DOMAIN);
451 __max_logical_packages = cnta;
452 __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
454 pr_info("Max. logical packages: %3u\n", cnta);
455 pr_info("Max. logical dies: %3u\n", cntb);
456 pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
458 cnta = domain_weight(TOPO_CORE_DOMAIN);
459 cntb = domain_weight(TOPO_SMT_DOMAIN);
461 * Can't use order delta here as order(cnta) can be equal
462 * order(cntb) even if cnta != cntb.
464 __max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
465 pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
467 firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
468 __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
469 pr_info("Num. cores per package: %3u\n", __num_cores_per_package);
470 __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
471 pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
473 pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
474 if (topo_info.nr_rejected_cpus)
475 pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
477 init_cpu_present(cpumask_of(0));
478 init_cpu_possible(cpumask_of(0));
480 /* Assign CPU numbers to non-present CPUs */
481 for (apicid = 0; disabled; disabled--, apicid++) {
482 apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
483 MAX_LOCAL_APIC, apicid);
484 if (apicid >= MAX_LOCAL_APIC)
486 cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
489 for (cpu = 0; cpu < allowed; cpu++) {
490 apicid = cpuid_to_apicid[cpu];
492 set_cpu_possible(cpu, true);
494 if (apicid == BAD_APICID)
497 cpu_mark_primary_thread(cpu, apicid);
498 set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
503 * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
505 void __init topology_reset_possible_cpus_up(void)
507 init_cpu_present(cpumask_of(0));
508 init_cpu_possible(cpumask_of(0));
510 bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
511 if (topo_info.boot_cpu_apic_id != BAD_APICID)
512 set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
515 static int __init setup_possible_cpus(char *str)
517 get_option(&str, &max_possible_cpus);
520 early_param("possible_cpus", setup_possible_cpus);