Merge tag 'for-linus-2022052401' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *      for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *      (in contrast to entering ACPI C3, in which case the WBINVD
27  *      instruction needs to be executed to flush the caches)
28  */
29
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/notifier.h>
51 #include <linux/cpu.h>
52 #include <linux/moduleparam.h>
53 #include <asm/cpu_device_id.h>
54 #include <asm/intel-family.h>
55 #include <asm/mwait.h>
56 #include <asm/msr.h>
57
58 #define INTEL_IDLE_VERSION "0.5.1"
59
60 static struct cpuidle_driver intel_idle_driver = {
61         .name = "intel_idle",
62         .owner = THIS_MODULE,
63 };
64 /* intel_idle.max_cstate=0 disables driver */
65 static int max_cstate = CPUIDLE_STATE_MAX - 1;
66 static unsigned int disabled_states_mask;
67 static unsigned int preferred_states_mask;
68
69 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
70
71 static unsigned long auto_demotion_disable_flags;
72
73 static enum {
74         C1E_PROMOTION_PRESERVE,
75         C1E_PROMOTION_ENABLE,
76         C1E_PROMOTION_DISABLE
77 } c1e_promotion = C1E_PROMOTION_PRESERVE;
78
79 struct idle_cpu {
80         struct cpuidle_state *state_table;
81
82         /*
83          * Hardware C-state auto-demotion may not always be optimal.
84          * Indicate which enable bits to clear here.
85          */
86         unsigned long auto_demotion_disable_flags;
87         bool byt_auto_demotion_disable_flag;
88         bool disable_promotion_to_c1e;
89         bool use_acpi;
90 };
91
92 static const struct idle_cpu *icpu __initdata;
93 static struct cpuidle_state *cpuidle_state_table __initdata;
94
95 static unsigned int mwait_substates __initdata;
96
97 /*
98  * Enable interrupts before entering the C-state. On some platforms and for
99  * some C-states, this may measurably decrease interrupt latency.
100  */
101 #define CPUIDLE_FLAG_IRQ_ENABLE         BIT(14)
102
103 /*
104  * Enable this state by default even if the ACPI _CST does not list it.
105  */
106 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
107
108 /*
109  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
110  * the C-state (top nibble) and sub-state (bottom nibble)
111  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
112  *
113  * We store the hint at the top of our "flags" for each state.
114  */
115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
117
118 /**
119  * intel_idle - Ask the processor to enter the given idle state.
120  * @dev: cpuidle device of the target CPU.
121  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
122  * @index: Target idle state index.
123  *
124  * Use the MWAIT instruction to notify the processor that the CPU represented by
125  * @dev is idle and it can try to enter the idle state corresponding to @index.
126  *
127  * If the local APIC timer is not known to be reliable in the target idle state,
128  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
129  *
130  * Must be called under local_irq_disable().
131  */
132 static __cpuidle int intel_idle(struct cpuidle_device *dev,
133                                 struct cpuidle_driver *drv, int index)
134 {
135         struct cpuidle_state *state = &drv->states[index];
136         unsigned long eax = flg2MWAIT(state->flags);
137         unsigned long ecx = 1; /* break on interrupt flag */
138
139         if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)
140                 local_irq_enable();
141
142         mwait_idle_with_hints(eax, ecx);
143
144         return index;
145 }
146
147 /**
148  * intel_idle_s2idle - Ask the processor to enter the given idle state.
149  * @dev: cpuidle device of the target CPU.
150  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
151  * @index: Target idle state index.
152  *
153  * Use the MWAIT instruction to notify the processor that the CPU represented by
154  * @dev is idle and it can try to enter the idle state corresponding to @index.
155  *
156  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
157  * scheduler tick and suspended scheduler clock on the target CPU.
158  */
159 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
160                                        struct cpuidle_driver *drv, int index)
161 {
162         unsigned long eax = flg2MWAIT(drv->states[index].flags);
163         unsigned long ecx = 1; /* break on interrupt flag */
164
165         mwait_idle_with_hints(eax, ecx);
166
167         return 0;
168 }
169
170 /*
171  * States are indexed by the cstate number,
172  * which is also the index into the MWAIT hint array.
173  * Thus C0 is a dummy.
174  */
175 static struct cpuidle_state nehalem_cstates[] __initdata = {
176         {
177                 .name = "C1",
178                 .desc = "MWAIT 0x00",
179                 .flags = MWAIT2flg(0x00),
180                 .exit_latency = 3,
181                 .target_residency = 6,
182                 .enter = &intel_idle,
183                 .enter_s2idle = intel_idle_s2idle, },
184         {
185                 .name = "C1E",
186                 .desc = "MWAIT 0x01",
187                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
188                 .exit_latency = 10,
189                 .target_residency = 20,
190                 .enter = &intel_idle,
191                 .enter_s2idle = intel_idle_s2idle, },
192         {
193                 .name = "C3",
194                 .desc = "MWAIT 0x10",
195                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
196                 .exit_latency = 20,
197                 .target_residency = 80,
198                 .enter = &intel_idle,
199                 .enter_s2idle = intel_idle_s2idle, },
200         {
201                 .name = "C6",
202                 .desc = "MWAIT 0x20",
203                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
204                 .exit_latency = 200,
205                 .target_residency = 800,
206                 .enter = &intel_idle,
207                 .enter_s2idle = intel_idle_s2idle, },
208         {
209                 .enter = NULL }
210 };
211
212 static struct cpuidle_state snb_cstates[] __initdata = {
213         {
214                 .name = "C1",
215                 .desc = "MWAIT 0x00",
216                 .flags = MWAIT2flg(0x00),
217                 .exit_latency = 2,
218                 .target_residency = 2,
219                 .enter = &intel_idle,
220                 .enter_s2idle = intel_idle_s2idle, },
221         {
222                 .name = "C1E",
223                 .desc = "MWAIT 0x01",
224                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
225                 .exit_latency = 10,
226                 .target_residency = 20,
227                 .enter = &intel_idle,
228                 .enter_s2idle = intel_idle_s2idle, },
229         {
230                 .name = "C3",
231                 .desc = "MWAIT 0x10",
232                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
233                 .exit_latency = 80,
234                 .target_residency = 211,
235                 .enter = &intel_idle,
236                 .enter_s2idle = intel_idle_s2idle, },
237         {
238                 .name = "C6",
239                 .desc = "MWAIT 0x20",
240                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
241                 .exit_latency = 104,
242                 .target_residency = 345,
243                 .enter = &intel_idle,
244                 .enter_s2idle = intel_idle_s2idle, },
245         {
246                 .name = "C7",
247                 .desc = "MWAIT 0x30",
248                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
249                 .exit_latency = 109,
250                 .target_residency = 345,
251                 .enter = &intel_idle,
252                 .enter_s2idle = intel_idle_s2idle, },
253         {
254                 .enter = NULL }
255 };
256
257 static struct cpuidle_state byt_cstates[] __initdata = {
258         {
259                 .name = "C1",
260                 .desc = "MWAIT 0x00",
261                 .flags = MWAIT2flg(0x00),
262                 .exit_latency = 1,
263                 .target_residency = 1,
264                 .enter = &intel_idle,
265                 .enter_s2idle = intel_idle_s2idle, },
266         {
267                 .name = "C6N",
268                 .desc = "MWAIT 0x58",
269                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
270                 .exit_latency = 300,
271                 .target_residency = 275,
272                 .enter = &intel_idle,
273                 .enter_s2idle = intel_idle_s2idle, },
274         {
275                 .name = "C6S",
276                 .desc = "MWAIT 0x52",
277                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
278                 .exit_latency = 500,
279                 .target_residency = 560,
280                 .enter = &intel_idle,
281                 .enter_s2idle = intel_idle_s2idle, },
282         {
283                 .name = "C7",
284                 .desc = "MWAIT 0x60",
285                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
286                 .exit_latency = 1200,
287                 .target_residency = 4000,
288                 .enter = &intel_idle,
289                 .enter_s2idle = intel_idle_s2idle, },
290         {
291                 .name = "C7S",
292                 .desc = "MWAIT 0x64",
293                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
294                 .exit_latency = 10000,
295                 .target_residency = 20000,
296                 .enter = &intel_idle,
297                 .enter_s2idle = intel_idle_s2idle, },
298         {
299                 .enter = NULL }
300 };
301
302 static struct cpuidle_state cht_cstates[] __initdata = {
303         {
304                 .name = "C1",
305                 .desc = "MWAIT 0x00",
306                 .flags = MWAIT2flg(0x00),
307                 .exit_latency = 1,
308                 .target_residency = 1,
309                 .enter = &intel_idle,
310                 .enter_s2idle = intel_idle_s2idle, },
311         {
312                 .name = "C6N",
313                 .desc = "MWAIT 0x58",
314                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
315                 .exit_latency = 80,
316                 .target_residency = 275,
317                 .enter = &intel_idle,
318                 .enter_s2idle = intel_idle_s2idle, },
319         {
320                 .name = "C6S",
321                 .desc = "MWAIT 0x52",
322                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
323                 .exit_latency = 200,
324                 .target_residency = 560,
325                 .enter = &intel_idle,
326                 .enter_s2idle = intel_idle_s2idle, },
327         {
328                 .name = "C7",
329                 .desc = "MWAIT 0x60",
330                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
331                 .exit_latency = 1200,
332                 .target_residency = 4000,
333                 .enter = &intel_idle,
334                 .enter_s2idle = intel_idle_s2idle, },
335         {
336                 .name = "C7S",
337                 .desc = "MWAIT 0x64",
338                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
339                 .exit_latency = 10000,
340                 .target_residency = 20000,
341                 .enter = &intel_idle,
342                 .enter_s2idle = intel_idle_s2idle, },
343         {
344                 .enter = NULL }
345 };
346
347 static struct cpuidle_state ivb_cstates[] __initdata = {
348         {
349                 .name = "C1",
350                 .desc = "MWAIT 0x00",
351                 .flags = MWAIT2flg(0x00),
352                 .exit_latency = 1,
353                 .target_residency = 1,
354                 .enter = &intel_idle,
355                 .enter_s2idle = intel_idle_s2idle, },
356         {
357                 .name = "C1E",
358                 .desc = "MWAIT 0x01",
359                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
360                 .exit_latency = 10,
361                 .target_residency = 20,
362                 .enter = &intel_idle,
363                 .enter_s2idle = intel_idle_s2idle, },
364         {
365                 .name = "C3",
366                 .desc = "MWAIT 0x10",
367                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
368                 .exit_latency = 59,
369                 .target_residency = 156,
370                 .enter = &intel_idle,
371                 .enter_s2idle = intel_idle_s2idle, },
372         {
373                 .name = "C6",
374                 .desc = "MWAIT 0x20",
375                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
376                 .exit_latency = 80,
377                 .target_residency = 300,
378                 .enter = &intel_idle,
379                 .enter_s2idle = intel_idle_s2idle, },
380         {
381                 .name = "C7",
382                 .desc = "MWAIT 0x30",
383                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
384                 .exit_latency = 87,
385                 .target_residency = 300,
386                 .enter = &intel_idle,
387                 .enter_s2idle = intel_idle_s2idle, },
388         {
389                 .enter = NULL }
390 };
391
392 static struct cpuidle_state ivt_cstates[] __initdata = {
393         {
394                 .name = "C1",
395                 .desc = "MWAIT 0x00",
396                 .flags = MWAIT2flg(0x00),
397                 .exit_latency = 1,
398                 .target_residency = 1,
399                 .enter = &intel_idle,
400                 .enter_s2idle = intel_idle_s2idle, },
401         {
402                 .name = "C1E",
403                 .desc = "MWAIT 0x01",
404                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
405                 .exit_latency = 10,
406                 .target_residency = 80,
407                 .enter = &intel_idle,
408                 .enter_s2idle = intel_idle_s2idle, },
409         {
410                 .name = "C3",
411                 .desc = "MWAIT 0x10",
412                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
413                 .exit_latency = 59,
414                 .target_residency = 156,
415                 .enter = &intel_idle,
416                 .enter_s2idle = intel_idle_s2idle, },
417         {
418                 .name = "C6",
419                 .desc = "MWAIT 0x20",
420                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
421                 .exit_latency = 82,
422                 .target_residency = 300,
423                 .enter = &intel_idle,
424                 .enter_s2idle = intel_idle_s2idle, },
425         {
426                 .enter = NULL }
427 };
428
429 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
430         {
431                 .name = "C1",
432                 .desc = "MWAIT 0x00",
433                 .flags = MWAIT2flg(0x00),
434                 .exit_latency = 1,
435                 .target_residency = 1,
436                 .enter = &intel_idle,
437                 .enter_s2idle = intel_idle_s2idle, },
438         {
439                 .name = "C1E",
440                 .desc = "MWAIT 0x01",
441                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
442                 .exit_latency = 10,
443                 .target_residency = 250,
444                 .enter = &intel_idle,
445                 .enter_s2idle = intel_idle_s2idle, },
446         {
447                 .name = "C3",
448                 .desc = "MWAIT 0x10",
449                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
450                 .exit_latency = 59,
451                 .target_residency = 300,
452                 .enter = &intel_idle,
453                 .enter_s2idle = intel_idle_s2idle, },
454         {
455                 .name = "C6",
456                 .desc = "MWAIT 0x20",
457                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
458                 .exit_latency = 84,
459                 .target_residency = 400,
460                 .enter = &intel_idle,
461                 .enter_s2idle = intel_idle_s2idle, },
462         {
463                 .enter = NULL }
464 };
465
466 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
467         {
468                 .name = "C1",
469                 .desc = "MWAIT 0x00",
470                 .flags = MWAIT2flg(0x00),
471                 .exit_latency = 1,
472                 .target_residency = 1,
473                 .enter = &intel_idle,
474                 .enter_s2idle = intel_idle_s2idle, },
475         {
476                 .name = "C1E",
477                 .desc = "MWAIT 0x01",
478                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
479                 .exit_latency = 10,
480                 .target_residency = 500,
481                 .enter = &intel_idle,
482                 .enter_s2idle = intel_idle_s2idle, },
483         {
484                 .name = "C3",
485                 .desc = "MWAIT 0x10",
486                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
487                 .exit_latency = 59,
488                 .target_residency = 600,
489                 .enter = &intel_idle,
490                 .enter_s2idle = intel_idle_s2idle, },
491         {
492                 .name = "C6",
493                 .desc = "MWAIT 0x20",
494                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
495                 .exit_latency = 88,
496                 .target_residency = 700,
497                 .enter = &intel_idle,
498                 .enter_s2idle = intel_idle_s2idle, },
499         {
500                 .enter = NULL }
501 };
502
503 static struct cpuidle_state hsw_cstates[] __initdata = {
504         {
505                 .name = "C1",
506                 .desc = "MWAIT 0x00",
507                 .flags = MWAIT2flg(0x00),
508                 .exit_latency = 2,
509                 .target_residency = 2,
510                 .enter = &intel_idle,
511                 .enter_s2idle = intel_idle_s2idle, },
512         {
513                 .name = "C1E",
514                 .desc = "MWAIT 0x01",
515                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
516                 .exit_latency = 10,
517                 .target_residency = 20,
518                 .enter = &intel_idle,
519                 .enter_s2idle = intel_idle_s2idle, },
520         {
521                 .name = "C3",
522                 .desc = "MWAIT 0x10",
523                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
524                 .exit_latency = 33,
525                 .target_residency = 100,
526                 .enter = &intel_idle,
527                 .enter_s2idle = intel_idle_s2idle, },
528         {
529                 .name = "C6",
530                 .desc = "MWAIT 0x20",
531                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
532                 .exit_latency = 133,
533                 .target_residency = 400,
534                 .enter = &intel_idle,
535                 .enter_s2idle = intel_idle_s2idle, },
536         {
537                 .name = "C7s",
538                 .desc = "MWAIT 0x32",
539                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
540                 .exit_latency = 166,
541                 .target_residency = 500,
542                 .enter = &intel_idle,
543                 .enter_s2idle = intel_idle_s2idle, },
544         {
545                 .name = "C8",
546                 .desc = "MWAIT 0x40",
547                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
548                 .exit_latency = 300,
549                 .target_residency = 900,
550                 .enter = &intel_idle,
551                 .enter_s2idle = intel_idle_s2idle, },
552         {
553                 .name = "C9",
554                 .desc = "MWAIT 0x50",
555                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
556                 .exit_latency = 600,
557                 .target_residency = 1800,
558                 .enter = &intel_idle,
559                 .enter_s2idle = intel_idle_s2idle, },
560         {
561                 .name = "C10",
562                 .desc = "MWAIT 0x60",
563                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
564                 .exit_latency = 2600,
565                 .target_residency = 7700,
566                 .enter = &intel_idle,
567                 .enter_s2idle = intel_idle_s2idle, },
568         {
569                 .enter = NULL }
570 };
571 static struct cpuidle_state bdw_cstates[] __initdata = {
572         {
573                 .name = "C1",
574                 .desc = "MWAIT 0x00",
575                 .flags = MWAIT2flg(0x00),
576                 .exit_latency = 2,
577                 .target_residency = 2,
578                 .enter = &intel_idle,
579                 .enter_s2idle = intel_idle_s2idle, },
580         {
581                 .name = "C1E",
582                 .desc = "MWAIT 0x01",
583                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
584                 .exit_latency = 10,
585                 .target_residency = 20,
586                 .enter = &intel_idle,
587                 .enter_s2idle = intel_idle_s2idle, },
588         {
589                 .name = "C3",
590                 .desc = "MWAIT 0x10",
591                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
592                 .exit_latency = 40,
593                 .target_residency = 100,
594                 .enter = &intel_idle,
595                 .enter_s2idle = intel_idle_s2idle, },
596         {
597                 .name = "C6",
598                 .desc = "MWAIT 0x20",
599                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
600                 .exit_latency = 133,
601                 .target_residency = 400,
602                 .enter = &intel_idle,
603                 .enter_s2idle = intel_idle_s2idle, },
604         {
605                 .name = "C7s",
606                 .desc = "MWAIT 0x32",
607                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
608                 .exit_latency = 166,
609                 .target_residency = 500,
610                 .enter = &intel_idle,
611                 .enter_s2idle = intel_idle_s2idle, },
612         {
613                 .name = "C8",
614                 .desc = "MWAIT 0x40",
615                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
616                 .exit_latency = 300,
617                 .target_residency = 900,
618                 .enter = &intel_idle,
619                 .enter_s2idle = intel_idle_s2idle, },
620         {
621                 .name = "C9",
622                 .desc = "MWAIT 0x50",
623                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
624                 .exit_latency = 600,
625                 .target_residency = 1800,
626                 .enter = &intel_idle,
627                 .enter_s2idle = intel_idle_s2idle, },
628         {
629                 .name = "C10",
630                 .desc = "MWAIT 0x60",
631                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
632                 .exit_latency = 2600,
633                 .target_residency = 7700,
634                 .enter = &intel_idle,
635                 .enter_s2idle = intel_idle_s2idle, },
636         {
637                 .enter = NULL }
638 };
639
640 static struct cpuidle_state skl_cstates[] __initdata = {
641         {
642                 .name = "C1",
643                 .desc = "MWAIT 0x00",
644                 .flags = MWAIT2flg(0x00),
645                 .exit_latency = 2,
646                 .target_residency = 2,
647                 .enter = &intel_idle,
648                 .enter_s2idle = intel_idle_s2idle, },
649         {
650                 .name = "C1E",
651                 .desc = "MWAIT 0x01",
652                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
653                 .exit_latency = 10,
654                 .target_residency = 20,
655                 .enter = &intel_idle,
656                 .enter_s2idle = intel_idle_s2idle, },
657         {
658                 .name = "C3",
659                 .desc = "MWAIT 0x10",
660                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
661                 .exit_latency = 70,
662                 .target_residency = 100,
663                 .enter = &intel_idle,
664                 .enter_s2idle = intel_idle_s2idle, },
665         {
666                 .name = "C6",
667                 .desc = "MWAIT 0x20",
668                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
669                 .exit_latency = 85,
670                 .target_residency = 200,
671                 .enter = &intel_idle,
672                 .enter_s2idle = intel_idle_s2idle, },
673         {
674                 .name = "C7s",
675                 .desc = "MWAIT 0x33",
676                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
677                 .exit_latency = 124,
678                 .target_residency = 800,
679                 .enter = &intel_idle,
680                 .enter_s2idle = intel_idle_s2idle, },
681         {
682                 .name = "C8",
683                 .desc = "MWAIT 0x40",
684                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
685                 .exit_latency = 200,
686                 .target_residency = 800,
687                 .enter = &intel_idle,
688                 .enter_s2idle = intel_idle_s2idle, },
689         {
690                 .name = "C9",
691                 .desc = "MWAIT 0x50",
692                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
693                 .exit_latency = 480,
694                 .target_residency = 5000,
695                 .enter = &intel_idle,
696                 .enter_s2idle = intel_idle_s2idle, },
697         {
698                 .name = "C10",
699                 .desc = "MWAIT 0x60",
700                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
701                 .exit_latency = 890,
702                 .target_residency = 5000,
703                 .enter = &intel_idle,
704                 .enter_s2idle = intel_idle_s2idle, },
705         {
706                 .enter = NULL }
707 };
708
709 static struct cpuidle_state skx_cstates[] __initdata = {
710         {
711                 .name = "C1",
712                 .desc = "MWAIT 0x00",
713                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
714                 .exit_latency = 2,
715                 .target_residency = 2,
716                 .enter = &intel_idle,
717                 .enter_s2idle = intel_idle_s2idle, },
718         {
719                 .name = "C1E",
720                 .desc = "MWAIT 0x01",
721                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
722                 .exit_latency = 10,
723                 .target_residency = 20,
724                 .enter = &intel_idle,
725                 .enter_s2idle = intel_idle_s2idle, },
726         {
727                 .name = "C6",
728                 .desc = "MWAIT 0x20",
729                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
730                 .exit_latency = 133,
731                 .target_residency = 600,
732                 .enter = &intel_idle,
733                 .enter_s2idle = intel_idle_s2idle, },
734         {
735                 .enter = NULL }
736 };
737
738 static struct cpuidle_state icx_cstates[] __initdata = {
739         {
740                 .name = "C1",
741                 .desc = "MWAIT 0x00",
742                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
743                 .exit_latency = 1,
744                 .target_residency = 1,
745                 .enter = &intel_idle,
746                 .enter_s2idle = intel_idle_s2idle, },
747         {
748                 .name = "C1E",
749                 .desc = "MWAIT 0x01",
750                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
751                 .exit_latency = 4,
752                 .target_residency = 4,
753                 .enter = &intel_idle,
754                 .enter_s2idle = intel_idle_s2idle, },
755         {
756                 .name = "C6",
757                 .desc = "MWAIT 0x20",
758                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
759                 .exit_latency = 170,
760                 .target_residency = 600,
761                 .enter = &intel_idle,
762                 .enter_s2idle = intel_idle_s2idle, },
763         {
764                 .enter = NULL }
765 };
766
767 /*
768  * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
769  * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
770  * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
771  * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
772  * both C1 and C1E requests end up with C1, so there is effectively no C1E.
773  *
774  * By default we enable C1 and disable C1E by marking it with
775  * 'CPUIDLE_FLAG_UNUSABLE'.
776  */
777 static struct cpuidle_state spr_cstates[] __initdata = {
778         {
779                 .name = "C1",
780                 .desc = "MWAIT 0x00",
781                 .flags = MWAIT2flg(0x00),
782                 .exit_latency = 1,
783                 .target_residency = 1,
784                 .enter = &intel_idle,
785                 .enter_s2idle = intel_idle_s2idle, },
786         {
787                 .name = "C1E",
788                 .desc = "MWAIT 0x01",
789                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
790                                            CPUIDLE_FLAG_UNUSABLE,
791                 .exit_latency = 2,
792                 .target_residency = 4,
793                 .enter = &intel_idle,
794                 .enter_s2idle = intel_idle_s2idle, },
795         {
796                 .name = "C6",
797                 .desc = "MWAIT 0x20",
798                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
799                 .exit_latency = 290,
800                 .target_residency = 800,
801                 .enter = &intel_idle,
802                 .enter_s2idle = intel_idle_s2idle, },
803         {
804                 .enter = NULL }
805 };
806
807 static struct cpuidle_state atom_cstates[] __initdata = {
808         {
809                 .name = "C1E",
810                 .desc = "MWAIT 0x00",
811                 .flags = MWAIT2flg(0x00),
812                 .exit_latency = 10,
813                 .target_residency = 20,
814                 .enter = &intel_idle,
815                 .enter_s2idle = intel_idle_s2idle, },
816         {
817                 .name = "C2",
818                 .desc = "MWAIT 0x10",
819                 .flags = MWAIT2flg(0x10),
820                 .exit_latency = 20,
821                 .target_residency = 80,
822                 .enter = &intel_idle,
823                 .enter_s2idle = intel_idle_s2idle, },
824         {
825                 .name = "C4",
826                 .desc = "MWAIT 0x30",
827                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
828                 .exit_latency = 100,
829                 .target_residency = 400,
830                 .enter = &intel_idle,
831                 .enter_s2idle = intel_idle_s2idle, },
832         {
833                 .name = "C6",
834                 .desc = "MWAIT 0x52",
835                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
836                 .exit_latency = 140,
837                 .target_residency = 560,
838                 .enter = &intel_idle,
839                 .enter_s2idle = intel_idle_s2idle, },
840         {
841                 .enter = NULL }
842 };
843 static struct cpuidle_state tangier_cstates[] __initdata = {
844         {
845                 .name = "C1",
846                 .desc = "MWAIT 0x00",
847                 .flags = MWAIT2flg(0x00),
848                 .exit_latency = 1,
849                 .target_residency = 4,
850                 .enter = &intel_idle,
851                 .enter_s2idle = intel_idle_s2idle, },
852         {
853                 .name = "C4",
854                 .desc = "MWAIT 0x30",
855                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
856                 .exit_latency = 100,
857                 .target_residency = 400,
858                 .enter = &intel_idle,
859                 .enter_s2idle = intel_idle_s2idle, },
860         {
861                 .name = "C6",
862                 .desc = "MWAIT 0x52",
863                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
864                 .exit_latency = 140,
865                 .target_residency = 560,
866                 .enter = &intel_idle,
867                 .enter_s2idle = intel_idle_s2idle, },
868         {
869                 .name = "C7",
870                 .desc = "MWAIT 0x60",
871                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
872                 .exit_latency = 1200,
873                 .target_residency = 4000,
874                 .enter = &intel_idle,
875                 .enter_s2idle = intel_idle_s2idle, },
876         {
877                 .name = "C9",
878                 .desc = "MWAIT 0x64",
879                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
880                 .exit_latency = 10000,
881                 .target_residency = 20000,
882                 .enter = &intel_idle,
883                 .enter_s2idle = intel_idle_s2idle, },
884         {
885                 .enter = NULL }
886 };
887 static struct cpuidle_state avn_cstates[] __initdata = {
888         {
889                 .name = "C1",
890                 .desc = "MWAIT 0x00",
891                 .flags = MWAIT2flg(0x00),
892                 .exit_latency = 2,
893                 .target_residency = 2,
894                 .enter = &intel_idle,
895                 .enter_s2idle = intel_idle_s2idle, },
896         {
897                 .name = "C6",
898                 .desc = "MWAIT 0x51",
899                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
900                 .exit_latency = 15,
901                 .target_residency = 45,
902                 .enter = &intel_idle,
903                 .enter_s2idle = intel_idle_s2idle, },
904         {
905                 .enter = NULL }
906 };
907 static struct cpuidle_state knl_cstates[] __initdata = {
908         {
909                 .name = "C1",
910                 .desc = "MWAIT 0x00",
911                 .flags = MWAIT2flg(0x00),
912                 .exit_latency = 1,
913                 .target_residency = 2,
914                 .enter = &intel_idle,
915                 .enter_s2idle = intel_idle_s2idle },
916         {
917                 .name = "C6",
918                 .desc = "MWAIT 0x10",
919                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
920                 .exit_latency = 120,
921                 .target_residency = 500,
922                 .enter = &intel_idle,
923                 .enter_s2idle = intel_idle_s2idle },
924         {
925                 .enter = NULL }
926 };
927
928 static struct cpuidle_state bxt_cstates[] __initdata = {
929         {
930                 .name = "C1",
931                 .desc = "MWAIT 0x00",
932                 .flags = MWAIT2flg(0x00),
933                 .exit_latency = 2,
934                 .target_residency = 2,
935                 .enter = &intel_idle,
936                 .enter_s2idle = intel_idle_s2idle, },
937         {
938                 .name = "C1E",
939                 .desc = "MWAIT 0x01",
940                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
941                 .exit_latency = 10,
942                 .target_residency = 20,
943                 .enter = &intel_idle,
944                 .enter_s2idle = intel_idle_s2idle, },
945         {
946                 .name = "C6",
947                 .desc = "MWAIT 0x20",
948                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
949                 .exit_latency = 133,
950                 .target_residency = 133,
951                 .enter = &intel_idle,
952                 .enter_s2idle = intel_idle_s2idle, },
953         {
954                 .name = "C7s",
955                 .desc = "MWAIT 0x31",
956                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
957                 .exit_latency = 155,
958                 .target_residency = 155,
959                 .enter = &intel_idle,
960                 .enter_s2idle = intel_idle_s2idle, },
961         {
962                 .name = "C8",
963                 .desc = "MWAIT 0x40",
964                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
965                 .exit_latency = 1000,
966                 .target_residency = 1000,
967                 .enter = &intel_idle,
968                 .enter_s2idle = intel_idle_s2idle, },
969         {
970                 .name = "C9",
971                 .desc = "MWAIT 0x50",
972                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
973                 .exit_latency = 2000,
974                 .target_residency = 2000,
975                 .enter = &intel_idle,
976                 .enter_s2idle = intel_idle_s2idle, },
977         {
978                 .name = "C10",
979                 .desc = "MWAIT 0x60",
980                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
981                 .exit_latency = 10000,
982                 .target_residency = 10000,
983                 .enter = &intel_idle,
984                 .enter_s2idle = intel_idle_s2idle, },
985         {
986                 .enter = NULL }
987 };
988
989 static struct cpuidle_state dnv_cstates[] __initdata = {
990         {
991                 .name = "C1",
992                 .desc = "MWAIT 0x00",
993                 .flags = MWAIT2flg(0x00),
994                 .exit_latency = 2,
995                 .target_residency = 2,
996                 .enter = &intel_idle,
997                 .enter_s2idle = intel_idle_s2idle, },
998         {
999                 .name = "C1E",
1000                 .desc = "MWAIT 0x01",
1001                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1002                 .exit_latency = 10,
1003                 .target_residency = 20,
1004                 .enter = &intel_idle,
1005                 .enter_s2idle = intel_idle_s2idle, },
1006         {
1007                 .name = "C6",
1008                 .desc = "MWAIT 0x20",
1009                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1010                 .exit_latency = 50,
1011                 .target_residency = 500,
1012                 .enter = &intel_idle,
1013                 .enter_s2idle = intel_idle_s2idle, },
1014         {
1015                 .enter = NULL }
1016 };
1017
1018 /*
1019  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1020  * C6, and this is indicated in the CPUID mwait leaf.
1021  */
1022 static struct cpuidle_state snr_cstates[] __initdata = {
1023         {
1024                 .name = "C1",
1025                 .desc = "MWAIT 0x00",
1026                 .flags = MWAIT2flg(0x00),
1027                 .exit_latency = 2,
1028                 .target_residency = 2,
1029                 .enter = &intel_idle,
1030                 .enter_s2idle = intel_idle_s2idle, },
1031         {
1032                 .name = "C1E",
1033                 .desc = "MWAIT 0x01",
1034                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1035                 .exit_latency = 15,
1036                 .target_residency = 25,
1037                 .enter = &intel_idle,
1038                 .enter_s2idle = intel_idle_s2idle, },
1039         {
1040                 .name = "C6",
1041                 .desc = "MWAIT 0x20",
1042                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1043                 .exit_latency = 130,
1044                 .target_residency = 500,
1045                 .enter = &intel_idle,
1046                 .enter_s2idle = intel_idle_s2idle, },
1047         {
1048                 .enter = NULL }
1049 };
1050
1051 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1052         .state_table = nehalem_cstates,
1053         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1054         .disable_promotion_to_c1e = true,
1055 };
1056
1057 static const struct idle_cpu idle_cpu_nhx __initconst = {
1058         .state_table = nehalem_cstates,
1059         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1060         .disable_promotion_to_c1e = true,
1061         .use_acpi = true,
1062 };
1063
1064 static const struct idle_cpu idle_cpu_atom __initconst = {
1065         .state_table = atom_cstates,
1066 };
1067
1068 static const struct idle_cpu idle_cpu_tangier __initconst = {
1069         .state_table = tangier_cstates,
1070 };
1071
1072 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1073         .state_table = atom_cstates,
1074         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1075 };
1076
1077 static const struct idle_cpu idle_cpu_snb __initconst = {
1078         .state_table = snb_cstates,
1079         .disable_promotion_to_c1e = true,
1080 };
1081
1082 static const struct idle_cpu idle_cpu_snx __initconst = {
1083         .state_table = snb_cstates,
1084         .disable_promotion_to_c1e = true,
1085         .use_acpi = true,
1086 };
1087
1088 static const struct idle_cpu idle_cpu_byt __initconst = {
1089         .state_table = byt_cstates,
1090         .disable_promotion_to_c1e = true,
1091         .byt_auto_demotion_disable_flag = true,
1092 };
1093
1094 static const struct idle_cpu idle_cpu_cht __initconst = {
1095         .state_table = cht_cstates,
1096         .disable_promotion_to_c1e = true,
1097         .byt_auto_demotion_disable_flag = true,
1098 };
1099
1100 static const struct idle_cpu idle_cpu_ivb __initconst = {
1101         .state_table = ivb_cstates,
1102         .disable_promotion_to_c1e = true,
1103 };
1104
1105 static const struct idle_cpu idle_cpu_ivt __initconst = {
1106         .state_table = ivt_cstates,
1107         .disable_promotion_to_c1e = true,
1108         .use_acpi = true,
1109 };
1110
1111 static const struct idle_cpu idle_cpu_hsw __initconst = {
1112         .state_table = hsw_cstates,
1113         .disable_promotion_to_c1e = true,
1114 };
1115
1116 static const struct idle_cpu idle_cpu_hsx __initconst = {
1117         .state_table = hsw_cstates,
1118         .disable_promotion_to_c1e = true,
1119         .use_acpi = true,
1120 };
1121
1122 static const struct idle_cpu idle_cpu_bdw __initconst = {
1123         .state_table = bdw_cstates,
1124         .disable_promotion_to_c1e = true,
1125 };
1126
1127 static const struct idle_cpu idle_cpu_bdx __initconst = {
1128         .state_table = bdw_cstates,
1129         .disable_promotion_to_c1e = true,
1130         .use_acpi = true,
1131 };
1132
1133 static const struct idle_cpu idle_cpu_skl __initconst = {
1134         .state_table = skl_cstates,
1135         .disable_promotion_to_c1e = true,
1136 };
1137
1138 static const struct idle_cpu idle_cpu_skx __initconst = {
1139         .state_table = skx_cstates,
1140         .disable_promotion_to_c1e = true,
1141         .use_acpi = true,
1142 };
1143
1144 static const struct idle_cpu idle_cpu_icx __initconst = {
1145         .state_table = icx_cstates,
1146         .disable_promotion_to_c1e = true,
1147         .use_acpi = true,
1148 };
1149
1150 static const struct idle_cpu idle_cpu_spr __initconst = {
1151         .state_table = spr_cstates,
1152         .disable_promotion_to_c1e = true,
1153         .use_acpi = true,
1154 };
1155
1156 static const struct idle_cpu idle_cpu_avn __initconst = {
1157         .state_table = avn_cstates,
1158         .disable_promotion_to_c1e = true,
1159         .use_acpi = true,
1160 };
1161
1162 static const struct idle_cpu idle_cpu_knl __initconst = {
1163         .state_table = knl_cstates,
1164         .use_acpi = true,
1165 };
1166
1167 static const struct idle_cpu idle_cpu_bxt __initconst = {
1168         .state_table = bxt_cstates,
1169         .disable_promotion_to_c1e = true,
1170 };
1171
1172 static const struct idle_cpu idle_cpu_dnv __initconst = {
1173         .state_table = dnv_cstates,
1174         .disable_promotion_to_c1e = true,
1175         .use_acpi = true,
1176 };
1177
1178 static const struct idle_cpu idle_cpu_snr __initconst = {
1179         .state_table = snr_cstates,
1180         .disable_promotion_to_c1e = true,
1181         .use_acpi = true,
1182 };
1183
1184 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1185         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &idle_cpu_nhx),
1186         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &idle_cpu_nehalem),
1187         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &idle_cpu_nehalem),
1188         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &idle_cpu_nehalem),
1189         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &idle_cpu_nhx),
1190         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &idle_cpu_nhx),
1191         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &idle_cpu_atom),
1192         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1193         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &idle_cpu_nhx),
1194         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &idle_cpu_snb),
1195         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &idle_cpu_snx),
1196         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &idle_cpu_atom),
1197         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &idle_cpu_byt),
1198         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1199         X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &idle_cpu_cht),
1200         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &idle_cpu_ivb),
1201         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &idle_cpu_ivt),
1202         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &idle_cpu_hsw),
1203         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &idle_cpu_hsx),
1204         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &idle_cpu_hsw),
1205         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &idle_cpu_hsw),
1206         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1207         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &idle_cpu_bdw),
1208         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &idle_cpu_bdw),
1209         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &idle_cpu_bdx),
1210         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &idle_cpu_bdx),
1211         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &idle_cpu_skl),
1212         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &idle_cpu_skl),
1213         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &idle_cpu_skl),
1214         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &idle_cpu_skl),
1215         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
1216         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
1217         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
1218         X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
1219         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
1220         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
1221         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &idle_cpu_bxt),
1222         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1223         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &idle_cpu_dnv),
1224         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &idle_cpu_snr),
1225         {}
1226 };
1227
1228 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1229         X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1230         {}
1231 };
1232
1233 static bool __init intel_idle_max_cstate_reached(int cstate)
1234 {
1235         if (cstate + 1 > max_cstate) {
1236                 pr_info("max_cstate %d reached\n", max_cstate);
1237                 return true;
1238         }
1239         return false;
1240 }
1241
1242 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1243 {
1244         unsigned long eax = flg2MWAIT(state->flags);
1245
1246         if (boot_cpu_has(X86_FEATURE_ARAT))
1247                 return false;
1248
1249         /*
1250          * Switch over to one-shot tick broadcast if the target C-state
1251          * is deeper than C1.
1252          */
1253         return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1254 }
1255
1256 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1257 #include <acpi/processor.h>
1258
1259 static bool no_acpi __read_mostly;
1260 module_param(no_acpi, bool, 0444);
1261 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1262
1263 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1264 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1265 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1266
1267 static struct acpi_processor_power acpi_state_table __initdata;
1268
1269 /**
1270  * intel_idle_cst_usable - Check if the _CST information can be used.
1271  *
1272  * Check if all of the C-states listed by _CST in the max_cstate range are
1273  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1274  */
1275 static bool __init intel_idle_cst_usable(void)
1276 {
1277         int cstate, limit;
1278
1279         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1280                       acpi_state_table.count);
1281
1282         for (cstate = 1; cstate < limit; cstate++) {
1283                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1284
1285                 if (cx->entry_method != ACPI_CSTATE_FFH)
1286                         return false;
1287         }
1288
1289         return true;
1290 }
1291
1292 static bool __init intel_idle_acpi_cst_extract(void)
1293 {
1294         unsigned int cpu;
1295
1296         if (no_acpi) {
1297                 pr_debug("Not allowed to use ACPI _CST\n");
1298                 return false;
1299         }
1300
1301         for_each_possible_cpu(cpu) {
1302                 struct acpi_processor *pr = per_cpu(processors, cpu);
1303
1304                 if (!pr)
1305                         continue;
1306
1307                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1308                         continue;
1309
1310                 acpi_state_table.count++;
1311
1312                 if (!intel_idle_cst_usable())
1313                         continue;
1314
1315                 if (!acpi_processor_claim_cst_control())
1316                         break;
1317
1318                 return true;
1319         }
1320
1321         acpi_state_table.count = 0;
1322         pr_debug("ACPI _CST not found or not usable\n");
1323         return false;
1324 }
1325
1326 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1327 {
1328         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1329
1330         /*
1331          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1332          * the interesting states are ACPI_CSTATE_FFH.
1333          */
1334         for (cstate = 1; cstate < limit; cstate++) {
1335                 struct acpi_processor_cx *cx;
1336                 struct cpuidle_state *state;
1337
1338                 if (intel_idle_max_cstate_reached(cstate - 1))
1339                         break;
1340
1341                 cx = &acpi_state_table.states[cstate];
1342
1343                 state = &drv->states[drv->state_count++];
1344
1345                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1346                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1347                 state->exit_latency = cx->latency;
1348                 /*
1349                  * For C1-type C-states use the same number for both the exit
1350                  * latency and target residency, because that is the case for
1351                  * C1 in the majority of the static C-states tables above.
1352                  * For the other types of C-states, however, set the target
1353                  * residency to 3 times the exit latency which should lead to
1354                  * a reasonable balance between energy-efficiency and
1355                  * performance in the majority of interesting cases.
1356                  */
1357                 state->target_residency = cx->latency;
1358                 if (cx->type > ACPI_STATE_C1)
1359                         state->target_residency *= 3;
1360
1361                 state->flags = MWAIT2flg(cx->address);
1362                 if (cx->type > ACPI_STATE_C2)
1363                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1364
1365                 if (disabled_states_mask & BIT(cstate))
1366                         state->flags |= CPUIDLE_FLAG_OFF;
1367
1368                 if (intel_idle_state_needs_timer_stop(state))
1369                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1370
1371                 state->enter = intel_idle;
1372                 state->enter_s2idle = intel_idle_s2idle;
1373         }
1374 }
1375
1376 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1377 {
1378         int cstate, limit;
1379
1380         /*
1381          * If there are no _CST C-states, do not disable any C-states by
1382          * default.
1383          */
1384         if (!acpi_state_table.count)
1385                 return false;
1386
1387         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1388         /*
1389          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1390          * the interesting states are ACPI_CSTATE_FFH.
1391          */
1392         for (cstate = 1; cstate < limit; cstate++) {
1393                 if (acpi_state_table.states[cstate].address == mwait_hint)
1394                         return false;
1395         }
1396         return true;
1397 }
1398 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1399 #define force_use_acpi  (false)
1400
1401 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1402 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1403 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1404 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1405
1406 /**
1407  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1408  *
1409  * Tune IVT multi-socket targets.
1410  * Assumption: num_sockets == (max_package_num + 1).
1411  */
1412 static void __init ivt_idle_state_table_update(void)
1413 {
1414         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1415         int cpu, package_num, num_sockets = 1;
1416
1417         for_each_online_cpu(cpu) {
1418                 package_num = topology_physical_package_id(cpu);
1419                 if (package_num + 1 > num_sockets) {
1420                         num_sockets = package_num + 1;
1421
1422                         if (num_sockets > 4) {
1423                                 cpuidle_state_table = ivt_cstates_8s;
1424                                 return;
1425                         }
1426                 }
1427         }
1428
1429         if (num_sockets > 2)
1430                 cpuidle_state_table = ivt_cstates_4s;
1431
1432         /* else, 1 and 2 socket systems use default ivt_cstates */
1433 }
1434
1435 /**
1436  * irtl_2_usec - IRTL to microseconds conversion.
1437  * @irtl: IRTL MSR value.
1438  *
1439  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1440  */
1441 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1442 {
1443         static const unsigned int irtl_ns_units[] __initconst = {
1444                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1445         };
1446         unsigned long long ns;
1447
1448         if (!irtl)
1449                 return 0;
1450
1451         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1452
1453         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1454 }
1455
1456 /**
1457  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1458  *
1459  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1460  * definitive maximum latency and use the same value for target_residency.
1461  */
1462 static void __init bxt_idle_state_table_update(void)
1463 {
1464         unsigned long long msr;
1465         unsigned int usec;
1466
1467         rdmsrl(MSR_PKGC6_IRTL, msr);
1468         usec = irtl_2_usec(msr);
1469         if (usec) {
1470                 bxt_cstates[2].exit_latency = usec;
1471                 bxt_cstates[2].target_residency = usec;
1472         }
1473
1474         rdmsrl(MSR_PKGC7_IRTL, msr);
1475         usec = irtl_2_usec(msr);
1476         if (usec) {
1477                 bxt_cstates[3].exit_latency = usec;
1478                 bxt_cstates[3].target_residency = usec;
1479         }
1480
1481         rdmsrl(MSR_PKGC8_IRTL, msr);
1482         usec = irtl_2_usec(msr);
1483         if (usec) {
1484                 bxt_cstates[4].exit_latency = usec;
1485                 bxt_cstates[4].target_residency = usec;
1486         }
1487
1488         rdmsrl(MSR_PKGC9_IRTL, msr);
1489         usec = irtl_2_usec(msr);
1490         if (usec) {
1491                 bxt_cstates[5].exit_latency = usec;
1492                 bxt_cstates[5].target_residency = usec;
1493         }
1494
1495         rdmsrl(MSR_PKGC10_IRTL, msr);
1496         usec = irtl_2_usec(msr);
1497         if (usec) {
1498                 bxt_cstates[6].exit_latency = usec;
1499                 bxt_cstates[6].target_residency = usec;
1500         }
1501
1502 }
1503
1504 /**
1505  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1506  *
1507  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1508  */
1509 static void __init sklh_idle_state_table_update(void)
1510 {
1511         unsigned long long msr;
1512         unsigned int eax, ebx, ecx, edx;
1513
1514
1515         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1516         if (max_cstate <= 7)
1517                 return;
1518
1519         /* if PC10 not present in CPUID.MWAIT.EDX */
1520         if ((mwait_substates & (0xF << 28)) == 0)
1521                 return;
1522
1523         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1524
1525         /* PC10 is not enabled in PKG C-state limit */
1526         if ((msr & 0xF) != 8)
1527                 return;
1528
1529         ecx = 0;
1530         cpuid(7, &eax, &ebx, &ecx, &edx);
1531
1532         /* if SGX is present */
1533         if (ebx & (1 << 2)) {
1534
1535                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1536
1537                 /* if SGX is enabled */
1538                 if (msr & (1 << 18))
1539                         return;
1540         }
1541
1542         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1543         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1544 }
1545
1546 /**
1547  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1548  * idle states table.
1549  */
1550 static void __init skx_idle_state_table_update(void)
1551 {
1552         unsigned long long msr;
1553
1554         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1555
1556         /*
1557          * 000b: C0/C1 (no package C-state support)
1558          * 001b: C2
1559          * 010b: C6 (non-retention)
1560          * 011b: C6 (retention)
1561          * 111b: No Package C state limits.
1562          */
1563         if ((msr & 0x7) < 2) {
1564                 /*
1565                  * Uses the CC6 + PC0 latency and 3 times of
1566                  * latency for target_residency if the PC6
1567                  * is disabled in BIOS. This is consistent
1568                  * with how intel_idle driver uses _CST
1569                  * to set the target_residency.
1570                  */
1571                 skx_cstates[2].exit_latency = 92;
1572                 skx_cstates[2].target_residency = 276;
1573         }
1574 }
1575
1576 /**
1577  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1578  */
1579 static void __init spr_idle_state_table_update(void)
1580 {
1581         unsigned long long msr;
1582
1583         /* Check if user prefers C1E over C1. */
1584         if ((preferred_states_mask & BIT(2)) &&
1585             !(preferred_states_mask & BIT(1))) {
1586                 /* Disable C1 and enable C1E. */
1587                 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
1588                 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1589
1590                 /* Enable C1E using the "C1E promotion" bit. */
1591                 c1e_promotion = C1E_PROMOTION_ENABLE;
1592         }
1593
1594         /*
1595          * By default, the C6 state assumes the worst-case scenario of package
1596          * C6. However, if PC6 is disabled, we update the numbers to match
1597          * core C6.
1598          */
1599         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1600
1601         /* Limit value 2 and above allow for PC6. */
1602         if ((msr & 0x7) < 2) {
1603                 spr_cstates[2].exit_latency = 190;
1604                 spr_cstates[2].target_residency = 600;
1605         }
1606 }
1607
1608 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1609 {
1610         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1611         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1612                                         MWAIT_SUBSTATE_MASK;
1613
1614         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1615         if (num_substates == 0)
1616                 return false;
1617
1618         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1619                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1620
1621         return true;
1622 }
1623
1624 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1625 {
1626         int cstate;
1627
1628         switch (boot_cpu_data.x86_model) {
1629         case INTEL_FAM6_IVYBRIDGE_X:
1630                 ivt_idle_state_table_update();
1631                 break;
1632         case INTEL_FAM6_ATOM_GOLDMONT:
1633         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1634                 bxt_idle_state_table_update();
1635                 break;
1636         case INTEL_FAM6_SKYLAKE:
1637                 sklh_idle_state_table_update();
1638                 break;
1639         case INTEL_FAM6_SKYLAKE_X:
1640                 skx_idle_state_table_update();
1641                 break;
1642         case INTEL_FAM6_SAPPHIRERAPIDS_X:
1643                 spr_idle_state_table_update();
1644                 break;
1645         }
1646
1647         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1648                 unsigned int mwait_hint;
1649
1650                 if (intel_idle_max_cstate_reached(cstate))
1651                         break;
1652
1653                 if (!cpuidle_state_table[cstate].enter &&
1654                     !cpuidle_state_table[cstate].enter_s2idle)
1655                         break;
1656
1657                 /* If marked as unusable, skip this state. */
1658                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1659                         pr_debug("state %s is disabled\n",
1660                                  cpuidle_state_table[cstate].name);
1661                         continue;
1662                 }
1663
1664                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1665                 if (!intel_idle_verify_cstate(mwait_hint))
1666                         continue;
1667
1668                 /* Structure copy. */
1669                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1670
1671                 if ((disabled_states_mask & BIT(drv->state_count)) ||
1672                     ((icpu->use_acpi || force_use_acpi) &&
1673                      intel_idle_off_by_default(mwait_hint) &&
1674                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1675                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1676
1677                 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1678                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1679
1680                 drv->state_count++;
1681         }
1682
1683         if (icpu->byt_auto_demotion_disable_flag) {
1684                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1685                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1686         }
1687 }
1688
1689 /**
1690  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1691  * @drv: cpuidle driver structure to initialize.
1692  */
1693 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1694 {
1695         cpuidle_poll_state_init(drv);
1696
1697         if (disabled_states_mask & BIT(0))
1698                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1699
1700         drv->state_count = 1;
1701
1702         if (icpu)
1703                 intel_idle_init_cstates_icpu(drv);
1704         else
1705                 intel_idle_init_cstates_acpi(drv);
1706 }
1707
1708 static void auto_demotion_disable(void)
1709 {
1710         unsigned long long msr_bits;
1711
1712         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1713         msr_bits &= ~auto_demotion_disable_flags;
1714         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1715 }
1716
1717 static void c1e_promotion_enable(void)
1718 {
1719         unsigned long long msr_bits;
1720
1721         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1722         msr_bits |= 0x2;
1723         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1724 }
1725
1726 static void c1e_promotion_disable(void)
1727 {
1728         unsigned long long msr_bits;
1729
1730         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1731         msr_bits &= ~0x2;
1732         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1733 }
1734
1735 /**
1736  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1737  * @cpu: CPU to initialize.
1738  *
1739  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1740  * with the processor model flags.
1741  */
1742 static int intel_idle_cpu_init(unsigned int cpu)
1743 {
1744         struct cpuidle_device *dev;
1745
1746         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1747         dev->cpu = cpu;
1748
1749         if (cpuidle_register_device(dev)) {
1750                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1751                 return -EIO;
1752         }
1753
1754         if (auto_demotion_disable_flags)
1755                 auto_demotion_disable();
1756
1757         if (c1e_promotion == C1E_PROMOTION_ENABLE)
1758                 c1e_promotion_enable();
1759         else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1760                 c1e_promotion_disable();
1761
1762         return 0;
1763 }
1764
1765 static int intel_idle_cpu_online(unsigned int cpu)
1766 {
1767         struct cpuidle_device *dev;
1768
1769         if (!boot_cpu_has(X86_FEATURE_ARAT))
1770                 tick_broadcast_enable();
1771
1772         /*
1773          * Some systems can hotplug a cpu at runtime after
1774          * the kernel has booted, we have to initialize the
1775          * driver in this case
1776          */
1777         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1778         if (!dev->registered)
1779                 return intel_idle_cpu_init(cpu);
1780
1781         return 0;
1782 }
1783
1784 /**
1785  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1786  */
1787 static void __init intel_idle_cpuidle_devices_uninit(void)
1788 {
1789         int i;
1790
1791         for_each_online_cpu(i)
1792                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1793 }
1794
1795 static int __init intel_idle_init(void)
1796 {
1797         const struct x86_cpu_id *id;
1798         unsigned int eax, ebx, ecx;
1799         int retval;
1800
1801         /* Do not load intel_idle at all for now if idle= is passed */
1802         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1803                 return -ENODEV;
1804
1805         if (max_cstate == 0) {
1806                 pr_debug("disabled\n");
1807                 return -EPERM;
1808         }
1809
1810         id = x86_match_cpu(intel_idle_ids);
1811         if (id) {
1812                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1813                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
1814                         return -ENODEV;
1815                 }
1816         } else {
1817                 id = x86_match_cpu(intel_mwait_ids);
1818                 if (!id)
1819                         return -ENODEV;
1820         }
1821
1822         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1823                 return -ENODEV;
1824
1825         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1826
1827         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1828             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1829             !mwait_substates)
1830                         return -ENODEV;
1831
1832         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1833
1834         icpu = (const struct idle_cpu *)id->driver_data;
1835         if (icpu) {
1836                 cpuidle_state_table = icpu->state_table;
1837                 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1838                 if (icpu->disable_promotion_to_c1e)
1839                         c1e_promotion = C1E_PROMOTION_DISABLE;
1840                 if (icpu->use_acpi || force_use_acpi)
1841                         intel_idle_acpi_cst_extract();
1842         } else if (!intel_idle_acpi_cst_extract()) {
1843                 return -ENODEV;
1844         }
1845
1846         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1847                  boot_cpu_data.x86_model);
1848
1849         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1850         if (!intel_idle_cpuidle_devices)
1851                 return -ENOMEM;
1852
1853         intel_idle_cpuidle_driver_init(&intel_idle_driver);
1854
1855         retval = cpuidle_register_driver(&intel_idle_driver);
1856         if (retval) {
1857                 struct cpuidle_driver *drv = cpuidle_get_driver();
1858                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1859                        drv ? drv->name : "none");
1860                 goto init_driver_fail;
1861         }
1862
1863         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1864                                    intel_idle_cpu_online, NULL);
1865         if (retval < 0)
1866                 goto hp_setup_fail;
1867
1868         pr_debug("Local APIC timer is reliable in %s\n",
1869                  boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
1870
1871         return 0;
1872
1873 hp_setup_fail:
1874         intel_idle_cpuidle_devices_uninit();
1875         cpuidle_unregister_driver(&intel_idle_driver);
1876 init_driver_fail:
1877         free_percpu(intel_idle_cpuidle_devices);
1878         return retval;
1879
1880 }
1881 device_initcall(intel_idle_init);
1882
1883 /*
1884  * We are not really modular, but we used to support that.  Meaning we also
1885  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1886  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1887  * is the easiest way (currently) to continue doing that.
1888  */
1889 module_param(max_cstate, int, 0444);
1890 /*
1891  * The positions of the bits that are set in this number are the indices of the
1892  * idle states to be disabled by default (as reflected by the names of the
1893  * corresponding idle state directories in sysfs, "state0", "state1" ...
1894  * "state<i>" ..., where <i> is the index of the given state).
1895  */
1896 module_param_named(states_off, disabled_states_mask, uint, 0444);
1897 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
1898 /*
1899  * Some platforms come with mutually exclusive C-states, so that if one is
1900  * enabled, the other C-states must not be used. Example: C1 and C1E on
1901  * Sapphire Rapids platform. This parameter allows for selecting the
1902  * preferred C-states among the groups of mutually exclusive C-states - the
1903  * selected C-states will be registered, the other C-states from the mutually
1904  * exclusive group won't be registered. If the platform has no mutually
1905  * exclusive C-states, this parameter has no effect.
1906  */
1907 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
1908 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");