Merge tag 'reset-for-v5.3' of git://git.pengutronix.de/git/pza/linux into arm/drivers
[sfrench/cifs-2.6.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  */
8
9 /*
10  * intel_idle is a cpuidle driver that loads on specific Intel processors
11  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
12  * make Linux more efficient on these processors, as intel_idle knows
13  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
14  */
15
16 /*
17  * Design Assumptions
18  *
19  * All CPUs have same idle states as boot CPU
20  *
21  * Chipset BM_STS (bus master status) bit is a NOP
22  *      for preventing entry into deep C-stats
23  */
24
25 /*
26  * Known limitations
27  *
28  * The driver currently initializes for_each_online_cpu() upon modprobe.
29  * It it unaware of subsequent processors hot-added to the system.
30  * This means that if you boot with maxcpus=n and later online
31  * processors above n, those processors will use C1 only.
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 #define DEBUG
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/kernel.h>
45 #include <linux/cpuidle.h>
46 #include <linux/tick.h>
47 #include <trace/events/power.h>
48 #include <linux/sched.h>
49 #include <linux/notifier.h>
50 #include <linux/cpu.h>
51 #include <linux/moduleparam.h>
52 #include <asm/cpu_device_id.h>
53 #include <asm/intel-family.h>
54 #include <asm/mwait.h>
55 #include <asm/msr.h>
56
57 #define INTEL_IDLE_VERSION "0.4.1"
58
59 static struct cpuidle_driver intel_idle_driver = {
60         .name = "intel_idle",
61         .owner = THIS_MODULE,
62 };
63 /* intel_idle.max_cstate=0 disables driver */
64 static int max_cstate = CPUIDLE_STATE_MAX - 1;
65
66 static unsigned int mwait_substates;
67
68 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
69 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
70 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
71
72 struct idle_cpu {
73         struct cpuidle_state *state_table;
74
75         /*
76          * Hardware C-state auto-demotion may not always be optimal.
77          * Indicate which enable bits to clear here.
78          */
79         unsigned long auto_demotion_disable_flags;
80         bool byt_auto_demotion_disable_flag;
81         bool disable_promotion_to_c1e;
82 };
83
84 static const struct idle_cpu *icpu;
85 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
86 static int intel_idle(struct cpuidle_device *dev,
87                         struct cpuidle_driver *drv, int index);
88 static void intel_idle_s2idle(struct cpuidle_device *dev,
89                               struct cpuidle_driver *drv, int index);
90 static struct cpuidle_state *cpuidle_state_table;
91
92 /*
93  * Set this flag for states where the HW flushes the TLB for us
94  * and so we don't need cross-calls to keep it consistent.
95  * If this flag is set, SW flushes the TLB, so even if the
96  * HW doesn't do the flushing, this flag is safe to use.
97  */
98 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
99
100 /*
101  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
102  * the C-state (top nibble) and sub-state (bottom nibble)
103  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
104  *
105  * We store the hint at the top of our "flags" for each state.
106  */
107 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
108 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
109
110 /*
111  * States are indexed by the cstate number,
112  * which is also the index into the MWAIT hint array.
113  * Thus C0 is a dummy.
114  */
115 static struct cpuidle_state nehalem_cstates[] = {
116         {
117                 .name = "C1",
118                 .desc = "MWAIT 0x00",
119                 .flags = MWAIT2flg(0x00),
120                 .exit_latency = 3,
121                 .target_residency = 6,
122                 .enter = &intel_idle,
123                 .enter_s2idle = intel_idle_s2idle, },
124         {
125                 .name = "C1E",
126                 .desc = "MWAIT 0x01",
127                 .flags = MWAIT2flg(0x01),
128                 .exit_latency = 10,
129                 .target_residency = 20,
130                 .enter = &intel_idle,
131                 .enter_s2idle = intel_idle_s2idle, },
132         {
133                 .name = "C3",
134                 .desc = "MWAIT 0x10",
135                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
136                 .exit_latency = 20,
137                 .target_residency = 80,
138                 .enter = &intel_idle,
139                 .enter_s2idle = intel_idle_s2idle, },
140         {
141                 .name = "C6",
142                 .desc = "MWAIT 0x20",
143                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
144                 .exit_latency = 200,
145                 .target_residency = 800,
146                 .enter = &intel_idle,
147                 .enter_s2idle = intel_idle_s2idle, },
148         {
149                 .enter = NULL }
150 };
151
152 static struct cpuidle_state snb_cstates[] = {
153         {
154                 .name = "C1",
155                 .desc = "MWAIT 0x00",
156                 .flags = MWAIT2flg(0x00),
157                 .exit_latency = 2,
158                 .target_residency = 2,
159                 .enter = &intel_idle,
160                 .enter_s2idle = intel_idle_s2idle, },
161         {
162                 .name = "C1E",
163                 .desc = "MWAIT 0x01",
164                 .flags = MWAIT2flg(0x01),
165                 .exit_latency = 10,
166                 .target_residency = 20,
167                 .enter = &intel_idle,
168                 .enter_s2idle = intel_idle_s2idle, },
169         {
170                 .name = "C3",
171                 .desc = "MWAIT 0x10",
172                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
173                 .exit_latency = 80,
174                 .target_residency = 211,
175                 .enter = &intel_idle,
176                 .enter_s2idle = intel_idle_s2idle, },
177         {
178                 .name = "C6",
179                 .desc = "MWAIT 0x20",
180                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
181                 .exit_latency = 104,
182                 .target_residency = 345,
183                 .enter = &intel_idle,
184                 .enter_s2idle = intel_idle_s2idle, },
185         {
186                 .name = "C7",
187                 .desc = "MWAIT 0x30",
188                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
189                 .exit_latency = 109,
190                 .target_residency = 345,
191                 .enter = &intel_idle,
192                 .enter_s2idle = intel_idle_s2idle, },
193         {
194                 .enter = NULL }
195 };
196
197 static struct cpuidle_state byt_cstates[] = {
198         {
199                 .name = "C1",
200                 .desc = "MWAIT 0x00",
201                 .flags = MWAIT2flg(0x00),
202                 .exit_latency = 1,
203                 .target_residency = 1,
204                 .enter = &intel_idle,
205                 .enter_s2idle = intel_idle_s2idle, },
206         {
207                 .name = "C6N",
208                 .desc = "MWAIT 0x58",
209                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
210                 .exit_latency = 300,
211                 .target_residency = 275,
212                 .enter = &intel_idle,
213                 .enter_s2idle = intel_idle_s2idle, },
214         {
215                 .name = "C6S",
216                 .desc = "MWAIT 0x52",
217                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
218                 .exit_latency = 500,
219                 .target_residency = 560,
220                 .enter = &intel_idle,
221                 .enter_s2idle = intel_idle_s2idle, },
222         {
223                 .name = "C7",
224                 .desc = "MWAIT 0x60",
225                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
226                 .exit_latency = 1200,
227                 .target_residency = 4000,
228                 .enter = &intel_idle,
229                 .enter_s2idle = intel_idle_s2idle, },
230         {
231                 .name = "C7S",
232                 .desc = "MWAIT 0x64",
233                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
234                 .exit_latency = 10000,
235                 .target_residency = 20000,
236                 .enter = &intel_idle,
237                 .enter_s2idle = intel_idle_s2idle, },
238         {
239                 .enter = NULL }
240 };
241
242 static struct cpuidle_state cht_cstates[] = {
243         {
244                 .name = "C1",
245                 .desc = "MWAIT 0x00",
246                 .flags = MWAIT2flg(0x00),
247                 .exit_latency = 1,
248                 .target_residency = 1,
249                 .enter = &intel_idle,
250                 .enter_s2idle = intel_idle_s2idle, },
251         {
252                 .name = "C6N",
253                 .desc = "MWAIT 0x58",
254                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
255                 .exit_latency = 80,
256                 .target_residency = 275,
257                 .enter = &intel_idle,
258                 .enter_s2idle = intel_idle_s2idle, },
259         {
260                 .name = "C6S",
261                 .desc = "MWAIT 0x52",
262                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
263                 .exit_latency = 200,
264                 .target_residency = 560,
265                 .enter = &intel_idle,
266                 .enter_s2idle = intel_idle_s2idle, },
267         {
268                 .name = "C7",
269                 .desc = "MWAIT 0x60",
270                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
271                 .exit_latency = 1200,
272                 .target_residency = 4000,
273                 .enter = &intel_idle,
274                 .enter_s2idle = intel_idle_s2idle, },
275         {
276                 .name = "C7S",
277                 .desc = "MWAIT 0x64",
278                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
279                 .exit_latency = 10000,
280                 .target_residency = 20000,
281                 .enter = &intel_idle,
282                 .enter_s2idle = intel_idle_s2idle, },
283         {
284                 .enter = NULL }
285 };
286
287 static struct cpuidle_state ivb_cstates[] = {
288         {
289                 .name = "C1",
290                 .desc = "MWAIT 0x00",
291                 .flags = MWAIT2flg(0x00),
292                 .exit_latency = 1,
293                 .target_residency = 1,
294                 .enter = &intel_idle,
295                 .enter_s2idle = intel_idle_s2idle, },
296         {
297                 .name = "C1E",
298                 .desc = "MWAIT 0x01",
299                 .flags = MWAIT2flg(0x01),
300                 .exit_latency = 10,
301                 .target_residency = 20,
302                 .enter = &intel_idle,
303                 .enter_s2idle = intel_idle_s2idle, },
304         {
305                 .name = "C3",
306                 .desc = "MWAIT 0x10",
307                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
308                 .exit_latency = 59,
309                 .target_residency = 156,
310                 .enter = &intel_idle,
311                 .enter_s2idle = intel_idle_s2idle, },
312         {
313                 .name = "C6",
314                 .desc = "MWAIT 0x20",
315                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
316                 .exit_latency = 80,
317                 .target_residency = 300,
318                 .enter = &intel_idle,
319                 .enter_s2idle = intel_idle_s2idle, },
320         {
321                 .name = "C7",
322                 .desc = "MWAIT 0x30",
323                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
324                 .exit_latency = 87,
325                 .target_residency = 300,
326                 .enter = &intel_idle,
327                 .enter_s2idle = intel_idle_s2idle, },
328         {
329                 .enter = NULL }
330 };
331
332 static struct cpuidle_state ivt_cstates[] = {
333         {
334                 .name = "C1",
335                 .desc = "MWAIT 0x00",
336                 .flags = MWAIT2flg(0x00),
337                 .exit_latency = 1,
338                 .target_residency = 1,
339                 .enter = &intel_idle,
340                 .enter_s2idle = intel_idle_s2idle, },
341         {
342                 .name = "C1E",
343                 .desc = "MWAIT 0x01",
344                 .flags = MWAIT2flg(0x01),
345                 .exit_latency = 10,
346                 .target_residency = 80,
347                 .enter = &intel_idle,
348                 .enter_s2idle = intel_idle_s2idle, },
349         {
350                 .name = "C3",
351                 .desc = "MWAIT 0x10",
352                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
353                 .exit_latency = 59,
354                 .target_residency = 156,
355                 .enter = &intel_idle,
356                 .enter_s2idle = intel_idle_s2idle, },
357         {
358                 .name = "C6",
359                 .desc = "MWAIT 0x20",
360                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
361                 .exit_latency = 82,
362                 .target_residency = 300,
363                 .enter = &intel_idle,
364                 .enter_s2idle = intel_idle_s2idle, },
365         {
366                 .enter = NULL }
367 };
368
369 static struct cpuidle_state ivt_cstates_4s[] = {
370         {
371                 .name = "C1",
372                 .desc = "MWAIT 0x00",
373                 .flags = MWAIT2flg(0x00),
374                 .exit_latency = 1,
375                 .target_residency = 1,
376                 .enter = &intel_idle,
377                 .enter_s2idle = intel_idle_s2idle, },
378         {
379                 .name = "C1E",
380                 .desc = "MWAIT 0x01",
381                 .flags = MWAIT2flg(0x01),
382                 .exit_latency = 10,
383                 .target_residency = 250,
384                 .enter = &intel_idle,
385                 .enter_s2idle = intel_idle_s2idle, },
386         {
387                 .name = "C3",
388                 .desc = "MWAIT 0x10",
389                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
390                 .exit_latency = 59,
391                 .target_residency = 300,
392                 .enter = &intel_idle,
393                 .enter_s2idle = intel_idle_s2idle, },
394         {
395                 .name = "C6",
396                 .desc = "MWAIT 0x20",
397                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
398                 .exit_latency = 84,
399                 .target_residency = 400,
400                 .enter = &intel_idle,
401                 .enter_s2idle = intel_idle_s2idle, },
402         {
403                 .enter = NULL }
404 };
405
406 static struct cpuidle_state ivt_cstates_8s[] = {
407         {
408                 .name = "C1",
409                 .desc = "MWAIT 0x00",
410                 .flags = MWAIT2flg(0x00),
411                 .exit_latency = 1,
412                 .target_residency = 1,
413                 .enter = &intel_idle,
414                 .enter_s2idle = intel_idle_s2idle, },
415         {
416                 .name = "C1E",
417                 .desc = "MWAIT 0x01",
418                 .flags = MWAIT2flg(0x01),
419                 .exit_latency = 10,
420                 .target_residency = 500,
421                 .enter = &intel_idle,
422                 .enter_s2idle = intel_idle_s2idle, },
423         {
424                 .name = "C3",
425                 .desc = "MWAIT 0x10",
426                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
427                 .exit_latency = 59,
428                 .target_residency = 600,
429                 .enter = &intel_idle,
430                 .enter_s2idle = intel_idle_s2idle, },
431         {
432                 .name = "C6",
433                 .desc = "MWAIT 0x20",
434                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
435                 .exit_latency = 88,
436                 .target_residency = 700,
437                 .enter = &intel_idle,
438                 .enter_s2idle = intel_idle_s2idle, },
439         {
440                 .enter = NULL }
441 };
442
443 static struct cpuidle_state hsw_cstates[] = {
444         {
445                 .name = "C1",
446                 .desc = "MWAIT 0x00",
447                 .flags = MWAIT2flg(0x00),
448                 .exit_latency = 2,
449                 .target_residency = 2,
450                 .enter = &intel_idle,
451                 .enter_s2idle = intel_idle_s2idle, },
452         {
453                 .name = "C1E",
454                 .desc = "MWAIT 0x01",
455                 .flags = MWAIT2flg(0x01),
456                 .exit_latency = 10,
457                 .target_residency = 20,
458                 .enter = &intel_idle,
459                 .enter_s2idle = intel_idle_s2idle, },
460         {
461                 .name = "C3",
462                 .desc = "MWAIT 0x10",
463                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
464                 .exit_latency = 33,
465                 .target_residency = 100,
466                 .enter = &intel_idle,
467                 .enter_s2idle = intel_idle_s2idle, },
468         {
469                 .name = "C6",
470                 .desc = "MWAIT 0x20",
471                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
472                 .exit_latency = 133,
473                 .target_residency = 400,
474                 .enter = &intel_idle,
475                 .enter_s2idle = intel_idle_s2idle, },
476         {
477                 .name = "C7s",
478                 .desc = "MWAIT 0x32",
479                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
480                 .exit_latency = 166,
481                 .target_residency = 500,
482                 .enter = &intel_idle,
483                 .enter_s2idle = intel_idle_s2idle, },
484         {
485                 .name = "C8",
486                 .desc = "MWAIT 0x40",
487                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
488                 .exit_latency = 300,
489                 .target_residency = 900,
490                 .enter = &intel_idle,
491                 .enter_s2idle = intel_idle_s2idle, },
492         {
493                 .name = "C9",
494                 .desc = "MWAIT 0x50",
495                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
496                 .exit_latency = 600,
497                 .target_residency = 1800,
498                 .enter = &intel_idle,
499                 .enter_s2idle = intel_idle_s2idle, },
500         {
501                 .name = "C10",
502                 .desc = "MWAIT 0x60",
503                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
504                 .exit_latency = 2600,
505                 .target_residency = 7700,
506                 .enter = &intel_idle,
507                 .enter_s2idle = intel_idle_s2idle, },
508         {
509                 .enter = NULL }
510 };
511 static struct cpuidle_state bdw_cstates[] = {
512         {
513                 .name = "C1",
514                 .desc = "MWAIT 0x00",
515                 .flags = MWAIT2flg(0x00),
516                 .exit_latency = 2,
517                 .target_residency = 2,
518                 .enter = &intel_idle,
519                 .enter_s2idle = intel_idle_s2idle, },
520         {
521                 .name = "C1E",
522                 .desc = "MWAIT 0x01",
523                 .flags = MWAIT2flg(0x01),
524                 .exit_latency = 10,
525                 .target_residency = 20,
526                 .enter = &intel_idle,
527                 .enter_s2idle = intel_idle_s2idle, },
528         {
529                 .name = "C3",
530                 .desc = "MWAIT 0x10",
531                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
532                 .exit_latency = 40,
533                 .target_residency = 100,
534                 .enter = &intel_idle,
535                 .enter_s2idle = intel_idle_s2idle, },
536         {
537                 .name = "C6",
538                 .desc = "MWAIT 0x20",
539                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
540                 .exit_latency = 133,
541                 .target_residency = 400,
542                 .enter = &intel_idle,
543                 .enter_s2idle = intel_idle_s2idle, },
544         {
545                 .name = "C7s",
546                 .desc = "MWAIT 0x32",
547                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
548                 .exit_latency = 166,
549                 .target_residency = 500,
550                 .enter = &intel_idle,
551                 .enter_s2idle = intel_idle_s2idle, },
552         {
553                 .name = "C8",
554                 .desc = "MWAIT 0x40",
555                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
556                 .exit_latency = 300,
557                 .target_residency = 900,
558                 .enter = &intel_idle,
559                 .enter_s2idle = intel_idle_s2idle, },
560         {
561                 .name = "C9",
562                 .desc = "MWAIT 0x50",
563                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
564                 .exit_latency = 600,
565                 .target_residency = 1800,
566                 .enter = &intel_idle,
567                 .enter_s2idle = intel_idle_s2idle, },
568         {
569                 .name = "C10",
570                 .desc = "MWAIT 0x60",
571                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
572                 .exit_latency = 2600,
573                 .target_residency = 7700,
574                 .enter = &intel_idle,
575                 .enter_s2idle = intel_idle_s2idle, },
576         {
577                 .enter = NULL }
578 };
579
580 static struct cpuidle_state skl_cstates[] = {
581         {
582                 .name = "C1",
583                 .desc = "MWAIT 0x00",
584                 .flags = MWAIT2flg(0x00),
585                 .exit_latency = 2,
586                 .target_residency = 2,
587                 .enter = &intel_idle,
588                 .enter_s2idle = intel_idle_s2idle, },
589         {
590                 .name = "C1E",
591                 .desc = "MWAIT 0x01",
592                 .flags = MWAIT2flg(0x01),
593                 .exit_latency = 10,
594                 .target_residency = 20,
595                 .enter = &intel_idle,
596                 .enter_s2idle = intel_idle_s2idle, },
597         {
598                 .name = "C3",
599                 .desc = "MWAIT 0x10",
600                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
601                 .exit_latency = 70,
602                 .target_residency = 100,
603                 .enter = &intel_idle,
604                 .enter_s2idle = intel_idle_s2idle, },
605         {
606                 .name = "C6",
607                 .desc = "MWAIT 0x20",
608                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
609                 .exit_latency = 85,
610                 .target_residency = 200,
611                 .enter = &intel_idle,
612                 .enter_s2idle = intel_idle_s2idle, },
613         {
614                 .name = "C7s",
615                 .desc = "MWAIT 0x33",
616                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
617                 .exit_latency = 124,
618                 .target_residency = 800,
619                 .enter = &intel_idle,
620                 .enter_s2idle = intel_idle_s2idle, },
621         {
622                 .name = "C8",
623                 .desc = "MWAIT 0x40",
624                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
625                 .exit_latency = 200,
626                 .target_residency = 800,
627                 .enter = &intel_idle,
628                 .enter_s2idle = intel_idle_s2idle, },
629         {
630                 .name = "C9",
631                 .desc = "MWAIT 0x50",
632                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
633                 .exit_latency = 480,
634                 .target_residency = 5000,
635                 .enter = &intel_idle,
636                 .enter_s2idle = intel_idle_s2idle, },
637         {
638                 .name = "C10",
639                 .desc = "MWAIT 0x60",
640                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
641                 .exit_latency = 890,
642                 .target_residency = 5000,
643                 .enter = &intel_idle,
644                 .enter_s2idle = intel_idle_s2idle, },
645         {
646                 .enter = NULL }
647 };
648
649 static struct cpuidle_state skx_cstates[] = {
650         {
651                 .name = "C1",
652                 .desc = "MWAIT 0x00",
653                 .flags = MWAIT2flg(0x00),
654                 .exit_latency = 2,
655                 .target_residency = 2,
656                 .enter = &intel_idle,
657                 .enter_s2idle = intel_idle_s2idle, },
658         {
659                 .name = "C1E",
660                 .desc = "MWAIT 0x01",
661                 .flags = MWAIT2flg(0x01),
662                 .exit_latency = 10,
663                 .target_residency = 20,
664                 .enter = &intel_idle,
665                 .enter_s2idle = intel_idle_s2idle, },
666         {
667                 .name = "C6",
668                 .desc = "MWAIT 0x20",
669                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
670                 .exit_latency = 133,
671                 .target_residency = 600,
672                 .enter = &intel_idle,
673                 .enter_s2idle = intel_idle_s2idle, },
674         {
675                 .enter = NULL }
676 };
677
678 static struct cpuidle_state atom_cstates[] = {
679         {
680                 .name = "C1E",
681                 .desc = "MWAIT 0x00",
682                 .flags = MWAIT2flg(0x00),
683                 .exit_latency = 10,
684                 .target_residency = 20,
685                 .enter = &intel_idle,
686                 .enter_s2idle = intel_idle_s2idle, },
687         {
688                 .name = "C2",
689                 .desc = "MWAIT 0x10",
690                 .flags = MWAIT2flg(0x10),
691                 .exit_latency = 20,
692                 .target_residency = 80,
693                 .enter = &intel_idle,
694                 .enter_s2idle = intel_idle_s2idle, },
695         {
696                 .name = "C4",
697                 .desc = "MWAIT 0x30",
698                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
699                 .exit_latency = 100,
700                 .target_residency = 400,
701                 .enter = &intel_idle,
702                 .enter_s2idle = intel_idle_s2idle, },
703         {
704                 .name = "C6",
705                 .desc = "MWAIT 0x52",
706                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
707                 .exit_latency = 140,
708                 .target_residency = 560,
709                 .enter = &intel_idle,
710                 .enter_s2idle = intel_idle_s2idle, },
711         {
712                 .enter = NULL }
713 };
714 static struct cpuidle_state tangier_cstates[] = {
715         {
716                 .name = "C1",
717                 .desc = "MWAIT 0x00",
718                 .flags = MWAIT2flg(0x00),
719                 .exit_latency = 1,
720                 .target_residency = 4,
721                 .enter = &intel_idle,
722                 .enter_s2idle = intel_idle_s2idle, },
723         {
724                 .name = "C4",
725                 .desc = "MWAIT 0x30",
726                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
727                 .exit_latency = 100,
728                 .target_residency = 400,
729                 .enter = &intel_idle,
730                 .enter_s2idle = intel_idle_s2idle, },
731         {
732                 .name = "C6",
733                 .desc = "MWAIT 0x52",
734                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
735                 .exit_latency = 140,
736                 .target_residency = 560,
737                 .enter = &intel_idle,
738                 .enter_s2idle = intel_idle_s2idle, },
739         {
740                 .name = "C7",
741                 .desc = "MWAIT 0x60",
742                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
743                 .exit_latency = 1200,
744                 .target_residency = 4000,
745                 .enter = &intel_idle,
746                 .enter_s2idle = intel_idle_s2idle, },
747         {
748                 .name = "C9",
749                 .desc = "MWAIT 0x64",
750                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
751                 .exit_latency = 10000,
752                 .target_residency = 20000,
753                 .enter = &intel_idle,
754                 .enter_s2idle = intel_idle_s2idle, },
755         {
756                 .enter = NULL }
757 };
758 static struct cpuidle_state avn_cstates[] = {
759         {
760                 .name = "C1",
761                 .desc = "MWAIT 0x00",
762                 .flags = MWAIT2flg(0x00),
763                 .exit_latency = 2,
764                 .target_residency = 2,
765                 .enter = &intel_idle,
766                 .enter_s2idle = intel_idle_s2idle, },
767         {
768                 .name = "C6",
769                 .desc = "MWAIT 0x51",
770                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
771                 .exit_latency = 15,
772                 .target_residency = 45,
773                 .enter = &intel_idle,
774                 .enter_s2idle = intel_idle_s2idle, },
775         {
776                 .enter = NULL }
777 };
778 static struct cpuidle_state knl_cstates[] = {
779         {
780                 .name = "C1",
781                 .desc = "MWAIT 0x00",
782                 .flags = MWAIT2flg(0x00),
783                 .exit_latency = 1,
784                 .target_residency = 2,
785                 .enter = &intel_idle,
786                 .enter_s2idle = intel_idle_s2idle },
787         {
788                 .name = "C6",
789                 .desc = "MWAIT 0x10",
790                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
791                 .exit_latency = 120,
792                 .target_residency = 500,
793                 .enter = &intel_idle,
794                 .enter_s2idle = intel_idle_s2idle },
795         {
796                 .enter = NULL }
797 };
798
799 static struct cpuidle_state bxt_cstates[] = {
800         {
801                 .name = "C1",
802                 .desc = "MWAIT 0x00",
803                 .flags = MWAIT2flg(0x00),
804                 .exit_latency = 2,
805                 .target_residency = 2,
806                 .enter = &intel_idle,
807                 .enter_s2idle = intel_idle_s2idle, },
808         {
809                 .name = "C1E",
810                 .desc = "MWAIT 0x01",
811                 .flags = MWAIT2flg(0x01),
812                 .exit_latency = 10,
813                 .target_residency = 20,
814                 .enter = &intel_idle,
815                 .enter_s2idle = intel_idle_s2idle, },
816         {
817                 .name = "C6",
818                 .desc = "MWAIT 0x20",
819                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
820                 .exit_latency = 133,
821                 .target_residency = 133,
822                 .enter = &intel_idle,
823                 .enter_s2idle = intel_idle_s2idle, },
824         {
825                 .name = "C7s",
826                 .desc = "MWAIT 0x31",
827                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
828                 .exit_latency = 155,
829                 .target_residency = 155,
830                 .enter = &intel_idle,
831                 .enter_s2idle = intel_idle_s2idle, },
832         {
833                 .name = "C8",
834                 .desc = "MWAIT 0x40",
835                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
836                 .exit_latency = 1000,
837                 .target_residency = 1000,
838                 .enter = &intel_idle,
839                 .enter_s2idle = intel_idle_s2idle, },
840         {
841                 .name = "C9",
842                 .desc = "MWAIT 0x50",
843                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
844                 .exit_latency = 2000,
845                 .target_residency = 2000,
846                 .enter = &intel_idle,
847                 .enter_s2idle = intel_idle_s2idle, },
848         {
849                 .name = "C10",
850                 .desc = "MWAIT 0x60",
851                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
852                 .exit_latency = 10000,
853                 .target_residency = 10000,
854                 .enter = &intel_idle,
855                 .enter_s2idle = intel_idle_s2idle, },
856         {
857                 .enter = NULL }
858 };
859
860 static struct cpuidle_state dnv_cstates[] = {
861         {
862                 .name = "C1",
863                 .desc = "MWAIT 0x00",
864                 .flags = MWAIT2flg(0x00),
865                 .exit_latency = 2,
866                 .target_residency = 2,
867                 .enter = &intel_idle,
868                 .enter_s2idle = intel_idle_s2idle, },
869         {
870                 .name = "C1E",
871                 .desc = "MWAIT 0x01",
872                 .flags = MWAIT2flg(0x01),
873                 .exit_latency = 10,
874                 .target_residency = 20,
875                 .enter = &intel_idle,
876                 .enter_s2idle = intel_idle_s2idle, },
877         {
878                 .name = "C6",
879                 .desc = "MWAIT 0x20",
880                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
881                 .exit_latency = 50,
882                 .target_residency = 500,
883                 .enter = &intel_idle,
884                 .enter_s2idle = intel_idle_s2idle, },
885         {
886                 .enter = NULL }
887 };
888
889 /**
890  * intel_idle
891  * @dev: cpuidle_device
892  * @drv: cpuidle driver
893  * @index: index of cpuidle state
894  *
895  * Must be called under local_irq_disable().
896  */
897 static __cpuidle int intel_idle(struct cpuidle_device *dev,
898                                 struct cpuidle_driver *drv, int index)
899 {
900         unsigned long ecx = 1; /* break on interrupt flag */
901         struct cpuidle_state *state = &drv->states[index];
902         unsigned long eax = flg2MWAIT(state->flags);
903         unsigned int cstate;
904         bool uninitialized_var(tick);
905         int cpu = smp_processor_id();
906
907         /*
908          * leave_mm() to avoid costly and often unnecessary wakeups
909          * for flushing the user TLB's associated with the active mm.
910          */
911         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
912                 leave_mm(cpu);
913
914         if (!static_cpu_has(X86_FEATURE_ARAT)) {
915                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
916                                 MWAIT_CSTATE_MASK) + 1;
917                 tick = false;
918                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
919                         tick = true;
920                         tick_broadcast_enter();
921                 }
922         }
923
924         mwait_idle_with_hints(eax, ecx);
925
926         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
927                 tick_broadcast_exit();
928
929         return index;
930 }
931
932 /**
933  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
934  * @dev: cpuidle_device
935  * @drv: cpuidle driver
936  * @index: state index
937  */
938 static void intel_idle_s2idle(struct cpuidle_device *dev,
939                              struct cpuidle_driver *drv, int index)
940 {
941         unsigned long ecx = 1; /* break on interrupt flag */
942         unsigned long eax = flg2MWAIT(drv->states[index].flags);
943
944         mwait_idle_with_hints(eax, ecx);
945 }
946
947 static void __setup_broadcast_timer(bool on)
948 {
949         if (on)
950                 tick_broadcast_enable();
951         else
952                 tick_broadcast_disable();
953 }
954
955 static void auto_demotion_disable(void)
956 {
957         unsigned long long msr_bits;
958
959         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
960         msr_bits &= ~(icpu->auto_demotion_disable_flags);
961         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
962 }
963 static void c1e_promotion_disable(void)
964 {
965         unsigned long long msr_bits;
966
967         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
968         msr_bits &= ~0x2;
969         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
970 }
971
972 static const struct idle_cpu idle_cpu_nehalem = {
973         .state_table = nehalem_cstates,
974         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
975         .disable_promotion_to_c1e = true,
976 };
977
978 static const struct idle_cpu idle_cpu_atom = {
979         .state_table = atom_cstates,
980 };
981
982 static const struct idle_cpu idle_cpu_tangier = {
983         .state_table = tangier_cstates,
984 };
985
986 static const struct idle_cpu idle_cpu_lincroft = {
987         .state_table = atom_cstates,
988         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
989 };
990
991 static const struct idle_cpu idle_cpu_snb = {
992         .state_table = snb_cstates,
993         .disable_promotion_to_c1e = true,
994 };
995
996 static const struct idle_cpu idle_cpu_byt = {
997         .state_table = byt_cstates,
998         .disable_promotion_to_c1e = true,
999         .byt_auto_demotion_disable_flag = true,
1000 };
1001
1002 static const struct idle_cpu idle_cpu_cht = {
1003         .state_table = cht_cstates,
1004         .disable_promotion_to_c1e = true,
1005         .byt_auto_demotion_disable_flag = true,
1006 };
1007
1008 static const struct idle_cpu idle_cpu_ivb = {
1009         .state_table = ivb_cstates,
1010         .disable_promotion_to_c1e = true,
1011 };
1012
1013 static const struct idle_cpu idle_cpu_ivt = {
1014         .state_table = ivt_cstates,
1015         .disable_promotion_to_c1e = true,
1016 };
1017
1018 static const struct idle_cpu idle_cpu_hsw = {
1019         .state_table = hsw_cstates,
1020         .disable_promotion_to_c1e = true,
1021 };
1022
1023 static const struct idle_cpu idle_cpu_bdw = {
1024         .state_table = bdw_cstates,
1025         .disable_promotion_to_c1e = true,
1026 };
1027
1028 static const struct idle_cpu idle_cpu_skl = {
1029         .state_table = skl_cstates,
1030         .disable_promotion_to_c1e = true,
1031 };
1032
1033 static const struct idle_cpu idle_cpu_skx = {
1034         .state_table = skx_cstates,
1035         .disable_promotion_to_c1e = true,
1036 };
1037
1038 static const struct idle_cpu idle_cpu_avn = {
1039         .state_table = avn_cstates,
1040         .disable_promotion_to_c1e = true,
1041 };
1042
1043 static const struct idle_cpu idle_cpu_knl = {
1044         .state_table = knl_cstates,
1045 };
1046
1047 static const struct idle_cpu idle_cpu_bxt = {
1048         .state_table = bxt_cstates,
1049         .disable_promotion_to_c1e = true,
1050 };
1051
1052 static const struct idle_cpu idle_cpu_dnv = {
1053         .state_table = dnv_cstates,
1054         .disable_promotion_to_c1e = true,
1055 };
1056
1057 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1058         INTEL_CPU_FAM6(NEHALEM_EP,              idle_cpu_nehalem),
1059         INTEL_CPU_FAM6(NEHALEM,                 idle_cpu_nehalem),
1060         INTEL_CPU_FAM6(NEHALEM_G,               idle_cpu_nehalem),
1061         INTEL_CPU_FAM6(WESTMERE,                idle_cpu_nehalem),
1062         INTEL_CPU_FAM6(WESTMERE_EP,             idle_cpu_nehalem),
1063         INTEL_CPU_FAM6(NEHALEM_EX,              idle_cpu_nehalem),
1064         INTEL_CPU_FAM6(ATOM_BONNELL,            idle_cpu_atom),
1065         INTEL_CPU_FAM6(ATOM_BONNELL_MID,        idle_cpu_lincroft),
1066         INTEL_CPU_FAM6(WESTMERE_EX,             idle_cpu_nehalem),
1067         INTEL_CPU_FAM6(SANDYBRIDGE,             idle_cpu_snb),
1068         INTEL_CPU_FAM6(SANDYBRIDGE_X,           idle_cpu_snb),
1069         INTEL_CPU_FAM6(ATOM_SALTWELL,           idle_cpu_atom),
1070         INTEL_CPU_FAM6(ATOM_SILVERMONT,         idle_cpu_byt),
1071         INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,     idle_cpu_tangier),
1072         INTEL_CPU_FAM6(ATOM_AIRMONT,            idle_cpu_cht),
1073         INTEL_CPU_FAM6(IVYBRIDGE,               idle_cpu_ivb),
1074         INTEL_CPU_FAM6(IVYBRIDGE_X,             idle_cpu_ivt),
1075         INTEL_CPU_FAM6(HASWELL_CORE,            idle_cpu_hsw),
1076         INTEL_CPU_FAM6(HASWELL_X,               idle_cpu_hsw),
1077         INTEL_CPU_FAM6(HASWELL_ULT,             idle_cpu_hsw),
1078         INTEL_CPU_FAM6(HASWELL_GT3E,            idle_cpu_hsw),
1079         INTEL_CPU_FAM6(ATOM_SILVERMONT_X,       idle_cpu_avn),
1080         INTEL_CPU_FAM6(BROADWELL_CORE,          idle_cpu_bdw),
1081         INTEL_CPU_FAM6(BROADWELL_GT3E,          idle_cpu_bdw),
1082         INTEL_CPU_FAM6(BROADWELL_X,             idle_cpu_bdw),
1083         INTEL_CPU_FAM6(BROADWELL_XEON_D,        idle_cpu_bdw),
1084         INTEL_CPU_FAM6(SKYLAKE_MOBILE,          idle_cpu_skl),
1085         INTEL_CPU_FAM6(SKYLAKE_DESKTOP,         idle_cpu_skl),
1086         INTEL_CPU_FAM6(KABYLAKE_MOBILE,         idle_cpu_skl),
1087         INTEL_CPU_FAM6(KABYLAKE_DESKTOP,        idle_cpu_skl),
1088         INTEL_CPU_FAM6(SKYLAKE_X,               idle_cpu_skx),
1089         INTEL_CPU_FAM6(XEON_PHI_KNL,            idle_cpu_knl),
1090         INTEL_CPU_FAM6(XEON_PHI_KNM,            idle_cpu_knl),
1091         INTEL_CPU_FAM6(ATOM_GOLDMONT,           idle_cpu_bxt),
1092         INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS,      idle_cpu_bxt),
1093         INTEL_CPU_FAM6(ATOM_GOLDMONT_X,         idle_cpu_dnv),
1094         INTEL_CPU_FAM6(ATOM_TREMONT_X,          idle_cpu_dnv),
1095         {}
1096 };
1097
1098 /*
1099  * intel_idle_probe()
1100  */
1101 static int __init intel_idle_probe(void)
1102 {
1103         unsigned int eax, ebx, ecx;
1104         const struct x86_cpu_id *id;
1105
1106         if (max_cstate == 0) {
1107                 pr_debug("disabled\n");
1108                 return -EPERM;
1109         }
1110
1111         id = x86_match_cpu(intel_idle_ids);
1112         if (!id) {
1113                 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1114                     boot_cpu_data.x86 == 6)
1115                         pr_debug("does not run on family %d model %d\n",
1116                                  boot_cpu_data.x86, boot_cpu_data.x86_model);
1117                 return -ENODEV;
1118         }
1119
1120         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1121                 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1122                 return -ENODEV;
1123         }
1124
1125         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1126                 return -ENODEV;
1127
1128         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1129
1130         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1131             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1132             !mwait_substates)
1133                         return -ENODEV;
1134
1135         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1136
1137         icpu = (const struct idle_cpu *)id->driver_data;
1138         cpuidle_state_table = icpu->state_table;
1139
1140         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1141                  boot_cpu_data.x86_model);
1142
1143         return 0;
1144 }
1145
1146 /*
1147  * intel_idle_cpuidle_devices_uninit()
1148  * Unregisters the cpuidle devices.
1149  */
1150 static void intel_idle_cpuidle_devices_uninit(void)
1151 {
1152         int i;
1153         struct cpuidle_device *dev;
1154
1155         for_each_online_cpu(i) {
1156                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1157                 cpuidle_unregister_device(dev);
1158         }
1159 }
1160
1161 /*
1162  * ivt_idle_state_table_update(void)
1163  *
1164  * Tune IVT multi-socket targets
1165  * Assumption: num_sockets == (max_package_num + 1)
1166  */
1167 static void ivt_idle_state_table_update(void)
1168 {
1169         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1170         int cpu, package_num, num_sockets = 1;
1171
1172         for_each_online_cpu(cpu) {
1173                 package_num = topology_physical_package_id(cpu);
1174                 if (package_num + 1 > num_sockets) {
1175                         num_sockets = package_num + 1;
1176
1177                         if (num_sockets > 4) {
1178                                 cpuidle_state_table = ivt_cstates_8s;
1179                                 return;
1180                         }
1181                 }
1182         }
1183
1184         if (num_sockets > 2)
1185                 cpuidle_state_table = ivt_cstates_4s;
1186
1187         /* else, 1 and 2 socket systems use default ivt_cstates */
1188 }
1189
1190 /*
1191  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1192  */
1193
1194 static unsigned int irtl_ns_units[] = {
1195         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1196
1197 static unsigned long long irtl_2_usec(unsigned long long irtl)
1198 {
1199         unsigned long long ns;
1200
1201         if (!irtl)
1202                 return 0;
1203
1204         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1205
1206         return div64_u64((irtl & 0x3FF) * ns, 1000);
1207 }
1208 /*
1209  * bxt_idle_state_table_update(void)
1210  *
1211  * On BXT, we trust the IRTL to show the definitive maximum latency
1212  * We use the same value for target_residency.
1213  */
1214 static void bxt_idle_state_table_update(void)
1215 {
1216         unsigned long long msr;
1217         unsigned int usec;
1218
1219         rdmsrl(MSR_PKGC6_IRTL, msr);
1220         usec = irtl_2_usec(msr);
1221         if (usec) {
1222                 bxt_cstates[2].exit_latency = usec;
1223                 bxt_cstates[2].target_residency = usec;
1224         }
1225
1226         rdmsrl(MSR_PKGC7_IRTL, msr);
1227         usec = irtl_2_usec(msr);
1228         if (usec) {
1229                 bxt_cstates[3].exit_latency = usec;
1230                 bxt_cstates[3].target_residency = usec;
1231         }
1232
1233         rdmsrl(MSR_PKGC8_IRTL, msr);
1234         usec = irtl_2_usec(msr);
1235         if (usec) {
1236                 bxt_cstates[4].exit_latency = usec;
1237                 bxt_cstates[4].target_residency = usec;
1238         }
1239
1240         rdmsrl(MSR_PKGC9_IRTL, msr);
1241         usec = irtl_2_usec(msr);
1242         if (usec) {
1243                 bxt_cstates[5].exit_latency = usec;
1244                 bxt_cstates[5].target_residency = usec;
1245         }
1246
1247         rdmsrl(MSR_PKGC10_IRTL, msr);
1248         usec = irtl_2_usec(msr);
1249         if (usec) {
1250                 bxt_cstates[6].exit_latency = usec;
1251                 bxt_cstates[6].target_residency = usec;
1252         }
1253
1254 }
1255 /*
1256  * sklh_idle_state_table_update(void)
1257  *
1258  * On SKL-H (model 0x5e) disable C8 and C9 if:
1259  * C10 is enabled and SGX disabled
1260  */
1261 static void sklh_idle_state_table_update(void)
1262 {
1263         unsigned long long msr;
1264         unsigned int eax, ebx, ecx, edx;
1265
1266
1267         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1268         if (max_cstate <= 7)
1269                 return;
1270
1271         /* if PC10 not present in CPUID.MWAIT.EDX */
1272         if ((mwait_substates & (0xF << 28)) == 0)
1273                 return;
1274
1275         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1276
1277         /* PC10 is not enabled in PKG C-state limit */
1278         if ((msr & 0xF) != 8)
1279                 return;
1280
1281         ecx = 0;
1282         cpuid(7, &eax, &ebx, &ecx, &edx);
1283
1284         /* if SGX is present */
1285         if (ebx & (1 << 2)) {
1286
1287                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1288
1289                 /* if SGX is enabled */
1290                 if (msr & (1 << 18))
1291                         return;
1292         }
1293
1294         skl_cstates[5].disabled = 1;    /* C8-SKL */
1295         skl_cstates[6].disabled = 1;    /* C9-SKL */
1296 }
1297 /*
1298  * intel_idle_state_table_update()
1299  *
1300  * Update the default state_table for this CPU-id
1301  */
1302
1303 static void intel_idle_state_table_update(void)
1304 {
1305         switch (boot_cpu_data.x86_model) {
1306
1307         case INTEL_FAM6_IVYBRIDGE_X:
1308                 ivt_idle_state_table_update();
1309                 break;
1310         case INTEL_FAM6_ATOM_GOLDMONT:
1311         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1312                 bxt_idle_state_table_update();
1313                 break;
1314         case INTEL_FAM6_SKYLAKE_DESKTOP:
1315                 sklh_idle_state_table_update();
1316                 break;
1317         }
1318 }
1319
1320 /*
1321  * intel_idle_cpuidle_driver_init()
1322  * allocate, initialize cpuidle_states
1323  */
1324 static void __init intel_idle_cpuidle_driver_init(void)
1325 {
1326         int cstate;
1327         struct cpuidle_driver *drv = &intel_idle_driver;
1328
1329         intel_idle_state_table_update();
1330
1331         cpuidle_poll_state_init(drv);
1332         drv->state_count = 1;
1333
1334         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1335                 int num_substates, mwait_hint, mwait_cstate;
1336
1337                 if ((cpuidle_state_table[cstate].enter == NULL) &&
1338                     (cpuidle_state_table[cstate].enter_s2idle == NULL))
1339                         break;
1340
1341                 if (cstate + 1 > max_cstate) {
1342                         pr_info("max_cstate %d reached\n", max_cstate);
1343                         break;
1344                 }
1345
1346                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1347                 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1348
1349                 /* number of sub-states for this state in CPUID.MWAIT */
1350                 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1351                                         & MWAIT_SUBSTATE_MASK;
1352
1353                 /* if NO sub-states for this state in CPUID, skip it */
1354                 if (num_substates == 0)
1355                         continue;
1356
1357                 /* if state marked as disabled, skip it */
1358                 if (cpuidle_state_table[cstate].disabled != 0) {
1359                         pr_debug("state %s is disabled\n",
1360                                  cpuidle_state_table[cstate].name);
1361                         continue;
1362                 }
1363
1364
1365                 if (((mwait_cstate + 1) > 2) &&
1366                         !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1367                         mark_tsc_unstable("TSC halts in idle"
1368                                         " states deeper than C2");
1369
1370                 drv->states[drv->state_count] = /* structure copy */
1371                         cpuidle_state_table[cstate];
1372
1373                 drv->state_count += 1;
1374         }
1375
1376         if (icpu->byt_auto_demotion_disable_flag) {
1377                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1378                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1379         }
1380 }
1381
1382
1383 /*
1384  * intel_idle_cpu_init()
1385  * allocate, initialize, register cpuidle_devices
1386  * @cpu: cpu/core to initialize
1387  */
1388 static int intel_idle_cpu_init(unsigned int cpu)
1389 {
1390         struct cpuidle_device *dev;
1391
1392         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1393         dev->cpu = cpu;
1394
1395         if (cpuidle_register_device(dev)) {
1396                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1397                 return -EIO;
1398         }
1399
1400         if (icpu->auto_demotion_disable_flags)
1401                 auto_demotion_disable();
1402
1403         if (icpu->disable_promotion_to_c1e)
1404                 c1e_promotion_disable();
1405
1406         return 0;
1407 }
1408
1409 static int intel_idle_cpu_online(unsigned int cpu)
1410 {
1411         struct cpuidle_device *dev;
1412
1413         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1414                 __setup_broadcast_timer(true);
1415
1416         /*
1417          * Some systems can hotplug a cpu at runtime after
1418          * the kernel has booted, we have to initialize the
1419          * driver in this case
1420          */
1421         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1422         if (!dev->registered)
1423                 return intel_idle_cpu_init(cpu);
1424
1425         return 0;
1426 }
1427
1428 static int __init intel_idle_init(void)
1429 {
1430         int retval;
1431
1432         /* Do not load intel_idle at all for now if idle= is passed */
1433         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1434                 return -ENODEV;
1435
1436         retval = intel_idle_probe();
1437         if (retval)
1438                 return retval;
1439
1440         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1441         if (intel_idle_cpuidle_devices == NULL)
1442                 return -ENOMEM;
1443
1444         intel_idle_cpuidle_driver_init();
1445         retval = cpuidle_register_driver(&intel_idle_driver);
1446         if (retval) {
1447                 struct cpuidle_driver *drv = cpuidle_get_driver();
1448                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1449                        drv ? drv->name : "none");
1450                 goto init_driver_fail;
1451         }
1452
1453         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1454                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1455
1456         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1457                                    intel_idle_cpu_online, NULL);
1458         if (retval < 0)
1459                 goto hp_setup_fail;
1460
1461         pr_debug("lapic_timer_reliable_states 0x%x\n",
1462                  lapic_timer_reliable_states);
1463
1464         return 0;
1465
1466 hp_setup_fail:
1467         intel_idle_cpuidle_devices_uninit();
1468         cpuidle_unregister_driver(&intel_idle_driver);
1469 init_driver_fail:
1470         free_percpu(intel_idle_cpuidle_devices);
1471         return retval;
1472
1473 }
1474 device_initcall(intel_idle_init);
1475
1476 /*
1477  * We are not really modular, but we used to support that.  Meaning we also
1478  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1479  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1480  * is the easiest way (currently) to continue doing that.
1481  */
1482 module_param(max_cstate, int, 0444);