Merge branch 'pm-cpuidle'
[sfrench/cifs-2.6.git] / drivers / idle / intel_idle.c
1 /*
2  * intel_idle.c - native hardware idle loop for modern Intel processors
3  *
4  * Copyright (c) 2013, Intel Corporation.
5  * Len Brown <len.brown@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20
21 /*
22  * intel_idle is a cpuidle driver that loads on specific Intel processors
23  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
24  * make Linux more efficient on these processors, as intel_idle knows
25  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26  */
27
28 /*
29  * Design Assumptions
30  *
31  * All CPUs have same idle states as boot CPU
32  *
33  * Chipset BM_STS (bus master status) bit is a NOP
34  *      for preventing entry into deep C-stats
35  */
36
37 /*
38  * Known limitations
39  *
40  * The driver currently initializes for_each_online_cpu() upon modprobe.
41  * It it unaware of subsequent processors hot-added to the system.
42  * This means that if you boot with maxcpus=n and later online
43  * processors above n, those processors will use C1 only.
44  *
45  * ACPI has a .suspend hack to turn off deep c-statees during suspend
46  * to avoid complications with the lapic timer workaround.
47  * Have not seen issues with suspend, but may need same workaround here.
48  *
49  */
50
51 /* un-comment DEBUG to enable pr_debug() statements */
52 #define DEBUG
53
54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56 #include <linux/kernel.h>
57 #include <linux/cpuidle.h>
58 #include <linux/tick.h>
59 #include <trace/events/power.h>
60 #include <linux/sched.h>
61 #include <linux/notifier.h>
62 #include <linux/cpu.h>
63 #include <linux/moduleparam.h>
64 #include <asm/cpu_device_id.h>
65 #include <asm/intel-family.h>
66 #include <asm/mwait.h>
67 #include <asm/msr.h>
68
69 #define INTEL_IDLE_VERSION "0.4.1"
70
71 static struct cpuidle_driver intel_idle_driver = {
72         .name = "intel_idle",
73         .owner = THIS_MODULE,
74 };
75 /* intel_idle.max_cstate=0 disables driver */
76 static int max_cstate = CPUIDLE_STATE_MAX - 1;
77
78 static unsigned int mwait_substates;
79
80 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
81 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
82 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
83
84 struct idle_cpu {
85         struct cpuidle_state *state_table;
86
87         /*
88          * Hardware C-state auto-demotion may not always be optimal.
89          * Indicate which enable bits to clear here.
90          */
91         unsigned long auto_demotion_disable_flags;
92         bool byt_auto_demotion_disable_flag;
93         bool disable_promotion_to_c1e;
94 };
95
96 static const struct idle_cpu *icpu;
97 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
98 static int intel_idle(struct cpuidle_device *dev,
99                         struct cpuidle_driver *drv, int index);
100 static void intel_idle_s2idle(struct cpuidle_device *dev,
101                               struct cpuidle_driver *drv, int index);
102 static struct cpuidle_state *cpuidle_state_table;
103
104 /*
105  * Set this flag for states where the HW flushes the TLB for us
106  * and so we don't need cross-calls to keep it consistent.
107  * If this flag is set, SW flushes the TLB, so even if the
108  * HW doesn't do the flushing, this flag is safe to use.
109  */
110 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
111
112 /*
113  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
114  * the C-state (top nibble) and sub-state (bottom nibble)
115  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
116  *
117  * We store the hint at the top of our "flags" for each state.
118  */
119 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
120 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
121
122 /*
123  * States are indexed by the cstate number,
124  * which is also the index into the MWAIT hint array.
125  * Thus C0 is a dummy.
126  */
127 static struct cpuidle_state nehalem_cstates[] = {
128         {
129                 .name = "C1",
130                 .desc = "MWAIT 0x00",
131                 .flags = MWAIT2flg(0x00),
132                 .exit_latency = 3,
133                 .target_residency = 6,
134                 .enter = &intel_idle,
135                 .enter_s2idle = intel_idle_s2idle, },
136         {
137                 .name = "C1E",
138                 .desc = "MWAIT 0x01",
139                 .flags = MWAIT2flg(0x01),
140                 .exit_latency = 10,
141                 .target_residency = 20,
142                 .enter = &intel_idle,
143                 .enter_s2idle = intel_idle_s2idle, },
144         {
145                 .name = "C3",
146                 .desc = "MWAIT 0x10",
147                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
148                 .exit_latency = 20,
149                 .target_residency = 80,
150                 .enter = &intel_idle,
151                 .enter_s2idle = intel_idle_s2idle, },
152         {
153                 .name = "C6",
154                 .desc = "MWAIT 0x20",
155                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
156                 .exit_latency = 200,
157                 .target_residency = 800,
158                 .enter = &intel_idle,
159                 .enter_s2idle = intel_idle_s2idle, },
160         {
161                 .enter = NULL }
162 };
163
164 static struct cpuidle_state snb_cstates[] = {
165         {
166                 .name = "C1",
167                 .desc = "MWAIT 0x00",
168                 .flags = MWAIT2flg(0x00),
169                 .exit_latency = 2,
170                 .target_residency = 2,
171                 .enter = &intel_idle,
172                 .enter_s2idle = intel_idle_s2idle, },
173         {
174                 .name = "C1E",
175                 .desc = "MWAIT 0x01",
176                 .flags = MWAIT2flg(0x01),
177                 .exit_latency = 10,
178                 .target_residency = 20,
179                 .enter = &intel_idle,
180                 .enter_s2idle = intel_idle_s2idle, },
181         {
182                 .name = "C3",
183                 .desc = "MWAIT 0x10",
184                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
185                 .exit_latency = 80,
186                 .target_residency = 211,
187                 .enter = &intel_idle,
188                 .enter_s2idle = intel_idle_s2idle, },
189         {
190                 .name = "C6",
191                 .desc = "MWAIT 0x20",
192                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
193                 .exit_latency = 104,
194                 .target_residency = 345,
195                 .enter = &intel_idle,
196                 .enter_s2idle = intel_idle_s2idle, },
197         {
198                 .name = "C7",
199                 .desc = "MWAIT 0x30",
200                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
201                 .exit_latency = 109,
202                 .target_residency = 345,
203                 .enter = &intel_idle,
204                 .enter_s2idle = intel_idle_s2idle, },
205         {
206                 .enter = NULL }
207 };
208
209 static struct cpuidle_state byt_cstates[] = {
210         {
211                 .name = "C1",
212                 .desc = "MWAIT 0x00",
213                 .flags = MWAIT2flg(0x00),
214                 .exit_latency = 1,
215                 .target_residency = 1,
216                 .enter = &intel_idle,
217                 .enter_s2idle = intel_idle_s2idle, },
218         {
219                 .name = "C6N",
220                 .desc = "MWAIT 0x58",
221                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
222                 .exit_latency = 300,
223                 .target_residency = 275,
224                 .enter = &intel_idle,
225                 .enter_s2idle = intel_idle_s2idle, },
226         {
227                 .name = "C6S",
228                 .desc = "MWAIT 0x52",
229                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
230                 .exit_latency = 500,
231                 .target_residency = 560,
232                 .enter = &intel_idle,
233                 .enter_s2idle = intel_idle_s2idle, },
234         {
235                 .name = "C7",
236                 .desc = "MWAIT 0x60",
237                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
238                 .exit_latency = 1200,
239                 .target_residency = 4000,
240                 .enter = &intel_idle,
241                 .enter_s2idle = intel_idle_s2idle, },
242         {
243                 .name = "C7S",
244                 .desc = "MWAIT 0x64",
245                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
246                 .exit_latency = 10000,
247                 .target_residency = 20000,
248                 .enter = &intel_idle,
249                 .enter_s2idle = intel_idle_s2idle, },
250         {
251                 .enter = NULL }
252 };
253
254 static struct cpuidle_state cht_cstates[] = {
255         {
256                 .name = "C1",
257                 .desc = "MWAIT 0x00",
258                 .flags = MWAIT2flg(0x00),
259                 .exit_latency = 1,
260                 .target_residency = 1,
261                 .enter = &intel_idle,
262                 .enter_s2idle = intel_idle_s2idle, },
263         {
264                 .name = "C6N",
265                 .desc = "MWAIT 0x58",
266                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
267                 .exit_latency = 80,
268                 .target_residency = 275,
269                 .enter = &intel_idle,
270                 .enter_s2idle = intel_idle_s2idle, },
271         {
272                 .name = "C6S",
273                 .desc = "MWAIT 0x52",
274                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
275                 .exit_latency = 200,
276                 .target_residency = 560,
277                 .enter = &intel_idle,
278                 .enter_s2idle = intel_idle_s2idle, },
279         {
280                 .name = "C7",
281                 .desc = "MWAIT 0x60",
282                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
283                 .exit_latency = 1200,
284                 .target_residency = 4000,
285                 .enter = &intel_idle,
286                 .enter_s2idle = intel_idle_s2idle, },
287         {
288                 .name = "C7S",
289                 .desc = "MWAIT 0x64",
290                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
291                 .exit_latency = 10000,
292                 .target_residency = 20000,
293                 .enter = &intel_idle,
294                 .enter_s2idle = intel_idle_s2idle, },
295         {
296                 .enter = NULL }
297 };
298
299 static struct cpuidle_state ivb_cstates[] = {
300         {
301                 .name = "C1",
302                 .desc = "MWAIT 0x00",
303                 .flags = MWAIT2flg(0x00),
304                 .exit_latency = 1,
305                 .target_residency = 1,
306                 .enter = &intel_idle,
307                 .enter_s2idle = intel_idle_s2idle, },
308         {
309                 .name = "C1E",
310                 .desc = "MWAIT 0x01",
311                 .flags = MWAIT2flg(0x01),
312                 .exit_latency = 10,
313                 .target_residency = 20,
314                 .enter = &intel_idle,
315                 .enter_s2idle = intel_idle_s2idle, },
316         {
317                 .name = "C3",
318                 .desc = "MWAIT 0x10",
319                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
320                 .exit_latency = 59,
321                 .target_residency = 156,
322                 .enter = &intel_idle,
323                 .enter_s2idle = intel_idle_s2idle, },
324         {
325                 .name = "C6",
326                 .desc = "MWAIT 0x20",
327                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
328                 .exit_latency = 80,
329                 .target_residency = 300,
330                 .enter = &intel_idle,
331                 .enter_s2idle = intel_idle_s2idle, },
332         {
333                 .name = "C7",
334                 .desc = "MWAIT 0x30",
335                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
336                 .exit_latency = 87,
337                 .target_residency = 300,
338                 .enter = &intel_idle,
339                 .enter_s2idle = intel_idle_s2idle, },
340         {
341                 .enter = NULL }
342 };
343
344 static struct cpuidle_state ivt_cstates[] = {
345         {
346                 .name = "C1",
347                 .desc = "MWAIT 0x00",
348                 .flags = MWAIT2flg(0x00),
349                 .exit_latency = 1,
350                 .target_residency = 1,
351                 .enter = &intel_idle,
352                 .enter_s2idle = intel_idle_s2idle, },
353         {
354                 .name = "C1E",
355                 .desc = "MWAIT 0x01",
356                 .flags = MWAIT2flg(0x01),
357                 .exit_latency = 10,
358                 .target_residency = 80,
359                 .enter = &intel_idle,
360                 .enter_s2idle = intel_idle_s2idle, },
361         {
362                 .name = "C3",
363                 .desc = "MWAIT 0x10",
364                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
365                 .exit_latency = 59,
366                 .target_residency = 156,
367                 .enter = &intel_idle,
368                 .enter_s2idle = intel_idle_s2idle, },
369         {
370                 .name = "C6",
371                 .desc = "MWAIT 0x20",
372                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
373                 .exit_latency = 82,
374                 .target_residency = 300,
375                 .enter = &intel_idle,
376                 .enter_s2idle = intel_idle_s2idle, },
377         {
378                 .enter = NULL }
379 };
380
381 static struct cpuidle_state ivt_cstates_4s[] = {
382         {
383                 .name = "C1",
384                 .desc = "MWAIT 0x00",
385                 .flags = MWAIT2flg(0x00),
386                 .exit_latency = 1,
387                 .target_residency = 1,
388                 .enter = &intel_idle,
389                 .enter_s2idle = intel_idle_s2idle, },
390         {
391                 .name = "C1E",
392                 .desc = "MWAIT 0x01",
393                 .flags = MWAIT2flg(0x01),
394                 .exit_latency = 10,
395                 .target_residency = 250,
396                 .enter = &intel_idle,
397                 .enter_s2idle = intel_idle_s2idle, },
398         {
399                 .name = "C3",
400                 .desc = "MWAIT 0x10",
401                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
402                 .exit_latency = 59,
403                 .target_residency = 300,
404                 .enter = &intel_idle,
405                 .enter_s2idle = intel_idle_s2idle, },
406         {
407                 .name = "C6",
408                 .desc = "MWAIT 0x20",
409                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
410                 .exit_latency = 84,
411                 .target_residency = 400,
412                 .enter = &intel_idle,
413                 .enter_s2idle = intel_idle_s2idle, },
414         {
415                 .enter = NULL }
416 };
417
418 static struct cpuidle_state ivt_cstates_8s[] = {
419         {
420                 .name = "C1",
421                 .desc = "MWAIT 0x00",
422                 .flags = MWAIT2flg(0x00),
423                 .exit_latency = 1,
424                 .target_residency = 1,
425                 .enter = &intel_idle,
426                 .enter_s2idle = intel_idle_s2idle, },
427         {
428                 .name = "C1E",
429                 .desc = "MWAIT 0x01",
430                 .flags = MWAIT2flg(0x01),
431                 .exit_latency = 10,
432                 .target_residency = 500,
433                 .enter = &intel_idle,
434                 .enter_s2idle = intel_idle_s2idle, },
435         {
436                 .name = "C3",
437                 .desc = "MWAIT 0x10",
438                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
439                 .exit_latency = 59,
440                 .target_residency = 600,
441                 .enter = &intel_idle,
442                 .enter_s2idle = intel_idle_s2idle, },
443         {
444                 .name = "C6",
445                 .desc = "MWAIT 0x20",
446                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
447                 .exit_latency = 88,
448                 .target_residency = 700,
449                 .enter = &intel_idle,
450                 .enter_s2idle = intel_idle_s2idle, },
451         {
452                 .enter = NULL }
453 };
454
455 static struct cpuidle_state hsw_cstates[] = {
456         {
457                 .name = "C1",
458                 .desc = "MWAIT 0x00",
459                 .flags = MWAIT2flg(0x00),
460                 .exit_latency = 2,
461                 .target_residency = 2,
462                 .enter = &intel_idle,
463                 .enter_s2idle = intel_idle_s2idle, },
464         {
465                 .name = "C1E",
466                 .desc = "MWAIT 0x01",
467                 .flags = MWAIT2flg(0x01),
468                 .exit_latency = 10,
469                 .target_residency = 20,
470                 .enter = &intel_idle,
471                 .enter_s2idle = intel_idle_s2idle, },
472         {
473                 .name = "C3",
474                 .desc = "MWAIT 0x10",
475                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
476                 .exit_latency = 33,
477                 .target_residency = 100,
478                 .enter = &intel_idle,
479                 .enter_s2idle = intel_idle_s2idle, },
480         {
481                 .name = "C6",
482                 .desc = "MWAIT 0x20",
483                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
484                 .exit_latency = 133,
485                 .target_residency = 400,
486                 .enter = &intel_idle,
487                 .enter_s2idle = intel_idle_s2idle, },
488         {
489                 .name = "C7s",
490                 .desc = "MWAIT 0x32",
491                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
492                 .exit_latency = 166,
493                 .target_residency = 500,
494                 .enter = &intel_idle,
495                 .enter_s2idle = intel_idle_s2idle, },
496         {
497                 .name = "C8",
498                 .desc = "MWAIT 0x40",
499                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
500                 .exit_latency = 300,
501                 .target_residency = 900,
502                 .enter = &intel_idle,
503                 .enter_s2idle = intel_idle_s2idle, },
504         {
505                 .name = "C9",
506                 .desc = "MWAIT 0x50",
507                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
508                 .exit_latency = 600,
509                 .target_residency = 1800,
510                 .enter = &intel_idle,
511                 .enter_s2idle = intel_idle_s2idle, },
512         {
513                 .name = "C10",
514                 .desc = "MWAIT 0x60",
515                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
516                 .exit_latency = 2600,
517                 .target_residency = 7700,
518                 .enter = &intel_idle,
519                 .enter_s2idle = intel_idle_s2idle, },
520         {
521                 .enter = NULL }
522 };
523 static struct cpuidle_state bdw_cstates[] = {
524         {
525                 .name = "C1",
526                 .desc = "MWAIT 0x00",
527                 .flags = MWAIT2flg(0x00),
528                 .exit_latency = 2,
529                 .target_residency = 2,
530                 .enter = &intel_idle,
531                 .enter_s2idle = intel_idle_s2idle, },
532         {
533                 .name = "C1E",
534                 .desc = "MWAIT 0x01",
535                 .flags = MWAIT2flg(0x01),
536                 .exit_latency = 10,
537                 .target_residency = 20,
538                 .enter = &intel_idle,
539                 .enter_s2idle = intel_idle_s2idle, },
540         {
541                 .name = "C3",
542                 .desc = "MWAIT 0x10",
543                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
544                 .exit_latency = 40,
545                 .target_residency = 100,
546                 .enter = &intel_idle,
547                 .enter_s2idle = intel_idle_s2idle, },
548         {
549                 .name = "C6",
550                 .desc = "MWAIT 0x20",
551                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
552                 .exit_latency = 133,
553                 .target_residency = 400,
554                 .enter = &intel_idle,
555                 .enter_s2idle = intel_idle_s2idle, },
556         {
557                 .name = "C7s",
558                 .desc = "MWAIT 0x32",
559                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
560                 .exit_latency = 166,
561                 .target_residency = 500,
562                 .enter = &intel_idle,
563                 .enter_s2idle = intel_idle_s2idle, },
564         {
565                 .name = "C8",
566                 .desc = "MWAIT 0x40",
567                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
568                 .exit_latency = 300,
569                 .target_residency = 900,
570                 .enter = &intel_idle,
571                 .enter_s2idle = intel_idle_s2idle, },
572         {
573                 .name = "C9",
574                 .desc = "MWAIT 0x50",
575                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
576                 .exit_latency = 600,
577                 .target_residency = 1800,
578                 .enter = &intel_idle,
579                 .enter_s2idle = intel_idle_s2idle, },
580         {
581                 .name = "C10",
582                 .desc = "MWAIT 0x60",
583                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
584                 .exit_latency = 2600,
585                 .target_residency = 7700,
586                 .enter = &intel_idle,
587                 .enter_s2idle = intel_idle_s2idle, },
588         {
589                 .enter = NULL }
590 };
591
592 static struct cpuidle_state skl_cstates[] = {
593         {
594                 .name = "C1",
595                 .desc = "MWAIT 0x00",
596                 .flags = MWAIT2flg(0x00),
597                 .exit_latency = 2,
598                 .target_residency = 2,
599                 .enter = &intel_idle,
600                 .enter_s2idle = intel_idle_s2idle, },
601         {
602                 .name = "C1E",
603                 .desc = "MWAIT 0x01",
604                 .flags = MWAIT2flg(0x01),
605                 .exit_latency = 10,
606                 .target_residency = 20,
607                 .enter = &intel_idle,
608                 .enter_s2idle = intel_idle_s2idle, },
609         {
610                 .name = "C3",
611                 .desc = "MWAIT 0x10",
612                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
613                 .exit_latency = 70,
614                 .target_residency = 100,
615                 .enter = &intel_idle,
616                 .enter_s2idle = intel_idle_s2idle, },
617         {
618                 .name = "C6",
619                 .desc = "MWAIT 0x20",
620                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
621                 .exit_latency = 85,
622                 .target_residency = 200,
623                 .enter = &intel_idle,
624                 .enter_s2idle = intel_idle_s2idle, },
625         {
626                 .name = "C7s",
627                 .desc = "MWAIT 0x33",
628                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
629                 .exit_latency = 124,
630                 .target_residency = 800,
631                 .enter = &intel_idle,
632                 .enter_s2idle = intel_idle_s2idle, },
633         {
634                 .name = "C8",
635                 .desc = "MWAIT 0x40",
636                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
637                 .exit_latency = 200,
638                 .target_residency = 800,
639                 .enter = &intel_idle,
640                 .enter_s2idle = intel_idle_s2idle, },
641         {
642                 .name = "C9",
643                 .desc = "MWAIT 0x50",
644                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
645                 .exit_latency = 480,
646                 .target_residency = 5000,
647                 .enter = &intel_idle,
648                 .enter_s2idle = intel_idle_s2idle, },
649         {
650                 .name = "C10",
651                 .desc = "MWAIT 0x60",
652                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
653                 .exit_latency = 890,
654                 .target_residency = 5000,
655                 .enter = &intel_idle,
656                 .enter_s2idle = intel_idle_s2idle, },
657         {
658                 .enter = NULL }
659 };
660
661 static struct cpuidle_state skx_cstates[] = {
662         {
663                 .name = "C1",
664                 .desc = "MWAIT 0x00",
665                 .flags = MWAIT2flg(0x00),
666                 .exit_latency = 2,
667                 .target_residency = 2,
668                 .enter = &intel_idle,
669                 .enter_s2idle = intel_idle_s2idle, },
670         {
671                 .name = "C1E",
672                 .desc = "MWAIT 0x01",
673                 .flags = MWAIT2flg(0x01),
674                 .exit_latency = 10,
675                 .target_residency = 20,
676                 .enter = &intel_idle,
677                 .enter_s2idle = intel_idle_s2idle, },
678         {
679                 .name = "C6",
680                 .desc = "MWAIT 0x20",
681                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
682                 .exit_latency = 133,
683                 .target_residency = 600,
684                 .enter = &intel_idle,
685                 .enter_s2idle = intel_idle_s2idle, },
686         {
687                 .enter = NULL }
688 };
689
690 static struct cpuidle_state atom_cstates[] = {
691         {
692                 .name = "C1E",
693                 .desc = "MWAIT 0x00",
694                 .flags = MWAIT2flg(0x00),
695                 .exit_latency = 10,
696                 .target_residency = 20,
697                 .enter = &intel_idle,
698                 .enter_s2idle = intel_idle_s2idle, },
699         {
700                 .name = "C2",
701                 .desc = "MWAIT 0x10",
702                 .flags = MWAIT2flg(0x10),
703                 .exit_latency = 20,
704                 .target_residency = 80,
705                 .enter = &intel_idle,
706                 .enter_s2idle = intel_idle_s2idle, },
707         {
708                 .name = "C4",
709                 .desc = "MWAIT 0x30",
710                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
711                 .exit_latency = 100,
712                 .target_residency = 400,
713                 .enter = &intel_idle,
714                 .enter_s2idle = intel_idle_s2idle, },
715         {
716                 .name = "C6",
717                 .desc = "MWAIT 0x52",
718                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
719                 .exit_latency = 140,
720                 .target_residency = 560,
721                 .enter = &intel_idle,
722                 .enter_s2idle = intel_idle_s2idle, },
723         {
724                 .enter = NULL }
725 };
726 static struct cpuidle_state tangier_cstates[] = {
727         {
728                 .name = "C1",
729                 .desc = "MWAIT 0x00",
730                 .flags = MWAIT2flg(0x00),
731                 .exit_latency = 1,
732                 .target_residency = 4,
733                 .enter = &intel_idle,
734                 .enter_s2idle = intel_idle_s2idle, },
735         {
736                 .name = "C4",
737                 .desc = "MWAIT 0x30",
738                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
739                 .exit_latency = 100,
740                 .target_residency = 400,
741                 .enter = &intel_idle,
742                 .enter_s2idle = intel_idle_s2idle, },
743         {
744                 .name = "C6",
745                 .desc = "MWAIT 0x52",
746                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
747                 .exit_latency = 140,
748                 .target_residency = 560,
749                 .enter = &intel_idle,
750                 .enter_s2idle = intel_idle_s2idle, },
751         {
752                 .name = "C7",
753                 .desc = "MWAIT 0x60",
754                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
755                 .exit_latency = 1200,
756                 .target_residency = 4000,
757                 .enter = &intel_idle,
758                 .enter_s2idle = intel_idle_s2idle, },
759         {
760                 .name = "C9",
761                 .desc = "MWAIT 0x64",
762                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
763                 .exit_latency = 10000,
764                 .target_residency = 20000,
765                 .enter = &intel_idle,
766                 .enter_s2idle = intel_idle_s2idle, },
767         {
768                 .enter = NULL }
769 };
770 static struct cpuidle_state avn_cstates[] = {
771         {
772                 .name = "C1",
773                 .desc = "MWAIT 0x00",
774                 .flags = MWAIT2flg(0x00),
775                 .exit_latency = 2,
776                 .target_residency = 2,
777                 .enter = &intel_idle,
778                 .enter_s2idle = intel_idle_s2idle, },
779         {
780                 .name = "C6",
781                 .desc = "MWAIT 0x51",
782                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
783                 .exit_latency = 15,
784                 .target_residency = 45,
785                 .enter = &intel_idle,
786                 .enter_s2idle = intel_idle_s2idle, },
787         {
788                 .enter = NULL }
789 };
790 static struct cpuidle_state knl_cstates[] = {
791         {
792                 .name = "C1",
793                 .desc = "MWAIT 0x00",
794                 .flags = MWAIT2flg(0x00),
795                 .exit_latency = 1,
796                 .target_residency = 2,
797                 .enter = &intel_idle,
798                 .enter_s2idle = intel_idle_s2idle },
799         {
800                 .name = "C6",
801                 .desc = "MWAIT 0x10",
802                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
803                 .exit_latency = 120,
804                 .target_residency = 500,
805                 .enter = &intel_idle,
806                 .enter_s2idle = intel_idle_s2idle },
807         {
808                 .enter = NULL }
809 };
810
811 static struct cpuidle_state bxt_cstates[] = {
812         {
813                 .name = "C1",
814                 .desc = "MWAIT 0x00",
815                 .flags = MWAIT2flg(0x00),
816                 .exit_latency = 2,
817                 .target_residency = 2,
818                 .enter = &intel_idle,
819                 .enter_s2idle = intel_idle_s2idle, },
820         {
821                 .name = "C1E",
822                 .desc = "MWAIT 0x01",
823                 .flags = MWAIT2flg(0x01),
824                 .exit_latency = 10,
825                 .target_residency = 20,
826                 .enter = &intel_idle,
827                 .enter_s2idle = intel_idle_s2idle, },
828         {
829                 .name = "C6",
830                 .desc = "MWAIT 0x20",
831                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
832                 .exit_latency = 133,
833                 .target_residency = 133,
834                 .enter = &intel_idle,
835                 .enter_s2idle = intel_idle_s2idle, },
836         {
837                 .name = "C7s",
838                 .desc = "MWAIT 0x31",
839                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
840                 .exit_latency = 155,
841                 .target_residency = 155,
842                 .enter = &intel_idle,
843                 .enter_s2idle = intel_idle_s2idle, },
844         {
845                 .name = "C8",
846                 .desc = "MWAIT 0x40",
847                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
848                 .exit_latency = 1000,
849                 .target_residency = 1000,
850                 .enter = &intel_idle,
851                 .enter_s2idle = intel_idle_s2idle, },
852         {
853                 .name = "C9",
854                 .desc = "MWAIT 0x50",
855                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
856                 .exit_latency = 2000,
857                 .target_residency = 2000,
858                 .enter = &intel_idle,
859                 .enter_s2idle = intel_idle_s2idle, },
860         {
861                 .name = "C10",
862                 .desc = "MWAIT 0x60",
863                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
864                 .exit_latency = 10000,
865                 .target_residency = 10000,
866                 .enter = &intel_idle,
867                 .enter_s2idle = intel_idle_s2idle, },
868         {
869                 .enter = NULL }
870 };
871
872 static struct cpuidle_state dnv_cstates[] = {
873         {
874                 .name = "C1",
875                 .desc = "MWAIT 0x00",
876                 .flags = MWAIT2flg(0x00),
877                 .exit_latency = 2,
878                 .target_residency = 2,
879                 .enter = &intel_idle,
880                 .enter_s2idle = intel_idle_s2idle, },
881         {
882                 .name = "C1E",
883                 .desc = "MWAIT 0x01",
884                 .flags = MWAIT2flg(0x01),
885                 .exit_latency = 10,
886                 .target_residency = 20,
887                 .enter = &intel_idle,
888                 .enter_s2idle = intel_idle_s2idle, },
889         {
890                 .name = "C6",
891                 .desc = "MWAIT 0x20",
892                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
893                 .exit_latency = 50,
894                 .target_residency = 500,
895                 .enter = &intel_idle,
896                 .enter_s2idle = intel_idle_s2idle, },
897         {
898                 .enter = NULL }
899 };
900
901 /**
902  * intel_idle
903  * @dev: cpuidle_device
904  * @drv: cpuidle driver
905  * @index: index of cpuidle state
906  *
907  * Must be called under local_irq_disable().
908  */
909 static __cpuidle int intel_idle(struct cpuidle_device *dev,
910                                 struct cpuidle_driver *drv, int index)
911 {
912         unsigned long ecx = 1; /* break on interrupt flag */
913         struct cpuidle_state *state = &drv->states[index];
914         unsigned long eax = flg2MWAIT(state->flags);
915         unsigned int cstate;
916         bool uninitialized_var(tick);
917         int cpu = smp_processor_id();
918
919         /*
920          * leave_mm() to avoid costly and often unnecessary wakeups
921          * for flushing the user TLB's associated with the active mm.
922          */
923         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
924                 leave_mm(cpu);
925
926         if (!static_cpu_has(X86_FEATURE_ARAT)) {
927                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
928                                 MWAIT_CSTATE_MASK) + 1;
929                 tick = false;
930                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
931                         tick = true;
932                         tick_broadcast_enter();
933                 }
934         }
935
936         mwait_idle_with_hints(eax, ecx);
937
938         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
939                 tick_broadcast_exit();
940
941         return index;
942 }
943
944 /**
945  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
946  * @dev: cpuidle_device
947  * @drv: cpuidle driver
948  * @index: state index
949  */
950 static void intel_idle_s2idle(struct cpuidle_device *dev,
951                              struct cpuidle_driver *drv, int index)
952 {
953         unsigned long ecx = 1; /* break on interrupt flag */
954         unsigned long eax = flg2MWAIT(drv->states[index].flags);
955
956         mwait_idle_with_hints(eax, ecx);
957 }
958
959 static void __setup_broadcast_timer(bool on)
960 {
961         if (on)
962                 tick_broadcast_enable();
963         else
964                 tick_broadcast_disable();
965 }
966
967 static void auto_demotion_disable(void)
968 {
969         unsigned long long msr_bits;
970
971         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
972         msr_bits &= ~(icpu->auto_demotion_disable_flags);
973         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
974 }
975 static void c1e_promotion_disable(void)
976 {
977         unsigned long long msr_bits;
978
979         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
980         msr_bits &= ~0x2;
981         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
982 }
983
984 static const struct idle_cpu idle_cpu_nehalem = {
985         .state_table = nehalem_cstates,
986         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
987         .disable_promotion_to_c1e = true,
988 };
989
990 static const struct idle_cpu idle_cpu_atom = {
991         .state_table = atom_cstates,
992 };
993
994 static const struct idle_cpu idle_cpu_tangier = {
995         .state_table = tangier_cstates,
996 };
997
998 static const struct idle_cpu idle_cpu_lincroft = {
999         .state_table = atom_cstates,
1000         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1001 };
1002
1003 static const struct idle_cpu idle_cpu_snb = {
1004         .state_table = snb_cstates,
1005         .disable_promotion_to_c1e = true,
1006 };
1007
1008 static const struct idle_cpu idle_cpu_byt = {
1009         .state_table = byt_cstates,
1010         .disable_promotion_to_c1e = true,
1011         .byt_auto_demotion_disable_flag = true,
1012 };
1013
1014 static const struct idle_cpu idle_cpu_cht = {
1015         .state_table = cht_cstates,
1016         .disable_promotion_to_c1e = true,
1017         .byt_auto_demotion_disable_flag = true,
1018 };
1019
1020 static const struct idle_cpu idle_cpu_ivb = {
1021         .state_table = ivb_cstates,
1022         .disable_promotion_to_c1e = true,
1023 };
1024
1025 static const struct idle_cpu idle_cpu_ivt = {
1026         .state_table = ivt_cstates,
1027         .disable_promotion_to_c1e = true,
1028 };
1029
1030 static const struct idle_cpu idle_cpu_hsw = {
1031         .state_table = hsw_cstates,
1032         .disable_promotion_to_c1e = true,
1033 };
1034
1035 static const struct idle_cpu idle_cpu_bdw = {
1036         .state_table = bdw_cstates,
1037         .disable_promotion_to_c1e = true,
1038 };
1039
1040 static const struct idle_cpu idle_cpu_skl = {
1041         .state_table = skl_cstates,
1042         .disable_promotion_to_c1e = true,
1043 };
1044
1045 static const struct idle_cpu idle_cpu_skx = {
1046         .state_table = skx_cstates,
1047         .disable_promotion_to_c1e = true,
1048 };
1049
1050 static const struct idle_cpu idle_cpu_avn = {
1051         .state_table = avn_cstates,
1052         .disable_promotion_to_c1e = true,
1053 };
1054
1055 static const struct idle_cpu idle_cpu_knl = {
1056         .state_table = knl_cstates,
1057 };
1058
1059 static const struct idle_cpu idle_cpu_bxt = {
1060         .state_table = bxt_cstates,
1061         .disable_promotion_to_c1e = true,
1062 };
1063
1064 static const struct idle_cpu idle_cpu_dnv = {
1065         .state_table = dnv_cstates,
1066         .disable_promotion_to_c1e = true,
1067 };
1068
1069 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1070         INTEL_CPU_FAM6(NEHALEM_EP,              idle_cpu_nehalem),
1071         INTEL_CPU_FAM6(NEHALEM,                 idle_cpu_nehalem),
1072         INTEL_CPU_FAM6(NEHALEM_G,               idle_cpu_nehalem),
1073         INTEL_CPU_FAM6(WESTMERE,                idle_cpu_nehalem),
1074         INTEL_CPU_FAM6(WESTMERE_EP,             idle_cpu_nehalem),
1075         INTEL_CPU_FAM6(NEHALEM_EX,              idle_cpu_nehalem),
1076         INTEL_CPU_FAM6(ATOM_PINEVIEW,           idle_cpu_atom),
1077         INTEL_CPU_FAM6(ATOM_LINCROFT,           idle_cpu_lincroft),
1078         INTEL_CPU_FAM6(WESTMERE_EX,             idle_cpu_nehalem),
1079         INTEL_CPU_FAM6(SANDYBRIDGE,             idle_cpu_snb),
1080         INTEL_CPU_FAM6(SANDYBRIDGE_X,           idle_cpu_snb),
1081         INTEL_CPU_FAM6(ATOM_CEDARVIEW,          idle_cpu_atom),
1082         INTEL_CPU_FAM6(ATOM_SILVERMONT1,        idle_cpu_byt),
1083         INTEL_CPU_FAM6(ATOM_MERRIFIELD,         idle_cpu_tangier),
1084         INTEL_CPU_FAM6(ATOM_AIRMONT,            idle_cpu_cht),
1085         INTEL_CPU_FAM6(IVYBRIDGE,               idle_cpu_ivb),
1086         INTEL_CPU_FAM6(IVYBRIDGE_X,             idle_cpu_ivt),
1087         INTEL_CPU_FAM6(HASWELL_CORE,            idle_cpu_hsw),
1088         INTEL_CPU_FAM6(HASWELL_X,               idle_cpu_hsw),
1089         INTEL_CPU_FAM6(HASWELL_ULT,             idle_cpu_hsw),
1090         INTEL_CPU_FAM6(HASWELL_GT3E,            idle_cpu_hsw),
1091         INTEL_CPU_FAM6(ATOM_SILVERMONT2,        idle_cpu_avn),
1092         INTEL_CPU_FAM6(BROADWELL_CORE,          idle_cpu_bdw),
1093         INTEL_CPU_FAM6(BROADWELL_GT3E,          idle_cpu_bdw),
1094         INTEL_CPU_FAM6(BROADWELL_X,             idle_cpu_bdw),
1095         INTEL_CPU_FAM6(BROADWELL_XEON_D,        idle_cpu_bdw),
1096         INTEL_CPU_FAM6(SKYLAKE_MOBILE,          idle_cpu_skl),
1097         INTEL_CPU_FAM6(SKYLAKE_DESKTOP,         idle_cpu_skl),
1098         INTEL_CPU_FAM6(KABYLAKE_MOBILE,         idle_cpu_skl),
1099         INTEL_CPU_FAM6(KABYLAKE_DESKTOP,        idle_cpu_skl),
1100         INTEL_CPU_FAM6(SKYLAKE_X,               idle_cpu_skx),
1101         INTEL_CPU_FAM6(XEON_PHI_KNL,            idle_cpu_knl),
1102         INTEL_CPU_FAM6(XEON_PHI_KNM,            idle_cpu_knl),
1103         INTEL_CPU_FAM6(ATOM_GOLDMONT,           idle_cpu_bxt),
1104         INTEL_CPU_FAM6(ATOM_GEMINI_LAKE,        idle_cpu_bxt),
1105         INTEL_CPU_FAM6(ATOM_DENVERTON,          idle_cpu_dnv),
1106         {}
1107 };
1108
1109 /*
1110  * intel_idle_probe()
1111  */
1112 static int __init intel_idle_probe(void)
1113 {
1114         unsigned int eax, ebx, ecx;
1115         const struct x86_cpu_id *id;
1116
1117         if (max_cstate == 0) {
1118                 pr_debug("disabled\n");
1119                 return -EPERM;
1120         }
1121
1122         id = x86_match_cpu(intel_idle_ids);
1123         if (!id) {
1124                 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1125                     boot_cpu_data.x86 == 6)
1126                         pr_debug("does not run on family %d model %d\n",
1127                                  boot_cpu_data.x86, boot_cpu_data.x86_model);
1128                 return -ENODEV;
1129         }
1130
1131         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1132                 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1133                 return -ENODEV;
1134         }
1135
1136         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1137                 return -ENODEV;
1138
1139         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1140
1141         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1142             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1143             !mwait_substates)
1144                         return -ENODEV;
1145
1146         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1147
1148         icpu = (const struct idle_cpu *)id->driver_data;
1149         cpuidle_state_table = icpu->state_table;
1150
1151         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1152                  boot_cpu_data.x86_model);
1153
1154         return 0;
1155 }
1156
1157 /*
1158  * intel_idle_cpuidle_devices_uninit()
1159  * Unregisters the cpuidle devices.
1160  */
1161 static void intel_idle_cpuidle_devices_uninit(void)
1162 {
1163         int i;
1164         struct cpuidle_device *dev;
1165
1166         for_each_online_cpu(i) {
1167                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1168                 cpuidle_unregister_device(dev);
1169         }
1170 }
1171
1172 /*
1173  * ivt_idle_state_table_update(void)
1174  *
1175  * Tune IVT multi-socket targets
1176  * Assumption: num_sockets == (max_package_num + 1)
1177  */
1178 static void ivt_idle_state_table_update(void)
1179 {
1180         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1181         int cpu, package_num, num_sockets = 1;
1182
1183         for_each_online_cpu(cpu) {
1184                 package_num = topology_physical_package_id(cpu);
1185                 if (package_num + 1 > num_sockets) {
1186                         num_sockets = package_num + 1;
1187
1188                         if (num_sockets > 4) {
1189                                 cpuidle_state_table = ivt_cstates_8s;
1190                                 return;
1191                         }
1192                 }
1193         }
1194
1195         if (num_sockets > 2)
1196                 cpuidle_state_table = ivt_cstates_4s;
1197
1198         /* else, 1 and 2 socket systems use default ivt_cstates */
1199 }
1200
1201 /*
1202  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1203  */
1204
1205 static unsigned int irtl_ns_units[] = {
1206         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1207
1208 static unsigned long long irtl_2_usec(unsigned long long irtl)
1209 {
1210         unsigned long long ns;
1211
1212         if (!irtl)
1213                 return 0;
1214
1215         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1216
1217         return div64_u64((irtl & 0x3FF) * ns, 1000);
1218 }
1219 /*
1220  * bxt_idle_state_table_update(void)
1221  *
1222  * On BXT, we trust the IRTL to show the definitive maximum latency
1223  * We use the same value for target_residency.
1224  */
1225 static void bxt_idle_state_table_update(void)
1226 {
1227         unsigned long long msr;
1228         unsigned int usec;
1229
1230         rdmsrl(MSR_PKGC6_IRTL, msr);
1231         usec = irtl_2_usec(msr);
1232         if (usec) {
1233                 bxt_cstates[2].exit_latency = usec;
1234                 bxt_cstates[2].target_residency = usec;
1235         }
1236
1237         rdmsrl(MSR_PKGC7_IRTL, msr);
1238         usec = irtl_2_usec(msr);
1239         if (usec) {
1240                 bxt_cstates[3].exit_latency = usec;
1241                 bxt_cstates[3].target_residency = usec;
1242         }
1243
1244         rdmsrl(MSR_PKGC8_IRTL, msr);
1245         usec = irtl_2_usec(msr);
1246         if (usec) {
1247                 bxt_cstates[4].exit_latency = usec;
1248                 bxt_cstates[4].target_residency = usec;
1249         }
1250
1251         rdmsrl(MSR_PKGC9_IRTL, msr);
1252         usec = irtl_2_usec(msr);
1253         if (usec) {
1254                 bxt_cstates[5].exit_latency = usec;
1255                 bxt_cstates[5].target_residency = usec;
1256         }
1257
1258         rdmsrl(MSR_PKGC10_IRTL, msr);
1259         usec = irtl_2_usec(msr);
1260         if (usec) {
1261                 bxt_cstates[6].exit_latency = usec;
1262                 bxt_cstates[6].target_residency = usec;
1263         }
1264
1265 }
1266 /*
1267  * sklh_idle_state_table_update(void)
1268  *
1269  * On SKL-H (model 0x5e) disable C8 and C9 if:
1270  * C10 is enabled and SGX disabled
1271  */
1272 static void sklh_idle_state_table_update(void)
1273 {
1274         unsigned long long msr;
1275         unsigned int eax, ebx, ecx, edx;
1276
1277
1278         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1279         if (max_cstate <= 7)
1280                 return;
1281
1282         /* if PC10 not present in CPUID.MWAIT.EDX */
1283         if ((mwait_substates & (0xF << 28)) == 0)
1284                 return;
1285
1286         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1287
1288         /* PC10 is not enabled in PKG C-state limit */
1289         if ((msr & 0xF) != 8)
1290                 return;
1291
1292         ecx = 0;
1293         cpuid(7, &eax, &ebx, &ecx, &edx);
1294
1295         /* if SGX is present */
1296         if (ebx & (1 << 2)) {
1297
1298                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1299
1300                 /* if SGX is enabled */
1301                 if (msr & (1 << 18))
1302                         return;
1303         }
1304
1305         skl_cstates[5].disabled = 1;    /* C8-SKL */
1306         skl_cstates[6].disabled = 1;    /* C9-SKL */
1307 }
1308 /*
1309  * intel_idle_state_table_update()
1310  *
1311  * Update the default state_table for this CPU-id
1312  */
1313
1314 static void intel_idle_state_table_update(void)
1315 {
1316         switch (boot_cpu_data.x86_model) {
1317
1318         case INTEL_FAM6_IVYBRIDGE_X:
1319                 ivt_idle_state_table_update();
1320                 break;
1321         case INTEL_FAM6_ATOM_GOLDMONT:
1322         case INTEL_FAM6_ATOM_GEMINI_LAKE:
1323                 bxt_idle_state_table_update();
1324                 break;
1325         case INTEL_FAM6_SKYLAKE_DESKTOP:
1326                 sklh_idle_state_table_update();
1327                 break;
1328         }
1329 }
1330
1331 /*
1332  * intel_idle_cpuidle_driver_init()
1333  * allocate, initialize cpuidle_states
1334  */
1335 static void __init intel_idle_cpuidle_driver_init(void)
1336 {
1337         int cstate;
1338         struct cpuidle_driver *drv = &intel_idle_driver;
1339
1340         intel_idle_state_table_update();
1341
1342         cpuidle_poll_state_init(drv);
1343         drv->state_count = 1;
1344
1345         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1346                 int num_substates, mwait_hint, mwait_cstate;
1347
1348                 if ((cpuidle_state_table[cstate].enter == NULL) &&
1349                     (cpuidle_state_table[cstate].enter_s2idle == NULL))
1350                         break;
1351
1352                 if (cstate + 1 > max_cstate) {
1353                         pr_info("max_cstate %d reached\n", max_cstate);
1354                         break;
1355                 }
1356
1357                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1358                 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1359
1360                 /* number of sub-states for this state in CPUID.MWAIT */
1361                 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1362                                         & MWAIT_SUBSTATE_MASK;
1363
1364                 /* if NO sub-states for this state in CPUID, skip it */
1365                 if (num_substates == 0)
1366                         continue;
1367
1368                 /* if state marked as disabled, skip it */
1369                 if (cpuidle_state_table[cstate].disabled != 0) {
1370                         pr_debug("state %s is disabled\n",
1371                                  cpuidle_state_table[cstate].name);
1372                         continue;
1373                 }
1374
1375
1376                 if (((mwait_cstate + 1) > 2) &&
1377                         !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1378                         mark_tsc_unstable("TSC halts in idle"
1379                                         " states deeper than C2");
1380
1381                 drv->states[drv->state_count] = /* structure copy */
1382                         cpuidle_state_table[cstate];
1383
1384                 drv->state_count += 1;
1385         }
1386
1387         if (icpu->byt_auto_demotion_disable_flag) {
1388                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1389                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1390         }
1391 }
1392
1393
1394 /*
1395  * intel_idle_cpu_init()
1396  * allocate, initialize, register cpuidle_devices
1397  * @cpu: cpu/core to initialize
1398  */
1399 static int intel_idle_cpu_init(unsigned int cpu)
1400 {
1401         struct cpuidle_device *dev;
1402
1403         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1404         dev->cpu = cpu;
1405
1406         if (cpuidle_register_device(dev)) {
1407                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1408                 return -EIO;
1409         }
1410
1411         if (icpu->auto_demotion_disable_flags)
1412                 auto_demotion_disable();
1413
1414         if (icpu->disable_promotion_to_c1e)
1415                 c1e_promotion_disable();
1416
1417         return 0;
1418 }
1419
1420 static int intel_idle_cpu_online(unsigned int cpu)
1421 {
1422         struct cpuidle_device *dev;
1423
1424         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1425                 __setup_broadcast_timer(true);
1426
1427         /*
1428          * Some systems can hotplug a cpu at runtime after
1429          * the kernel has booted, we have to initialize the
1430          * driver in this case
1431          */
1432         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1433         if (!dev->registered)
1434                 return intel_idle_cpu_init(cpu);
1435
1436         return 0;
1437 }
1438
1439 static int __init intel_idle_init(void)
1440 {
1441         int retval;
1442
1443         /* Do not load intel_idle at all for now if idle= is passed */
1444         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1445                 return -ENODEV;
1446
1447         retval = intel_idle_probe();
1448         if (retval)
1449                 return retval;
1450
1451         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1452         if (intel_idle_cpuidle_devices == NULL)
1453                 return -ENOMEM;
1454
1455         intel_idle_cpuidle_driver_init();
1456         retval = cpuidle_register_driver(&intel_idle_driver);
1457         if (retval) {
1458                 struct cpuidle_driver *drv = cpuidle_get_driver();
1459                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1460                        drv ? drv->name : "none");
1461                 goto init_driver_fail;
1462         }
1463
1464         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1465                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1466
1467         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1468                                    intel_idle_cpu_online, NULL);
1469         if (retval < 0)
1470                 goto hp_setup_fail;
1471
1472         pr_debug("lapic_timer_reliable_states 0x%x\n",
1473                  lapic_timer_reliable_states);
1474
1475         return 0;
1476
1477 hp_setup_fail:
1478         intel_idle_cpuidle_devices_uninit();
1479         cpuidle_unregister_driver(&intel_idle_driver);
1480 init_driver_fail:
1481         free_percpu(intel_idle_cpuidle_devices);
1482         return retval;
1483
1484 }
1485 device_initcall(intel_idle_init);
1486
1487 /*
1488  * We are not really modular, but we used to support that.  Meaning we also
1489  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1490  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1491  * is the easiest way (currently) to continue doing that.
1492  */
1493 module_param(max_cstate, int, 0444);