Merge branch 'x86/cache' into perf/core, to pick up fixes
[sfrench/cifs-2.6.git] / drivers / idle / intel_idle.c
1 /*
2  * intel_idle.c - native hardware idle loop for modern Intel processors
3  *
4  * Copyright (c) 2013, Intel Corporation.
5  * Len Brown <len.brown@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20
21 /*
22  * intel_idle is a cpuidle driver that loads on specific Intel processors
23  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
24  * make Linux more efficient on these processors, as intel_idle knows
25  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26  */
27
28 /*
29  * Design Assumptions
30  *
31  * All CPUs have same idle states as boot CPU
32  *
33  * Chipset BM_STS (bus master status) bit is a NOP
34  *      for preventing entry into deep C-stats
35  */
36
37 /*
38  * Known limitations
39  *
40  * The driver currently initializes for_each_online_cpu() upon modprobe.
41  * It it unaware of subsequent processors hot-added to the system.
42  * This means that if you boot with maxcpus=n and later online
43  * processors above n, those processors will use C1 only.
44  *
45  * ACPI has a .suspend hack to turn off deep c-statees during suspend
46  * to avoid complications with the lapic timer workaround.
47  * Have not seen issues with suspend, but may need same workaround here.
48  *
49  */
50
51 /* un-comment DEBUG to enable pr_debug() statements */
52 #define DEBUG
53
54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
55
56 #include <linux/kernel.h>
57 #include <linux/cpuidle.h>
58 #include <linux/tick.h>
59 #include <trace/events/power.h>
60 #include <linux/sched.h>
61 #include <linux/notifier.h>
62 #include <linux/cpu.h>
63 #include <linux/moduleparam.h>
64 #include <asm/cpu_device_id.h>
65 #include <asm/intel-family.h>
66 #include <asm/mwait.h>
67 #include <asm/msr.h>
68
69 #define INTEL_IDLE_VERSION "0.4.1"
70
71 static struct cpuidle_driver intel_idle_driver = {
72         .name = "intel_idle",
73         .owner = THIS_MODULE,
74 };
75 /* intel_idle.max_cstate=0 disables driver */
76 static int max_cstate = CPUIDLE_STATE_MAX - 1;
77
78 static unsigned int mwait_substates;
79
80 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
81 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
82 static unsigned int lapic_timer_reliable_states = (1 << 1);      /* Default to only C1 */
83
84 struct idle_cpu {
85         struct cpuidle_state *state_table;
86
87         /*
88          * Hardware C-state auto-demotion may not always be optimal.
89          * Indicate which enable bits to clear here.
90          */
91         unsigned long auto_demotion_disable_flags;
92         bool byt_auto_demotion_disable_flag;
93         bool disable_promotion_to_c1e;
94 };
95
96 static const struct idle_cpu *icpu;
97 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
98 static int intel_idle(struct cpuidle_device *dev,
99                         struct cpuidle_driver *drv, int index);
100 static void intel_idle_s2idle(struct cpuidle_device *dev,
101                               struct cpuidle_driver *drv, int index);
102 static struct cpuidle_state *cpuidle_state_table;
103
104 /*
105  * Set this flag for states where the HW flushes the TLB for us
106  * and so we don't need cross-calls to keep it consistent.
107  * If this flag is set, SW flushes the TLB, so even if the
108  * HW doesn't do the flushing, this flag is safe to use.
109  */
110 #define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
111
112 /*
113  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
114  * the C-state (top nibble) and sub-state (bottom nibble)
115  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
116  *
117  * We store the hint at the top of our "flags" for each state.
118  */
119 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
120 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
121
122 /*
123  * States are indexed by the cstate number,
124  * which is also the index into the MWAIT hint array.
125  * Thus C0 is a dummy.
126  */
127 static struct cpuidle_state nehalem_cstates[] = {
128         {
129                 .name = "C1",
130                 .desc = "MWAIT 0x00",
131                 .flags = MWAIT2flg(0x00),
132                 .exit_latency = 3,
133                 .target_residency = 6,
134                 .enter = &intel_idle,
135                 .enter_s2idle = intel_idle_s2idle, },
136         {
137                 .name = "C1E",
138                 .desc = "MWAIT 0x01",
139                 .flags = MWAIT2flg(0x01),
140                 .exit_latency = 10,
141                 .target_residency = 20,
142                 .enter = &intel_idle,
143                 .enter_s2idle = intel_idle_s2idle, },
144         {
145                 .name = "C3",
146                 .desc = "MWAIT 0x10",
147                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
148                 .exit_latency = 20,
149                 .target_residency = 80,
150                 .enter = &intel_idle,
151                 .enter_s2idle = intel_idle_s2idle, },
152         {
153                 .name = "C6",
154                 .desc = "MWAIT 0x20",
155                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
156                 .exit_latency = 200,
157                 .target_residency = 800,
158                 .enter = &intel_idle,
159                 .enter_s2idle = intel_idle_s2idle, },
160         {
161                 .enter = NULL }
162 };
163
164 static struct cpuidle_state snb_cstates[] = {
165         {
166                 .name = "C1",
167                 .desc = "MWAIT 0x00",
168                 .flags = MWAIT2flg(0x00),
169                 .exit_latency = 2,
170                 .target_residency = 2,
171                 .enter = &intel_idle,
172                 .enter_s2idle = intel_idle_s2idle, },
173         {
174                 .name = "C1E",
175                 .desc = "MWAIT 0x01",
176                 .flags = MWAIT2flg(0x01),
177                 .exit_latency = 10,
178                 .target_residency = 20,
179                 .enter = &intel_idle,
180                 .enter_s2idle = intel_idle_s2idle, },
181         {
182                 .name = "C3",
183                 .desc = "MWAIT 0x10",
184                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
185                 .exit_latency = 80,
186                 .target_residency = 211,
187                 .enter = &intel_idle,
188                 .enter_s2idle = intel_idle_s2idle, },
189         {
190                 .name = "C6",
191                 .desc = "MWAIT 0x20",
192                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
193                 .exit_latency = 104,
194                 .target_residency = 345,
195                 .enter = &intel_idle,
196                 .enter_s2idle = intel_idle_s2idle, },
197         {
198                 .name = "C7",
199                 .desc = "MWAIT 0x30",
200                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
201                 .exit_latency = 109,
202                 .target_residency = 345,
203                 .enter = &intel_idle,
204                 .enter_s2idle = intel_idle_s2idle, },
205         {
206                 .enter = NULL }
207 };
208
209 static struct cpuidle_state byt_cstates[] = {
210         {
211                 .name = "C1",
212                 .desc = "MWAIT 0x00",
213                 .flags = MWAIT2flg(0x00),
214                 .exit_latency = 1,
215                 .target_residency = 1,
216                 .enter = &intel_idle,
217                 .enter_s2idle = intel_idle_s2idle, },
218         {
219                 .name = "C6N",
220                 .desc = "MWAIT 0x58",
221                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
222                 .exit_latency = 300,
223                 .target_residency = 275,
224                 .enter = &intel_idle,
225                 .enter_s2idle = intel_idle_s2idle, },
226         {
227                 .name = "C6S",
228                 .desc = "MWAIT 0x52",
229                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
230                 .exit_latency = 500,
231                 .target_residency = 560,
232                 .enter = &intel_idle,
233                 .enter_s2idle = intel_idle_s2idle, },
234         {
235                 .name = "C7",
236                 .desc = "MWAIT 0x60",
237                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
238                 .exit_latency = 1200,
239                 .target_residency = 4000,
240                 .enter = &intel_idle,
241                 .enter_s2idle = intel_idle_s2idle, },
242         {
243                 .name = "C7S",
244                 .desc = "MWAIT 0x64",
245                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
246                 .exit_latency = 10000,
247                 .target_residency = 20000,
248                 .enter = &intel_idle,
249                 .enter_s2idle = intel_idle_s2idle, },
250         {
251                 .enter = NULL }
252 };
253
254 static struct cpuidle_state cht_cstates[] = {
255         {
256                 .name = "C1",
257                 .desc = "MWAIT 0x00",
258                 .flags = MWAIT2flg(0x00),
259                 .exit_latency = 1,
260                 .target_residency = 1,
261                 .enter = &intel_idle,
262                 .enter_s2idle = intel_idle_s2idle, },
263         {
264                 .name = "C6N",
265                 .desc = "MWAIT 0x58",
266                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
267                 .exit_latency = 80,
268                 .target_residency = 275,
269                 .enter = &intel_idle,
270                 .enter_s2idle = intel_idle_s2idle, },
271         {
272                 .name = "C6S",
273                 .desc = "MWAIT 0x52",
274                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
275                 .exit_latency = 200,
276                 .target_residency = 560,
277                 .enter = &intel_idle,
278                 .enter_s2idle = intel_idle_s2idle, },
279         {
280                 .name = "C7",
281                 .desc = "MWAIT 0x60",
282                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
283                 .exit_latency = 1200,
284                 .target_residency = 4000,
285                 .enter = &intel_idle,
286                 .enter_s2idle = intel_idle_s2idle, },
287         {
288                 .name = "C7S",
289                 .desc = "MWAIT 0x64",
290                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
291                 .exit_latency = 10000,
292                 .target_residency = 20000,
293                 .enter = &intel_idle,
294                 .enter_s2idle = intel_idle_s2idle, },
295         {
296                 .enter = NULL }
297 };
298
299 static struct cpuidle_state ivb_cstates[] = {
300         {
301                 .name = "C1",
302                 .desc = "MWAIT 0x00",
303                 .flags = MWAIT2flg(0x00),
304                 .exit_latency = 1,
305                 .target_residency = 1,
306                 .enter = &intel_idle,
307                 .enter_s2idle = intel_idle_s2idle, },
308         {
309                 .name = "C1E",
310                 .desc = "MWAIT 0x01",
311                 .flags = MWAIT2flg(0x01),
312                 .exit_latency = 10,
313                 .target_residency = 20,
314                 .enter = &intel_idle,
315                 .enter_s2idle = intel_idle_s2idle, },
316         {
317                 .name = "C3",
318                 .desc = "MWAIT 0x10",
319                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
320                 .exit_latency = 59,
321                 .target_residency = 156,
322                 .enter = &intel_idle,
323                 .enter_s2idle = intel_idle_s2idle, },
324         {
325                 .name = "C6",
326                 .desc = "MWAIT 0x20",
327                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
328                 .exit_latency = 80,
329                 .target_residency = 300,
330                 .enter = &intel_idle,
331                 .enter_s2idle = intel_idle_s2idle, },
332         {
333                 .name = "C7",
334                 .desc = "MWAIT 0x30",
335                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
336                 .exit_latency = 87,
337                 .target_residency = 300,
338                 .enter = &intel_idle,
339                 .enter_s2idle = intel_idle_s2idle, },
340         {
341                 .enter = NULL }
342 };
343
344 static struct cpuidle_state ivt_cstates[] = {
345         {
346                 .name = "C1",
347                 .desc = "MWAIT 0x00",
348                 .flags = MWAIT2flg(0x00),
349                 .exit_latency = 1,
350                 .target_residency = 1,
351                 .enter = &intel_idle,
352                 .enter_s2idle = intel_idle_s2idle, },
353         {
354                 .name = "C1E",
355                 .desc = "MWAIT 0x01",
356                 .flags = MWAIT2flg(0x01),
357                 .exit_latency = 10,
358                 .target_residency = 80,
359                 .enter = &intel_idle,
360                 .enter_s2idle = intel_idle_s2idle, },
361         {
362                 .name = "C3",
363                 .desc = "MWAIT 0x10",
364                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
365                 .exit_latency = 59,
366                 .target_residency = 156,
367                 .enter = &intel_idle,
368                 .enter_s2idle = intel_idle_s2idle, },
369         {
370                 .name = "C6",
371                 .desc = "MWAIT 0x20",
372                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
373                 .exit_latency = 82,
374                 .target_residency = 300,
375                 .enter = &intel_idle,
376                 .enter_s2idle = intel_idle_s2idle, },
377         {
378                 .enter = NULL }
379 };
380
381 static struct cpuidle_state ivt_cstates_4s[] = {
382         {
383                 .name = "C1",
384                 .desc = "MWAIT 0x00",
385                 .flags = MWAIT2flg(0x00),
386                 .exit_latency = 1,
387                 .target_residency = 1,
388                 .enter = &intel_idle,
389                 .enter_s2idle = intel_idle_s2idle, },
390         {
391                 .name = "C1E",
392                 .desc = "MWAIT 0x01",
393                 .flags = MWAIT2flg(0x01),
394                 .exit_latency = 10,
395                 .target_residency = 250,
396                 .enter = &intel_idle,
397                 .enter_s2idle = intel_idle_s2idle, },
398         {
399                 .name = "C3",
400                 .desc = "MWAIT 0x10",
401                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
402                 .exit_latency = 59,
403                 .target_residency = 300,
404                 .enter = &intel_idle,
405                 .enter_s2idle = intel_idle_s2idle, },
406         {
407                 .name = "C6",
408                 .desc = "MWAIT 0x20",
409                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
410                 .exit_latency = 84,
411                 .target_residency = 400,
412                 .enter = &intel_idle,
413                 .enter_s2idle = intel_idle_s2idle, },
414         {
415                 .enter = NULL }
416 };
417
418 static struct cpuidle_state ivt_cstates_8s[] = {
419         {
420                 .name = "C1",
421                 .desc = "MWAIT 0x00",
422                 .flags = MWAIT2flg(0x00),
423                 .exit_latency = 1,
424                 .target_residency = 1,
425                 .enter = &intel_idle,
426                 .enter_s2idle = intel_idle_s2idle, },
427         {
428                 .name = "C1E",
429                 .desc = "MWAIT 0x01",
430                 .flags = MWAIT2flg(0x01),
431                 .exit_latency = 10,
432                 .target_residency = 500,
433                 .enter = &intel_idle,
434                 .enter_s2idle = intel_idle_s2idle, },
435         {
436                 .name = "C3",
437                 .desc = "MWAIT 0x10",
438                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
439                 .exit_latency = 59,
440                 .target_residency = 600,
441                 .enter = &intel_idle,
442                 .enter_s2idle = intel_idle_s2idle, },
443         {
444                 .name = "C6",
445                 .desc = "MWAIT 0x20",
446                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
447                 .exit_latency = 88,
448                 .target_residency = 700,
449                 .enter = &intel_idle,
450                 .enter_s2idle = intel_idle_s2idle, },
451         {
452                 .enter = NULL }
453 };
454
455 static struct cpuidle_state hsw_cstates[] = {
456         {
457                 .name = "C1",
458                 .desc = "MWAIT 0x00",
459                 .flags = MWAIT2flg(0x00),
460                 .exit_latency = 2,
461                 .target_residency = 2,
462                 .enter = &intel_idle,
463                 .enter_s2idle = intel_idle_s2idle, },
464         {
465                 .name = "C1E",
466                 .desc = "MWAIT 0x01",
467                 .flags = MWAIT2flg(0x01),
468                 .exit_latency = 10,
469                 .target_residency = 20,
470                 .enter = &intel_idle,
471                 .enter_s2idle = intel_idle_s2idle, },
472         {
473                 .name = "C3",
474                 .desc = "MWAIT 0x10",
475                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
476                 .exit_latency = 33,
477                 .target_residency = 100,
478                 .enter = &intel_idle,
479                 .enter_s2idle = intel_idle_s2idle, },
480         {
481                 .name = "C6",
482                 .desc = "MWAIT 0x20",
483                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
484                 .exit_latency = 133,
485                 .target_residency = 400,
486                 .enter = &intel_idle,
487                 .enter_s2idle = intel_idle_s2idle, },
488         {
489                 .name = "C7s",
490                 .desc = "MWAIT 0x32",
491                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
492                 .exit_latency = 166,
493                 .target_residency = 500,
494                 .enter = &intel_idle,
495                 .enter_s2idle = intel_idle_s2idle, },
496         {
497                 .name = "C8",
498                 .desc = "MWAIT 0x40",
499                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
500                 .exit_latency = 300,
501                 .target_residency = 900,
502                 .enter = &intel_idle,
503                 .enter_s2idle = intel_idle_s2idle, },
504         {
505                 .name = "C9",
506                 .desc = "MWAIT 0x50",
507                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
508                 .exit_latency = 600,
509                 .target_residency = 1800,
510                 .enter = &intel_idle,
511                 .enter_s2idle = intel_idle_s2idle, },
512         {
513                 .name = "C10",
514                 .desc = "MWAIT 0x60",
515                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
516                 .exit_latency = 2600,
517                 .target_residency = 7700,
518                 .enter = &intel_idle,
519                 .enter_s2idle = intel_idle_s2idle, },
520         {
521                 .enter = NULL }
522 };
523 static struct cpuidle_state bdw_cstates[] = {
524         {
525                 .name = "C1",
526                 .desc = "MWAIT 0x00",
527                 .flags = MWAIT2flg(0x00),
528                 .exit_latency = 2,
529                 .target_residency = 2,
530                 .enter = &intel_idle,
531                 .enter_s2idle = intel_idle_s2idle, },
532         {
533                 .name = "C1E",
534                 .desc = "MWAIT 0x01",
535                 .flags = MWAIT2flg(0x01),
536                 .exit_latency = 10,
537                 .target_residency = 20,
538                 .enter = &intel_idle,
539                 .enter_s2idle = intel_idle_s2idle, },
540         {
541                 .name = "C3",
542                 .desc = "MWAIT 0x10",
543                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
544                 .exit_latency = 40,
545                 .target_residency = 100,
546                 .enter = &intel_idle,
547                 .enter_s2idle = intel_idle_s2idle, },
548         {
549                 .name = "C6",
550                 .desc = "MWAIT 0x20",
551                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
552                 .exit_latency = 133,
553                 .target_residency = 400,
554                 .enter = &intel_idle,
555                 .enter_s2idle = intel_idle_s2idle, },
556         {
557                 .name = "C7s",
558                 .desc = "MWAIT 0x32",
559                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
560                 .exit_latency = 166,
561                 .target_residency = 500,
562                 .enter = &intel_idle,
563                 .enter_s2idle = intel_idle_s2idle, },
564         {
565                 .name = "C8",
566                 .desc = "MWAIT 0x40",
567                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
568                 .exit_latency = 300,
569                 .target_residency = 900,
570                 .enter = &intel_idle,
571                 .enter_s2idle = intel_idle_s2idle, },
572         {
573                 .name = "C9",
574                 .desc = "MWAIT 0x50",
575                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
576                 .exit_latency = 600,
577                 .target_residency = 1800,
578                 .enter = &intel_idle,
579                 .enter_s2idle = intel_idle_s2idle, },
580         {
581                 .name = "C10",
582                 .desc = "MWAIT 0x60",
583                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
584                 .exit_latency = 2600,
585                 .target_residency = 7700,
586                 .enter = &intel_idle,
587                 .enter_s2idle = intel_idle_s2idle, },
588         {
589                 .enter = NULL }
590 };
591
592 static struct cpuidle_state skl_cstates[] = {
593         {
594                 .name = "C1",
595                 .desc = "MWAIT 0x00",
596                 .flags = MWAIT2flg(0x00),
597                 .exit_latency = 2,
598                 .target_residency = 2,
599                 .enter = &intel_idle,
600                 .enter_s2idle = intel_idle_s2idle, },
601         {
602                 .name = "C1E",
603                 .desc = "MWAIT 0x01",
604                 .flags = MWAIT2flg(0x01),
605                 .exit_latency = 10,
606                 .target_residency = 20,
607                 .enter = &intel_idle,
608                 .enter_s2idle = intel_idle_s2idle, },
609         {
610                 .name = "C3",
611                 .desc = "MWAIT 0x10",
612                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
613                 .exit_latency = 70,
614                 .target_residency = 100,
615                 .enter = &intel_idle,
616                 .enter_s2idle = intel_idle_s2idle, },
617         {
618                 .name = "C6",
619                 .desc = "MWAIT 0x20",
620                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
621                 .exit_latency = 85,
622                 .target_residency = 200,
623                 .enter = &intel_idle,
624                 .enter_s2idle = intel_idle_s2idle, },
625         {
626                 .name = "C7s",
627                 .desc = "MWAIT 0x33",
628                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
629                 .exit_latency = 124,
630                 .target_residency = 800,
631                 .enter = &intel_idle,
632                 .enter_s2idle = intel_idle_s2idle, },
633         {
634                 .name = "C8",
635                 .desc = "MWAIT 0x40",
636                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
637                 .exit_latency = 200,
638                 .target_residency = 800,
639                 .enter = &intel_idle,
640                 .enter_s2idle = intel_idle_s2idle, },
641         {
642                 .name = "C9",
643                 .desc = "MWAIT 0x50",
644                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
645                 .exit_latency = 480,
646                 .target_residency = 5000,
647                 .enter = &intel_idle,
648                 .enter_s2idle = intel_idle_s2idle, },
649         {
650                 .name = "C10",
651                 .desc = "MWAIT 0x60",
652                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
653                 .exit_latency = 890,
654                 .target_residency = 5000,
655                 .enter = &intel_idle,
656                 .enter_s2idle = intel_idle_s2idle, },
657         {
658                 .enter = NULL }
659 };
660
661 static struct cpuidle_state skx_cstates[] = {
662         {
663                 .name = "C1",
664                 .desc = "MWAIT 0x00",
665                 .flags = MWAIT2flg(0x00),
666                 .exit_latency = 2,
667                 .target_residency = 2,
668                 .enter = &intel_idle,
669                 .enter_s2idle = intel_idle_s2idle, },
670         {
671                 .name = "C1E",
672                 .desc = "MWAIT 0x01",
673                 .flags = MWAIT2flg(0x01),
674                 .exit_latency = 10,
675                 .target_residency = 20,
676                 .enter = &intel_idle,
677                 .enter_s2idle = intel_idle_s2idle, },
678         {
679                 .name = "C6",
680                 .desc = "MWAIT 0x20",
681                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
682                 .exit_latency = 133,
683                 .target_residency = 600,
684                 .enter = &intel_idle,
685                 .enter_s2idle = intel_idle_s2idle, },
686         {
687                 .enter = NULL }
688 };
689
690 static struct cpuidle_state atom_cstates[] = {
691         {
692                 .name = "C1E",
693                 .desc = "MWAIT 0x00",
694                 .flags = MWAIT2flg(0x00),
695                 .exit_latency = 10,
696                 .target_residency = 20,
697                 .enter = &intel_idle,
698                 .enter_s2idle = intel_idle_s2idle, },
699         {
700                 .name = "C2",
701                 .desc = "MWAIT 0x10",
702                 .flags = MWAIT2flg(0x10),
703                 .exit_latency = 20,
704                 .target_residency = 80,
705                 .enter = &intel_idle,
706                 .enter_s2idle = intel_idle_s2idle, },
707         {
708                 .name = "C4",
709                 .desc = "MWAIT 0x30",
710                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
711                 .exit_latency = 100,
712                 .target_residency = 400,
713                 .enter = &intel_idle,
714                 .enter_s2idle = intel_idle_s2idle, },
715         {
716                 .name = "C6",
717                 .desc = "MWAIT 0x52",
718                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
719                 .exit_latency = 140,
720                 .target_residency = 560,
721                 .enter = &intel_idle,
722                 .enter_s2idle = intel_idle_s2idle, },
723         {
724                 .enter = NULL }
725 };
726 static struct cpuidle_state tangier_cstates[] = {
727         {
728                 .name = "C1",
729                 .desc = "MWAIT 0x00",
730                 .flags = MWAIT2flg(0x00),
731                 .exit_latency = 1,
732                 .target_residency = 4,
733                 .enter = &intel_idle,
734                 .enter_s2idle = intel_idle_s2idle, },
735         {
736                 .name = "C4",
737                 .desc = "MWAIT 0x30",
738                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
739                 .exit_latency = 100,
740                 .target_residency = 400,
741                 .enter = &intel_idle,
742                 .enter_s2idle = intel_idle_s2idle, },
743         {
744                 .name = "C6",
745                 .desc = "MWAIT 0x52",
746                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
747                 .exit_latency = 140,
748                 .target_residency = 560,
749                 .enter = &intel_idle,
750                 .enter_s2idle = intel_idle_s2idle, },
751         {
752                 .name = "C7",
753                 .desc = "MWAIT 0x60",
754                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
755                 .exit_latency = 1200,
756                 .target_residency = 4000,
757                 .enter = &intel_idle,
758                 .enter_s2idle = intel_idle_s2idle, },
759         {
760                 .name = "C9",
761                 .desc = "MWAIT 0x64",
762                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
763                 .exit_latency = 10000,
764                 .target_residency = 20000,
765                 .enter = &intel_idle,
766                 .enter_s2idle = intel_idle_s2idle, },
767         {
768                 .enter = NULL }
769 };
770 static struct cpuidle_state avn_cstates[] = {
771         {
772                 .name = "C1",
773                 .desc = "MWAIT 0x00",
774                 .flags = MWAIT2flg(0x00),
775                 .exit_latency = 2,
776                 .target_residency = 2,
777                 .enter = &intel_idle,
778                 .enter_s2idle = intel_idle_s2idle, },
779         {
780                 .name = "C6",
781                 .desc = "MWAIT 0x51",
782                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
783                 .exit_latency = 15,
784                 .target_residency = 45,
785                 .enter = &intel_idle,
786                 .enter_s2idle = intel_idle_s2idle, },
787         {
788                 .enter = NULL }
789 };
790 static struct cpuidle_state knl_cstates[] = {
791         {
792                 .name = "C1",
793                 .desc = "MWAIT 0x00",
794                 .flags = MWAIT2flg(0x00),
795                 .exit_latency = 1,
796                 .target_residency = 2,
797                 .enter = &intel_idle,
798                 .enter_s2idle = intel_idle_s2idle },
799         {
800                 .name = "C6",
801                 .desc = "MWAIT 0x10",
802                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
803                 .exit_latency = 120,
804                 .target_residency = 500,
805                 .enter = &intel_idle,
806                 .enter_s2idle = intel_idle_s2idle },
807         {
808                 .enter = NULL }
809 };
810
811 static struct cpuidle_state bxt_cstates[] = {
812         {
813                 .name = "C1",
814                 .desc = "MWAIT 0x00",
815                 .flags = MWAIT2flg(0x00),
816                 .exit_latency = 2,
817                 .target_residency = 2,
818                 .enter = &intel_idle,
819                 .enter_s2idle = intel_idle_s2idle, },
820         {
821                 .name = "C1E",
822                 .desc = "MWAIT 0x01",
823                 .flags = MWAIT2flg(0x01),
824                 .exit_latency = 10,
825                 .target_residency = 20,
826                 .enter = &intel_idle,
827                 .enter_s2idle = intel_idle_s2idle, },
828         {
829                 .name = "C6",
830                 .desc = "MWAIT 0x20",
831                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
832                 .exit_latency = 133,
833                 .target_residency = 133,
834                 .enter = &intel_idle,
835                 .enter_s2idle = intel_idle_s2idle, },
836         {
837                 .name = "C7s",
838                 .desc = "MWAIT 0x31",
839                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
840                 .exit_latency = 155,
841                 .target_residency = 155,
842                 .enter = &intel_idle,
843                 .enter_s2idle = intel_idle_s2idle, },
844         {
845                 .name = "C8",
846                 .desc = "MWAIT 0x40",
847                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
848                 .exit_latency = 1000,
849                 .target_residency = 1000,
850                 .enter = &intel_idle,
851                 .enter_s2idle = intel_idle_s2idle, },
852         {
853                 .name = "C9",
854                 .desc = "MWAIT 0x50",
855                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
856                 .exit_latency = 2000,
857                 .target_residency = 2000,
858                 .enter = &intel_idle,
859                 .enter_s2idle = intel_idle_s2idle, },
860         {
861                 .name = "C10",
862                 .desc = "MWAIT 0x60",
863                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
864                 .exit_latency = 10000,
865                 .target_residency = 10000,
866                 .enter = &intel_idle,
867                 .enter_s2idle = intel_idle_s2idle, },
868         {
869                 .enter = NULL }
870 };
871
872 static struct cpuidle_state dnv_cstates[] = {
873         {
874                 .name = "C1",
875                 .desc = "MWAIT 0x00",
876                 .flags = MWAIT2flg(0x00),
877                 .exit_latency = 2,
878                 .target_residency = 2,
879                 .enter = &intel_idle,
880                 .enter_s2idle = intel_idle_s2idle, },
881         {
882                 .name = "C1E",
883                 .desc = "MWAIT 0x01",
884                 .flags = MWAIT2flg(0x01),
885                 .exit_latency = 10,
886                 .target_residency = 20,
887                 .enter = &intel_idle,
888                 .enter_s2idle = intel_idle_s2idle, },
889         {
890                 .name = "C6",
891                 .desc = "MWAIT 0x20",
892                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
893                 .exit_latency = 50,
894                 .target_residency = 500,
895                 .enter = &intel_idle,
896                 .enter_s2idle = intel_idle_s2idle, },
897         {
898                 .enter = NULL }
899 };
900
901 /**
902  * intel_idle
903  * @dev: cpuidle_device
904  * @drv: cpuidle driver
905  * @index: index of cpuidle state
906  *
907  * Must be called under local_irq_disable().
908  */
909 static __cpuidle int intel_idle(struct cpuidle_device *dev,
910                                 struct cpuidle_driver *drv, int index)
911 {
912         unsigned long ecx = 1; /* break on interrupt flag */
913         struct cpuidle_state *state = &drv->states[index];
914         unsigned long eax = flg2MWAIT(state->flags);
915         unsigned int cstate;
916         bool uninitialized_var(tick);
917         int cpu = smp_processor_id();
918
919         /*
920          * leave_mm() to avoid costly and often unnecessary wakeups
921          * for flushing the user TLB's associated with the active mm.
922          */
923         if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
924                 leave_mm(cpu);
925
926         if (!static_cpu_has(X86_FEATURE_ARAT)) {
927                 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
928                                 MWAIT_CSTATE_MASK) + 1;
929                 tick = false;
930                 if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
931                         tick = true;
932                         tick_broadcast_enter();
933                 }
934         }
935
936         mwait_idle_with_hints(eax, ecx);
937
938         if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
939                 tick_broadcast_exit();
940
941         return index;
942 }
943
944 /**
945  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
946  * @dev: cpuidle_device
947  * @drv: cpuidle driver
948  * @index: state index
949  */
950 static void intel_idle_s2idle(struct cpuidle_device *dev,
951                              struct cpuidle_driver *drv, int index)
952 {
953         unsigned long ecx = 1; /* break on interrupt flag */
954         unsigned long eax = flg2MWAIT(drv->states[index].flags);
955
956         mwait_idle_with_hints(eax, ecx);
957 }
958
959 static void __setup_broadcast_timer(bool on)
960 {
961         if (on)
962                 tick_broadcast_enable();
963         else
964                 tick_broadcast_disable();
965 }
966
967 static void auto_demotion_disable(void)
968 {
969         unsigned long long msr_bits;
970
971         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
972         msr_bits &= ~(icpu->auto_demotion_disable_flags);
973         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
974 }
975 static void c1e_promotion_disable(void)
976 {
977         unsigned long long msr_bits;
978
979         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
980         msr_bits &= ~0x2;
981         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
982 }
983
984 static const struct idle_cpu idle_cpu_nehalem = {
985         .state_table = nehalem_cstates,
986         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
987         .disable_promotion_to_c1e = true,
988 };
989
990 static const struct idle_cpu idle_cpu_atom = {
991         .state_table = atom_cstates,
992 };
993
994 static const struct idle_cpu idle_cpu_tangier = {
995         .state_table = tangier_cstates,
996 };
997
998 static const struct idle_cpu idle_cpu_lincroft = {
999         .state_table = atom_cstates,
1000         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1001 };
1002
1003 static const struct idle_cpu idle_cpu_snb = {
1004         .state_table = snb_cstates,
1005         .disable_promotion_to_c1e = true,
1006 };
1007
1008 static const struct idle_cpu idle_cpu_byt = {
1009         .state_table = byt_cstates,
1010         .disable_promotion_to_c1e = true,
1011         .byt_auto_demotion_disable_flag = true,
1012 };
1013
1014 static const struct idle_cpu idle_cpu_cht = {
1015         .state_table = cht_cstates,
1016         .disable_promotion_to_c1e = true,
1017         .byt_auto_demotion_disable_flag = true,
1018 };
1019
1020 static const struct idle_cpu idle_cpu_ivb = {
1021         .state_table = ivb_cstates,
1022         .disable_promotion_to_c1e = true,
1023 };
1024
1025 static const struct idle_cpu idle_cpu_ivt = {
1026         .state_table = ivt_cstates,
1027         .disable_promotion_to_c1e = true,
1028 };
1029
1030 static const struct idle_cpu idle_cpu_hsw = {
1031         .state_table = hsw_cstates,
1032         .disable_promotion_to_c1e = true,
1033 };
1034
1035 static const struct idle_cpu idle_cpu_bdw = {
1036         .state_table = bdw_cstates,
1037         .disable_promotion_to_c1e = true,
1038 };
1039
1040 static const struct idle_cpu idle_cpu_skl = {
1041         .state_table = skl_cstates,
1042         .disable_promotion_to_c1e = true,
1043 };
1044
1045 static const struct idle_cpu idle_cpu_skx = {
1046         .state_table = skx_cstates,
1047         .disable_promotion_to_c1e = true,
1048 };
1049
1050 static const struct idle_cpu idle_cpu_avn = {
1051         .state_table = avn_cstates,
1052         .disable_promotion_to_c1e = true,
1053 };
1054
1055 static const struct idle_cpu idle_cpu_knl = {
1056         .state_table = knl_cstates,
1057 };
1058
1059 static const struct idle_cpu idle_cpu_bxt = {
1060         .state_table = bxt_cstates,
1061         .disable_promotion_to_c1e = true,
1062 };
1063
1064 static const struct idle_cpu idle_cpu_dnv = {
1065         .state_table = dnv_cstates,
1066         .disable_promotion_to_c1e = true,
1067 };
1068
1069 #define ICPU(model, cpu) \
1070         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu }
1071
1072 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1073         ICPU(INTEL_FAM6_NEHALEM_EP,             idle_cpu_nehalem),
1074         ICPU(INTEL_FAM6_NEHALEM,                idle_cpu_nehalem),
1075         ICPU(INTEL_FAM6_NEHALEM_G,              idle_cpu_nehalem),
1076         ICPU(INTEL_FAM6_WESTMERE,               idle_cpu_nehalem),
1077         ICPU(INTEL_FAM6_WESTMERE_EP,            idle_cpu_nehalem),
1078         ICPU(INTEL_FAM6_NEHALEM_EX,             idle_cpu_nehalem),
1079         ICPU(INTEL_FAM6_ATOM_BONNELL,           idle_cpu_atom),
1080         ICPU(INTEL_FAM6_ATOM_BONNELL_MID,               idle_cpu_lincroft),
1081         ICPU(INTEL_FAM6_WESTMERE_EX,            idle_cpu_nehalem),
1082         ICPU(INTEL_FAM6_SANDYBRIDGE,            idle_cpu_snb),
1083         ICPU(INTEL_FAM6_SANDYBRIDGE_X,          idle_cpu_snb),
1084         ICPU(INTEL_FAM6_ATOM_SALTWELL,          idle_cpu_atom),
1085         ICPU(INTEL_FAM6_ATOM_SILVERMONT,        idle_cpu_byt),
1086         ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID,    idle_cpu_tangier),
1087         ICPU(INTEL_FAM6_ATOM_AIRMONT,           idle_cpu_cht),
1088         ICPU(INTEL_FAM6_IVYBRIDGE,              idle_cpu_ivb),
1089         ICPU(INTEL_FAM6_IVYBRIDGE_X,            idle_cpu_ivt),
1090         ICPU(INTEL_FAM6_HASWELL_CORE,           idle_cpu_hsw),
1091         ICPU(INTEL_FAM6_HASWELL_X,              idle_cpu_hsw),
1092         ICPU(INTEL_FAM6_HASWELL_ULT,            idle_cpu_hsw),
1093         ICPU(INTEL_FAM6_HASWELL_GT3E,           idle_cpu_hsw),
1094         ICPU(INTEL_FAM6_ATOM_SILVERMONT_X,      idle_cpu_avn),
1095         ICPU(INTEL_FAM6_BROADWELL_CORE,         idle_cpu_bdw),
1096         ICPU(INTEL_FAM6_BROADWELL_GT3E,         idle_cpu_bdw),
1097         ICPU(INTEL_FAM6_BROADWELL_X,            idle_cpu_bdw),
1098         ICPU(INTEL_FAM6_BROADWELL_XEON_D,       idle_cpu_bdw),
1099         ICPU(INTEL_FAM6_SKYLAKE_MOBILE,         idle_cpu_skl),
1100         ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        idle_cpu_skl),
1101         ICPU(INTEL_FAM6_KABYLAKE_MOBILE,        idle_cpu_skl),
1102         ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,       idle_cpu_skl),
1103         ICPU(INTEL_FAM6_SKYLAKE_X,              idle_cpu_skx),
1104         ICPU(INTEL_FAM6_XEON_PHI_KNL,           idle_cpu_knl),
1105         ICPU(INTEL_FAM6_XEON_PHI_KNM,           idle_cpu_knl),
1106         ICPU(INTEL_FAM6_ATOM_GOLDMONT,          idle_cpu_bxt),
1107         ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS,     idle_cpu_bxt),
1108         ICPU(INTEL_FAM6_ATOM_GOLDMONT_X,        idle_cpu_dnv),
1109         {}
1110 };
1111
1112 /*
1113  * intel_idle_probe()
1114  */
1115 static int __init intel_idle_probe(void)
1116 {
1117         unsigned int eax, ebx, ecx;
1118         const struct x86_cpu_id *id;
1119
1120         if (max_cstate == 0) {
1121                 pr_debug("disabled\n");
1122                 return -EPERM;
1123         }
1124
1125         id = x86_match_cpu(intel_idle_ids);
1126         if (!id) {
1127                 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1128                     boot_cpu_data.x86 == 6)
1129                         pr_debug("does not run on family %d model %d\n",
1130                                  boot_cpu_data.x86, boot_cpu_data.x86_model);
1131                 return -ENODEV;
1132         }
1133
1134         if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1135                 pr_debug("Please enable MWAIT in BIOS SETUP\n");
1136                 return -ENODEV;
1137         }
1138
1139         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1140                 return -ENODEV;
1141
1142         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1143
1144         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1145             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1146             !mwait_substates)
1147                         return -ENODEV;
1148
1149         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1150
1151         icpu = (const struct idle_cpu *)id->driver_data;
1152         cpuidle_state_table = icpu->state_table;
1153
1154         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1155                  boot_cpu_data.x86_model);
1156
1157         return 0;
1158 }
1159
1160 /*
1161  * intel_idle_cpuidle_devices_uninit()
1162  * Unregisters the cpuidle devices.
1163  */
1164 static void intel_idle_cpuidle_devices_uninit(void)
1165 {
1166         int i;
1167         struct cpuidle_device *dev;
1168
1169         for_each_online_cpu(i) {
1170                 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1171                 cpuidle_unregister_device(dev);
1172         }
1173 }
1174
1175 /*
1176  * ivt_idle_state_table_update(void)
1177  *
1178  * Tune IVT multi-socket targets
1179  * Assumption: num_sockets == (max_package_num + 1)
1180  */
1181 static void ivt_idle_state_table_update(void)
1182 {
1183         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1184         int cpu, package_num, num_sockets = 1;
1185
1186         for_each_online_cpu(cpu) {
1187                 package_num = topology_physical_package_id(cpu);
1188                 if (package_num + 1 > num_sockets) {
1189                         num_sockets = package_num + 1;
1190
1191                         if (num_sockets > 4) {
1192                                 cpuidle_state_table = ivt_cstates_8s;
1193                                 return;
1194                         }
1195                 }
1196         }
1197
1198         if (num_sockets > 2)
1199                 cpuidle_state_table = ivt_cstates_4s;
1200
1201         /* else, 1 and 2 socket systems use default ivt_cstates */
1202 }
1203
1204 /*
1205  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1206  */
1207
1208 static unsigned int irtl_ns_units[] = {
1209         1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1210
1211 static unsigned long long irtl_2_usec(unsigned long long irtl)
1212 {
1213         unsigned long long ns;
1214
1215         if (!irtl)
1216                 return 0;
1217
1218         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1219
1220         return div64_u64((irtl & 0x3FF) * ns, 1000);
1221 }
1222 /*
1223  * bxt_idle_state_table_update(void)
1224  *
1225  * On BXT, we trust the IRTL to show the definitive maximum latency
1226  * We use the same value for target_residency.
1227  */
1228 static void bxt_idle_state_table_update(void)
1229 {
1230         unsigned long long msr;
1231         unsigned int usec;
1232
1233         rdmsrl(MSR_PKGC6_IRTL, msr);
1234         usec = irtl_2_usec(msr);
1235         if (usec) {
1236                 bxt_cstates[2].exit_latency = usec;
1237                 bxt_cstates[2].target_residency = usec;
1238         }
1239
1240         rdmsrl(MSR_PKGC7_IRTL, msr);
1241         usec = irtl_2_usec(msr);
1242         if (usec) {
1243                 bxt_cstates[3].exit_latency = usec;
1244                 bxt_cstates[3].target_residency = usec;
1245         }
1246
1247         rdmsrl(MSR_PKGC8_IRTL, msr);
1248         usec = irtl_2_usec(msr);
1249         if (usec) {
1250                 bxt_cstates[4].exit_latency = usec;
1251                 bxt_cstates[4].target_residency = usec;
1252         }
1253
1254         rdmsrl(MSR_PKGC9_IRTL, msr);
1255         usec = irtl_2_usec(msr);
1256         if (usec) {
1257                 bxt_cstates[5].exit_latency = usec;
1258                 bxt_cstates[5].target_residency = usec;
1259         }
1260
1261         rdmsrl(MSR_PKGC10_IRTL, msr);
1262         usec = irtl_2_usec(msr);
1263         if (usec) {
1264                 bxt_cstates[6].exit_latency = usec;
1265                 bxt_cstates[6].target_residency = usec;
1266         }
1267
1268 }
1269 /*
1270  * sklh_idle_state_table_update(void)
1271  *
1272  * On SKL-H (model 0x5e) disable C8 and C9 if:
1273  * C10 is enabled and SGX disabled
1274  */
1275 static void sklh_idle_state_table_update(void)
1276 {
1277         unsigned long long msr;
1278         unsigned int eax, ebx, ecx, edx;
1279
1280
1281         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1282         if (max_cstate <= 7)
1283                 return;
1284
1285         /* if PC10 not present in CPUID.MWAIT.EDX */
1286         if ((mwait_substates & (0xF << 28)) == 0)
1287                 return;
1288
1289         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1290
1291         /* PC10 is not enabled in PKG C-state limit */
1292         if ((msr & 0xF) != 8)
1293                 return;
1294
1295         ecx = 0;
1296         cpuid(7, &eax, &ebx, &ecx, &edx);
1297
1298         /* if SGX is present */
1299         if (ebx & (1 << 2)) {
1300
1301                 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1302
1303                 /* if SGX is enabled */
1304                 if (msr & (1 << 18))
1305                         return;
1306         }
1307
1308         skl_cstates[5].disabled = 1;    /* C8-SKL */
1309         skl_cstates[6].disabled = 1;    /* C9-SKL */
1310 }
1311 /*
1312  * intel_idle_state_table_update()
1313  *
1314  * Update the default state_table for this CPU-id
1315  */
1316
1317 static void intel_idle_state_table_update(void)
1318 {
1319         switch (boot_cpu_data.x86_model) {
1320
1321         case INTEL_FAM6_IVYBRIDGE_X:
1322                 ivt_idle_state_table_update();
1323                 break;
1324         case INTEL_FAM6_ATOM_GOLDMONT:
1325         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1326                 bxt_idle_state_table_update();
1327                 break;
1328         case INTEL_FAM6_SKYLAKE_DESKTOP:
1329                 sklh_idle_state_table_update();
1330                 break;
1331         }
1332 }
1333
1334 /*
1335  * intel_idle_cpuidle_driver_init()
1336  * allocate, initialize cpuidle_states
1337  */
1338 static void __init intel_idle_cpuidle_driver_init(void)
1339 {
1340         int cstate;
1341         struct cpuidle_driver *drv = &intel_idle_driver;
1342
1343         intel_idle_state_table_update();
1344
1345         cpuidle_poll_state_init(drv);
1346         drv->state_count = 1;
1347
1348         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1349                 int num_substates, mwait_hint, mwait_cstate;
1350
1351                 if ((cpuidle_state_table[cstate].enter == NULL) &&
1352                     (cpuidle_state_table[cstate].enter_s2idle == NULL))
1353                         break;
1354
1355                 if (cstate + 1 > max_cstate) {
1356                         pr_info("max_cstate %d reached\n", max_cstate);
1357                         break;
1358                 }
1359
1360                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1361                 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1362
1363                 /* number of sub-states for this state in CPUID.MWAIT */
1364                 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1365                                         & MWAIT_SUBSTATE_MASK;
1366
1367                 /* if NO sub-states for this state in CPUID, skip it */
1368                 if (num_substates == 0)
1369                         continue;
1370
1371                 /* if state marked as disabled, skip it */
1372                 if (cpuidle_state_table[cstate].disabled != 0) {
1373                         pr_debug("state %s is disabled\n",
1374                                  cpuidle_state_table[cstate].name);
1375                         continue;
1376                 }
1377
1378
1379                 if (((mwait_cstate + 1) > 2) &&
1380                         !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1381                         mark_tsc_unstable("TSC halts in idle"
1382                                         " states deeper than C2");
1383
1384                 drv->states[drv->state_count] = /* structure copy */
1385                         cpuidle_state_table[cstate];
1386
1387                 drv->state_count += 1;
1388         }
1389
1390         if (icpu->byt_auto_demotion_disable_flag) {
1391                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1392                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1393         }
1394 }
1395
1396
1397 /*
1398  * intel_idle_cpu_init()
1399  * allocate, initialize, register cpuidle_devices
1400  * @cpu: cpu/core to initialize
1401  */
1402 static int intel_idle_cpu_init(unsigned int cpu)
1403 {
1404         struct cpuidle_device *dev;
1405
1406         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1407         dev->cpu = cpu;
1408
1409         if (cpuidle_register_device(dev)) {
1410                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1411                 return -EIO;
1412         }
1413
1414         if (icpu->auto_demotion_disable_flags)
1415                 auto_demotion_disable();
1416
1417         if (icpu->disable_promotion_to_c1e)
1418                 c1e_promotion_disable();
1419
1420         return 0;
1421 }
1422
1423 static int intel_idle_cpu_online(unsigned int cpu)
1424 {
1425         struct cpuidle_device *dev;
1426
1427         if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1428                 __setup_broadcast_timer(true);
1429
1430         /*
1431          * Some systems can hotplug a cpu at runtime after
1432          * the kernel has booted, we have to initialize the
1433          * driver in this case
1434          */
1435         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1436         if (!dev->registered)
1437                 return intel_idle_cpu_init(cpu);
1438
1439         return 0;
1440 }
1441
1442 static int __init intel_idle_init(void)
1443 {
1444         int retval;
1445
1446         /* Do not load intel_idle at all for now if idle= is passed */
1447         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1448                 return -ENODEV;
1449
1450         retval = intel_idle_probe();
1451         if (retval)
1452                 return retval;
1453
1454         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1455         if (intel_idle_cpuidle_devices == NULL)
1456                 return -ENOMEM;
1457
1458         intel_idle_cpuidle_driver_init();
1459         retval = cpuidle_register_driver(&intel_idle_driver);
1460         if (retval) {
1461                 struct cpuidle_driver *drv = cpuidle_get_driver();
1462                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1463                        drv ? drv->name : "none");
1464                 goto init_driver_fail;
1465         }
1466
1467         if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
1468                 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1469
1470         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1471                                    intel_idle_cpu_online, NULL);
1472         if (retval < 0)
1473                 goto hp_setup_fail;
1474
1475         pr_debug("lapic_timer_reliable_states 0x%x\n",
1476                  lapic_timer_reliable_states);
1477
1478         return 0;
1479
1480 hp_setup_fail:
1481         intel_idle_cpuidle_devices_uninit();
1482         cpuidle_unregister_driver(&intel_idle_driver);
1483 init_driver_fail:
1484         free_percpu(intel_idle_cpuidle_devices);
1485         return retval;
1486
1487 }
1488 device_initcall(intel_idle_init);
1489
1490 /*
1491  * We are not really modular, but we used to support that.  Meaning we also
1492  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1493  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1494  * is the easiest way (currently) to continue doing that.
1495  */
1496 module_param(max_cstate, int, 0444);