Merge branches 'acpi-pm' and 'pm-sleep'
[sfrench/cifs-2.6.git] / drivers / cpufreq / intel_pstate.c
1 /*
2  * intel_pstate.c: Native P state management for Intel processors
3  *
4  * (C) Copyright 2012 Intel Corporation
5  * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/module.h>
18 #include <linux/ktime.h>
19 #include <linux/hrtimer.h>
20 #include <linux/tick.h>
21 #include <linux/slab.h>
22 #include <linux/sched/cpufreq.h>
23 #include <linux/list.h>
24 #include <linux/cpu.h>
25 #include <linux/cpufreq.h>
26 #include <linux/sysfs.h>
27 #include <linux/types.h>
28 #include <linux/fs.h>
29 #include <linux/acpi.h>
30 #include <linux/vmalloc.h>
31 #include <trace/events/power.h>
32
33 #include <asm/div64.h>
34 #include <asm/msr.h>
35 #include <asm/cpu_device_id.h>
36 #include <asm/cpufeature.h>
37 #include <asm/intel-family.h>
38
39 #define INTEL_PSTATE_SAMPLING_INTERVAL  (10 * NSEC_PER_MSEC)
40
41 #define INTEL_CPUFREQ_TRANSITION_LATENCY        20000
42 #define INTEL_CPUFREQ_TRANSITION_DELAY          500
43
44 #ifdef CONFIG_ACPI
45 #include <acpi/processor.h>
46 #include <acpi/cppc_acpi.h>
47 #endif
48
49 #define FRAC_BITS 8
50 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
51 #define fp_toint(X) ((X) >> FRAC_BITS)
52
53 #define EXT_BITS 6
54 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
55 #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
56 #define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
57
58 static inline int32_t mul_fp(int32_t x, int32_t y)
59 {
60         return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
61 }
62
63 static inline int32_t div_fp(s64 x, s64 y)
64 {
65         return div64_s64((int64_t)x << FRAC_BITS, y);
66 }
67
68 static inline int ceiling_fp(int32_t x)
69 {
70         int mask, ret;
71
72         ret = fp_toint(x);
73         mask = (1 << FRAC_BITS) - 1;
74         if (x & mask)
75                 ret += 1;
76         return ret;
77 }
78
79 static inline int32_t percent_fp(int percent)
80 {
81         return div_fp(percent, 100);
82 }
83
84 static inline u64 mul_ext_fp(u64 x, u64 y)
85 {
86         return (x * y) >> EXT_FRAC_BITS;
87 }
88
89 static inline u64 div_ext_fp(u64 x, u64 y)
90 {
91         return div64_u64(x << EXT_FRAC_BITS, y);
92 }
93
94 static inline int32_t percent_ext_fp(int percent)
95 {
96         return div_ext_fp(percent, 100);
97 }
98
99 /**
100  * struct sample -      Store performance sample
101  * @core_avg_perf:      Ratio of APERF/MPERF which is the actual average
102  *                      performance during last sample period
103  * @busy_scaled:        Scaled busy value which is used to calculate next
104  *                      P state. This can be different than core_avg_perf
105  *                      to account for cpu idle period
106  * @aperf:              Difference of actual performance frequency clock count
107  *                      read from APERF MSR between last and current sample
108  * @mperf:              Difference of maximum performance frequency clock count
109  *                      read from MPERF MSR between last and current sample
110  * @tsc:                Difference of time stamp counter between last and
111  *                      current sample
112  * @time:               Current time from scheduler
113  *
114  * This structure is used in the cpudata structure to store performance sample
115  * data for choosing next P State.
116  */
117 struct sample {
118         int32_t core_avg_perf;
119         int32_t busy_scaled;
120         u64 aperf;
121         u64 mperf;
122         u64 tsc;
123         u64 time;
124 };
125
126 /**
127  * struct pstate_data - Store P state data
128  * @current_pstate:     Current requested P state
129  * @min_pstate:         Min P state possible for this platform
130  * @max_pstate:         Max P state possible for this platform
131  * @max_pstate_physical:This is physical Max P state for a processor
132  *                      This can be higher than the max_pstate which can
133  *                      be limited by platform thermal design power limits
134  * @scaling:            Scaling factor to  convert frequency to cpufreq
135  *                      frequency units
136  * @turbo_pstate:       Max Turbo P state possible for this platform
137  * @max_freq:           @max_pstate frequency in cpufreq units
138  * @turbo_freq:         @turbo_pstate frequency in cpufreq units
139  *
140  * Stores the per cpu model P state limits and current P state.
141  */
142 struct pstate_data {
143         int     current_pstate;
144         int     min_pstate;
145         int     max_pstate;
146         int     max_pstate_physical;
147         int     scaling;
148         int     turbo_pstate;
149         unsigned int max_freq;
150         unsigned int turbo_freq;
151 };
152
153 /**
154  * struct vid_data -    Stores voltage information data
155  * @min:                VID data for this platform corresponding to
156  *                      the lowest P state
157  * @max:                VID data corresponding to the highest P State.
158  * @turbo:              VID data for turbo P state
159  * @ratio:              Ratio of (vid max - vid min) /
160  *                      (max P state - Min P State)
161  *
162  * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling)
163  * This data is used in Atom platforms, where in addition to target P state,
164  * the voltage data needs to be specified to select next P State.
165  */
166 struct vid_data {
167         int min;
168         int max;
169         int turbo;
170         int32_t ratio;
171 };
172
173 /**
174  * struct global_params - Global parameters, mostly tunable via sysfs.
175  * @no_turbo:           Whether or not to use turbo P-states.
176  * @turbo_disabled:     Whethet or not turbo P-states are available at all,
177  *                      based on the MSR_IA32_MISC_ENABLE value and whether or
178  *                      not the maximum reported turbo P-state is different from
179  *                      the maximum reported non-turbo one.
180  * @min_perf_pct:       Minimum capacity limit in percent of the maximum turbo
181  *                      P-state capacity.
182  * @max_perf_pct:       Maximum capacity limit in percent of the maximum turbo
183  *                      P-state capacity.
184  */
185 struct global_params {
186         bool no_turbo;
187         bool turbo_disabled;
188         int max_perf_pct;
189         int min_perf_pct;
190 };
191
192 /**
193  * struct cpudata -     Per CPU instance data storage
194  * @cpu:                CPU number for this instance data
195  * @policy:             CPUFreq policy value
196  * @update_util:        CPUFreq utility callback information
197  * @update_util_set:    CPUFreq utility callback is set
198  * @iowait_boost:       iowait-related boost fraction
199  * @last_update:        Time of the last update.
200  * @pstate:             Stores P state limits for this CPU
201  * @vid:                Stores VID limits for this CPU
202  * @last_sample_time:   Last Sample time
203  * @aperf_mperf_shift:  Number of clock cycles after aperf, merf is incremented
204  *                      This shift is a multiplier to mperf delta to
205  *                      calculate CPU busy.
206  * @prev_aperf:         Last APERF value read from APERF MSR
207  * @prev_mperf:         Last MPERF value read from MPERF MSR
208  * @prev_tsc:           Last timestamp counter (TSC) value
209  * @prev_cummulative_iowait: IO Wait time difference from last and
210  *                      current sample
211  * @sample:             Storage for storing last Sample data
212  * @min_perf_ratio:     Minimum capacity in terms of PERF or HWP ratios
213  * @max_perf_ratio:     Maximum capacity in terms of PERF or HWP ratios
214  * @acpi_perf_data:     Stores ACPI perf information read from _PSS
215  * @valid_pss_table:    Set to true for valid ACPI _PSS entries found
216  * @epp_powersave:      Last saved HWP energy performance preference
217  *                      (EPP) or energy performance bias (EPB),
218  *                      when policy switched to performance
219  * @epp_policy:         Last saved policy used to set EPP/EPB
220  * @epp_default:        Power on default HWP energy performance
221  *                      preference/bias
222  * @epp_saved:          Saved EPP/EPB during system suspend or CPU offline
223  *                      operation
224  * @hwp_req_cached:     Cached value of the last HWP Request MSR
225  * @hwp_cap_cached:     Cached value of the last HWP Capabilities MSR
226  * @last_io_update:     Last time when IO wake flag was set
227  * @sched_flags:        Store scheduler flags for possible cross CPU update
228  * @hwp_boost_min:      Last HWP boosted min performance
229  *
230  * This structure stores per CPU instance data for all CPUs.
231  */
232 struct cpudata {
233         int cpu;
234
235         unsigned int policy;
236         struct update_util_data update_util;
237         bool   update_util_set;
238
239         struct pstate_data pstate;
240         struct vid_data vid;
241
242         u64     last_update;
243         u64     last_sample_time;
244         u64     aperf_mperf_shift;
245         u64     prev_aperf;
246         u64     prev_mperf;
247         u64     prev_tsc;
248         u64     prev_cummulative_iowait;
249         struct sample sample;
250         int32_t min_perf_ratio;
251         int32_t max_perf_ratio;
252 #ifdef CONFIG_ACPI
253         struct acpi_processor_performance acpi_perf_data;
254         bool valid_pss_table;
255 #endif
256         unsigned int iowait_boost;
257         s16 epp_powersave;
258         s16 epp_policy;
259         s16 epp_default;
260         s16 epp_saved;
261         u64 hwp_req_cached;
262         u64 hwp_cap_cached;
263         u64 last_io_update;
264         unsigned int sched_flags;
265         u32 hwp_boost_min;
266 };
267
268 static struct cpudata **all_cpu_data;
269
270 /**
271  * struct pstate_funcs - Per CPU model specific callbacks
272  * @get_max:            Callback to get maximum non turbo effective P state
273  * @get_max_physical:   Callback to get maximum non turbo physical P state
274  * @get_min:            Callback to get minimum P state
275  * @get_turbo:          Callback to get turbo P state
276  * @get_scaling:        Callback to get frequency scaling factor
277  * @get_val:            Callback to convert P state to actual MSR write value
278  * @get_vid:            Callback to get VID data for Atom platforms
279  *
280  * Core and Atom CPU models have different way to get P State limits. This
281  * structure is used to store those callbacks.
282  */
283 struct pstate_funcs {
284         int (*get_max)(void);
285         int (*get_max_physical)(void);
286         int (*get_min)(void);
287         int (*get_turbo)(void);
288         int (*get_scaling)(void);
289         int (*get_aperf_mperf_shift)(void);
290         u64 (*get_val)(struct cpudata*, int pstate);
291         void (*get_vid)(struct cpudata *);
292 };
293
294 static struct pstate_funcs pstate_funcs __read_mostly;
295
296 static int hwp_active __read_mostly;
297 static int hwp_mode_bdw __read_mostly;
298 static bool per_cpu_limits __read_mostly;
299 static bool hwp_boost __read_mostly;
300
301 static struct cpufreq_driver *intel_pstate_driver __read_mostly;
302
303 #ifdef CONFIG_ACPI
304 static bool acpi_ppc;
305 #endif
306
307 static struct global_params global;
308
309 static DEFINE_MUTEX(intel_pstate_driver_lock);
310 static DEFINE_MUTEX(intel_pstate_limits_lock);
311
312 #ifdef CONFIG_ACPI
313
314 static bool intel_pstate_acpi_pm_profile_server(void)
315 {
316         if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
317             acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
318                 return true;
319
320         return false;
321 }
322
323 static bool intel_pstate_get_ppc_enable_status(void)
324 {
325         if (intel_pstate_acpi_pm_profile_server())
326                 return true;
327
328         return acpi_ppc;
329 }
330
331 #ifdef CONFIG_ACPI_CPPC_LIB
332
333 /* The work item is needed to avoid CPU hotplug locking issues */
334 static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
335 {
336         sched_set_itmt_support();
337 }
338
339 static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
340
341 static void intel_pstate_set_itmt_prio(int cpu)
342 {
343         struct cppc_perf_caps cppc_perf;
344         static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
345         int ret;
346
347         ret = cppc_get_perf_caps(cpu, &cppc_perf);
348         if (ret)
349                 return;
350
351         /*
352          * The priorities can be set regardless of whether or not
353          * sched_set_itmt_support(true) has been called and it is valid to
354          * update them at any time after it has been called.
355          */
356         sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
357
358         if (max_highest_perf <= min_highest_perf) {
359                 if (cppc_perf.highest_perf > max_highest_perf)
360                         max_highest_perf = cppc_perf.highest_perf;
361
362                 if (cppc_perf.highest_perf < min_highest_perf)
363                         min_highest_perf = cppc_perf.highest_perf;
364
365                 if (max_highest_perf > min_highest_perf) {
366                         /*
367                          * This code can be run during CPU online under the
368                          * CPU hotplug locks, so sched_set_itmt_support()
369                          * cannot be called from here.  Queue up a work item
370                          * to invoke it.
371                          */
372                         schedule_work(&sched_itmt_work);
373                 }
374         }
375 }
376
377 static int intel_pstate_get_cppc_guranteed(int cpu)
378 {
379         struct cppc_perf_caps cppc_perf;
380         int ret;
381
382         ret = cppc_get_perf_caps(cpu, &cppc_perf);
383         if (ret)
384                 return ret;
385
386         return cppc_perf.guaranteed_perf;
387 }
388
389 #else
390 static void intel_pstate_set_itmt_prio(int cpu)
391 {
392 }
393
394 static int intel_pstate_get_cppc_guranteed(int cpu)
395 {
396         return -ENOTSUPP;
397 }
398 #endif
399
400 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
401 {
402         struct cpudata *cpu;
403         int ret;
404         int i;
405
406         if (hwp_active) {
407                 intel_pstate_set_itmt_prio(policy->cpu);
408                 return;
409         }
410
411         if (!intel_pstate_get_ppc_enable_status())
412                 return;
413
414         cpu = all_cpu_data[policy->cpu];
415
416         ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
417                                                   policy->cpu);
418         if (ret)
419                 return;
420
421         /*
422          * Check if the control value in _PSS is for PERF_CTL MSR, which should
423          * guarantee that the states returned by it map to the states in our
424          * list directly.
425          */
426         if (cpu->acpi_perf_data.control_register.space_id !=
427                                                 ACPI_ADR_SPACE_FIXED_HARDWARE)
428                 goto err;
429
430         /*
431          * If there is only one entry _PSS, simply ignore _PSS and continue as
432          * usual without taking _PSS into account
433          */
434         if (cpu->acpi_perf_data.state_count < 2)
435                 goto err;
436
437         pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
438         for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
439                 pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
440                          (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
441                          (u32) cpu->acpi_perf_data.states[i].core_frequency,
442                          (u32) cpu->acpi_perf_data.states[i].power,
443                          (u32) cpu->acpi_perf_data.states[i].control);
444         }
445
446         /*
447          * The _PSS table doesn't contain whole turbo frequency range.
448          * This just contains +1 MHZ above the max non turbo frequency,
449          * with control value corresponding to max turbo ratio. But
450          * when cpufreq set policy is called, it will call with this
451          * max frequency, which will cause a reduced performance as
452          * this driver uses real max turbo frequency as the max
453          * frequency. So correct this frequency in _PSS table to
454          * correct max turbo frequency based on the turbo state.
455          * Also need to convert to MHz as _PSS freq is in MHz.
456          */
457         if (!global.turbo_disabled)
458                 cpu->acpi_perf_data.states[0].core_frequency =
459                                         policy->cpuinfo.max_freq / 1000;
460         cpu->valid_pss_table = true;
461         pr_debug("_PPC limits will be enforced\n");
462
463         return;
464
465  err:
466         cpu->valid_pss_table = false;
467         acpi_processor_unregister_performance(policy->cpu);
468 }
469
470 static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
471 {
472         struct cpudata *cpu;
473
474         cpu = all_cpu_data[policy->cpu];
475         if (!cpu->valid_pss_table)
476                 return;
477
478         acpi_processor_unregister_performance(policy->cpu);
479 }
480 #else
481 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
482 {
483 }
484
485 static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
486 {
487 }
488
489 static inline bool intel_pstate_acpi_pm_profile_server(void)
490 {
491         return false;
492 }
493 #endif
494
495 static inline void update_turbo_state(void)
496 {
497         u64 misc_en;
498         struct cpudata *cpu;
499
500         cpu = all_cpu_data[0];
501         rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
502         global.turbo_disabled =
503                 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
504                  cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
505 }
506
507 static int min_perf_pct_min(void)
508 {
509         struct cpudata *cpu = all_cpu_data[0];
510         int turbo_pstate = cpu->pstate.turbo_pstate;
511
512         return turbo_pstate ?
513                 (cpu->pstate.min_pstate * 100 / turbo_pstate) : 0;
514 }
515
516 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
517 {
518         u64 epb;
519         int ret;
520
521         if (!static_cpu_has(X86_FEATURE_EPB))
522                 return -ENXIO;
523
524         ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
525         if (ret)
526                 return (s16)ret;
527
528         return (s16)(epb & 0x0f);
529 }
530
531 static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
532 {
533         s16 epp;
534
535         if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
536                 /*
537                  * When hwp_req_data is 0, means that caller didn't read
538                  * MSR_HWP_REQUEST, so need to read and get EPP.
539                  */
540                 if (!hwp_req_data) {
541                         epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
542                                             &hwp_req_data);
543                         if (epp)
544                                 return epp;
545                 }
546                 epp = (hwp_req_data >> 24) & 0xff;
547         } else {
548                 /* When there is no EPP present, HWP uses EPB settings */
549                 epp = intel_pstate_get_epb(cpu_data);
550         }
551
552         return epp;
553 }
554
555 static int intel_pstate_set_epb(int cpu, s16 pref)
556 {
557         u64 epb;
558         int ret;
559
560         if (!static_cpu_has(X86_FEATURE_EPB))
561                 return -ENXIO;
562
563         ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
564         if (ret)
565                 return ret;
566
567         epb = (epb & ~0x0f) | pref;
568         wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
569
570         return 0;
571 }
572
573 /*
574  * EPP/EPB display strings corresponding to EPP index in the
575  * energy_perf_strings[]
576  *      index           String
577  *-------------------------------------
578  *      0               default
579  *      1               performance
580  *      2               balance_performance
581  *      3               balance_power
582  *      4               power
583  */
584 static const char * const energy_perf_strings[] = {
585         "default",
586         "performance",
587         "balance_performance",
588         "balance_power",
589         "power",
590         NULL
591 };
592 static const unsigned int epp_values[] = {
593         HWP_EPP_PERFORMANCE,
594         HWP_EPP_BALANCE_PERFORMANCE,
595         HWP_EPP_BALANCE_POWERSAVE,
596         HWP_EPP_POWERSAVE
597 };
598
599 static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
600 {
601         s16 epp;
602         int index = -EINVAL;
603
604         epp = intel_pstate_get_epp(cpu_data, 0);
605         if (epp < 0)
606                 return epp;
607
608         if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
609                 if (epp == HWP_EPP_PERFORMANCE)
610                         return 1;
611                 if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
612                         return 2;
613                 if (epp <= HWP_EPP_BALANCE_POWERSAVE)
614                         return 3;
615                 else
616                         return 4;
617         } else if (static_cpu_has(X86_FEATURE_EPB)) {
618                 /*
619                  * Range:
620                  *      0x00-0x03       :       Performance
621                  *      0x04-0x07       :       Balance performance
622                  *      0x08-0x0B       :       Balance power
623                  *      0x0C-0x0F       :       Power
624                  * The EPB is a 4 bit value, but our ranges restrict the
625                  * value which can be set. Here only using top two bits
626                  * effectively.
627                  */
628                 index = (epp >> 2) + 1;
629         }
630
631         return index;
632 }
633
634 static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
635                                               int pref_index)
636 {
637         int epp = -EINVAL;
638         int ret;
639
640         if (!pref_index)
641                 epp = cpu_data->epp_default;
642
643         mutex_lock(&intel_pstate_limits_lock);
644
645         if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
646                 u64 value;
647
648                 ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
649                 if (ret)
650                         goto return_pref;
651
652                 value &= ~GENMASK_ULL(31, 24);
653
654                 if (epp == -EINVAL)
655                         epp = epp_values[pref_index - 1];
656
657                 value |= (u64)epp << 24;
658                 ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
659         } else {
660                 if (epp == -EINVAL)
661                         epp = (pref_index - 1) << 2;
662                 ret = intel_pstate_set_epb(cpu_data->cpu, epp);
663         }
664 return_pref:
665         mutex_unlock(&intel_pstate_limits_lock);
666
667         return ret;
668 }
669
670 static ssize_t show_energy_performance_available_preferences(
671                                 struct cpufreq_policy *policy, char *buf)
672 {
673         int i = 0;
674         int ret = 0;
675
676         while (energy_perf_strings[i] != NULL)
677                 ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
678
679         ret += sprintf(&buf[ret], "\n");
680
681         return ret;
682 }
683
684 cpufreq_freq_attr_ro(energy_performance_available_preferences);
685
686 static ssize_t store_energy_performance_preference(
687                 struct cpufreq_policy *policy, const char *buf, size_t count)
688 {
689         struct cpudata *cpu_data = all_cpu_data[policy->cpu];
690         char str_preference[21];
691         int ret;
692
693         ret = sscanf(buf, "%20s", str_preference);
694         if (ret != 1)
695                 return -EINVAL;
696
697         ret = match_string(energy_perf_strings, -1, str_preference);
698         if (ret < 0)
699                 return ret;
700
701         intel_pstate_set_energy_pref_index(cpu_data, ret);
702         return count;
703 }
704
705 static ssize_t show_energy_performance_preference(
706                                 struct cpufreq_policy *policy, char *buf)
707 {
708         struct cpudata *cpu_data = all_cpu_data[policy->cpu];
709         int preference;
710
711         preference = intel_pstate_get_energy_pref_index(cpu_data);
712         if (preference < 0)
713                 return preference;
714
715         return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
716 }
717
718 cpufreq_freq_attr_rw(energy_performance_preference);
719
720 static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
721 {
722         struct cpudata *cpu;
723         u64 cap;
724         int ratio;
725
726         ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
727         if (ratio <= 0) {
728                 rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
729                 ratio = HWP_GUARANTEED_PERF(cap);
730         }
731
732         cpu = all_cpu_data[policy->cpu];
733
734         return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling);
735 }
736
737 cpufreq_freq_attr_ro(base_frequency);
738
739 static struct freq_attr *hwp_cpufreq_attrs[] = {
740         &energy_performance_preference,
741         &energy_performance_available_preferences,
742         &base_frequency,
743         NULL,
744 };
745
746 static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
747                                      int *current_max)
748 {
749         u64 cap;
750
751         rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
752         WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
753         if (global.no_turbo)
754                 *current_max = HWP_GUARANTEED_PERF(cap);
755         else
756                 *current_max = HWP_HIGHEST_PERF(cap);
757
758         *phy_max = HWP_HIGHEST_PERF(cap);
759 }
760
761 static void intel_pstate_hwp_set(unsigned int cpu)
762 {
763         struct cpudata *cpu_data = all_cpu_data[cpu];
764         int max, min;
765         u64 value;
766         s16 epp;
767
768         max = cpu_data->max_perf_ratio;
769         min = cpu_data->min_perf_ratio;
770
771         if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
772                 min = max;
773
774         rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
775
776         value &= ~HWP_MIN_PERF(~0L);
777         value |= HWP_MIN_PERF(min);
778
779         value &= ~HWP_MAX_PERF(~0L);
780         value |= HWP_MAX_PERF(max);
781
782         if (cpu_data->epp_policy == cpu_data->policy)
783                 goto skip_epp;
784
785         cpu_data->epp_policy = cpu_data->policy;
786
787         if (cpu_data->epp_saved >= 0) {
788                 epp = cpu_data->epp_saved;
789                 cpu_data->epp_saved = -EINVAL;
790                 goto update_epp;
791         }
792
793         if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
794                 epp = intel_pstate_get_epp(cpu_data, value);
795                 cpu_data->epp_powersave = epp;
796                 /* If EPP read was failed, then don't try to write */
797                 if (epp < 0)
798                         goto skip_epp;
799
800                 epp = 0;
801         } else {
802                 /* skip setting EPP, when saved value is invalid */
803                 if (cpu_data->epp_powersave < 0)
804                         goto skip_epp;
805
806                 /*
807                  * No need to restore EPP when it is not zero. This
808                  * means:
809                  *  - Policy is not changed
810                  *  - user has manually changed
811                  *  - Error reading EPB
812                  */
813                 epp = intel_pstate_get_epp(cpu_data, value);
814                 if (epp)
815                         goto skip_epp;
816
817                 epp = cpu_data->epp_powersave;
818         }
819 update_epp:
820         if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
821                 value &= ~GENMASK_ULL(31, 24);
822                 value |= (u64)epp << 24;
823         } else {
824                 intel_pstate_set_epb(cpu, epp);
825         }
826 skip_epp:
827         WRITE_ONCE(cpu_data->hwp_req_cached, value);
828         wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
829 }
830
831 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
832 {
833         struct cpudata *cpu_data = all_cpu_data[policy->cpu];
834
835         if (!hwp_active)
836                 return 0;
837
838         cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
839
840         return 0;
841 }
842
843 static void intel_pstate_hwp_enable(struct cpudata *cpudata);
844
845 static int intel_pstate_resume(struct cpufreq_policy *policy)
846 {
847         if (!hwp_active)
848                 return 0;
849
850         mutex_lock(&intel_pstate_limits_lock);
851
852         if (policy->cpu == 0)
853                 intel_pstate_hwp_enable(all_cpu_data[policy->cpu]);
854
855         all_cpu_data[policy->cpu]->epp_policy = 0;
856         intel_pstate_hwp_set(policy->cpu);
857
858         mutex_unlock(&intel_pstate_limits_lock);
859
860         return 0;
861 }
862
863 static void intel_pstate_update_policies(void)
864 {
865         int cpu;
866
867         for_each_possible_cpu(cpu)
868                 cpufreq_update_policy(cpu);
869 }
870
871 /************************** sysfs begin ************************/
872 #define show_one(file_name, object)                                     \
873         static ssize_t show_##file_name                                 \
874         (struct kobject *kobj, struct attribute *attr, char *buf)       \
875         {                                                               \
876                 return sprintf(buf, "%u\n", global.object);             \
877         }
878
879 static ssize_t intel_pstate_show_status(char *buf);
880 static int intel_pstate_update_status(const char *buf, size_t size);
881
882 static ssize_t show_status(struct kobject *kobj,
883                            struct attribute *attr, char *buf)
884 {
885         ssize_t ret;
886
887         mutex_lock(&intel_pstate_driver_lock);
888         ret = intel_pstate_show_status(buf);
889         mutex_unlock(&intel_pstate_driver_lock);
890
891         return ret;
892 }
893
894 static ssize_t store_status(struct kobject *a, struct attribute *b,
895                             const char *buf, size_t count)
896 {
897         char *p = memchr(buf, '\n', count);
898         int ret;
899
900         mutex_lock(&intel_pstate_driver_lock);
901         ret = intel_pstate_update_status(buf, p ? p - buf : count);
902         mutex_unlock(&intel_pstate_driver_lock);
903
904         return ret < 0 ? ret : count;
905 }
906
907 static ssize_t show_turbo_pct(struct kobject *kobj,
908                                 struct attribute *attr, char *buf)
909 {
910         struct cpudata *cpu;
911         int total, no_turbo, turbo_pct;
912         uint32_t turbo_fp;
913
914         mutex_lock(&intel_pstate_driver_lock);
915
916         if (!intel_pstate_driver) {
917                 mutex_unlock(&intel_pstate_driver_lock);
918                 return -EAGAIN;
919         }
920
921         cpu = all_cpu_data[0];
922
923         total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
924         no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
925         turbo_fp = div_fp(no_turbo, total);
926         turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
927
928         mutex_unlock(&intel_pstate_driver_lock);
929
930         return sprintf(buf, "%u\n", turbo_pct);
931 }
932
933 static ssize_t show_num_pstates(struct kobject *kobj,
934                                 struct attribute *attr, char *buf)
935 {
936         struct cpudata *cpu;
937         int total;
938
939         mutex_lock(&intel_pstate_driver_lock);
940
941         if (!intel_pstate_driver) {
942                 mutex_unlock(&intel_pstate_driver_lock);
943                 return -EAGAIN;
944         }
945
946         cpu = all_cpu_data[0];
947         total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
948
949         mutex_unlock(&intel_pstate_driver_lock);
950
951         return sprintf(buf, "%u\n", total);
952 }
953
954 static ssize_t show_no_turbo(struct kobject *kobj,
955                              struct attribute *attr, char *buf)
956 {
957         ssize_t ret;
958
959         mutex_lock(&intel_pstate_driver_lock);
960
961         if (!intel_pstate_driver) {
962                 mutex_unlock(&intel_pstate_driver_lock);
963                 return -EAGAIN;
964         }
965
966         update_turbo_state();
967         if (global.turbo_disabled)
968                 ret = sprintf(buf, "%u\n", global.turbo_disabled);
969         else
970                 ret = sprintf(buf, "%u\n", global.no_turbo);
971
972         mutex_unlock(&intel_pstate_driver_lock);
973
974         return ret;
975 }
976
977 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
978                               const char *buf, size_t count)
979 {
980         unsigned int input;
981         int ret;
982
983         ret = sscanf(buf, "%u", &input);
984         if (ret != 1)
985                 return -EINVAL;
986
987         mutex_lock(&intel_pstate_driver_lock);
988
989         if (!intel_pstate_driver) {
990                 mutex_unlock(&intel_pstate_driver_lock);
991                 return -EAGAIN;
992         }
993
994         mutex_lock(&intel_pstate_limits_lock);
995
996         update_turbo_state();
997         if (global.turbo_disabled) {
998                 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
999                 mutex_unlock(&intel_pstate_limits_lock);
1000                 mutex_unlock(&intel_pstate_driver_lock);
1001                 return -EPERM;
1002         }
1003
1004         global.no_turbo = clamp_t(int, input, 0, 1);
1005
1006         if (global.no_turbo) {
1007                 struct cpudata *cpu = all_cpu_data[0];
1008                 int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1009
1010                 /* Squash the global minimum into the permitted range. */
1011                 if (global.min_perf_pct > pct)
1012                         global.min_perf_pct = pct;
1013         }
1014
1015         mutex_unlock(&intel_pstate_limits_lock);
1016
1017         intel_pstate_update_policies();
1018
1019         mutex_unlock(&intel_pstate_driver_lock);
1020
1021         return count;
1022 }
1023
1024 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
1025                                   const char *buf, size_t count)
1026 {
1027         unsigned int input;
1028         int ret;
1029
1030         ret = sscanf(buf, "%u", &input);
1031         if (ret != 1)
1032                 return -EINVAL;
1033
1034         mutex_lock(&intel_pstate_driver_lock);
1035
1036         if (!intel_pstate_driver) {
1037                 mutex_unlock(&intel_pstate_driver_lock);
1038                 return -EAGAIN;
1039         }
1040
1041         mutex_lock(&intel_pstate_limits_lock);
1042
1043         global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
1044
1045         mutex_unlock(&intel_pstate_limits_lock);
1046
1047         intel_pstate_update_policies();
1048
1049         mutex_unlock(&intel_pstate_driver_lock);
1050
1051         return count;
1052 }
1053
1054 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
1055                                   const char *buf, size_t count)
1056 {
1057         unsigned int input;
1058         int ret;
1059
1060         ret = sscanf(buf, "%u", &input);
1061         if (ret != 1)
1062                 return -EINVAL;
1063
1064         mutex_lock(&intel_pstate_driver_lock);
1065
1066         if (!intel_pstate_driver) {
1067                 mutex_unlock(&intel_pstate_driver_lock);
1068                 return -EAGAIN;
1069         }
1070
1071         mutex_lock(&intel_pstate_limits_lock);
1072
1073         global.min_perf_pct = clamp_t(int, input,
1074                                       min_perf_pct_min(), global.max_perf_pct);
1075
1076         mutex_unlock(&intel_pstate_limits_lock);
1077
1078         intel_pstate_update_policies();
1079
1080         mutex_unlock(&intel_pstate_driver_lock);
1081
1082         return count;
1083 }
1084
1085 static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
1086                                 struct attribute *attr, char *buf)
1087 {
1088         return sprintf(buf, "%u\n", hwp_boost);
1089 }
1090
1091 static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b,
1092                                        const char *buf, size_t count)
1093 {
1094         unsigned int input;
1095         int ret;
1096
1097         ret = kstrtouint(buf, 10, &input);
1098         if (ret)
1099                 return ret;
1100
1101         mutex_lock(&intel_pstate_driver_lock);
1102         hwp_boost = !!input;
1103         intel_pstate_update_policies();
1104         mutex_unlock(&intel_pstate_driver_lock);
1105
1106         return count;
1107 }
1108
1109 show_one(max_perf_pct, max_perf_pct);
1110 show_one(min_perf_pct, min_perf_pct);
1111
1112 define_one_global_rw(status);
1113 define_one_global_rw(no_turbo);
1114 define_one_global_rw(max_perf_pct);
1115 define_one_global_rw(min_perf_pct);
1116 define_one_global_ro(turbo_pct);
1117 define_one_global_ro(num_pstates);
1118 define_one_global_rw(hwp_dynamic_boost);
1119
1120 static struct attribute *intel_pstate_attributes[] = {
1121         &status.attr,
1122         &no_turbo.attr,
1123         &turbo_pct.attr,
1124         &num_pstates.attr,
1125         NULL
1126 };
1127
1128 static const struct attribute_group intel_pstate_attr_group = {
1129         .attrs = intel_pstate_attributes,
1130 };
1131
1132 static void __init intel_pstate_sysfs_expose_params(void)
1133 {
1134         struct kobject *intel_pstate_kobject;
1135         int rc;
1136
1137         intel_pstate_kobject = kobject_create_and_add("intel_pstate",
1138                                                 &cpu_subsys.dev_root->kobj);
1139         if (WARN_ON(!intel_pstate_kobject))
1140                 return;
1141
1142         rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
1143         if (WARN_ON(rc))
1144                 return;
1145
1146         /*
1147          * If per cpu limits are enforced there are no global limits, so
1148          * return without creating max/min_perf_pct attributes
1149          */
1150         if (per_cpu_limits)
1151                 return;
1152
1153         rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
1154         WARN_ON(rc);
1155
1156         rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
1157         WARN_ON(rc);
1158
1159         if (hwp_active) {
1160                 rc = sysfs_create_file(intel_pstate_kobject,
1161                                        &hwp_dynamic_boost.attr);
1162                 WARN_ON(rc);
1163         }
1164 }
1165 /************************** sysfs end ************************/
1166
1167 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
1168 {
1169         /* First disable HWP notification interrupt as we don't process them */
1170         if (static_cpu_has(X86_FEATURE_HWP_NOTIFY))
1171                 wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
1172
1173         wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
1174         cpudata->epp_policy = 0;
1175         if (cpudata->epp_default == -EINVAL)
1176                 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
1177 }
1178
1179 #define MSR_IA32_POWER_CTL_BIT_EE       19
1180
1181 /* Disable energy efficiency optimization */
1182 static void intel_pstate_disable_ee(int cpu)
1183 {
1184         u64 power_ctl;
1185         int ret;
1186
1187         ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl);
1188         if (ret)
1189                 return;
1190
1191         if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) {
1192                 pr_info("Disabling energy efficiency optimization\n");
1193                 power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
1194                 wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl);
1195         }
1196 }
1197
1198 static int atom_get_min_pstate(void)
1199 {
1200         u64 value;
1201
1202         rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1203         return (value >> 8) & 0x7F;
1204 }
1205
1206 static int atom_get_max_pstate(void)
1207 {
1208         u64 value;
1209
1210         rdmsrl(MSR_ATOM_CORE_RATIOS, value);
1211         return (value >> 16) & 0x7F;
1212 }
1213
1214 static int atom_get_turbo_pstate(void)
1215 {
1216         u64 value;
1217
1218         rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
1219         return value & 0x7F;
1220 }
1221
1222 static u64 atom_get_val(struct cpudata *cpudata, int pstate)
1223 {
1224         u64 val;
1225         int32_t vid_fp;
1226         u32 vid;
1227
1228         val = (u64)pstate << 8;
1229         if (global.no_turbo && !global.turbo_disabled)
1230                 val |= (u64)1 << 32;
1231
1232         vid_fp = cpudata->vid.min + mul_fp(
1233                 int_tofp(pstate - cpudata->pstate.min_pstate),
1234                 cpudata->vid.ratio);
1235
1236         vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
1237         vid = ceiling_fp(vid_fp);
1238
1239         if (pstate > cpudata->pstate.max_pstate)
1240                 vid = cpudata->vid.turbo;
1241
1242         return val | vid;
1243 }
1244
1245 static int silvermont_get_scaling(void)
1246 {
1247         u64 value;
1248         int i;
1249         /* Defined in Table 35-6 from SDM (Sept 2015) */
1250         static int silvermont_freq_table[] = {
1251                 83300, 100000, 133300, 116700, 80000};
1252
1253         rdmsrl(MSR_FSB_FREQ, value);
1254         i = value & 0x7;
1255         WARN_ON(i > 4);
1256
1257         return silvermont_freq_table[i];
1258 }
1259
1260 static int airmont_get_scaling(void)
1261 {
1262         u64 value;
1263         int i;
1264         /* Defined in Table 35-10 from SDM (Sept 2015) */
1265         static int airmont_freq_table[] = {
1266                 83300, 100000, 133300, 116700, 80000,
1267                 93300, 90000, 88900, 87500};
1268
1269         rdmsrl(MSR_FSB_FREQ, value);
1270         i = value & 0xF;
1271         WARN_ON(i > 8);
1272
1273         return airmont_freq_table[i];
1274 }
1275
1276 static void atom_get_vid(struct cpudata *cpudata)
1277 {
1278         u64 value;
1279
1280         rdmsrl(MSR_ATOM_CORE_VIDS, value);
1281         cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
1282         cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
1283         cpudata->vid.ratio = div_fp(
1284                 cpudata->vid.max - cpudata->vid.min,
1285                 int_tofp(cpudata->pstate.max_pstate -
1286                         cpudata->pstate.min_pstate));
1287
1288         rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
1289         cpudata->vid.turbo = value & 0x7f;
1290 }
1291
1292 static int core_get_min_pstate(void)
1293 {
1294         u64 value;
1295
1296         rdmsrl(MSR_PLATFORM_INFO, value);
1297         return (value >> 40) & 0xFF;
1298 }
1299
1300 static int core_get_max_pstate_physical(void)
1301 {
1302         u64 value;
1303
1304         rdmsrl(MSR_PLATFORM_INFO, value);
1305         return (value >> 8) & 0xFF;
1306 }
1307
1308 static int core_get_tdp_ratio(u64 plat_info)
1309 {
1310         /* Check how many TDP levels present */
1311         if (plat_info & 0x600000000) {
1312                 u64 tdp_ctrl;
1313                 u64 tdp_ratio;
1314                 int tdp_msr;
1315                 int err;
1316
1317                 /* Get the TDP level (0, 1, 2) to get ratios */
1318                 err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
1319                 if (err)
1320                         return err;
1321
1322                 /* TDP MSR are continuous starting at 0x648 */
1323                 tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03);
1324                 err = rdmsrl_safe(tdp_msr, &tdp_ratio);
1325                 if (err)
1326                         return err;
1327
1328                 /* For level 1 and 2, bits[23:16] contain the ratio */
1329                 if (tdp_ctrl & 0x03)
1330                         tdp_ratio >>= 16;
1331
1332                 tdp_ratio &= 0xff; /* ratios are only 8 bits long */
1333                 pr_debug("tdp_ratio %x\n", (int)tdp_ratio);
1334
1335                 return (int)tdp_ratio;
1336         }
1337
1338         return -ENXIO;
1339 }
1340
1341 static int core_get_max_pstate(void)
1342 {
1343         u64 tar;
1344         u64 plat_info;
1345         int max_pstate;
1346         int tdp_ratio;
1347         int err;
1348
1349         rdmsrl(MSR_PLATFORM_INFO, plat_info);
1350         max_pstate = (plat_info >> 8) & 0xFF;
1351
1352         tdp_ratio = core_get_tdp_ratio(plat_info);
1353         if (tdp_ratio <= 0)
1354                 return max_pstate;
1355
1356         if (hwp_active) {
1357                 /* Turbo activation ratio is not used on HWP platforms */
1358                 return tdp_ratio;
1359         }
1360
1361         err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
1362         if (!err) {
1363                 int tar_levels;
1364
1365                 /* Do some sanity checking for safety */
1366                 tar_levels = tar & 0xff;
1367                 if (tdp_ratio - 1 == tar_levels) {
1368                         max_pstate = tar_levels;
1369                         pr_debug("max_pstate=TAC %x\n", max_pstate);
1370                 }
1371         }
1372
1373         return max_pstate;
1374 }
1375
1376 static int core_get_turbo_pstate(void)
1377 {
1378         u64 value;
1379         int nont, ret;
1380
1381         rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1382         nont = core_get_max_pstate();
1383         ret = (value) & 255;
1384         if (ret <= nont)
1385                 ret = nont;
1386         return ret;
1387 }
1388
1389 static inline int core_get_scaling(void)
1390 {
1391         return 100000;
1392 }
1393
1394 static u64 core_get_val(struct cpudata *cpudata, int pstate)
1395 {
1396         u64 val;
1397
1398         val = (u64)pstate << 8;
1399         if (global.no_turbo && !global.turbo_disabled)
1400                 val |= (u64)1 << 32;
1401
1402         return val;
1403 }
1404
1405 static int knl_get_aperf_mperf_shift(void)
1406 {
1407         return 10;
1408 }
1409
1410 static int knl_get_turbo_pstate(void)
1411 {
1412         u64 value;
1413         int nont, ret;
1414
1415         rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
1416         nont = core_get_max_pstate();
1417         ret = (((value) >> 8) & 0xFF);
1418         if (ret <= nont)
1419                 ret = nont;
1420         return ret;
1421 }
1422
1423 static int intel_pstate_get_base_pstate(struct cpudata *cpu)
1424 {
1425         return global.no_turbo || global.turbo_disabled ?
1426                         cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
1427 }
1428
1429 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
1430 {
1431         trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
1432         cpu->pstate.current_pstate = pstate;
1433         /*
1434          * Generally, there is no guarantee that this code will always run on
1435          * the CPU being updated, so force the register update to run on the
1436          * right CPU.
1437          */
1438         wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
1439                       pstate_funcs.get_val(cpu, pstate));
1440 }
1441
1442 static void intel_pstate_set_min_pstate(struct cpudata *cpu)
1443 {
1444         intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
1445 }
1446
1447 static void intel_pstate_max_within_limits(struct cpudata *cpu)
1448 {
1449         int pstate;
1450
1451         update_turbo_state();
1452         pstate = intel_pstate_get_base_pstate(cpu);
1453         pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
1454         intel_pstate_set_pstate(cpu, pstate);
1455 }
1456
1457 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1458 {
1459         cpu->pstate.min_pstate = pstate_funcs.get_min();
1460         cpu->pstate.max_pstate = pstate_funcs.get_max();
1461         cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
1462         cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
1463         cpu->pstate.scaling = pstate_funcs.get_scaling();
1464         cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
1465
1466         if (hwp_active && !hwp_mode_bdw) {
1467                 unsigned int phy_max, current_max;
1468
1469                 intel_pstate_get_hwp_max(cpu->cpu, &phy_max, &current_max);
1470                 cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
1471         } else {
1472                 cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1473         }
1474
1475         if (pstate_funcs.get_aperf_mperf_shift)
1476                 cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
1477
1478         if (pstate_funcs.get_vid)
1479                 pstate_funcs.get_vid(cpu);
1480
1481         intel_pstate_set_min_pstate(cpu);
1482 }
1483
1484 /*
1485  * Long hold time will keep high perf limits for long time,
1486  * which negatively impacts perf/watt for some workloads,
1487  * like specpower. 3ms is based on experiements on some
1488  * workoads.
1489  */
1490 static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
1491
1492 static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
1493 {
1494         u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
1495         u32 max_limit = (hwp_req & 0xff00) >> 8;
1496         u32 min_limit = (hwp_req & 0xff);
1497         u32 boost_level1;
1498
1499         /*
1500          * Cases to consider (User changes via sysfs or boot time):
1501          * If, P0 (Turbo max) = P1 (Guaranteed max) = min:
1502          *      No boost, return.
1503          * If, P0 (Turbo max) > P1 (Guaranteed max) = min:
1504          *     Should result in one level boost only for P0.
1505          * If, P0 (Turbo max) = P1 (Guaranteed max) > min:
1506          *     Should result in two level boost:
1507          *         (min + p1)/2 and P1.
1508          * If, P0 (Turbo max) > P1 (Guaranteed max) > min:
1509          *     Should result in three level boost:
1510          *        (min + p1)/2, P1 and P0.
1511          */
1512
1513         /* If max and min are equal or already at max, nothing to boost */
1514         if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
1515                 return;
1516
1517         if (!cpu->hwp_boost_min)
1518                 cpu->hwp_boost_min = min_limit;
1519
1520         /* level at half way mark between min and guranteed */
1521         boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
1522
1523         if (cpu->hwp_boost_min < boost_level1)
1524                 cpu->hwp_boost_min = boost_level1;
1525         else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1526                 cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
1527         else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
1528                  max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1529                 cpu->hwp_boost_min = max_limit;
1530         else
1531                 return;
1532
1533         hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
1534         wrmsrl(MSR_HWP_REQUEST, hwp_req);
1535         cpu->last_update = cpu->sample.time;
1536 }
1537
1538 static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
1539 {
1540         if (cpu->hwp_boost_min) {
1541                 bool expired;
1542
1543                 /* Check if we are idle for hold time to boost down */
1544                 expired = time_after64(cpu->sample.time, cpu->last_update +
1545                                        hwp_boost_hold_time_ns);
1546                 if (expired) {
1547                         wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
1548                         cpu->hwp_boost_min = 0;
1549                 }
1550         }
1551         cpu->last_update = cpu->sample.time;
1552 }
1553
1554 static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
1555                                                       u64 time)
1556 {
1557         cpu->sample.time = time;
1558
1559         if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
1560                 bool do_io = false;
1561
1562                 cpu->sched_flags = 0;
1563                 /*
1564                  * Set iowait_boost flag and update time. Since IO WAIT flag
1565                  * is set all the time, we can't just conclude that there is
1566                  * some IO bound activity is scheduled on this CPU with just
1567                  * one occurrence. If we receive at least two in two
1568                  * consecutive ticks, then we treat as boost candidate.
1569                  */
1570                 if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
1571                         do_io = true;
1572
1573                 cpu->last_io_update = time;
1574
1575                 if (do_io)
1576                         intel_pstate_hwp_boost_up(cpu);
1577
1578         } else {
1579                 intel_pstate_hwp_boost_down(cpu);
1580         }
1581 }
1582
1583 static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
1584                                                 u64 time, unsigned int flags)
1585 {
1586         struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1587
1588         cpu->sched_flags |= flags;
1589
1590         if (smp_processor_id() == cpu->cpu)
1591                 intel_pstate_update_util_hwp_local(cpu, time);
1592 }
1593
1594 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
1595 {
1596         struct sample *sample = &cpu->sample;
1597
1598         sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
1599 }
1600
1601 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
1602 {
1603         u64 aperf, mperf;
1604         unsigned long flags;
1605         u64 tsc;
1606
1607         local_irq_save(flags);
1608         rdmsrl(MSR_IA32_APERF, aperf);
1609         rdmsrl(MSR_IA32_MPERF, mperf);
1610         tsc = rdtsc();
1611         if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
1612                 local_irq_restore(flags);
1613                 return false;
1614         }
1615         local_irq_restore(flags);
1616
1617         cpu->last_sample_time = cpu->sample.time;
1618         cpu->sample.time = time;
1619         cpu->sample.aperf = aperf;
1620         cpu->sample.mperf = mperf;
1621         cpu->sample.tsc =  tsc;
1622         cpu->sample.aperf -= cpu->prev_aperf;
1623         cpu->sample.mperf -= cpu->prev_mperf;
1624         cpu->sample.tsc -= cpu->prev_tsc;
1625
1626         cpu->prev_aperf = aperf;
1627         cpu->prev_mperf = mperf;
1628         cpu->prev_tsc = tsc;
1629         /*
1630          * First time this function is invoked in a given cycle, all of the
1631          * previous sample data fields are equal to zero or stale and they must
1632          * be populated with meaningful numbers for things to work, so assume
1633          * that sample.time will always be reset before setting the utilization
1634          * update hook and make the caller skip the sample then.
1635          */
1636         if (cpu->last_sample_time) {
1637                 intel_pstate_calc_avg_perf(cpu);
1638                 return true;
1639         }
1640         return false;
1641 }
1642
1643 static inline int32_t get_avg_frequency(struct cpudata *cpu)
1644 {
1645         return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz);
1646 }
1647
1648 static inline int32_t get_avg_pstate(struct cpudata *cpu)
1649 {
1650         return mul_ext_fp(cpu->pstate.max_pstate_physical,
1651                           cpu->sample.core_avg_perf);
1652 }
1653
1654 static inline int32_t get_target_pstate(struct cpudata *cpu)
1655 {
1656         struct sample *sample = &cpu->sample;
1657         int32_t busy_frac, boost;
1658         int target, avg_pstate;
1659
1660         busy_frac = div_fp(sample->mperf << cpu->aperf_mperf_shift,
1661                            sample->tsc);
1662
1663         boost = cpu->iowait_boost;
1664         cpu->iowait_boost >>= 1;
1665
1666         if (busy_frac < boost)
1667                 busy_frac = boost;
1668
1669         sample->busy_scaled = busy_frac * 100;
1670
1671         target = global.no_turbo || global.turbo_disabled ?
1672                         cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
1673         target += target >> 2;
1674         target = mul_fp(target, busy_frac);
1675         if (target < cpu->pstate.min_pstate)
1676                 target = cpu->pstate.min_pstate;
1677
1678         /*
1679          * If the average P-state during the previous cycle was higher than the
1680          * current target, add 50% of the difference to the target to reduce
1681          * possible performance oscillations and offset possible performance
1682          * loss related to moving the workload from one CPU to another within
1683          * a package/module.
1684          */
1685         avg_pstate = get_avg_pstate(cpu);
1686         if (avg_pstate > target)
1687                 target += (avg_pstate - target) >> 1;
1688
1689         return target;
1690 }
1691
1692 static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
1693 {
1694         int max_pstate = intel_pstate_get_base_pstate(cpu);
1695         int min_pstate;
1696
1697         min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
1698         max_pstate = max(min_pstate, cpu->max_perf_ratio);
1699         return clamp_t(int, pstate, min_pstate, max_pstate);
1700 }
1701
1702 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
1703 {
1704         if (pstate == cpu->pstate.current_pstate)
1705                 return;
1706
1707         cpu->pstate.current_pstate = pstate;
1708         wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
1709 }
1710
1711 static void intel_pstate_adjust_pstate(struct cpudata *cpu)
1712 {
1713         int from = cpu->pstate.current_pstate;
1714         struct sample *sample;
1715         int target_pstate;
1716
1717         update_turbo_state();
1718
1719         target_pstate = get_target_pstate(cpu);
1720         target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
1721         trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
1722         intel_pstate_update_pstate(cpu, target_pstate);
1723
1724         sample = &cpu->sample;
1725         trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
1726                 fp_toint(sample->busy_scaled),
1727                 from,
1728                 cpu->pstate.current_pstate,
1729                 sample->mperf,
1730                 sample->aperf,
1731                 sample->tsc,
1732                 get_avg_frequency(cpu),
1733                 fp_toint(cpu->iowait_boost * 100));
1734 }
1735
1736 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
1737                                      unsigned int flags)
1738 {
1739         struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1740         u64 delta_ns;
1741
1742         /* Don't allow remote callbacks */
1743         if (smp_processor_id() != cpu->cpu)
1744                 return;
1745
1746         if (flags & SCHED_CPUFREQ_IOWAIT) {
1747                 cpu->iowait_boost = int_tofp(1);
1748                 cpu->last_update = time;
1749                 /*
1750                  * The last time the busy was 100% so P-state was max anyway
1751                  * so avoid overhead of computation.
1752                  */
1753                 if (fp_toint(cpu->sample.busy_scaled) == 100)
1754                         return;
1755
1756                 goto set_pstate;
1757         } else if (cpu->iowait_boost) {
1758                 /* Clear iowait_boost if the CPU may have been idle. */
1759                 delta_ns = time - cpu->last_update;
1760                 if (delta_ns > TICK_NSEC)
1761                         cpu->iowait_boost = 0;
1762         }
1763         cpu->last_update = time;
1764         delta_ns = time - cpu->sample.time;
1765         if ((s64)delta_ns < INTEL_PSTATE_SAMPLING_INTERVAL)
1766                 return;
1767
1768 set_pstate:
1769         if (intel_pstate_sample(cpu, time))
1770                 intel_pstate_adjust_pstate(cpu);
1771 }
1772
1773 static struct pstate_funcs core_funcs = {
1774         .get_max = core_get_max_pstate,
1775         .get_max_physical = core_get_max_pstate_physical,
1776         .get_min = core_get_min_pstate,
1777         .get_turbo = core_get_turbo_pstate,
1778         .get_scaling = core_get_scaling,
1779         .get_val = core_get_val,
1780 };
1781
1782 static const struct pstate_funcs silvermont_funcs = {
1783         .get_max = atom_get_max_pstate,
1784         .get_max_physical = atom_get_max_pstate,
1785         .get_min = atom_get_min_pstate,
1786         .get_turbo = atom_get_turbo_pstate,
1787         .get_val = atom_get_val,
1788         .get_scaling = silvermont_get_scaling,
1789         .get_vid = atom_get_vid,
1790 };
1791
1792 static const struct pstate_funcs airmont_funcs = {
1793         .get_max = atom_get_max_pstate,
1794         .get_max_physical = atom_get_max_pstate,
1795         .get_min = atom_get_min_pstate,
1796         .get_turbo = atom_get_turbo_pstate,
1797         .get_val = atom_get_val,
1798         .get_scaling = airmont_get_scaling,
1799         .get_vid = atom_get_vid,
1800 };
1801
1802 static const struct pstate_funcs knl_funcs = {
1803         .get_max = core_get_max_pstate,
1804         .get_max_physical = core_get_max_pstate_physical,
1805         .get_min = core_get_min_pstate,
1806         .get_turbo = knl_get_turbo_pstate,
1807         .get_aperf_mperf_shift = knl_get_aperf_mperf_shift,
1808         .get_scaling = core_get_scaling,
1809         .get_val = core_get_val,
1810 };
1811
1812 #define ICPU(model, policy) \
1813         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
1814                         (unsigned long)&policy }
1815
1816 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1817         ICPU(INTEL_FAM6_SANDYBRIDGE,            core_funcs),
1818         ICPU(INTEL_FAM6_SANDYBRIDGE_X,          core_funcs),
1819         ICPU(INTEL_FAM6_ATOM_SILVERMONT1,       silvermont_funcs),
1820         ICPU(INTEL_FAM6_IVYBRIDGE,              core_funcs),
1821         ICPU(INTEL_FAM6_HASWELL_CORE,           core_funcs),
1822         ICPU(INTEL_FAM6_BROADWELL_CORE,         core_funcs),
1823         ICPU(INTEL_FAM6_IVYBRIDGE_X,            core_funcs),
1824         ICPU(INTEL_FAM6_HASWELL_X,              core_funcs),
1825         ICPU(INTEL_FAM6_HASWELL_ULT,            core_funcs),
1826         ICPU(INTEL_FAM6_HASWELL_GT3E,           core_funcs),
1827         ICPU(INTEL_FAM6_BROADWELL_GT3E,         core_funcs),
1828         ICPU(INTEL_FAM6_ATOM_AIRMONT,           airmont_funcs),
1829         ICPU(INTEL_FAM6_SKYLAKE_MOBILE,         core_funcs),
1830         ICPU(INTEL_FAM6_BROADWELL_X,            core_funcs),
1831         ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        core_funcs),
1832         ICPU(INTEL_FAM6_BROADWELL_XEON_D,       core_funcs),
1833         ICPU(INTEL_FAM6_XEON_PHI_KNL,           knl_funcs),
1834         ICPU(INTEL_FAM6_XEON_PHI_KNM,           knl_funcs),
1835         ICPU(INTEL_FAM6_ATOM_GOLDMONT,          core_funcs),
1836         ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,       core_funcs),
1837         ICPU(INTEL_FAM6_SKYLAKE_X,              core_funcs),
1838         {}
1839 };
1840 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
1841
1842 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
1843         ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs),
1844         ICPU(INTEL_FAM6_BROADWELL_X, core_funcs),
1845         ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1846         {}
1847 };
1848
1849 static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
1850         ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_funcs),
1851         {}
1852 };
1853
1854 static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
1855         ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1856         ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
1857         {}
1858 };
1859
1860 static int intel_pstate_init_cpu(unsigned int cpunum)
1861 {
1862         struct cpudata *cpu;
1863
1864         cpu = all_cpu_data[cpunum];
1865
1866         if (!cpu) {
1867                 cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
1868                 if (!cpu)
1869                         return -ENOMEM;
1870
1871                 all_cpu_data[cpunum] = cpu;
1872
1873                 cpu->epp_default = -EINVAL;
1874                 cpu->epp_powersave = -EINVAL;
1875                 cpu->epp_saved = -EINVAL;
1876         }
1877
1878         cpu = all_cpu_data[cpunum];
1879
1880         cpu->cpu = cpunum;
1881
1882         if (hwp_active) {
1883                 const struct x86_cpu_id *id;
1884
1885                 id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids);
1886                 if (id)
1887                         intel_pstate_disable_ee(cpunum);
1888
1889                 intel_pstate_hwp_enable(cpu);
1890
1891                 id = x86_match_cpu(intel_pstate_hwp_boost_ids);
1892                 if (id && intel_pstate_acpi_pm_profile_server())
1893                         hwp_boost = true;
1894         }
1895
1896         intel_pstate_get_cpu_pstates(cpu);
1897
1898         pr_debug("controlling: cpu %d\n", cpunum);
1899
1900         return 0;
1901 }
1902
1903 static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
1904 {
1905         struct cpudata *cpu = all_cpu_data[cpu_num];
1906
1907         if (hwp_active && !hwp_boost)
1908                 return;
1909
1910         if (cpu->update_util_set)
1911                 return;
1912
1913         /* Prevent intel_pstate_update_util() from using stale data. */
1914         cpu->sample.time = 0;
1915         cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
1916                                      (hwp_active ?
1917                                       intel_pstate_update_util_hwp :
1918                                       intel_pstate_update_util));
1919         cpu->update_util_set = true;
1920 }
1921
1922 static void intel_pstate_clear_update_util_hook(unsigned int cpu)
1923 {
1924         struct cpudata *cpu_data = all_cpu_data[cpu];
1925
1926         if (!cpu_data->update_util_set)
1927                 return;
1928
1929         cpufreq_remove_update_util_hook(cpu);
1930         cpu_data->update_util_set = false;
1931         synchronize_sched();
1932 }
1933
1934 static int intel_pstate_get_max_freq(struct cpudata *cpu)
1935 {
1936         return global.turbo_disabled || global.no_turbo ?
1937                         cpu->pstate.max_freq : cpu->pstate.turbo_freq;
1938 }
1939
1940 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
1941                                             struct cpudata *cpu)
1942 {
1943         int max_freq = intel_pstate_get_max_freq(cpu);
1944         int32_t max_policy_perf, min_policy_perf;
1945         int max_state, turbo_max;
1946
1947         /*
1948          * HWP needs some special consideration, because on BDX the
1949          * HWP_REQUEST uses abstract value to represent performance
1950          * rather than pure ratios.
1951          */
1952         if (hwp_active) {
1953                 intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
1954         } else {
1955                 max_state = intel_pstate_get_base_pstate(cpu);
1956                 turbo_max = cpu->pstate.turbo_pstate;
1957         }
1958
1959         max_policy_perf = max_state * policy->max / max_freq;
1960         if (policy->max == policy->min) {
1961                 min_policy_perf = max_policy_perf;
1962         } else {
1963                 min_policy_perf = max_state * policy->min / max_freq;
1964                 min_policy_perf = clamp_t(int32_t, min_policy_perf,
1965                                           0, max_policy_perf);
1966         }
1967
1968         pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
1969                  policy->cpu, max_state,
1970                  min_policy_perf, max_policy_perf);
1971
1972         /* Normalize user input to [min_perf, max_perf] */
1973         if (per_cpu_limits) {
1974                 cpu->min_perf_ratio = min_policy_perf;
1975                 cpu->max_perf_ratio = max_policy_perf;
1976         } else {
1977                 int32_t global_min, global_max;
1978
1979                 /* Global limits are in percent of the maximum turbo P-state. */
1980                 global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
1981                 global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
1982                 global_min = clamp_t(int32_t, global_min, 0, global_max);
1983
1984                 pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu,
1985                          global_min, global_max);
1986
1987                 cpu->min_perf_ratio = max(min_policy_perf, global_min);
1988                 cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
1989                 cpu->max_perf_ratio = min(max_policy_perf, global_max);
1990                 cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
1991
1992                 /* Make sure min_perf <= max_perf */
1993                 cpu->min_perf_ratio = min(cpu->min_perf_ratio,
1994                                           cpu->max_perf_ratio);
1995
1996         }
1997         pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu,
1998                  cpu->max_perf_ratio,
1999                  cpu->min_perf_ratio);
2000 }
2001
2002 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
2003 {
2004         struct cpudata *cpu;
2005
2006         if (!policy->cpuinfo.max_freq)
2007                 return -ENODEV;
2008
2009         pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
2010                  policy->cpuinfo.max_freq, policy->max);
2011
2012         cpu = all_cpu_data[policy->cpu];
2013         cpu->policy = policy->policy;
2014
2015         mutex_lock(&intel_pstate_limits_lock);
2016
2017         intel_pstate_update_perf_limits(policy, cpu);
2018
2019         if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
2020                 /*
2021                  * NOHZ_FULL CPUs need this as the governor callback may not
2022                  * be invoked on them.
2023                  */
2024                 intel_pstate_clear_update_util_hook(policy->cpu);
2025                 intel_pstate_max_within_limits(cpu);
2026         } else {
2027                 intel_pstate_set_update_util_hook(policy->cpu);
2028         }
2029
2030         if (hwp_active) {
2031                 /*
2032                  * When hwp_boost was active before and dynamically it
2033                  * was turned off, in that case we need to clear the
2034                  * update util hook.
2035                  */
2036                 if (!hwp_boost)
2037                         intel_pstate_clear_update_util_hook(policy->cpu);
2038                 intel_pstate_hwp_set(policy->cpu);
2039         }
2040
2041         mutex_unlock(&intel_pstate_limits_lock);
2042
2043         return 0;
2044 }
2045
2046 static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
2047                                          struct cpudata *cpu)
2048 {
2049         if (!hwp_active &&
2050             cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
2051             policy->max < policy->cpuinfo.max_freq &&
2052             policy->max > cpu->pstate.max_freq) {
2053                 pr_debug("policy->max > max non turbo frequency\n");
2054                 policy->max = policy->cpuinfo.max_freq;
2055         }
2056 }
2057
2058 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
2059 {
2060         struct cpudata *cpu = all_cpu_data[policy->cpu];
2061
2062         update_turbo_state();
2063         cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
2064                                      intel_pstate_get_max_freq(cpu));
2065
2066         if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
2067             policy->policy != CPUFREQ_POLICY_PERFORMANCE)
2068                 return -EINVAL;
2069
2070         intel_pstate_adjust_policy_max(policy, cpu);
2071
2072         return 0;
2073 }
2074
2075 static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
2076 {
2077         intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
2078 }
2079
2080 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
2081 {
2082         pr_debug("CPU %d exiting\n", policy->cpu);
2083
2084         intel_pstate_clear_update_util_hook(policy->cpu);
2085         if (hwp_active)
2086                 intel_pstate_hwp_save_state(policy);
2087         else
2088                 intel_cpufreq_stop_cpu(policy);
2089 }
2090
2091 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
2092 {
2093         intel_pstate_exit_perf_limits(policy);
2094
2095         policy->fast_switch_possible = false;
2096
2097         return 0;
2098 }
2099
2100 static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
2101 {
2102         struct cpudata *cpu;
2103         int rc;
2104
2105         rc = intel_pstate_init_cpu(policy->cpu);
2106         if (rc)
2107                 return rc;
2108
2109         cpu = all_cpu_data[policy->cpu];
2110
2111         cpu->max_perf_ratio = 0xFF;
2112         cpu->min_perf_ratio = 0;
2113
2114         policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
2115         policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
2116
2117         /* cpuinfo and default policy values */
2118         policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
2119         update_turbo_state();
2120         policy->cpuinfo.max_freq = global.turbo_disabled ?
2121                         cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
2122         policy->cpuinfo.max_freq *= cpu->pstate.scaling;
2123
2124         if (hwp_active) {
2125                 unsigned int max_freq;
2126
2127                 max_freq = global.turbo_disabled ?
2128                         cpu->pstate.max_freq : cpu->pstate.turbo_freq;
2129                 if (max_freq < policy->cpuinfo.max_freq)
2130                         policy->cpuinfo.max_freq = max_freq;
2131         }
2132
2133         intel_pstate_init_acpi_perf_limits(policy);
2134
2135         policy->fast_switch_possible = true;
2136
2137         return 0;
2138 }
2139
2140 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
2141 {
2142         int ret = __intel_pstate_cpu_init(policy);
2143
2144         if (ret)
2145                 return ret;
2146
2147         if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
2148                 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
2149         else
2150                 policy->policy = CPUFREQ_POLICY_POWERSAVE;
2151
2152         return 0;
2153 }
2154
2155 static struct cpufreq_driver intel_pstate = {
2156         .flags          = CPUFREQ_CONST_LOOPS,
2157         .verify         = intel_pstate_verify_policy,
2158         .setpolicy      = intel_pstate_set_policy,
2159         .suspend        = intel_pstate_hwp_save_state,
2160         .resume         = intel_pstate_resume,
2161         .init           = intel_pstate_cpu_init,
2162         .exit           = intel_pstate_cpu_exit,
2163         .stop_cpu       = intel_pstate_stop_cpu,
2164         .name           = "intel_pstate",
2165 };
2166
2167 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
2168 {
2169         struct cpudata *cpu = all_cpu_data[policy->cpu];
2170
2171         update_turbo_state();
2172         cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
2173                                      intel_pstate_get_max_freq(cpu));
2174
2175         intel_pstate_adjust_policy_max(policy, cpu);
2176
2177         intel_pstate_update_perf_limits(policy, cpu);
2178
2179         return 0;
2180 }
2181
2182 /* Use of trace in passive mode:
2183  *
2184  * In passive mode the trace core_busy field (also known as the
2185  * performance field, and lablelled as such on the graphs; also known as
2186  * core_avg_perf) is not needed and so is re-assigned to indicate if the
2187  * driver call was via the normal or fast switch path. Various graphs
2188  * output from the intel_pstate_tracer.py utility that include core_busy
2189  * (or performance or core_avg_perf) have a fixed y-axis from 0 to 100%,
2190  * so we use 10 to indicate the the normal path through the driver, and
2191  * 90 to indicate the fast switch path through the driver.
2192  * The scaled_busy field is not used, and is set to 0.
2193  */
2194
2195 #define INTEL_PSTATE_TRACE_TARGET 10
2196 #define INTEL_PSTATE_TRACE_FAST_SWITCH 90
2197
2198 static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate)
2199 {
2200         struct sample *sample;
2201
2202         if (!trace_pstate_sample_enabled())
2203                 return;
2204
2205         if (!intel_pstate_sample(cpu, ktime_get()))
2206                 return;
2207
2208         sample = &cpu->sample;
2209         trace_pstate_sample(trace_type,
2210                 0,
2211                 old_pstate,
2212                 cpu->pstate.current_pstate,
2213                 sample->mperf,
2214                 sample->aperf,
2215                 sample->tsc,
2216                 get_avg_frequency(cpu),
2217                 fp_toint(cpu->iowait_boost * 100));
2218 }
2219
2220 static int intel_cpufreq_target(struct cpufreq_policy *policy,
2221                                 unsigned int target_freq,
2222                                 unsigned int relation)
2223 {
2224         struct cpudata *cpu = all_cpu_data[policy->cpu];
2225         struct cpufreq_freqs freqs;
2226         int target_pstate, old_pstate;
2227
2228         update_turbo_state();
2229
2230         freqs.old = policy->cur;
2231         freqs.new = target_freq;
2232
2233         cpufreq_freq_transition_begin(policy, &freqs);
2234         switch (relation) {
2235         case CPUFREQ_RELATION_L:
2236                 target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
2237                 break;
2238         case CPUFREQ_RELATION_H:
2239                 target_pstate = freqs.new / cpu->pstate.scaling;
2240                 break;
2241         default:
2242                 target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
2243                 break;
2244         }
2245         target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2246         old_pstate = cpu->pstate.current_pstate;
2247         if (target_pstate != cpu->pstate.current_pstate) {
2248                 cpu->pstate.current_pstate = target_pstate;
2249                 wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
2250                               pstate_funcs.get_val(cpu, target_pstate));
2251         }
2252         freqs.new = target_pstate * cpu->pstate.scaling;
2253         intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate);
2254         cpufreq_freq_transition_end(policy, &freqs, false);
2255
2256         return 0;
2257 }
2258
2259 static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
2260                                               unsigned int target_freq)
2261 {
2262         struct cpudata *cpu = all_cpu_data[policy->cpu];
2263         int target_pstate, old_pstate;
2264
2265         update_turbo_state();
2266
2267         target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
2268         target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
2269         old_pstate = cpu->pstate.current_pstate;
2270         intel_pstate_update_pstate(cpu, target_pstate);
2271         intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
2272         return target_pstate * cpu->pstate.scaling;
2273 }
2274
2275 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
2276 {
2277         int ret = __intel_pstate_cpu_init(policy);
2278
2279         if (ret)
2280                 return ret;
2281
2282         policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
2283         policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
2284         /* This reflects the intel_pstate_get_cpu_pstates() setting. */
2285         policy->cur = policy->cpuinfo.min_freq;
2286
2287         return 0;
2288 }
2289
2290 static struct cpufreq_driver intel_cpufreq = {
2291         .flags          = CPUFREQ_CONST_LOOPS,
2292         .verify         = intel_cpufreq_verify_policy,
2293         .target         = intel_cpufreq_target,
2294         .fast_switch    = intel_cpufreq_fast_switch,
2295         .init           = intel_cpufreq_cpu_init,
2296         .exit           = intel_pstate_cpu_exit,
2297         .stop_cpu       = intel_cpufreq_stop_cpu,
2298         .name           = "intel_cpufreq",
2299 };
2300
2301 static struct cpufreq_driver *default_driver = &intel_pstate;
2302
2303 static void intel_pstate_driver_cleanup(void)
2304 {
2305         unsigned int cpu;
2306
2307         get_online_cpus();
2308         for_each_online_cpu(cpu) {
2309                 if (all_cpu_data[cpu]) {
2310                         if (intel_pstate_driver == &intel_pstate)
2311                                 intel_pstate_clear_update_util_hook(cpu);
2312
2313                         kfree(all_cpu_data[cpu]);
2314                         all_cpu_data[cpu] = NULL;
2315                 }
2316         }
2317         put_online_cpus();
2318         intel_pstate_driver = NULL;
2319 }
2320
2321 static int intel_pstate_register_driver(struct cpufreq_driver *driver)
2322 {
2323         int ret;
2324
2325         memset(&global, 0, sizeof(global));
2326         global.max_perf_pct = 100;
2327
2328         intel_pstate_driver = driver;
2329         ret = cpufreq_register_driver(intel_pstate_driver);
2330         if (ret) {
2331                 intel_pstate_driver_cleanup();
2332                 return ret;
2333         }
2334
2335         global.min_perf_pct = min_perf_pct_min();
2336
2337         return 0;
2338 }
2339
2340 static int intel_pstate_unregister_driver(void)
2341 {
2342         if (hwp_active)
2343                 return -EBUSY;
2344
2345         cpufreq_unregister_driver(intel_pstate_driver);
2346         intel_pstate_driver_cleanup();
2347
2348         return 0;
2349 }
2350
2351 static ssize_t intel_pstate_show_status(char *buf)
2352 {
2353         if (!intel_pstate_driver)
2354                 return sprintf(buf, "off\n");
2355
2356         return sprintf(buf, "%s\n", intel_pstate_driver == &intel_pstate ?
2357                                         "active" : "passive");
2358 }
2359
2360 static int intel_pstate_update_status(const char *buf, size_t size)
2361 {
2362         int ret;
2363
2364         if (size == 3 && !strncmp(buf, "off", size))
2365                 return intel_pstate_driver ?
2366                         intel_pstate_unregister_driver() : -EINVAL;
2367
2368         if (size == 6 && !strncmp(buf, "active", size)) {
2369                 if (intel_pstate_driver) {
2370                         if (intel_pstate_driver == &intel_pstate)
2371                                 return 0;
2372
2373                         ret = intel_pstate_unregister_driver();
2374                         if (ret)
2375                                 return ret;
2376                 }
2377
2378                 return intel_pstate_register_driver(&intel_pstate);
2379         }
2380
2381         if (size == 7 && !strncmp(buf, "passive", size)) {
2382                 if (intel_pstate_driver) {
2383                         if (intel_pstate_driver == &intel_cpufreq)
2384                                 return 0;
2385
2386                         ret = intel_pstate_unregister_driver();
2387                         if (ret)
2388                                 return ret;
2389                 }
2390
2391                 return intel_pstate_register_driver(&intel_cpufreq);
2392         }
2393
2394         return -EINVAL;
2395 }
2396
2397 static int no_load __initdata;
2398 static int no_hwp __initdata;
2399 static int hwp_only __initdata;
2400 static unsigned int force_load __initdata;
2401
2402 static int __init intel_pstate_msrs_not_valid(void)
2403 {
2404         if (!pstate_funcs.get_max() ||
2405             !pstate_funcs.get_min() ||
2406             !pstate_funcs.get_turbo())
2407                 return -ENODEV;
2408
2409         return 0;
2410 }
2411
2412 static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
2413 {
2414         pstate_funcs.get_max   = funcs->get_max;
2415         pstate_funcs.get_max_physical = funcs->get_max_physical;
2416         pstate_funcs.get_min   = funcs->get_min;
2417         pstate_funcs.get_turbo = funcs->get_turbo;
2418         pstate_funcs.get_scaling = funcs->get_scaling;
2419         pstate_funcs.get_val   = funcs->get_val;
2420         pstate_funcs.get_vid   = funcs->get_vid;
2421         pstate_funcs.get_aperf_mperf_shift = funcs->get_aperf_mperf_shift;
2422 }
2423
2424 #ifdef CONFIG_ACPI
2425
2426 static bool __init intel_pstate_no_acpi_pss(void)
2427 {
2428         int i;
2429
2430         for_each_possible_cpu(i) {
2431                 acpi_status status;
2432                 union acpi_object *pss;
2433                 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
2434                 struct acpi_processor *pr = per_cpu(processors, i);
2435
2436                 if (!pr)
2437                         continue;
2438
2439                 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
2440                 if (ACPI_FAILURE(status))
2441                         continue;
2442
2443                 pss = buffer.pointer;
2444                 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
2445                         kfree(pss);
2446                         return false;
2447                 }
2448
2449                 kfree(pss);
2450         }
2451
2452         return true;
2453 }
2454
2455 static bool __init intel_pstate_no_acpi_pcch(void)
2456 {
2457         acpi_status status;
2458         acpi_handle handle;
2459
2460         status = acpi_get_handle(NULL, "\\_SB", &handle);
2461         if (ACPI_FAILURE(status))
2462                 return true;
2463
2464         return !acpi_has_method(handle, "PCCH");
2465 }
2466
2467 static bool __init intel_pstate_has_acpi_ppc(void)
2468 {
2469         int i;
2470
2471         for_each_possible_cpu(i) {
2472                 struct acpi_processor *pr = per_cpu(processors, i);
2473
2474                 if (!pr)
2475                         continue;
2476                 if (acpi_has_method(pr->handle, "_PPC"))
2477                         return true;
2478         }
2479         return false;
2480 }
2481
2482 enum {
2483         PSS,
2484         PPC,
2485 };
2486
2487 /* Hardware vendor-specific info that has its own power management modes */
2488 static struct acpi_platform_list plat_info[] __initdata = {
2489         {"HP    ", "ProLiant", 0, ACPI_SIG_FADT, all_versions, 0, PSS},
2490         {"ORACLE", "X4-2    ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2491         {"ORACLE", "X4-2L   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2492         {"ORACLE", "X4-2B   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2493         {"ORACLE", "X3-2    ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2494         {"ORACLE", "X3-2L   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2495         {"ORACLE", "X3-2B   ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2496         {"ORACLE", "X4470M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2497         {"ORACLE", "X4270M3 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2498         {"ORACLE", "X4270M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2499         {"ORACLE", "X4170M2 ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2500         {"ORACLE", "X4170 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2501         {"ORACLE", "X4275 M3", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2502         {"ORACLE", "X6-2    ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2503         {"ORACLE", "Sudbury ", 0, ACPI_SIG_FADT, all_versions, 0, PPC},
2504         { } /* End */
2505 };
2506
2507 static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
2508 {
2509         const struct x86_cpu_id *id;
2510         u64 misc_pwr;
2511         int idx;
2512
2513         id = x86_match_cpu(intel_pstate_cpu_oob_ids);
2514         if (id) {
2515                 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
2516                 if ( misc_pwr & (1 << 8))
2517                         return true;
2518         }
2519
2520         idx = acpi_match_platform_list(plat_info);
2521         if (idx < 0)
2522                 return false;
2523
2524         switch (plat_info[idx].data) {
2525         case PSS:
2526                 if (!intel_pstate_no_acpi_pss())
2527                         return false;
2528
2529                 return intel_pstate_no_acpi_pcch();
2530         case PPC:
2531                 return intel_pstate_has_acpi_ppc() && !force_load;
2532         }
2533
2534         return false;
2535 }
2536
2537 static void intel_pstate_request_control_from_smm(void)
2538 {
2539         /*
2540          * It may be unsafe to request P-states control from SMM if _PPC support
2541          * has not been enabled.
2542          */
2543         if (acpi_ppc)
2544                 acpi_processor_pstate_control();
2545 }
2546 #else /* CONFIG_ACPI not enabled */
2547 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
2548 static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
2549 static inline void intel_pstate_request_control_from_smm(void) {}
2550 #endif /* CONFIG_ACPI */
2551
2552 #define INTEL_PSTATE_HWP_BROADWELL      0x01
2553
2554 #define ICPU_HWP(model, hwp_mode) \
2555         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
2556
2557 static const struct x86_cpu_id hwp_support_ids[] __initconst = {
2558         ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
2559         ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL),
2560         ICPU_HWP(X86_MODEL_ANY, 0),
2561         {}
2562 };
2563
2564 static int __init intel_pstate_init(void)
2565 {
2566         const struct x86_cpu_id *id;
2567         int rc;
2568
2569         if (no_load)
2570                 return -ENODEV;
2571
2572         id = x86_match_cpu(hwp_support_ids);
2573         if (id) {
2574                 copy_cpu_funcs(&core_funcs);
2575                 if (!no_hwp) {
2576                         hwp_active++;
2577                         hwp_mode_bdw = id->driver_data;
2578                         intel_pstate.attr = hwp_cpufreq_attrs;
2579                         goto hwp_cpu_matched;
2580                 }
2581         } else {
2582                 id = x86_match_cpu(intel_pstate_cpu_ids);
2583                 if (!id)
2584                         return -ENODEV;
2585
2586                 copy_cpu_funcs((struct pstate_funcs *)id->driver_data);
2587         }
2588
2589         if (intel_pstate_msrs_not_valid())
2590                 return -ENODEV;
2591
2592 hwp_cpu_matched:
2593         /*
2594          * The Intel pstate driver will be ignored if the platform
2595          * firmware has its own power management modes.
2596          */
2597         if (intel_pstate_platform_pwr_mgmt_exists())
2598                 return -ENODEV;
2599
2600         if (!hwp_active && hwp_only)
2601                 return -ENOTSUPP;
2602
2603         pr_info("Intel P-state driver initializing\n");
2604
2605         all_cpu_data = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
2606         if (!all_cpu_data)
2607                 return -ENOMEM;
2608
2609         intel_pstate_request_control_from_smm();
2610
2611         intel_pstate_sysfs_expose_params();
2612
2613         mutex_lock(&intel_pstate_driver_lock);
2614         rc = intel_pstate_register_driver(default_driver);
2615         mutex_unlock(&intel_pstate_driver_lock);
2616         if (rc)
2617                 return rc;
2618
2619         if (hwp_active)
2620                 pr_info("HWP enabled\n");
2621
2622         return 0;
2623 }
2624 device_initcall(intel_pstate_init);
2625
2626 static int __init intel_pstate_setup(char *str)
2627 {
2628         if (!str)
2629                 return -EINVAL;
2630
2631         if (!strcmp(str, "disable")) {
2632                 no_load = 1;
2633         } else if (!strcmp(str, "passive")) {
2634                 pr_info("Passive mode enabled\n");
2635                 default_driver = &intel_cpufreq;
2636                 no_hwp = 1;
2637         }
2638         if (!strcmp(str, "no_hwp")) {
2639                 pr_info("HWP disabled\n");
2640                 no_hwp = 1;
2641         }
2642         if (!strcmp(str, "force"))
2643                 force_load = 1;
2644         if (!strcmp(str, "hwp_only"))
2645                 hwp_only = 1;
2646         if (!strcmp(str, "per_cpu_perf_limits"))
2647                 per_cpu_limits = true;
2648
2649 #ifdef CONFIG_ACPI
2650         if (!strcmp(str, "support_acpi_ppc"))
2651                 acpi_ppc = true;
2652 #endif
2653
2654         return 0;
2655 }
2656 early_param("intel_pstate", intel_pstate_setup);
2657
2658 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
2659 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
2660 MODULE_LICENSE("GPL");