Merge branches 'pm-avs', 'pm-clk', 'pm-devfreq' and 'pm-sleep'
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 24 Mar 2016 23:58:18 +0000 (00:58 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 24 Mar 2016 23:58:18 +0000 (00:58 +0100)
* pm-avs:
  PM / AVS: rockchip-io: add io selectors and supplies for rk3399

* pm-clk:
  PM / clk: Add support for obtaining clocks from device-tree

* pm-devfreq:
  PM / devfreq: Spelling s/frequnecy/frequency/

* pm-sleep:
  ACPI / PM: Runtime resume devices when waking from hibernate
  PM / sleep: Clear pm_suspend_global_flags upon hibernate

13 files changed:
Documentation/ABI/testing/sysfs-devices-system-cpu
drivers/acpi/sleep.c
drivers/base/power/clock_ops.c
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/powernv-cpufreq.c
drivers/cpuidle/governors/menu.c
drivers/devfreq/Kconfig
drivers/idle/intel_idle.c
include/linux/pm_clock.h
kernel/power/hibernate.c

index b683e8ee69ecc59a353c700330b7162f50652123..16501334b99fe88544534274c837b822a03d0d70 100644 (file)
@@ -271,3 +271,72 @@ Description:       Parameters for the CPU cache attributes
                        - WriteBack: data is written only to the cache line and
                                     the modified cache line is written to main
                                     memory only when it is replaced
+
+What:          /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
+               /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
+Date:          March 2016
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
+               Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Description:   POWERNV CPUFreq driver's frequency throttle stats directory and
+               attributes
+
+               'cpuX/cpufreq/throttle_stats' directory contains the CPU frequency
+               throttle stat attributes for the chip. The throttle stats of a cpu
+               is common across all the cpus belonging to a chip. Below are the
+               throttle attributes exported in the 'throttle_stats' directory:
+
+               - turbo_stat : This file gives the total number of times the max
+               frequency is throttled to lower frequency in turbo (at and above
+               nominal frequency) range of frequencies.
+
+               - sub_turbo_stat : This file gives the total number of times the
+               max frequency is throttled to lower frequency in sub-turbo(below
+               nominal frequency) range of frequencies.
+
+               - unthrottle : This file gives the total number of times the max
+               frequency is unthrottled after being throttled.
+
+               - powercap : This file gives the total number of times the max
+               frequency is throttled due to 'Power Capping'.
+
+               - overtemp : This file gives the total number of times the max
+               frequency is throttled due to 'CPU Over Temperature'.
+
+               - supply_fault : This file gives the total number of times the
+               max frequency is throttled due to 'Power Supply Failure'.
+
+               - overcurrent : This file gives the total number of times the
+               max frequency is throttled due to 'Overcurrent'.
+
+               - occ_reset : This file gives the total number of times the max
+               frequency is throttled due to 'OCC Reset'.
+
+               The sysfs attributes representing different throttle reasons like
+               powercap, overtemp, supply_fault, overcurrent and occ_reset map to
+               the reasons provided by OCC firmware for throttling the frequency.
+
+What:          /sys/devices/system/cpu/cpufreq/policyX/throttle_stats
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/turbo_stat
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/sub_turbo_stat
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/unthrottle
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/powercap
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/overtemp
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/supply_fault
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/overcurrent
+               /sys/devices/system/cpu/cpufreq/policyX/throttle_stats/occ_reset
+Date:          March 2016
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
+               Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Description:   POWERNV CPUFreq driver's frequency throttle stats directory and
+               attributes
+
+               'policyX/throttle_stats' directory and all the attributes are same as
+               the /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory and
+               attributes which give the frequency throttle information of the chip.
index fbfcce3b5227e0c4ea90e5838f8762582978ae99..2a8b5964429753b212a0f28ff9be5f19a7a1b777 100644 (file)
@@ -748,6 +748,7 @@ static int acpi_hibernation_enter(void)
 
 static void acpi_hibernation_leave(void)
 {
+       pm_set_resume_via_firmware();
        /*
         * If ACPI is not enabled by the BIOS and the boot kernel, we need to
         * enable it here.
index 272a52ebafc09d0c4a3d974b6ca0bf16aef5e612..0e64a1b5e62a0a768b568da8ed7dbc6554e11060 100644 (file)
@@ -137,6 +137,62 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk)
        return __pm_clk_add(dev, NULL, clk);
 }
 
+
+/**
+ * of_pm_clk_add_clks - Start using device clock(s) for power management.
+ * @dev: Device whose clock(s) is going to be used for power management.
+ *
+ * Add a series of clocks described in the 'clocks' device-tree node for
+ * a device to the list of clocks used for the power management of @dev.
+ * On success, returns the number of clocks added. Returns a negative
+ * error code if there are no clocks in the device node for the device
+ * or if adding a clock fails.
+ */
+int of_pm_clk_add_clks(struct device *dev)
+{
+       struct clk **clks;
+       unsigned int i, count;
+       int ret;
+
+       if (!dev || !dev->of_node)
+               return -EINVAL;
+
+       count = of_count_phandle_with_args(dev->of_node, "clocks",
+                                          "#clock-cells");
+       if (count == 0)
+               return -ENODEV;
+
+       clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);
+       if (!clks)
+               return -ENOMEM;
+
+       for (i = 0; i < count; i++) {
+               clks[i] = of_clk_get(dev->of_node, i);
+               if (IS_ERR(clks[i])) {
+                       ret = PTR_ERR(clks[i]);
+                       goto error;
+               }
+
+               ret = pm_clk_add_clk(dev, clks[i]);
+               if (ret) {
+                       clk_put(clks[i]);
+                       goto error;
+               }
+       }
+
+       kfree(clks);
+
+       return i;
+
+error:
+       while (i--)
+               pm_clk_remove_clk(dev, clks[i]);
+
+       kfree(clks);
+
+       return ret;
+}
+
 /**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
@@ -197,6 +253,39 @@ void pm_clk_remove(struct device *dev, const char *con_id)
        __pm_clk_remove(ce);
 }
 
+/**
+ * pm_clk_remove_clk - Stop using a device clock for power management.
+ * @dev: Device whose clock should not be used for PM any more.
+ * @clk: Clock pointer
+ *
+ * Remove the clock pointed to by @clk from the list of clocks used for
+ * the power management of @dev.
+ */
+void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+       struct pm_subsys_data *psd = dev_to_psd(dev);
+       struct pm_clock_entry *ce;
+
+       if (!psd || !clk)
+               return;
+
+       spin_lock_irq(&psd->lock);
+
+       list_for_each_entry(ce, &psd->clock_list, node) {
+               if (clk == ce->clk)
+                       goto remove;
+       }
+
+       spin_unlock_irq(&psd->lock);
+       return;
+
+ remove:
+       list_del(&ce->node);
+       spin_unlock_irq(&psd->lock);
+
+       __pm_clk_remove(ce);
+}
+
 /**
  * pm_clk_init - Initialize a device's list of power management clocks.
  * @dev: Device to initialize the list of PM clocks for.
index 59a7b380fbe28fcc242e3aa641ff1eac1a09ebcb..fb5712141040abd750829f6a212c98044ddda52f 100644 (file)
@@ -245,7 +245,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
        }
 }
 
-u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
+static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
 {
        u32 val, dummy;
 
@@ -253,7 +253,7 @@ u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
        return val;
 }
 
-void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
+static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
 {
        u32 lo, hi;
 
@@ -262,7 +262,7 @@ void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
        wrmsr(MSR_IA32_PERF_CTL, lo, hi);
 }
 
-u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
+static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
 {
        u32 val, dummy;
 
@@ -270,12 +270,12 @@ u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
        return val;
 }
 
-void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
+static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
 {
        wrmsr(MSR_AMD_PERF_CTL, val, 0);
 }
 
-u32 cpu_freq_read_io(struct acpi_pct_register *reg)
+static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
 {
        u32 val;
 
@@ -283,7 +283,7 @@ u32 cpu_freq_read_io(struct acpi_pct_register *reg)
        return val;
 }
 
-void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
+static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
 {
        acpi_os_write_port(reg->address, val, reg->bit_width);
 }
@@ -514,8 +514,10 @@ static int boost_notify(struct notifier_block *nb, unsigned long action,
         */
 
        switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
+       case CPU_DOWN_FAILED:
+       case CPU_DOWN_FAILED_FROZEN:
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
                boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
                break;
 
index 4c7825856eabebde799913257e936f12b29ec387..b87596b591b3ca9fdf332884700e5a3cc96ebf10 100644 (file)
@@ -76,6 +76,7 @@ static inline bool has_target(void)
 /* internal prototypes */
 static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
+static int cpufreq_start_governor(struct cpufreq_policy *policy);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -964,10 +965,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
        cpumask_set_cpu(cpu, policy->cpus);
 
        if (has_target()) {
-               ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-               if (!ret)
-                       ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
+               ret = cpufreq_start_governor(policy);
                if (ret)
                        pr_err("%s: Failed to start governor\n", __func__);
        }
@@ -1308,10 +1306,7 @@ static void cpufreq_offline(unsigned int cpu)
        /* Start governor again for active policy */
        if (!policy_is_inactive(policy)) {
                if (has_target()) {
-                       ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-                       if (!ret)
-                               ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
+                       ret = cpufreq_start_governor(policy);
                        if (ret)
                                pr_err("%s: Failed to start governor\n", __func__);
                }
@@ -1401,9 +1396,17 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
 {
        struct cpufreq_policy *policy;
        unsigned int ret_freq = 0;
+       unsigned long flags;
 
-       if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
-               return cpufreq_driver->get(cpu);
+       read_lock_irqsave(&cpufreq_driver_lock, flags);
+
+       if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) {
+               ret_freq = cpufreq_driver->get(cpu);
+               read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+               return ret_freq;
+       }
+
+       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
        policy = cpufreq_cpu_get(cpu);
        if (policy) {
@@ -1484,6 +1487,24 @@ unsigned int cpufreq_get(unsigned int cpu)
 }
 EXPORT_SYMBOL(cpufreq_get);
 
+static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
+{
+       unsigned int new_freq;
+
+       new_freq = cpufreq_driver->get(policy->cpu);
+       if (!new_freq)
+               return 0;
+
+       if (!policy->cur) {
+               pr_debug("cpufreq: Driver did not initialize current freq\n");
+               policy->cur = new_freq;
+       } else if (policy->cur != new_freq && has_target()) {
+               cpufreq_out_of_sync(policy, new_freq);
+       }
+
+       return new_freq;
+}
+
 static struct subsys_interface cpufreq_interface = {
        .name           = "cpufreq",
        .subsys         = &cpu_subsys,
@@ -1583,9 +1604,7 @@ void cpufreq_resume(void)
                                policy);
                } else {
                        down_write(&policy->rwsem);
-                       ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-                       if (!ret)
-                               cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+                       ret = cpufreq_start_governor(policy);
                        up_write(&policy->rwsem);
 
                        if (ret)
@@ -1593,17 +1612,6 @@ void cpufreq_resume(void)
                                       __func__, policy);
                }
        }
-
-       /*
-        * schedule call cpufreq_update_policy() for first-online CPU, as that
-        * wouldn't be hotplugged-out on suspend. It will verify that the
-        * current freq is in sync with what we believe it to be.
-        */
-       policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
-       if (WARN_ON(!policy))
-               return;
-
-       schedule_work(&policy->update);
 }
 
 /**
@@ -1927,6 +1935,17 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
        return ret;
 }
 
+static int cpufreq_start_governor(struct cpufreq_policy *policy)
+{
+       int ret;
+
+       if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
+               cpufreq_update_current_freq(policy);
+
+       ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
+       return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+}
+
 int cpufreq_register_governor(struct cpufreq_governor *governor)
 {
        int err;
@@ -2063,8 +2082,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
                return cpufreq_driver->setpolicy(new_policy);
        }
 
-       if (new_policy->governor == policy->governor)
-               goto out;
+       if (new_policy->governor == policy->governor) {
+               pr_debug("cpufreq: governor limits update\n");
+               return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+       }
 
        pr_debug("governor switch\n");
 
@@ -2092,10 +2113,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
        policy->governor = new_policy->governor;
        ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
        if (!ret) {
-               ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-               if (!ret)
-                       goto out;
-
+               ret = cpufreq_start_governor(policy);
+               if (!ret) {
+                       pr_debug("cpufreq: governor change\n");
+                       return 0;
+               }
                cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
        }
 
@@ -2106,14 +2128,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
                if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
                        policy->governor = NULL;
                else
-                       cpufreq_governor(policy, CPUFREQ_GOV_START);
+                       cpufreq_start_governor(policy);
        }
 
        return ret;
-
- out:
-       pr_debug("governor: change or update limits\n");
-       return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 }
 
 /**
@@ -2144,19 +2162,11 @@ int cpufreq_update_policy(unsigned int cpu)
         * -> ask driver for current freq and notify governors about a change
         */
        if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
-               new_policy.cur = cpufreq_driver->get(cpu);
+               new_policy.cur = cpufreq_update_current_freq(policy);
                if (WARN_ON(!new_policy.cur)) {
                        ret = -EIO;
                        goto unlock;
                }
-
-               if (!policy->cur) {
-                       pr_debug("Driver did not initialize current freq\n");
-                       policy->cur = new_policy.cur;
-               } else {
-                       if (policy->cur != new_policy.cur && has_target())
-                               cpufreq_out_of_sync(policy, new_policy.cur);
-               }
        }
 
        ret = cpufreq_set_policy(policy, &new_policy);
index 1c25ef4056164b2cb7ddc587441637a1973612c1..10a5cfeae8c5e34317805402c63d0c11e3aaac5e 100644 (file)
@@ -329,7 +329,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
        struct policy_dbs_info *policy_dbs;
 
        policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
-       schedule_work(&policy_dbs->work);
+       schedule_work_on(smp_processor_id(), &policy_dbs->work);
 }
 
 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
index cb5607495816d73bfa33e3579962aa5a68ff3af4..4b644526fd5977943761d5430e1967d26997567d 100644 (file)
@@ -134,7 +134,7 @@ struct pstate_funcs {
        int (*get_min)(void);
        int (*get_turbo)(void);
        int (*get_scaling)(void);
-       void (*set)(struct cpudata*, int pstate);
+       u64 (*get_val)(struct cpudata*, int pstate);
        void (*get_vid)(struct cpudata *);
        int32_t (*get_target_pstate)(struct cpudata *);
 };
@@ -565,7 +565,7 @@ static int atom_get_turbo_pstate(void)
        return value & 0x7F;
 }
 
-static void atom_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 atom_get_val(struct cpudata *cpudata, int pstate)
 {
        u64 val;
        int32_t vid_fp;
@@ -585,9 +585,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate)
        if (pstate > cpudata->pstate.max_pstate)
                vid = cpudata->vid.turbo;
 
-       val |= vid;
-
-       wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+       return val | vid;
 }
 
 static int silvermont_get_scaling(void)
@@ -711,7 +709,7 @@ static inline int core_get_scaling(void)
        return 100000;
 }
 
-static void core_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 core_get_val(struct cpudata *cpudata, int pstate)
 {
        u64 val;
 
@@ -719,7 +717,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
        if (limits->no_turbo && !limits->turbo_disabled)
                val |= (u64)1 << 32;
 
-       wrmsrl(MSR_IA32_PERF_CTL, val);
+       return val;
 }
 
 static int knl_get_turbo_pstate(void)
@@ -750,7 +748,7 @@ static struct cpu_defaults core_params = {
                .get_min = core_get_min_pstate,
                .get_turbo = core_get_turbo_pstate,
                .get_scaling = core_get_scaling,
-               .set = core_set_pstate,
+               .get_val = core_get_val,
                .get_target_pstate = get_target_pstate_use_performance,
        },
 };
@@ -769,7 +767,7 @@ static struct cpu_defaults silvermont_params = {
                .get_max_physical = atom_get_max_pstate,
                .get_min = atom_get_min_pstate,
                .get_turbo = atom_get_turbo_pstate,
-               .set = atom_set_pstate,
+               .get_val = atom_get_val,
                .get_scaling = silvermont_get_scaling,
                .get_vid = atom_get_vid,
                .get_target_pstate = get_target_pstate_use_cpu_load,
@@ -790,7 +788,7 @@ static struct cpu_defaults airmont_params = {
                .get_max_physical = atom_get_max_pstate,
                .get_min = atom_get_min_pstate,
                .get_turbo = atom_get_turbo_pstate,
-               .set = atom_set_pstate,
+               .get_val = atom_get_val,
                .get_scaling = airmont_get_scaling,
                .get_vid = atom_get_vid,
                .get_target_pstate = get_target_pstate_use_cpu_load,
@@ -812,7 +810,7 @@ static struct cpu_defaults knl_params = {
                .get_min = core_get_min_pstate,
                .get_turbo = knl_get_turbo_pstate,
                .get_scaling = core_get_scaling,
-               .set = core_set_pstate,
+               .get_val = core_get_val,
                .get_target_pstate = get_target_pstate_use_performance,
        },
 };
@@ -839,25 +837,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
+static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
 {
-       int max_perf, min_perf;
-
-       if (force) {
-               update_turbo_state();
-
-               intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-
-               pstate = clamp_t(int, pstate, min_perf, max_perf);
-
-               if (pstate == cpu->pstate.current_pstate)
-                       return;
-       }
        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-
        cpu->pstate.current_pstate = pstate;
+}
 
-       pstate_funcs.set(cpu, pstate);
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+{
+       int pstate = cpu->pstate.min_pstate;
+
+       intel_pstate_record_pstate(cpu, pstate);
+       /*
+        * Generally, there is no guarantee that this code will always run on
+        * the CPU being updated, so force the register update to run on the
+        * right CPU.
+        */
+       wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+                     pstate_funcs.get_val(cpu, pstate));
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -870,7 +867,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 
        if (pstate_funcs.get_vid)
                pstate_funcs.get_vid(cpu);
-       intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+
+       intel_pstate_set_min_pstate(cpu);
 }
 
 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
@@ -997,6 +995,21 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
        return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
 }
 
+static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+{
+       int max_perf, min_perf;
+
+       update_turbo_state();
+
+       intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
+       pstate = clamp_t(int, pstate, min_perf, max_perf);
+       if (pstate == cpu->pstate.current_pstate)
+               return;
+
+       intel_pstate_record_pstate(cpu, pstate);
+       wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+}
+
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
        int from, target_pstate;
@@ -1006,7 +1019,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
        target_pstate = pstate_funcs.get_target_pstate(cpu);
 
-       intel_pstate_set_pstate(cpu, target_pstate, true);
+       intel_pstate_update_pstate(cpu, target_pstate);
 
        sample = &cpu->sample;
        trace_pstate_sample(fp_toint(sample->core_pct_busy),
@@ -1180,7 +1193,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
        if (hwp_active)
                return;
 
-       intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+       intel_pstate_set_min_pstate(cpu);
 }
 
 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
@@ -1255,7 +1268,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
        pstate_funcs.get_min   = funcs->get_min;
        pstate_funcs.get_turbo = funcs->get_turbo;
        pstate_funcs.get_scaling = funcs->get_scaling;
-       pstate_funcs.set       = funcs->set;
+       pstate_funcs.get_val   = funcs->get_val;
        pstate_funcs.get_vid   = funcs->get_vid;
        pstate_funcs.get_target_pstate = funcs->get_target_pstate;
 
index 50bf12033bbc080c70b5de72de3f89c2dc8bd3a6..39ac78c94be0f3c191aaf5868cd3c2154ab5fb59 100644 (file)
@@ -44,7 +44,6 @@
 
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled, occ_reset;
-static unsigned int *core_to_chip_map;
 
 static const char * const throttle_reason[] = {
        "No throttling",
@@ -55,6 +54,16 @@ static const char * const throttle_reason[] = {
        "OCC Reset"
 };
 
+enum throttle_reason_type {
+       NO_THROTTLE = 0,
+       POWERCAP,
+       CPU_OVERTEMP,
+       POWER_SUPPLY_FAILURE,
+       OVERCURRENT,
+       OCC_RESET_THROTTLE,
+       OCC_MAX_REASON
+};
+
 static struct chip {
        unsigned int id;
        bool throttled;
@@ -62,9 +71,13 @@ static struct chip {
        u8 throttle_reason;
        cpumask_t mask;
        struct work_struct throttle;
+       int throttle_turbo;
+       int throttle_sub_turbo;
+       int reason[OCC_MAX_REASON];
 } *chips;
 
 static int nr_chips;
+static DEFINE_PER_CPU(struct chip *, chip_info);
 
 /*
  * Note: The set of pstates consists of contiguous integers, the
@@ -196,6 +209,42 @@ static struct freq_attr *powernv_cpu_freq_attr[] = {
        NULL,
 };
 
+#define throttle_attr(name, member)                                    \
+static ssize_t name##_show(struct cpufreq_policy *policy, char *buf)   \
+{                                                                      \
+       struct chip *chip = per_cpu(chip_info, policy->cpu);            \
+                                                                       \
+       return sprintf(buf, "%u\n", chip->member);                      \
+}                                                                      \
+                                                                       \
+static struct freq_attr throttle_attr_##name = __ATTR_RO(name)         \
+
+throttle_attr(unthrottle, reason[NO_THROTTLE]);
+throttle_attr(powercap, reason[POWERCAP]);
+throttle_attr(overtemp, reason[CPU_OVERTEMP]);
+throttle_attr(supply_fault, reason[POWER_SUPPLY_FAILURE]);
+throttle_attr(overcurrent, reason[OVERCURRENT]);
+throttle_attr(occ_reset, reason[OCC_RESET_THROTTLE]);
+throttle_attr(turbo_stat, throttle_turbo);
+throttle_attr(sub_turbo_stat, throttle_sub_turbo);
+
+static struct attribute *throttle_attrs[] = {
+       &throttle_attr_unthrottle.attr,
+       &throttle_attr_powercap.attr,
+       &throttle_attr_overtemp.attr,
+       &throttle_attr_supply_fault.attr,
+       &throttle_attr_overcurrent.attr,
+       &throttle_attr_occ_reset.attr,
+       &throttle_attr_turbo_stat.attr,
+       &throttle_attr_sub_turbo_stat.attr,
+       NULL,
+};
+
+static const struct attribute_group throttle_attr_grp = {
+       .name   = "throttle_stats",
+       .attrs  = throttle_attrs,
+};
+
 /* Helper routines */
 
 /* Access helpers to power mgt SPR */
@@ -324,34 +373,35 @@ static inline unsigned int get_nominal_index(void)
 
 static void powernv_cpufreq_throttle_check(void *data)
 {
+       struct chip *chip;
        unsigned int cpu = smp_processor_id();
-       unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)];
        unsigned long pmsr;
-       int pmsr_pmax, i;
+       int pmsr_pmax;
 
        pmsr = get_pmspr(SPRN_PMSR);
-
-       for (i = 0; i < nr_chips; i++)
-               if (chips[i].id == chip_id)
-                       break;
+       chip = this_cpu_read(chip_info);
 
        /* Check for Pmax Capping */
        pmsr_pmax = (s8)PMSR_MAX(pmsr);
        if (pmsr_pmax != powernv_pstate_info.max) {
-               if (chips[i].throttled)
+               if (chip->throttled)
                        goto next;
-               chips[i].throttled = true;
-               if (pmsr_pmax < powernv_pstate_info.nominal)
+               chip->throttled = true;
+               if (pmsr_pmax < powernv_pstate_info.nominal) {
                        pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
-                                    cpu, chips[i].id, pmsr_pmax,
+                                    cpu, chip->id, pmsr_pmax,
                                     powernv_pstate_info.nominal);
-               trace_powernv_throttle(chips[i].id,
-                                     throttle_reason[chips[i].throttle_reason],
+                       chip->throttle_sub_turbo++;
+               } else {
+                       chip->throttle_turbo++;
+               }
+               trace_powernv_throttle(chip->id,
+                                     throttle_reason[chip->throttle_reason],
                                      pmsr_pmax);
-       } else if (chips[i].throttled) {
-               chips[i].throttled = false;
-               trace_powernv_throttle(chips[i].id,
-                                     throttle_reason[chips[i].throttle_reason],
+       } else if (chip->throttled) {
+               chip->throttled = false;
+               trace_powernv_throttle(chip->id,
+                                     throttle_reason[chip->throttle_reason],
                                      pmsr_pmax);
        }
 
@@ -411,6 +461,21 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
        for (i = 0; i < threads_per_core; i++)
                cpumask_set_cpu(base + i, policy->cpus);
 
+       if (!policy->driver_data) {
+               int ret;
+
+               ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
+               if (ret) {
+                       pr_info("Failed to create throttle stats directory for cpu %d\n",
+                               policy->cpu);
+                       return ret;
+               }
+               /*
+                * policy->driver_data is used as a flag for one-time
+                * creation of throttle sysfs files.
+                */
+               policy->driver_data = policy;
+       }
        return cpufreq_table_validate_and_show(policy, powernv_freqs);
 }
 
@@ -517,8 +582,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
                                break;
 
                if (omsg.throttle_status >= 0 &&
-                   omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
+                   omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) {
                        chips[i].throttle_reason = omsg.throttle_status;
+                       chips[i].reason[omsg.throttle_status]++;
+               }
 
                if (!omsg.throttle_status)
                        chips[i].restore = true;
@@ -558,47 +625,34 @@ static int init_chip_info(void)
        unsigned int chip[256];
        unsigned int cpu, i;
        unsigned int prev_chip_id = UINT_MAX;
-       cpumask_t cpu_mask;
-       int ret = -ENOMEM;
-
-       core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int),
-                                  GFP_KERNEL);
-       if (!core_to_chip_map)
-               goto out;
 
-       cpumask_copy(&cpu_mask, cpu_possible_mask);
-       for_each_cpu(cpu, &cpu_mask) {
+       for_each_possible_cpu(cpu) {
                unsigned int id = cpu_to_chip_id(cpu);
 
                if (prev_chip_id != id) {
                        prev_chip_id = id;
                        chip[nr_chips++] = id;
                }
-               core_to_chip_map[cpu_core_index_of_thread(cpu)] = id;
-               cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu));
        }
 
        chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
        if (!chips)
-               goto free_chip_map;
+               return -ENOMEM;
 
        for (i = 0; i < nr_chips; i++) {
                chips[i].id = chip[i];
                cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
                INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
+               for_each_cpu(cpu, &chips[i].mask)
+                       per_cpu(chip_info, cpu) =  &chips[i];
        }
 
        return 0;
-free_chip_map:
-       kfree(core_to_chip_map);
-out:
-       return ret;
 }
 
 static inline void clean_chip_info(void)
 {
        kfree(chips);
-       kfree(core_to_chip_map);
 }
 
 static inline void unregister_all_notifiers(void)
index 27fc733cb5b97382b458744432e9c9c348170e84..03d38c291de62d7f6503f13855142fb5948b64a0 100644 (file)
@@ -196,7 +196,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static void get_typical_interval(struct menu_device *data)
+static unsigned int get_typical_interval(struct menu_device *data)
 {
        int i, divisor;
        unsigned int max, thresh, avg;
@@ -253,9 +253,7 @@ again:
        if (likely(variance <= U64_MAX/36)) {
                if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
                                                        || variance <= 400) {
-                       if (data->next_timer_us > avg)
-                               data->predicted_us = avg;
-                       return;
+                       return avg;
                }
        }
 
@@ -269,7 +267,7 @@ again:
         * with sporadic activity with a bunch of short pauses.
         */
        if ((divisor * 4) <= INTERVALS * 3)
-               return;
+               return UINT_MAX;
 
        thresh = max - 1;
        goto again;
@@ -286,6 +284,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
        int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
        int i;
        unsigned int interactivity_req;
+       unsigned int expected_interval;
        unsigned long nr_iowaiters, cpu_load;
 
        if (data->needs_update) {
@@ -312,31 +311,42 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                                         data->correction_factor[data->bucket],
                                         RESOLUTION * DECAY);
 
-       get_typical_interval(data);
-
-       /*
-        * Performance multiplier defines a minimum predicted idle
-        * duration / latency ratio. Adjust the latency limit if
-        * necessary.
-        */
-       interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
-       if (latency_req > interactivity_req)
-               latency_req = interactivity_req;
+       expected_interval = get_typical_interval(data);
+       expected_interval = min(expected_interval, data->next_timer_us);
 
        if (CPUIDLE_DRIVER_STATE_START > 0) {
-               data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
+               struct cpuidle_state *s = &drv->states[CPUIDLE_DRIVER_STATE_START];
+               unsigned int polling_threshold;
+
                /*
                 * We want to default to C1 (hlt), not to busy polling
-                * unless the timer is happening really really soon.
+                * unless the timer is happening really really soon, or
+                * C1's exit latency exceeds the user configured limit.
                 */
-               if (interactivity_req > 20 &&
-                   !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
-                       dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
+               polling_threshold = max_t(unsigned int, 20, s->target_residency);
+               if (data->next_timer_us > polling_threshold &&
+                   latency_req > s->exit_latency && !s->disabled &&
+                   !dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable)
                        data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+               else
+                       data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
        } else {
                data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
        }
 
+       /*
+        * Use the lowest expected idle interval to pick the idle state.
+        */
+       data->predicted_us = min(data->predicted_us, expected_interval);
+
+       /*
+        * Use the performance multiplier and the user-configurable
+        * latency_req to determine the maximum exit latency.
+        */
+       interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+       if (latency_req > interactivity_req)
+               latency_req = interactivity_req;
+
        /*
         * Find the idle state with the lowest power while satisfying
         * our constraints.
index 64281bb2f6503c5a63d605ca7249ca86434d5aaf..4de78c552251a5a58803f74957e82f41a76307e8 100644 (file)
@@ -61,7 +61,7 @@ config DEVFREQ_GOV_USERSPACE
          Sets the frequency at the user specified one.
          This governor returns the user configured frequency if there
          has been an input to /sys/devices/.../power/devfreq_set_freq.
-         Otherwise, the governor does not change the frequnecy
+         Otherwise, the governor does not change the frequency
          given at the initialization.
 
 comment "DEVFREQ Drivers"
index cd4510a6337548d26344b8ccc5cb427ab13dd4ef..ba947df5a8c7d09af2c36a23d5b6a1d5f4df8bb3 100644 (file)
@@ -65,7 +65,7 @@
 #include <asm/mwait.h>
 #include <asm/msr.h>
 
-#define INTEL_IDLE_VERSION "0.4"
+#define INTEL_IDLE_VERSION "0.4.1"
 #define PREFIX "intel_idle: "
 
 static struct cpuidle_driver intel_idle_driver = {
@@ -716,6 +716,26 @@ static struct cpuidle_state avn_cstates[] = {
        {
                .enter = NULL }
 };
+static struct cpuidle_state knl_cstates[] = {
+       {
+               .name = "C1-KNL",
+               .desc = "MWAIT 0x00",
+               .flags = MWAIT2flg(0x00),
+               .exit_latency = 1,
+               .target_residency = 2,
+               .enter = &intel_idle,
+               .enter_freeze = intel_idle_freeze },
+       {
+               .name = "C6-KNL",
+               .desc = "MWAIT 0x10",
+               .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 120,
+               .target_residency = 500,
+               .enter = &intel_idle,
+               .enter_freeze = intel_idle_freeze },
+       {
+               .enter = NULL }
+};
 
 /**
  * intel_idle
@@ -890,6 +910,10 @@ static const struct idle_cpu idle_cpu_avn = {
        .disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_knl = {
+       .state_table = knl_cstates,
+};
+
 #define ICPU(model, cpu) \
        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -921,6 +945,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
        ICPU(0x56, idle_cpu_bdw),
        ICPU(0x4e, idle_cpu_skl),
        ICPU(0x5e, idle_cpu_skl),
+       ICPU(0x57, idle_cpu_knl),
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -994,36 +1019,92 @@ static void intel_idle_cpuidle_devices_uninit(void)
 }
 
 /*
- * intel_idle_state_table_update()
+ * ivt_idle_state_table_update(void)
  *
- * Update the default state_table for this CPU-id
- *
- * Currently used to access tuned IVT multi-socket targets
+ * Tune IVT multi-socket targets
  * Assumption: num_sockets == (max_package_num + 1)
  */
-void intel_idle_state_table_update(void)
+static void ivt_idle_state_table_update(void)
 {
        /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
-       if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
-               int cpu, package_num, num_sockets = 1;
-
-               for_each_online_cpu(cpu) {
-                       package_num = topology_physical_package_id(cpu);
-                       if (package_num + 1 > num_sockets) {
-                               num_sockets = package_num + 1;
-
-                               if (num_sockets > 4) {
-                                       cpuidle_state_table = ivt_cstates_8s;
-                                       return;
-                               }
+       int cpu, package_num, num_sockets = 1;
+
+       for_each_online_cpu(cpu) {
+               package_num = topology_physical_package_id(cpu);
+               if (package_num + 1 > num_sockets) {
+                       num_sockets = package_num + 1;
+
+                       if (num_sockets > 4) {
+                               cpuidle_state_table = ivt_cstates_8s;
+                               return;
                        }
                }
+       }
+
+       if (num_sockets > 2)
+               cpuidle_state_table = ivt_cstates_4s;
 
-               if (num_sockets > 2)
-                       cpuidle_state_table = ivt_cstates_4s;
-               /* else, 1 and 2 socket systems use default ivt_cstates */
+       /* else, 1 and 2 socket systems use default ivt_cstates */
+}
+/*
+ * sklh_idle_state_table_update(void)
+ *
+ * On SKL-H (model 0x5e) disable C8 and C9 if:
+ * C10 is enabled and SGX disabled
+ */
+static void sklh_idle_state_table_update(void)
+{
+       unsigned long long msr;
+       unsigned int eax, ebx, ecx, edx;
+
+
+       /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
+       if (max_cstate <= 7)
+               return;
+
+       /* if PC10 not present in CPUID.MWAIT.EDX */
+       if ((mwait_substates & (0xF << 28)) == 0)
+               return;
+
+       rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+
+       /* PC10 is not enabled in PKG C-state limit */
+       if ((msr & 0xF) != 8)
+               return;
+
+       ecx = 0;
+       cpuid(7, &eax, &ebx, &ecx, &edx);
+
+       /* if SGX is present */
+       if (ebx & (1 << 2)) {
+
+               rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+
+               /* if SGX is enabled */
+               if (msr & (1 << 18))
+                       return;
+       }
+
+       skl_cstates[5].disabled = 1;    /* C8-SKL */
+       skl_cstates[6].disabled = 1;    /* C9-SKL */
+}
+/*
+ * intel_idle_state_table_update()
+ *
+ * Update the default state_table for this CPU-id
+ */
+
+static void intel_idle_state_table_update(void)
+{
+       switch (boot_cpu_data.x86_model) {
+
+       case 0x3e: /* IVT */
+               ivt_idle_state_table_update();
+               break;
+       case 0x5e: /* SKL-H */
+               sklh_idle_state_table_update();
+               break;
        }
-       return;
 }
 
 /*
@@ -1063,6 +1144,14 @@ static int __init intel_idle_cpuidle_driver_init(void)
                if (num_substates == 0)
                        continue;
 
+               /* if state marked as disabled, skip it */
+               if (cpuidle_state_table[cstate].disabled != 0) {
+                       pr_debug(PREFIX "state %s is disabled",
+                               cpuidle_state_table[cstate].name);
+                       continue;
+               }
+
+
                if (((mwait_cstate + 1) > 2) &&
                        !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
                        mark_tsc_unstable("TSC halts in idle"
index 25266c600021462c8f809a22451231820caf1b6e..308d6044f153b3c2fcfd58b6d6ffa3f5d28fc65a 100644 (file)
@@ -42,7 +42,9 @@ extern int pm_clk_create(struct device *dev);
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
 extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
+extern int of_pm_clk_add_clks(struct device *dev);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
+extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
 extern int pm_clk_suspend(struct device *dev);
 extern int pm_clk_resume(struct device *dev);
 #else
@@ -69,11 +71,18 @@ static inline int pm_clk_add_clk(struct device *dev, struct clk *clk)
 {
        return -EINVAL;
 }
+static inline int of_pm_clk_add_clks(struct device *dev)
+{
+       return -EINVAL;
+}
 static inline void pm_clk_remove(struct device *dev, const char *con_id)
 {
 }
 #define pm_clk_suspend NULL
 #define pm_clk_resume  NULL
+static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+}
 #endif
 
 #ifdef CONFIG_HAVE_CLK
index aa0f26b5842659a5c12b794ed8535e77edd44f15..fca9254280ee4fad96bc8f2a61772160975c4cd6 100644 (file)
@@ -339,6 +339,7 @@ int hibernation_snapshot(int platform_mode)
        pm_message_t msg;
        int error;
 
+       pm_suspend_clear_flags();
        error = platform_begin(platform_mode);
        if (error)
                goto Close;