Add suspend-related notifications for CPU hotplug
[sfrench/cifs-2.6.git] / drivers / cpufreq / cpufreq.c
index 9bdcdbdcc0ad351a5a4a7e4f86f8febefc6936ef..eb37fba9b7efccf163badc70450aee38873b60d3 100644 (file)
@@ -41,8 +41,67 @@ static struct cpufreq_driver *cpufreq_driver;
 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
 static DEFINE_SPINLOCK(cpufreq_driver_lock);
 
+/*
+ * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
+ * all cpufreq/hotplug/workqueue/etc related lock issues.
+ *
+ * The rules for this semaphore:
+ * - Any routine that wants to read from the policy structure will
+ *   do a down_read on this semaphore.
+ * - Any routine that will write to the policy structure and/or may take away
+ *   the policy altogether (eg. CPU hotplug), will hold this lock in write
+ *   mode before doing so.
+ *
+ * Additional rules:
+ * - All holders of the lock should check to make sure that the CPU they
+ *   are concerned with are online after they get the lock.
+ * - Governor routines that can be called in cpufreq hotplug path should not
+ *   take this sem as top level hotplug notifier handler takes this.
+ */
+static DEFINE_PER_CPU(int, policy_cpu);
+static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
+
+#define lock_policy_rwsem(mode, cpu)                                   \
+int lock_policy_rwsem_##mode                                           \
+(int cpu)                                                              \
+{                                                                      \
+       int policy_cpu = per_cpu(policy_cpu, cpu);                      \
+       BUG_ON(policy_cpu == -1);                                       \
+       down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
+       if (unlikely(!cpu_online(cpu))) {                               \
+               up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
+               return -1;                                              \
+       }                                                               \
+                                                                       \
+       return 0;                                                       \
+}
+
+lock_policy_rwsem(read, cpu);
+EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
+
+lock_policy_rwsem(write, cpu);
+EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
+
+void unlock_policy_rwsem_read(int cpu)
+{
+       int policy_cpu = per_cpu(policy_cpu, cpu);
+       BUG_ON(policy_cpu == -1);
+       up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
+
+void unlock_policy_rwsem_write(int cpu)
+{
+       int policy_cpu = per_cpu(policy_cpu, cpu);
+       BUG_ON(policy_cpu == -1);
+       up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
+
+
 /* internal prototypes */
 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
+static unsigned int __cpufreq_get(unsigned int cpu);
 static void handle_update(struct work_struct *work);
 
 /**
@@ -415,10 +474,8 @@ static ssize_t store_##file_name                                   \
        if (ret != 1)                                                   \
                return -EINVAL;                                         \
                                                                        \
-       mutex_lock(&policy->lock);                                      \
        ret = __cpufreq_set_policy(policy, &new_policy);                \
        policy->user_policy.object = policy->object;                    \
-       mutex_unlock(&policy->lock);                                    \
                                                                        \
        return ret ? ret : count;                                       \
 }
@@ -432,7 +489,7 @@ store_one(scaling_max_freq,max);
 static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
                                                        char *buf)
 {
-       unsigned int cur_freq = cpufreq_get(policy->cpu);
+       unsigned int cur_freq = __cpufreq_get(policy->cpu);
        if (!cur_freq)
                return sprintf(buf, "<unknown>");
        return sprintf(buf, "%u\n", cur_freq);
@@ -479,12 +536,10 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
 
        /* Do not use cpufreq_set_policy here or the user_policy.max
           will be wrongly overridden */
-       mutex_lock(&policy->lock);
        ret = __cpufreq_set_policy(policy, &new_policy);
 
        policy->user_policy.policy = policy->policy;
        policy->user_policy.governor = policy->governor;
-       mutex_unlock(&policy->lock);
 
        if (ret)
                return ret;
@@ -589,11 +644,17 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
                return -EINVAL;
+
+       if (lock_policy_rwsem_read(policy->cpu) < 0)
+               return -EINVAL;
+
        if (fattr->show)
                ret = fattr->show(policy, buf);
        else
                ret = -EIO;
 
+       unlock_policy_rwsem_read(policy->cpu);
+
        cpufreq_cpu_put(policy);
        return ret;
 }
@@ -607,11 +668,17 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr,
        policy = cpufreq_cpu_get(policy->cpu);
        if (!policy)
                return -EINVAL;
+
+       if (lock_policy_rwsem_write(policy->cpu) < 0)
+               return -EINVAL;
+
        if (fattr->store)
                ret = fattr->store(policy, buf, count);
        else
                ret = -EIO;
 
+       unlock_policy_rwsem_write(policy->cpu);
+
        cpufreq_cpu_put(policy);
        return ret;
 }
@@ -685,8 +752,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
        policy->cpu = cpu;
        policy->cpus = cpumask_of_cpu(cpu);
 
-       mutex_init(&policy->lock);
-       mutex_lock(&policy->lock);
+       /* Initially set CPU itself as the policy_cpu */
+       per_cpu(policy_cpu, cpu) = cpu;
+       lock_policy_rwsem_write(cpu);
+
        init_completion(&policy->kobj_unregister);
        INIT_WORK(&policy->update, handle_update);
 
@@ -696,9 +765,12 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
        ret = cpufreq_driver->init(policy);
        if (ret) {
                dprintk("initialization failed\n");
-               mutex_unlock(&policy->lock);
+               unlock_policy_rwsem_write(cpu);
                goto err_out;
        }
+       policy->user_policy.min = policy->cpuinfo.min_freq;
+       policy->user_policy.max = policy->cpuinfo.max_freq;
+       policy->user_policy.governor = policy->governor;
 
 #ifdef CONFIG_SMP
        for_each_cpu_mask(j, policy->cpus) {
@@ -710,6 +782,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                 */
                managed_policy = cpufreq_cpu_get(j);
                if (unlikely(managed_policy)) {
+
+                       /* Set proper policy_cpu */
+                       unlock_policy_rwsem_write(cpu);
+                       per_cpu(policy_cpu, cpu) = managed_policy->cpu;
+
+                       if (lock_policy_rwsem_write(cpu) < 0)
+                               goto err_out_driver_exit;
+
                        spin_lock_irqsave(&cpufreq_driver_lock, flags);
                        managed_policy->cpus = policy->cpus;
                        cpufreq_cpu_data[cpu] = managed_policy;
@@ -720,13 +800,13 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                                                &managed_policy->kobj,
                                                "cpufreq");
                        if (ret) {
-                               mutex_unlock(&policy->lock);
+                               unlock_policy_rwsem_write(cpu);
                                goto err_out_driver_exit;
                        }
 
                        cpufreq_debug_enable_ratelimit();
-                       mutex_unlock(&policy->lock);
                        ret = 0;
+                       unlock_policy_rwsem_write(cpu);
                        goto err_out_driver_exit; /* call driver->exit() */
                }
        }
@@ -740,7 +820,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 
        ret = kobject_register(&policy->kobj);
        if (ret) {
-               mutex_unlock(&policy->lock);
+               unlock_policy_rwsem_write(cpu);
                goto err_out_driver_exit;
        }
        /* set up files for this cpu device */
@@ -755,8 +835,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
 
        spin_lock_irqsave(&cpufreq_driver_lock, flags);
-       for_each_cpu_mask(j, policy->cpus)
+       for_each_cpu_mask(j, policy->cpus) {
                cpufreq_cpu_data[j] = policy;
+               per_cpu(policy_cpu, j) = policy->cpu;
+       }
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
        /* symlink affected CPUs */
@@ -772,17 +854,20 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
                ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
                                        "cpufreq");
                if (ret) {
-                       mutex_unlock(&policy->lock);
+                       unlock_policy_rwsem_write(cpu);
                        goto err_out_unregister;
                }
        }
 
        policy->governor = NULL; /* to assure that the starting sequence is
                                  * run in cpufreq_set_policy */
-       mutex_unlock(&policy->lock);
 
        /* set default policy */
-       ret = cpufreq_set_policy(&new_policy);
+       ret = __cpufreq_set_policy(policy, &new_policy);
+       policy->user_policy.policy = policy->policy;
+
+       unlock_policy_rwsem_write(cpu);
+
        if (ret) {
                dprintk("setting policy failed\n");
                goto err_out_unregister;
@@ -820,11 +905,13 @@ module_out:
 
 
 /**
- * cpufreq_remove_dev - remove a CPU device
+ * __cpufreq_remove_dev - remove a CPU device
  *
  * Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
  */
-static int cpufreq_remove_dev (struct sys_device * sys_dev)
+static int __cpufreq_remove_dev (struct sys_device * sys_dev)
 {
        unsigned int cpu = sys_dev->id;
        unsigned long flags;
@@ -843,6 +930,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
        if (!data) {
                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
                cpufreq_debug_enable_ratelimit();
+               unlock_policy_rwsem_write(cpu);
                return -EINVAL;
        }
        cpufreq_cpu_data[cpu] = NULL;
@@ -859,6 +947,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
                sysfs_remove_link(&sys_dev->kobj, "cpufreq");
                cpufreq_cpu_put(data);
                cpufreq_debug_enable_ratelimit();
+               unlock_policy_rwsem_write(cpu);
                return 0;
        }
 #endif
@@ -867,6 +956,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
        if (!kobject_get(&data->kobj)) {
                spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
                cpufreq_debug_enable_ratelimit();
+               unlock_policy_rwsem_write(cpu);
                return -EFAULT;
        }
 
@@ -900,10 +990,10 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
        spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 #endif
 
-       mutex_lock(&data->lock);
        if (cpufreq_driver->target)
                __cpufreq_governor(data, CPUFREQ_GOV_STOP);
-       mutex_unlock(&data->lock);
+
+       unlock_policy_rwsem_write(cpu);
 
        kobject_unregister(&data->kobj);
 
@@ -927,6 +1017,22 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
 }
 
 
+static int cpufreq_remove_dev (struct sys_device * sys_dev)
+{
+       unsigned int cpu = sys_dev->id;
+       int retval;
+
+       if (cpu_is_offline(cpu))
+               return 0;
+
+       if (unlikely(lock_policy_rwsem_write(cpu)))
+               BUG();
+
+       retval = __cpufreq_remove_dev(sys_dev);
+       return retval;
+}
+
+
 static void handle_update(struct work_struct *work)
 {
        struct cpufreq_policy *policy =
@@ -974,9 +1080,12 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
        unsigned int ret_freq = 0;
 
        if (policy) {
-               mutex_lock(&policy->lock);
+               if (unlikely(lock_policy_rwsem_read(cpu)))
+                       return ret_freq;
+
                ret_freq = policy->cur;
-               mutex_unlock(&policy->lock);
+
+               unlock_policy_rwsem_read(cpu);
                cpufreq_cpu_put(policy);
        }
 
@@ -985,24 +1094,13 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
 EXPORT_SYMBOL(cpufreq_quick_get);
 
 
-/**
- * cpufreq_get - get the current CPU frequency (in kHz)
- * @cpu: CPU number
- *
- * Get the CPU current (static) CPU frequency
- */
-unsigned int cpufreq_get(unsigned int cpu)
+static unsigned int __cpufreq_get(unsigned int cpu)
 {
-       struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+       struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
        unsigned int ret_freq = 0;
 
-       if (!policy)
-               return 0;
-
        if (!cpufreq_driver->get)
-               goto out;
-
-       mutex_lock(&policy->lock);
+               return (ret_freq);
 
        ret_freq = cpufreq_driver->get(cpu);
 
@@ -1016,11 +1114,33 @@ unsigned int cpufreq_get(unsigned int cpu)
                }
        }
 
-       mutex_unlock(&policy->lock);
+       return (ret_freq);
+}
 
-out:
-       cpufreq_cpu_put(policy);
+/**
+ * cpufreq_get - get the current CPU frequency (in kHz)
+ * @cpu: CPU number
+ *
+ * Get the CPU current (static) CPU frequency
+ */
+unsigned int cpufreq_get(unsigned int cpu)
+{
+       unsigned int ret_freq = 0;
+       struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+       if (!policy)
+               goto out;
 
+       if (unlikely(lock_policy_rwsem_read(cpu)))
+               goto out_policy;
+
+       ret_freq = __cpufreq_get(cpu);
+
+       unlock_policy_rwsem_read(cpu);
+
+out_policy:
+       cpufreq_cpu_put(policy);
+out:
        return (ret_freq);
 }
 EXPORT_SYMBOL(cpufreq_get);
@@ -1297,18 +1417,19 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
        if (!policy)
                return -EINVAL;
 
-       mutex_lock(&policy->lock);
+       if (unlikely(lock_policy_rwsem_write(policy->cpu)))
+               return -EINVAL;
 
        ret = __cpufreq_driver_target(policy, target_freq, relation);
 
-       mutex_unlock(&policy->lock);
+       unlock_policy_rwsem_write(policy->cpu);
 
        cpufreq_cpu_put(policy);
        return ret;
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
-int cpufreq_driver_getavg(struct cpufreq_policy *policy)
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
 {
        int ret = 0;
 
@@ -1316,17 +1437,13 @@ int cpufreq_driver_getavg(struct cpufreq_policy *policy)
        if (!policy)
                return -EINVAL;
 
-       mutex_lock(&policy->lock);
-
        if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
                ret = cpufreq_driver->getavg(policy->cpu);
 
-       mutex_unlock(&policy->lock);
-
        cpufreq_cpu_put(policy);
        return ret;
 }
-EXPORT_SYMBOL_GPL(cpufreq_driver_getavg);
+EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
 
 /*
  * when "event" is CPUFREQ_GOV_LIMITS
@@ -1410,9 +1527,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
        if (!cpu_policy)
                return -EINVAL;
 
-       mutex_lock(&cpu_policy->lock);
        memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
-       mutex_unlock(&cpu_policy->lock);
 
        cpufreq_cpu_put(cpu_policy);
        return 0;
@@ -1510,42 +1625,6 @@ error_out:
        return ret;
 }
 
-/**
- *     cpufreq_set_policy - set a new CPUFreq policy
- *     @policy: policy to be set.
- *
- *     Sets a new CPU frequency and voltage scaling policy.
- */
-int cpufreq_set_policy(struct cpufreq_policy *policy)
-{
-       int ret = 0;
-       struct cpufreq_policy *data;
-
-       if (!policy)
-               return -EINVAL;
-
-       data = cpufreq_cpu_get(policy->cpu);
-       if (!data)
-               return -EINVAL;
-
-       /* lock this CPU */
-       mutex_lock(&data->lock);
-
-       ret = __cpufreq_set_policy(data, policy);
-       data->user_policy.min = data->min;
-       data->user_policy.max = data->max;
-       data->user_policy.policy = data->policy;
-       data->user_policy.governor = data->governor;
-
-       mutex_unlock(&data->lock);
-
-       cpufreq_cpu_put(data);
-
-       return ret;
-}
-EXPORT_SYMBOL(cpufreq_set_policy);
-
-
 /**
  *     cpufreq_update_policy - re-evaluate an existing cpufreq policy
  *     @cpu: CPU which shall be re-evaluated
@@ -1562,7 +1641,8 @@ int cpufreq_update_policy(unsigned int cpu)
        if (!data)
                return -ENODEV;
 
-       mutex_lock(&data->lock);
+       if (unlikely(lock_policy_rwsem_write(cpu)))
+               return -EINVAL;
 
        dprintk("updating policy for CPU %u\n", cpu);
        memcpy(&policy, data, sizeof(struct cpufreq_policy));
@@ -1587,7 +1667,8 @@ int cpufreq_update_policy(unsigned int cpu)
 
        ret = __cpufreq_set_policy(data, &policy);
 
-       mutex_unlock(&data->lock);
+       unlock_policy_rwsem_write(cpu);
+
        cpufreq_cpu_put(data);
        return ret;
 }
@@ -1597,31 +1678,31 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
                                        unsigned long action, void *hcpu)
 {
        unsigned int cpu = (unsigned long)hcpu;
-       struct cpufreq_policy *policy;
        struct sys_device *sys_dev;
+       struct cpufreq_policy *policy;
 
        sys_dev = get_cpu_sysdev(cpu);
-
        if (sys_dev) {
                switch (action) {
                case CPU_ONLINE:
+               case CPU_ONLINE_FROZEN:
                        cpufreq_add_dev(sys_dev);
                        break;
                case CPU_DOWN_PREPARE:
-                       /*
-                        * We attempt to put this cpu in lowest frequency
-                        * possible before going down. This will permit
-                        * hardware-managed P-State to switch other related
-                        * threads to min or higher speeds if possible.
-                        */
+               case CPU_DOWN_PREPARE_FROZEN:
+                       if (unlikely(lock_policy_rwsem_write(cpu)))
+                               BUG();
+
                        policy = cpufreq_cpu_data[cpu];
                        if (policy) {
-                               cpufreq_driver_target(policy, policy->min,
+                               __cpufreq_driver_target(policy, policy->min,
                                                CPUFREQ_RELATION_H);
                        }
+                       __cpufreq_remove_dev(sys_dev);
                        break;
-               case CPU_DEAD:
-                       cpufreq_remove_dev(sys_dev);
+               case CPU_DOWN_FAILED:
+               case CPU_DOWN_FAILED_FROZEN:
+                       cpufreq_add_dev(sys_dev);
                        break;
                }
        }
@@ -1735,3 +1816,16 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
        return 0;
 }
 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
+
+static int __init cpufreq_core_init(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               per_cpu(policy_cpu, cpu) = -1;
+               init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
+       }
+       return 0;
+}
+
+core_initcall(cpufreq_core_init);