thermal: intel: no need to check return value of debugfs_create functions
[sfrench/cifs-2.6.git] / drivers / thermal / intel / x86_pkg_temp_thermal.c
1 /*
2  * x86_pkg_temp_thermal driver
3  * Copyright (c) 2013, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc.
16  *
17  */
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/module.h>
21 #include <linux/init.h>
22 #include <linux/err.h>
23 #include <linux/param.h>
24 #include <linux/device.h>
25 #include <linux/platform_device.h>
26 #include <linux/cpu.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/pm.h>
30 #include <linux/thermal.h>
31 #include <linux/debugfs.h>
32 #include <asm/cpu_device_id.h>
33 #include <asm/mce.h>
34
35 /*
36 * Rate control delay: Idea is to introduce denounce effect
37 * This should be long enough to avoid reduce events, when
38 * threshold is set to a temperature, which is constantly
39 * violated, but at the short enough to take any action.
40 * The action can be remove threshold or change it to next
41 * interesting setting. Based on experiments, in around
42 * every 5 seconds under load will give us a significant
43 * temperature change.
44 */
45 #define PKG_TEMP_THERMAL_NOTIFY_DELAY   5000
46 static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
47 module_param(notify_delay_ms, int, 0644);
48 MODULE_PARM_DESC(notify_delay_ms,
49         "User space notification delay in milli seconds.");
50
51 /* Number of trip points in thermal zone. Currently it can't
52 * be more than 2. MSR can allow setting and getting notifications
53 * for only 2 thresholds. This define enforces this, if there
54 * is some wrong values returned by cpuid for number of thresholds.
55 */
56 #define MAX_NUMBER_OF_TRIPS     2
57
58 struct pkg_device {
59         int                             cpu;
60         bool                            work_scheduled;
61         u32                             tj_max;
62         u32                             msr_pkg_therm_low;
63         u32                             msr_pkg_therm_high;
64         struct delayed_work             work;
65         struct thermal_zone_device      *tzone;
66         struct cpumask                  cpumask;
67 };
68
69 static struct thermal_zone_params pkg_temp_tz_params = {
70         .no_hwmon       = true,
71 };
72
73 /* Keep track of how many package pointers we allocated in init() */
74 static int max_packages __read_mostly;
75 /* Array of package pointers */
76 static struct pkg_device **packages;
77 /* Serializes interrupt notification, work and hotplug */
78 static DEFINE_SPINLOCK(pkg_temp_lock);
79 /* Protects zone operation in the work function against hotplug removal */
80 static DEFINE_MUTEX(thermal_zone_mutex);
81
82 /* The dynamically assigned cpu hotplug state for module_exit() */
83 static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
84
85 /* Debug counters to show using debugfs */
86 static struct dentry *debugfs;
87 static unsigned int pkg_interrupt_cnt;
88 static unsigned int pkg_work_cnt;
89
90 static void pkg_temp_debugfs_init(void)
91 {
92         debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
93
94         debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
95                            &pkg_interrupt_cnt);
96         debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
97                            &pkg_work_cnt);
98 }
99
100 /*
101  * Protection:
102  *
103  * - cpu hotplug: Read serialized by cpu hotplug lock
104  *                Write must hold pkg_temp_lock
105  *
106  * - Other callsites: Must hold pkg_temp_lock
107  */
108 static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
109 {
110         int pkgid = topology_logical_package_id(cpu);
111
112         if (pkgid >= 0 && pkgid < max_packages)
113                 return packages[pkgid];
114         return NULL;
115 }
116
117 /*
118 * tj-max is is interesting because threshold is set relative to this
119 * temperature.
120 */
121 static int get_tj_max(int cpu, u32 *tj_max)
122 {
123         u32 eax, edx, val;
124         int err;
125
126         err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
127         if (err)
128                 return err;
129
130         val = (eax >> 16) & 0xff;
131         *tj_max = val * 1000;
132
133         return val ? 0 : -EINVAL;
134 }
135
136 static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
137 {
138         struct pkg_device *pkgdev = tzd->devdata;
139         u32 eax, edx;
140
141         rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx);
142         if (eax & 0x80000000) {
143                 *temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000;
144                 pr_debug("sys_get_curr_temp %d\n", *temp);
145                 return 0;
146         }
147         return -EINVAL;
148 }
149
150 static int sys_get_trip_temp(struct thermal_zone_device *tzd,
151                              int trip, int *temp)
152 {
153         struct pkg_device *pkgdev = tzd->devdata;
154         unsigned long thres_reg_value;
155         u32 mask, shift, eax, edx;
156         int ret;
157
158         if (trip >= MAX_NUMBER_OF_TRIPS)
159                 return -EINVAL;
160
161         if (trip) {
162                 mask = THERM_MASK_THRESHOLD1;
163                 shift = THERM_SHIFT_THRESHOLD1;
164         } else {
165                 mask = THERM_MASK_THRESHOLD0;
166                 shift = THERM_SHIFT_THRESHOLD0;
167         }
168
169         ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
170                            &eax, &edx);
171         if (ret < 0)
172                 return ret;
173
174         thres_reg_value = (eax & mask) >> shift;
175         if (thres_reg_value)
176                 *temp = pkgdev->tj_max - thres_reg_value * 1000;
177         else
178                 *temp = 0;
179         pr_debug("sys_get_trip_temp %d\n", *temp);
180
181         return 0;
182 }
183
184 static int
185 sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
186 {
187         struct pkg_device *pkgdev = tzd->devdata;
188         u32 l, h, mask, shift, intr;
189         int ret;
190
191         if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max)
192                 return -EINVAL;
193
194         ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
195                            &l, &h);
196         if (ret < 0)
197                 return ret;
198
199         if (trip) {
200                 mask = THERM_MASK_THRESHOLD1;
201                 shift = THERM_SHIFT_THRESHOLD1;
202                 intr = THERM_INT_THRESHOLD1_ENABLE;
203         } else {
204                 mask = THERM_MASK_THRESHOLD0;
205                 shift = THERM_SHIFT_THRESHOLD0;
206                 intr = THERM_INT_THRESHOLD0_ENABLE;
207         }
208         l &= ~mask;
209         /*
210         * When users space sets a trip temperature == 0, which is indication
211         * that, it is no longer interested in receiving notifications.
212         */
213         if (!temp) {
214                 l &= ~intr;
215         } else {
216                 l |= (pkgdev->tj_max - temp)/1000 << shift;
217                 l |= intr;
218         }
219
220         return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
221 }
222
223 static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
224                              enum thermal_trip_type *type)
225 {
226         *type = THERMAL_TRIP_PASSIVE;
227         return 0;
228 }
229
230 /* Thermal zone callback registry */
231 static struct thermal_zone_device_ops tzone_ops = {
232         .get_temp = sys_get_curr_temp,
233         .get_trip_temp = sys_get_trip_temp,
234         .get_trip_type = sys_get_trip_type,
235         .set_trip_temp = sys_set_trip_temp,
236 };
237
238 static bool pkg_thermal_rate_control(void)
239 {
240         return true;
241 }
242
243 /* Enable threshold interrupt on local package/cpu */
244 static inline void enable_pkg_thres_interrupt(void)
245 {
246         u8 thres_0, thres_1;
247         u32 l, h;
248
249         rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
250         /* only enable/disable if it had valid threshold value */
251         thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
252         thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
253         if (thres_0)
254                 l |= THERM_INT_THRESHOLD0_ENABLE;
255         if (thres_1)
256                 l |= THERM_INT_THRESHOLD1_ENABLE;
257         wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
258 }
259
260 /* Disable threshold interrupt on local package/cpu */
261 static inline void disable_pkg_thres_interrupt(void)
262 {
263         u32 l, h;
264
265         rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
266
267         l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
268         wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
269 }
270
271 static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
272 {
273         struct thermal_zone_device *tzone = NULL;
274         int cpu = smp_processor_id();
275         struct pkg_device *pkgdev;
276         u64 msr_val, wr_val;
277
278         mutex_lock(&thermal_zone_mutex);
279         spin_lock_irq(&pkg_temp_lock);
280         ++pkg_work_cnt;
281
282         pkgdev = pkg_temp_thermal_get_dev(cpu);
283         if (!pkgdev) {
284                 spin_unlock_irq(&pkg_temp_lock);
285                 mutex_unlock(&thermal_zone_mutex);
286                 return;
287         }
288         pkgdev->work_scheduled = false;
289
290         rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
291         wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
292         if (wr_val != msr_val) {
293                 wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
294                 tzone = pkgdev->tzone;
295         }
296
297         enable_pkg_thres_interrupt();
298         spin_unlock_irq(&pkg_temp_lock);
299
300         /*
301          * If tzone is not NULL, then thermal_zone_mutex will prevent the
302          * concurrent removal in the cpu offline callback.
303          */
304         if (tzone)
305                 thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
306
307         mutex_unlock(&thermal_zone_mutex);
308 }
309
310 static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
311 {
312         unsigned long ms = msecs_to_jiffies(notify_delay_ms);
313
314         schedule_delayed_work_on(cpu, work, ms);
315 }
316
317 static int pkg_thermal_notify(u64 msr_val)
318 {
319         int cpu = smp_processor_id();
320         struct pkg_device *pkgdev;
321         unsigned long flags;
322
323         spin_lock_irqsave(&pkg_temp_lock, flags);
324         ++pkg_interrupt_cnt;
325
326         disable_pkg_thres_interrupt();
327
328         /* Work is per package, so scheduling it once is enough. */
329         pkgdev = pkg_temp_thermal_get_dev(cpu);
330         if (pkgdev && !pkgdev->work_scheduled) {
331                 pkgdev->work_scheduled = true;
332                 pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work);
333         }
334
335         spin_unlock_irqrestore(&pkg_temp_lock, flags);
336         return 0;
337 }
338
339 static int pkg_temp_thermal_device_add(unsigned int cpu)
340 {
341         int pkgid = topology_logical_package_id(cpu);
342         u32 tj_max, eax, ebx, ecx, edx;
343         struct pkg_device *pkgdev;
344         int thres_count, err;
345
346         if (pkgid >= max_packages)
347                 return -ENOMEM;
348
349         cpuid(6, &eax, &ebx, &ecx, &edx);
350         thres_count = ebx & 0x07;
351         if (!thres_count)
352                 return -ENODEV;
353
354         thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
355
356         err = get_tj_max(cpu, &tj_max);
357         if (err)
358                 return err;
359
360         pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
361         if (!pkgdev)
362                 return -ENOMEM;
363
364         INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn);
365         pkgdev->cpu = cpu;
366         pkgdev->tj_max = tj_max;
367         pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp",
368                         thres_count,
369                         (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
370                         pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
371         if (IS_ERR(pkgdev->tzone)) {
372                 err = PTR_ERR(pkgdev->tzone);
373                 kfree(pkgdev);
374                 return err;
375         }
376         /* Store MSR value for package thermal interrupt, to restore at exit */
377         rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low,
378               pkgdev->msr_pkg_therm_high);
379
380         cpumask_set_cpu(cpu, &pkgdev->cpumask);
381         spin_lock_irq(&pkg_temp_lock);
382         packages[pkgid] = pkgdev;
383         spin_unlock_irq(&pkg_temp_lock);
384         return 0;
385 }
386
387 static int pkg_thermal_cpu_offline(unsigned int cpu)
388 {
389         struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
390         bool lastcpu, was_target;
391         int target;
392
393         if (!pkgdev)
394                 return 0;
395
396         target = cpumask_any_but(&pkgdev->cpumask, cpu);
397         cpumask_clear_cpu(cpu, &pkgdev->cpumask);
398         lastcpu = target >= nr_cpu_ids;
399         /*
400          * Remove the sysfs files, if this is the last cpu in the package
401          * before doing further cleanups.
402          */
403         if (lastcpu) {
404                 struct thermal_zone_device *tzone = pkgdev->tzone;
405
406                 /*
407                  * We must protect against a work function calling
408                  * thermal_zone_update, after/while unregister. We null out
409                  * the pointer under the zone mutex, so the worker function
410                  * won't try to call.
411                  */
412                 mutex_lock(&thermal_zone_mutex);
413                 pkgdev->tzone = NULL;
414                 mutex_unlock(&thermal_zone_mutex);
415
416                 thermal_zone_device_unregister(tzone);
417         }
418
419         /* Protect against work and interrupts */
420         spin_lock_irq(&pkg_temp_lock);
421
422         /*
423          * Check whether this cpu was the current target and store the new
424          * one. When we drop the lock, then the interrupt notify function
425          * will see the new target.
426          */
427         was_target = pkgdev->cpu == cpu;
428         pkgdev->cpu = target;
429
430         /*
431          * If this is the last CPU in the package remove the package
432          * reference from the array and restore the interrupt MSR. When we
433          * drop the lock neither the interrupt notify function nor the
434          * worker will see the package anymore.
435          */
436         if (lastcpu) {
437                 packages[topology_logical_package_id(cpu)] = NULL;
438                 /* After this point nothing touches the MSR anymore. */
439                 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
440                       pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
441         }
442
443         /*
444          * Check whether there is work scheduled and whether the work is
445          * targeted at the outgoing CPU.
446          */
447         if (pkgdev->work_scheduled && was_target) {
448                 /*
449                  * To cancel the work we need to drop the lock, otherwise
450                  * we might deadlock if the work needs to be flushed.
451                  */
452                 spin_unlock_irq(&pkg_temp_lock);
453                 cancel_delayed_work_sync(&pkgdev->work);
454                 spin_lock_irq(&pkg_temp_lock);
455                 /*
456                  * If this is not the last cpu in the package and the work
457                  * did not run after we dropped the lock above, then we
458                  * need to reschedule the work, otherwise the interrupt
459                  * stays disabled forever.
460                  */
461                 if (!lastcpu && pkgdev->work_scheduled)
462                         pkg_thermal_schedule_work(target, &pkgdev->work);
463         }
464
465         spin_unlock_irq(&pkg_temp_lock);
466
467         /* Final cleanup if this is the last cpu */
468         if (lastcpu)
469                 kfree(pkgdev);
470         return 0;
471 }
472
473 static int pkg_thermal_cpu_online(unsigned int cpu)
474 {
475         struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
476         struct cpuinfo_x86 *c = &cpu_data(cpu);
477
478         /* Paranoia check */
479         if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
480                 return -ENODEV;
481
482         /* If the package exists, nothing to do */
483         if (pkgdev) {
484                 cpumask_set_cpu(cpu, &pkgdev->cpumask);
485                 return 0;
486         }
487         return pkg_temp_thermal_device_add(cpu);
488 }
489
490 static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
491         { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
492         {}
493 };
494 MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
495
496 static int __init pkg_temp_thermal_init(void)
497 {
498         int ret;
499
500         if (!x86_match_cpu(pkg_temp_thermal_ids))
501                 return -ENODEV;
502
503         max_packages = topology_max_packages();
504         packages = kcalloc(max_packages, sizeof(struct pkg_device *),
505                            GFP_KERNEL);
506         if (!packages)
507                 return -ENOMEM;
508
509         ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
510                                 pkg_thermal_cpu_online, pkg_thermal_cpu_offline);
511         if (ret < 0)
512                 goto err;
513
514         /* Store the state for module exit */
515         pkg_thermal_hp_state = ret;
516
517         platform_thermal_package_notify = pkg_thermal_notify;
518         platform_thermal_package_rate_control = pkg_thermal_rate_control;
519
520          /* Don't care if it fails */
521         pkg_temp_debugfs_init();
522         return 0;
523
524 err:
525         kfree(packages);
526         return ret;
527 }
528 module_init(pkg_temp_thermal_init)
529
530 static void __exit pkg_temp_thermal_exit(void)
531 {
532         platform_thermal_package_notify = NULL;
533         platform_thermal_package_rate_control = NULL;
534
535         cpuhp_remove_state(pkg_thermal_hp_state);
536         debugfs_remove_recursive(debugfs);
537         kfree(packages);
538 }
539 module_exit(pkg_temp_thermal_exit)
540
541 MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
542 MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
543 MODULE_LICENSE("GPL v2");