e34af7772a666427fa34c5673a478a1cf2e6f350
[sfrench/cifs-2.6.git] / drivers / acpi / processor_idle.c
1 /*
2  * processor_idle - idle state submodule to the ACPI processor driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8  *                      - Added processor hotplug support
9  *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
10  *                      - Added support for C3 on SMP
11  *
12  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13  *
14  *  This program is free software; you can redistribute it and/or modify
15  *  it under the terms of the GNU General Public License as published by
16  *  the Free Software Foundation; either version 2 of the License, or (at
17  *  your option) any later version.
18  *
19  *  This program is distributed in the hope that it will be useful, but
20  *  WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  *  General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License along
25  *  with this program; if not, write to the Free Software Foundation, Inc.,
26  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27  *
28  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29  */
30
31 #include <linux/kernel.h>
32 #include <linux/module.h>
33 #include <linux/init.h>
34 #include <linux/cpufreq.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/acpi.h>
38 #include <linux/dmi.h>
39 #include <linux/moduleparam.h>
40 #include <linux/sched.h>        /* need_resched() */
41 #include <linux/latency.h>
42
43 /*
44  * Include the apic definitions for x86 to have the APIC timer related defines
45  * available also for UP (on SMP it gets magically included via linux/smp.h).
46  * asm/acpi.h is not an option, as it would require more include magic. Also
47  * creating an empty asm-ia64/apic.h would just trade pest vs. cholera.
48  */
49 #ifdef CONFIG_X86
50 #include <asm/apic.h>
51 #endif
52
53 #include <asm/io.h>
54 #include <asm/uaccess.h>
55
56 #include <acpi/acpi_bus.h>
57 #include <acpi/processor.h>
58
59 #define ACPI_PROCESSOR_COMPONENT        0x01000000
60 #define ACPI_PROCESSOR_CLASS            "processor"
61 #define ACPI_PROCESSOR_DRIVER_NAME      "ACPI Processor Driver"
62 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
63 ACPI_MODULE_NAME("acpi_processor")
64 #define ACPI_PROCESSOR_FILE_POWER       "power"
65 #define US_TO_PM_TIMER_TICKS(t)         ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
66 #define C2_OVERHEAD                     4       /* 1us (3.579 ticks per us) */
67 #define C3_OVERHEAD                     4       /* 1us (3.579 ticks per us) */
68 static void (*pm_idle_save) (void) __read_mostly;
69 module_param(max_cstate, uint, 0644);
70
71 static unsigned int nocst __read_mostly;
72 module_param(nocst, uint, 0000);
73
74 /*
75  * bm_history -- bit-mask with a bit per jiffy of bus-master activity
76  * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
77  * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
78  * 100 HZ: 0x0000000F: 4 jiffies = 40ms
79  * reduce history for more aggressive entry into C3
80  */
81 static unsigned int bm_history __read_mostly =
82     (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
83 module_param(bm_history, uint, 0644);
84 /* --------------------------------------------------------------------------
85                                 Power Management
86    -------------------------------------------------------------------------- */
87
88 /*
89  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
90  * For now disable this. Probably a bug somewhere else.
91  *
92  * To skip this limit, boot/load with a large max_cstate limit.
93  */
94 static int set_max_cstate(struct dmi_system_id *id)
95 {
96         if (max_cstate > ACPI_PROCESSOR_MAX_POWER)
97                 return 0;
98
99         printk(KERN_NOTICE PREFIX "%s detected - limiting to C%ld max_cstate."
100                " Override with \"processor.max_cstate=%d\"\n", id->ident,
101                (long)id->driver_data, ACPI_PROCESSOR_MAX_POWER + 1);
102
103         max_cstate = (long)id->driver_data;
104
105         return 0;
106 }
107
108 /* Actually this shouldn't be __cpuinitdata, would be better to fix the
109    callers to only run once -AK */
110 static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = {
111         { set_max_cstate, "IBM ThinkPad R40e", {
112           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
113           DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1},
114         { set_max_cstate, "IBM ThinkPad R40e", {
115           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
116           DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1},
117         { set_max_cstate, "IBM ThinkPad R40e", {
118           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
119           DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1},
120         { set_max_cstate, "IBM ThinkPad R40e", {
121           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
122           DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1},
123         { set_max_cstate, "IBM ThinkPad R40e", {
124           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
125           DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1},
126         { set_max_cstate, "IBM ThinkPad R40e", {
127           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
128           DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1},
129         { set_max_cstate, "IBM ThinkPad R40e", {
130           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
131           DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1},
132         { set_max_cstate, "IBM ThinkPad R40e", {
133           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
134           DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1},
135         { set_max_cstate, "IBM ThinkPad R40e", {
136           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
137           DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1},
138         { set_max_cstate, "IBM ThinkPad R40e", {
139           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
140           DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1},
141         { set_max_cstate, "IBM ThinkPad R40e", {
142           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
143           DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1},
144         { set_max_cstate, "IBM ThinkPad R40e", {
145           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
146           DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1},
147         { set_max_cstate, "IBM ThinkPad R40e", {
148           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
149           DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1},
150         { set_max_cstate, "IBM ThinkPad R40e", {
151           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
152           DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1},
153         { set_max_cstate, "IBM ThinkPad R40e", {
154           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
155           DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1},
156         { set_max_cstate, "IBM ThinkPad R40e", {
157           DMI_MATCH(DMI_BIOS_VENDOR,"IBM"),
158           DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1},
159         { set_max_cstate, "Medion 41700", {
160           DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
161           DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1},
162         { set_max_cstate, "Clevo 5600D", {
163           DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
164           DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")},
165          (void *)2},
166         {},
167 };
168
169 static inline u32 ticks_elapsed(u32 t1, u32 t2)
170 {
171         if (t2 >= t1)
172                 return (t2 - t1);
173         else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
174                 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
175         else
176                 return ((0xFFFFFFFF - t1) + t2);
177 }
178
179 static void
180 acpi_processor_power_activate(struct acpi_processor *pr,
181                               struct acpi_processor_cx *new)
182 {
183         struct acpi_processor_cx *old;
184
185         if (!pr || !new)
186                 return;
187
188         old = pr->power.state;
189
190         if (old)
191                 old->promotion.count = 0;
192         new->demotion.count = 0;
193
194         /* Cleanup from old state. */
195         if (old) {
196                 switch (old->type) {
197                 case ACPI_STATE_C3:
198                         /* Disable bus master reload */
199                         if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
200                                 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
201                         break;
202                 }
203         }
204
205         /* Prepare to use new state. */
206         switch (new->type) {
207         case ACPI_STATE_C3:
208                 /* Enable bus master reload */
209                 if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
210                         acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
211                 break;
212         }
213
214         pr->power.state = new;
215
216         return;
217 }
218
219 static void acpi_safe_halt(void)
220 {
221         current_thread_info()->status &= ~TS_POLLING;
222         /*
223          * TS_POLLING-cleared state must be visible before we
224          * test NEED_RESCHED:
225          */
226         smp_mb();
227         if (!need_resched())
228                 safe_halt();
229         current_thread_info()->status |= TS_POLLING;
230 }
231
232 static atomic_t c3_cpu_count;
233
234 /* Common C-state entry for C2, C3, .. */
235 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
236 {
237         if (cstate->space_id == ACPI_CSTATE_FFH) {
238                 /* Call into architectural FFH based C-state */
239                 acpi_processor_ffh_cstate_enter(cstate);
240         } else {
241                 int unused;
242                 /* IO port based C-state */
243                 inb(cstate->address);
244                 /* Dummy wait op - must do something useless after P_LVL2 read
245                    because chipsets cannot guarantee that STPCLK# signal
246                    gets asserted in time to freeze execution properly. */
247                 unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
248         }
249 }
250
251 static void acpi_processor_idle(void)
252 {
253         struct acpi_processor *pr = NULL;
254         struct acpi_processor_cx *cx = NULL;
255         struct acpi_processor_cx *next_state = NULL;
256         int sleep_ticks = 0;
257         u32 t1, t2 = 0;
258
259         pr = processors[smp_processor_id()];
260         if (!pr)
261                 return;
262
263         /*
264          * Interrupts must be disabled during bus mastering calculations and
265          * for C2/C3 transitions.
266          */
267         local_irq_disable();
268
269         /*
270          * Check whether we truly need to go idle, or should
271          * reschedule:
272          */
273         if (unlikely(need_resched())) {
274                 local_irq_enable();
275                 return;
276         }
277
278         cx = pr->power.state;
279         if (!cx) {
280                 if (pm_idle_save)
281                         pm_idle_save();
282                 else
283                         acpi_safe_halt();
284                 return;
285         }
286
287         /*
288          * Check BM Activity
289          * -----------------
290          * Check for bus mastering activity (if required), record, and check
291          * for demotion.
292          */
293         if (pr->flags.bm_check) {
294                 u32 bm_status = 0;
295                 unsigned long diff = jiffies - pr->power.bm_check_timestamp;
296
297                 if (diff > 31)
298                         diff = 31;
299
300                 pr->power.bm_activity <<= diff;
301
302                 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
303                 if (bm_status) {
304                         pr->power.bm_activity |= 0x1;
305                         acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
306                 }
307                 /*
308                  * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
309                  * the true state of bus mastering activity; forcing us to
310                  * manually check the BMIDEA bit of each IDE channel.
311                  */
312                 else if (errata.piix4.bmisx) {
313                         if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
314                             || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
315                                 pr->power.bm_activity |= 0x1;
316                 }
317
318                 pr->power.bm_check_timestamp = jiffies;
319
320                 /*
321                  * If bus mastering is or was active this jiffy, demote
322                  * to avoid a faulty transition.  Note that the processor
323                  * won't enter a low-power state during this call (to this
324                  * function) but should upon the next.
325                  *
326                  * TBD: A better policy might be to fallback to the demotion
327                  *      state (use it for this quantum only) istead of
328                  *      demoting -- and rely on duration as our sole demotion
329                  *      qualification.  This may, however, introduce DMA
330                  *      issues (e.g. floppy DMA transfer overrun/underrun).
331                  */
332                 if ((pr->power.bm_activity & 0x1) &&
333                     cx->demotion.threshold.bm) {
334                         local_irq_enable();
335                         next_state = cx->demotion.state;
336                         goto end;
337                 }
338         }
339
340 #ifdef CONFIG_HOTPLUG_CPU
341         /*
342          * Check for P_LVL2_UP flag before entering C2 and above on
343          * an SMP system. We do it here instead of doing it at _CST/P_LVL
344          * detection phase, to work cleanly with logical CPU hotplug.
345          */
346         if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 
347             !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
348                 cx = &pr->power.states[ACPI_STATE_C1];
349 #endif
350
351         /*
352          * Sleep:
353          * ------
354          * Invoke the current Cx state to put the processor to sleep.
355          */
356         if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
357                 current_thread_info()->status &= ~TS_POLLING;
358                 /*
359                  * TS_POLLING-cleared state must be visible before we
360                  * test NEED_RESCHED:
361                  */
362                 smp_mb();
363                 if (need_resched()) {
364                         current_thread_info()->status |= TS_POLLING;
365                         local_irq_enable();
366                         return;
367                 }
368         }
369
370         switch (cx->type) {
371
372         case ACPI_STATE_C1:
373                 /*
374                  * Invoke C1.
375                  * Use the appropriate idle routine, the one that would
376                  * be used without acpi C-states.
377                  */
378                 if (pm_idle_save)
379                         pm_idle_save();
380                 else
381                         acpi_safe_halt();
382
383                 /*
384                  * TBD: Can't get time duration while in C1, as resumes
385                  *      go to an ISR rather than here.  Need to instrument
386                  *      base interrupt handler.
387                  */
388                 sleep_ticks = 0xFFFFFFFF;
389                 break;
390
391         case ACPI_STATE_C2:
392                 /* Get start time (ticks) */
393                 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
394                 /* Invoke C2 */
395                 acpi_cstate_enter(cx);
396                 /* Get end time (ticks) */
397                 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
398
399 #ifdef CONFIG_GENERIC_TIME
400                 /* TSC halts in C2, so notify users */
401                 mark_tsc_unstable();
402 #endif
403                 /* Re-enable interrupts */
404                 local_irq_enable();
405                 current_thread_info()->status |= TS_POLLING;
406                 /* Compute time (ticks) that we were actually asleep */
407                 sleep_ticks =
408                     ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
409                 break;
410
411         case ACPI_STATE_C3:
412
413                 if (pr->flags.bm_check) {
414                         if (atomic_inc_return(&c3_cpu_count) ==
415                             num_online_cpus()) {
416                                 /*
417                                  * All CPUs are trying to go to C3
418                                  * Disable bus master arbitration
419                                  */
420                                 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
421                         }
422                 } else {
423                         /* SMP with no shared cache... Invalidate cache  */
424                         ACPI_FLUSH_CPU_CACHE();
425                 }
426
427                 /* Get start time (ticks) */
428                 t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
429                 /* Invoke C3 */
430                 acpi_cstate_enter(cx);
431                 /* Get end time (ticks) */
432                 t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
433                 if (pr->flags.bm_check) {
434                         /* Enable bus master arbitration */
435                         atomic_dec(&c3_cpu_count);
436                         acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
437                 }
438
439 #ifdef CONFIG_GENERIC_TIME
440                 /* TSC halts in C3, so notify users */
441                 mark_tsc_unstable();
442 #endif
443                 /* Re-enable interrupts */
444                 local_irq_enable();
445                 current_thread_info()->status |= TS_POLLING;
446                 /* Compute time (ticks) that we were actually asleep */
447                 sleep_ticks =
448                     ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
449                 break;
450
451         default:
452                 local_irq_enable();
453                 return;
454         }
455         cx->usage++;
456         if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0))
457                 cx->time += sleep_ticks;
458
459         next_state = pr->power.state;
460
461 #ifdef CONFIG_HOTPLUG_CPU
462         /* Don't do promotion/demotion */
463         if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) &&
464             !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) {
465                 next_state = cx;
466                 goto end;
467         }
468 #endif
469
470         /*
471          * Promotion?
472          * ----------
473          * Track the number of longs (time asleep is greater than threshold)
474          * and promote when the count threshold is reached.  Note that bus
475          * mastering activity may prevent promotions.
476          * Do not promote above max_cstate.
477          */
478         if (cx->promotion.state &&
479             ((cx->promotion.state - pr->power.states) <= max_cstate)) {
480                 if (sleep_ticks > cx->promotion.threshold.ticks &&
481                   cx->promotion.state->latency <= system_latency_constraint()) {
482                         cx->promotion.count++;
483                         cx->demotion.count = 0;
484                         if (cx->promotion.count >=
485                             cx->promotion.threshold.count) {
486                                 if (pr->flags.bm_check) {
487                                         if (!
488                                             (pr->power.bm_activity & cx->
489                                              promotion.threshold.bm)) {
490                                                 next_state =
491                                                     cx->promotion.state;
492                                                 goto end;
493                                         }
494                                 } else {
495                                         next_state = cx->promotion.state;
496                                         goto end;
497                                 }
498                         }
499                 }
500         }
501
502         /*
503          * Demotion?
504          * ---------
505          * Track the number of shorts (time asleep is less than time threshold)
506          * and demote when the usage threshold is reached.
507          */
508         if (cx->demotion.state) {
509                 if (sleep_ticks < cx->demotion.threshold.ticks) {
510                         cx->demotion.count++;
511                         cx->promotion.count = 0;
512                         if (cx->demotion.count >= cx->demotion.threshold.count) {
513                                 next_state = cx->demotion.state;
514                                 goto end;
515                         }
516                 }
517         }
518
519       end:
520         /*
521          * Demote if current state exceeds max_cstate
522          * or if the latency of the current state is unacceptable
523          */
524         if ((pr->power.state - pr->power.states) > max_cstate ||
525                 pr->power.state->latency > system_latency_constraint()) {
526                 if (cx->demotion.state)
527                         next_state = cx->demotion.state;
528         }
529
530         /*
531          * New Cx State?
532          * -------------
533          * If we're going to start using a new Cx state we must clean up
534          * from the previous and prepare to use the new.
535          */
536         if (next_state != pr->power.state)
537                 acpi_processor_power_activate(pr, next_state);
538 }
539
540 static int acpi_processor_set_power_policy(struct acpi_processor *pr)
541 {
542         unsigned int i;
543         unsigned int state_is_set = 0;
544         struct acpi_processor_cx *lower = NULL;
545         struct acpi_processor_cx *higher = NULL;
546         struct acpi_processor_cx *cx;
547
548
549         if (!pr)
550                 return -EINVAL;
551
552         /*
553          * This function sets the default Cx state policy (OS idle handler).
554          * Our scheme is to promote quickly to C2 but more conservatively
555          * to C3.  We're favoring C2  for its characteristics of low latency
556          * (quick response), good power savings, and ability to allow bus
557          * mastering activity.  Note that the Cx state policy is completely
558          * customizable and can be altered dynamically.
559          */
560
561         /* startup state */
562         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
563                 cx = &pr->power.states[i];
564                 if (!cx->valid)
565                         continue;
566
567                 if (!state_is_set)
568                         pr->power.state = cx;
569                 state_is_set++;
570                 break;
571         }
572
573         if (!state_is_set)
574                 return -ENODEV;
575
576         /* demotion */
577         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
578                 cx = &pr->power.states[i];
579                 if (!cx->valid)
580                         continue;
581
582                 if (lower) {
583                         cx->demotion.state = lower;
584                         cx->demotion.threshold.ticks = cx->latency_ticks;
585                         cx->demotion.threshold.count = 1;
586                         if (cx->type == ACPI_STATE_C3)
587                                 cx->demotion.threshold.bm = bm_history;
588                 }
589
590                 lower = cx;
591         }
592
593         /* promotion */
594         for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) {
595                 cx = &pr->power.states[i];
596                 if (!cx->valid)
597                         continue;
598
599                 if (higher) {
600                         cx->promotion.state = higher;
601                         cx->promotion.threshold.ticks = cx->latency_ticks;
602                         if (cx->type >= ACPI_STATE_C2)
603                                 cx->promotion.threshold.count = 4;
604                         else
605                                 cx->promotion.threshold.count = 10;
606                         if (higher->type == ACPI_STATE_C3)
607                                 cx->promotion.threshold.bm = bm_history;
608                 }
609
610                 higher = cx;
611         }
612
613         return 0;
614 }
615
616 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
617 {
618
619         if (!pr)
620                 return -EINVAL;
621
622         if (!pr->pblk)
623                 return -ENODEV;
624
625         /* if info is obtained from pblk/fadt, type equals state */
626         pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2;
627         pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3;
628
629 #ifndef CONFIG_HOTPLUG_CPU
630         /*
631          * Check for P_LVL2_UP flag before entering C2 and above on
632          * an SMP system. 
633          */
634         if ((num_online_cpus() > 1) &&
635             !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
636                 return -ENODEV;
637 #endif
638
639         /* determine C2 and C3 address from pblk */
640         pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4;
641         pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5;
642
643         /* determine latencies from FADT */
644         pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency;
645         pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency;
646
647         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
648                           "lvl2[0x%08x] lvl3[0x%08x]\n",
649                           pr->power.states[ACPI_STATE_C2].address,
650                           pr->power.states[ACPI_STATE_C3].address));
651
652         return 0;
653 }
654
655 static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
656 {
657         if (!pr->power.states[ACPI_STATE_C1].valid) {
658                 /* set the first C-State to C1 */
659                 /* all processors need to support C1 */
660                 pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
661                 pr->power.states[ACPI_STATE_C1].valid = 1;
662         }
663         /* the C0 state only exists as a filler in our array */
664         pr->power.states[ACPI_STATE_C0].valid = 1;
665         return 0;
666 }
667
668 static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
669 {
670         acpi_status status = 0;
671         acpi_integer count;
672         int current_count;
673         int i;
674         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
675         union acpi_object *cst;
676
677
678         if (nocst)
679                 return -ENODEV;
680
681         current_count = 0;
682
683         status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
684         if (ACPI_FAILURE(status)) {
685                 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n"));
686                 return -ENODEV;
687         }
688
689         cst = buffer.pointer;
690
691         /* There must be at least 2 elements */
692         if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) {
693                 printk(KERN_ERR PREFIX "not enough elements in _CST\n");
694                 status = -EFAULT;
695                 goto end;
696         }
697
698         count = cst->package.elements[0].integer.value;
699
700         /* Validate number of power states. */
701         if (count < 1 || count != cst->package.count - 1) {
702                 printk(KERN_ERR PREFIX "count given by _CST is not valid\n");
703                 status = -EFAULT;
704                 goto end;
705         }
706
707         /* Tell driver that at least _CST is supported. */
708         pr->flags.has_cst = 1;
709
710         for (i = 1; i <= count; i++) {
711                 union acpi_object *element;
712                 union acpi_object *obj;
713                 struct acpi_power_register *reg;
714                 struct acpi_processor_cx cx;
715
716                 memset(&cx, 0, sizeof(cx));
717
718                 element = &(cst->package.elements[i]);
719                 if (element->type != ACPI_TYPE_PACKAGE)
720                         continue;
721
722                 if (element->package.count != 4)
723                         continue;
724
725                 obj = &(element->package.elements[0]);
726
727                 if (obj->type != ACPI_TYPE_BUFFER)
728                         continue;
729
730                 reg = (struct acpi_power_register *)obj->buffer.pointer;
731
732                 if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO &&
733                     (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
734                         continue;
735
736                 /* There should be an easy way to extract an integer... */
737                 obj = &(element->package.elements[1]);
738                 if (obj->type != ACPI_TYPE_INTEGER)
739                         continue;
740
741                 cx.type = obj->integer.value;
742                 /*
743                  * Some buggy BIOSes won't list C1 in _CST -
744                  * Let acpi_processor_get_power_info_default() handle them later
745                  */
746                 if (i == 1 && cx.type != ACPI_STATE_C1)
747                         current_count++;
748
749                 cx.address = reg->address;
750                 cx.index = current_count + 1;
751
752                 cx.space_id = ACPI_CSTATE_SYSTEMIO;
753                 if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
754                         if (acpi_processor_ffh_cstate_probe
755                                         (pr->id, &cx, reg) == 0) {
756                                 cx.space_id = ACPI_CSTATE_FFH;
757                         } else if (cx.type != ACPI_STATE_C1) {
758                                 /*
759                                  * C1 is a special case where FIXED_HARDWARE
760                                  * can be handled in non-MWAIT way as well.
761                                  * In that case, save this _CST entry info.
762                                  * That is, we retain space_id of SYSTEM_IO for
763                                  * halt based C1.
764                                  * Otherwise, ignore this info and continue.
765                                  */
766                                 continue;
767                         }
768                 }
769
770                 obj = &(element->package.elements[2]);
771                 if (obj->type != ACPI_TYPE_INTEGER)
772                         continue;
773
774                 cx.latency = obj->integer.value;
775
776                 obj = &(element->package.elements[3]);
777                 if (obj->type != ACPI_TYPE_INTEGER)
778                         continue;
779
780                 cx.power = obj->integer.value;
781
782                 current_count++;
783                 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
784
785                 /*
786                  * We support total ACPI_PROCESSOR_MAX_POWER - 1
787                  * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1)
788                  */
789                 if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) {
790                         printk(KERN_WARNING
791                                "Limiting number of power states to max (%d)\n",
792                                ACPI_PROCESSOR_MAX_POWER);
793                         printk(KERN_WARNING
794                                "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n");
795                         break;
796                 }
797         }
798
799         ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n",
800                           current_count));
801
802         /* Validate number of power states discovered */
803         if (current_count < 2)
804                 status = -EFAULT;
805
806       end:
807         kfree(buffer.pointer);
808
809         return status;
810 }
811
812 static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
813 {
814
815         if (!cx->address)
816                 return;
817
818         /*
819          * C2 latency must be less than or equal to 100
820          * microseconds.
821          */
822         else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) {
823                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
824                                   "latency too large [%d]\n", cx->latency));
825                 return;
826         }
827
828         /*
829          * Otherwise we've met all of our C2 requirements.
830          * Normalize the C2 latency to expidite policy
831          */
832         cx->valid = 1;
833         cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
834
835         return;
836 }
837
838 static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
839                                            struct acpi_processor_cx *cx)
840 {
841         static int bm_check_flag;
842
843
844         if (!cx->address)
845                 return;
846
847         /*
848          * C3 latency must be less than or equal to 1000
849          * microseconds.
850          */
851         else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) {
852                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
853                                   "latency too large [%d]\n", cx->latency));
854                 return;
855         }
856
857         /*
858          * PIIX4 Erratum #18: We don't support C3 when Type-F (fast)
859          * DMA transfers are used by any ISA device to avoid livelock.
860          * Note that we could disable Type-F DMA (as recommended by
861          * the erratum), but this is known to disrupt certain ISA
862          * devices thus we take the conservative approach.
863          */
864         else if (errata.piix4.fdma) {
865                 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
866                                   "C3 not supported on PIIX4 with Type-F DMA\n"));
867                 return;
868         }
869
870         /* All the logic here assumes flags.bm_check is same across all CPUs */
871         if (!bm_check_flag) {
872                 /* Determine whether bm_check is needed based on CPU  */
873                 acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
874                 bm_check_flag = pr->flags.bm_check;
875         } else {
876                 pr->flags.bm_check = bm_check_flag;
877         }
878
879         if (pr->flags.bm_check) {
880                 /* bus mastering control is necessary */
881                 if (!pr->flags.bm_control) {
882                         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
883                                           "C3 support requires bus mastering control\n"));
884                         return;
885                 }
886         } else {
887                 /*
888                  * WBINVD should be set in fadt, for C3 state to be
889                  * supported on when bm_check is not required.
890                  */
891                 if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) {
892                         ACPI_DEBUG_PRINT((ACPI_DB_INFO,
893                                           "Cache invalidation should work properly"
894                                           " for C3 to be enabled on SMP systems\n"));
895                         return;
896                 }
897                 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
898         }
899
900         /*
901          * Otherwise we've met all of our C3 requirements.
902          * Normalize the C3 latency to expidite policy.  Enable
903          * checking of bus mastering status (bm_check) so we can
904          * use this in our C3 policy
905          */
906         cx->valid = 1;
907         cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
908
909         return;
910 }
911
912 static int acpi_processor_power_verify(struct acpi_processor *pr)
913 {
914         unsigned int i;
915         unsigned int working = 0;
916
917 #ifdef ARCH_APICTIMER_STOPS_ON_C3
918         int timer_broadcast = 0;
919         cpumask_t mask = cpumask_of_cpu(pr->id);
920         on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1);
921 #endif
922
923         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
924                 struct acpi_processor_cx *cx = &pr->power.states[i];
925
926                 switch (cx->type) {
927                 case ACPI_STATE_C1:
928                         cx->valid = 1;
929                         break;
930
931                 case ACPI_STATE_C2:
932                         acpi_processor_power_verify_c2(cx);
933 #ifdef ARCH_APICTIMER_STOPS_ON_C3
934                         /* Some AMD systems fake C3 as C2, but still
935                            have timer troubles */
936                         if (cx->valid && 
937                                 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
938                                 timer_broadcast++;
939 #endif
940                         break;
941
942                 case ACPI_STATE_C3:
943                         acpi_processor_power_verify_c3(pr, cx);
944 #ifdef ARCH_APICTIMER_STOPS_ON_C3
945                         if (cx->valid)
946                                 timer_broadcast++;
947 #endif
948                         break;
949                 }
950
951                 if (cx->valid)
952                         working++;
953         }
954
955 #ifdef ARCH_APICTIMER_STOPS_ON_C3
956         if (timer_broadcast)
957                 on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1);
958 #endif
959
960         return (working);
961 }
962
963 static int acpi_processor_get_power_info(struct acpi_processor *pr)
964 {
965         unsigned int i;
966         int result;
967
968
969         /* NOTE: the idle thread may not be running while calling
970          * this function */
971
972         /* Zero initialize all the C-states info. */
973         memset(pr->power.states, 0, sizeof(pr->power.states));
974
975         result = acpi_processor_get_power_info_cst(pr);
976         if (result == -ENODEV)
977                 result = acpi_processor_get_power_info_fadt(pr);
978
979         if (result)
980                 return result;
981
982         acpi_processor_get_power_info_default(pr);
983
984         pr->power.count = acpi_processor_power_verify(pr);
985
986         /*
987          * Set Default Policy
988          * ------------------
989          * Now that we know which states are supported, set the default
990          * policy.  Note that this policy can be changed dynamically
991          * (e.g. encourage deeper sleeps to conserve battery life when
992          * not on AC).
993          */
994         result = acpi_processor_set_power_policy(pr);
995         if (result)
996                 return result;
997
998         /*
999          * if one state of type C2 or C3 is available, mark this
1000          * CPU as being "idle manageable"
1001          */
1002         for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) {
1003                 if (pr->power.states[i].valid) {
1004                         pr->power.count = i;
1005                         if (pr->power.states[i].type >= ACPI_STATE_C2)
1006                                 pr->flags.power = 1;
1007                 }
1008         }
1009
1010         return 0;
1011 }
1012
1013 int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1014 {
1015         int result = 0;
1016
1017
1018         if (!pr)
1019                 return -EINVAL;
1020
1021         if (nocst) {
1022                 return -ENODEV;
1023         }
1024
1025         if (!pr->flags.power_setup_done)
1026                 return -ENODEV;
1027
1028         /* Fall back to the default idle loop */
1029         pm_idle = pm_idle_save;
1030         synchronize_sched();    /* Relies on interrupts forcing exit from idle. */
1031
1032         pr->flags.power = 0;
1033         result = acpi_processor_get_power_info(pr);
1034         if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
1035                 pm_idle = acpi_processor_idle;
1036
1037         return result;
1038 }
1039
1040 /* proc interface */
1041
1042 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
1043 {
1044         struct acpi_processor *pr = seq->private;
1045         unsigned int i;
1046
1047
1048         if (!pr)
1049                 goto end;
1050
1051         seq_printf(seq, "active state:            C%zd\n"
1052                    "max_cstate:              C%d\n"
1053                    "bus master activity:     %08x\n"
1054                    "maximum allowed latency: %d usec\n",
1055                    pr->power.state ? pr->power.state - pr->power.states : 0,
1056                    max_cstate, (unsigned)pr->power.bm_activity,
1057                    system_latency_constraint());
1058
1059         seq_puts(seq, "states:\n");
1060
1061         for (i = 1; i <= pr->power.count; i++) {
1062                 seq_printf(seq, "   %cC%d:                  ",
1063                            (&pr->power.states[i] ==
1064                             pr->power.state ? '*' : ' '), i);
1065
1066                 if (!pr->power.states[i].valid) {
1067                         seq_puts(seq, "<not supported>\n");
1068                         continue;
1069                 }
1070
1071                 switch (pr->power.states[i].type) {
1072                 case ACPI_STATE_C1:
1073                         seq_printf(seq, "type[C1] ");
1074                         break;
1075                 case ACPI_STATE_C2:
1076                         seq_printf(seq, "type[C2] ");
1077                         break;
1078                 case ACPI_STATE_C3:
1079                         seq_printf(seq, "type[C3] ");
1080                         break;
1081                 default:
1082                         seq_printf(seq, "type[--] ");
1083                         break;
1084                 }
1085
1086                 if (pr->power.states[i].promotion.state)
1087                         seq_printf(seq, "promotion[C%zd] ",
1088                                    (pr->power.states[i].promotion.state -
1089                                     pr->power.states));
1090                 else
1091                         seq_puts(seq, "promotion[--] ");
1092
1093                 if (pr->power.states[i].demotion.state)
1094                         seq_printf(seq, "demotion[C%zd] ",
1095                                    (pr->power.states[i].demotion.state -
1096                                     pr->power.states));
1097                 else
1098                         seq_puts(seq, "demotion[--] ");
1099
1100                 seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n",
1101                            pr->power.states[i].latency,
1102                            pr->power.states[i].usage,
1103                            (unsigned long long)pr->power.states[i].time);
1104         }
1105
1106       end:
1107         return 0;
1108 }
1109
1110 static int acpi_processor_power_open_fs(struct inode *inode, struct file *file)
1111 {
1112         return single_open(file, acpi_processor_power_seq_show,
1113                            PDE(inode)->data);
1114 }
1115
1116 static const struct file_operations acpi_processor_power_fops = {
1117         .open = acpi_processor_power_open_fs,
1118         .read = seq_read,
1119         .llseek = seq_lseek,
1120         .release = single_release,
1121 };
1122
1123 #ifdef CONFIG_SMP
1124 static void smp_callback(void *v)
1125 {
1126         /* we already woke the CPU up, nothing more to do */
1127 }
1128
1129 /*
1130  * This function gets called when a part of the kernel has a new latency
1131  * requirement.  This means we need to get all processors out of their C-state,
1132  * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
1133  * wakes them all right up.
1134  */
1135 static int acpi_processor_latency_notify(struct notifier_block *b,
1136                 unsigned long l, void *v)
1137 {
1138         smp_call_function(smp_callback, NULL, 0, 1);
1139         return NOTIFY_OK;
1140 }
1141
1142 static struct notifier_block acpi_processor_latency_notifier = {
1143         .notifier_call = acpi_processor_latency_notify,
1144 };
1145 #endif
1146
1147 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
1148                               struct acpi_device *device)
1149 {
1150         acpi_status status = 0;
1151         static int first_run;
1152         struct proc_dir_entry *entry = NULL;
1153         unsigned int i;
1154
1155
1156         if (!first_run) {
1157                 dmi_check_system(processor_power_dmi_table);
1158                 if (max_cstate < ACPI_C_STATES_MAX)
1159                         printk(KERN_NOTICE
1160                                "ACPI: processor limited to max C-state %d\n",
1161                                max_cstate);
1162                 first_run++;
1163 #ifdef CONFIG_SMP
1164                 register_latency_notifier(&acpi_processor_latency_notifier);
1165 #endif
1166         }
1167
1168         if (!pr)
1169                 return -EINVAL;
1170
1171         if (acpi_gbl_FADT.cst_control && !nocst) {
1172                 status =
1173                     acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8);
1174                 if (ACPI_FAILURE(status)) {
1175                         ACPI_EXCEPTION((AE_INFO, status,
1176                                         "Notifying BIOS of _CST ability failed"));
1177                 }
1178         }
1179
1180         acpi_processor_get_power_info(pr);
1181
1182         /*
1183          * Install the idle handler if processor power management is supported.
1184          * Note that we use previously set idle handler will be used on
1185          * platforms that only support C1.
1186          */
1187         if ((pr->flags.power) && (!boot_option_idle_override)) {
1188                 printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
1189                 for (i = 1; i <= pr->power.count; i++)
1190                         if (pr->power.states[i].valid)
1191                                 printk(" C%d[C%d]", i,
1192                                        pr->power.states[i].type);
1193                 printk(")\n");
1194
1195                 if (pr->id == 0) {
1196                         pm_idle_save = pm_idle;
1197                         pm_idle = acpi_processor_idle;
1198                 }
1199         }
1200
1201         /* 'power' [R] */
1202         entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1203                                   S_IRUGO, acpi_device_dir(device));
1204         if (!entry)
1205                 return -EIO;
1206         else {
1207                 entry->proc_fops = &acpi_processor_power_fops;
1208                 entry->data = acpi_driver_data(device);
1209                 entry->owner = THIS_MODULE;
1210         }
1211
1212         pr->flags.power_setup_done = 1;
1213
1214         return 0;
1215 }
1216
1217 int acpi_processor_power_exit(struct acpi_processor *pr,
1218                               struct acpi_device *device)
1219 {
1220
1221         pr->flags.power_setup_done = 0;
1222
1223         if (acpi_device_dir(device))
1224                 remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
1225                                   acpi_device_dir(device));
1226
1227         /* Unregister the idle handler when processor #0 is removed. */
1228         if (pr->id == 0) {
1229                 pm_idle = pm_idle_save;
1230
1231                 /*
1232                  * We are about to unload the current idle thread pm callback
1233                  * (pm_idle), Wait for all processors to update cached/local
1234                  * copies of pm_idle before proceeding.
1235                  */
1236                 cpu_idle_wait();
1237 #ifdef CONFIG_SMP
1238                 unregister_latency_notifier(&acpi_processor_latency_notifier);
1239 #endif
1240         }
1241
1242         return 0;
1243 }