Merge tag 'perf-urgent-for-mingo-4.14-20170928' of git://git.kernel.org/pub/scm/linux...
[sfrench/cifs-2.6.git] / arch / powerpc / platforms / powernv / idle.c
1 /*
2  * PowerNV cpuidle code
3  *
4  * Copyright 2015 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/types.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/of.h>
16 #include <linux/device.h>
17 #include <linux/cpu.h>
18
19 #include <asm/firmware.h>
20 #include <asm/machdep.h>
21 #include <asm/opal.h>
22 #include <asm/cputhreads.h>
23 #include <asm/cpuidle.h>
24 #include <asm/code-patching.h>
25 #include <asm/smp.h>
26 #include <asm/runlatch.h>
27
28 #include "powernv.h"
29 #include "subcore.h"
30
31 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
32 #define MAX_STOP_STATE  0xF
33
34 #define P9_STOP_SPR_MSR 2000
35 #define P9_STOP_SPR_PSSCR      855
36
37 static u32 supported_cpuidle_states;
38
39 /*
40  * The default stop state that will be used by ppc_md.power_save
41  * function on platforms that support stop instruction.
42  */
43 static u64 pnv_default_stop_val;
44 static u64 pnv_default_stop_mask;
45 static bool default_stop_found;
46
47 /*
48  * First deep stop state. Used to figure out when to save/restore
49  * hypervisor context.
50  */
51 u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
52
53 /*
54  * psscr value and mask of the deepest stop idle state.
55  * Used when a cpu is offlined.
56  */
57 static u64 pnv_deepest_stop_psscr_val;
58 static u64 pnv_deepest_stop_psscr_mask;
59 static u64 pnv_deepest_stop_flag;
60 static bool deepest_stop_found;
61
62 static int pnv_save_sprs_for_deep_states(void)
63 {
64         int cpu;
65         int rc;
66
67         /*
68          * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
69          * all cpus at boot. Get these reg values of current cpu and use the
70          * same across all cpus.
71          */
72         uint64_t lpcr_val = mfspr(SPRN_LPCR);
73         uint64_t hid0_val = mfspr(SPRN_HID0);
74         uint64_t hid1_val = mfspr(SPRN_HID1);
75         uint64_t hid4_val = mfspr(SPRN_HID4);
76         uint64_t hid5_val = mfspr(SPRN_HID5);
77         uint64_t hmeer_val = mfspr(SPRN_HMEER);
78         uint64_t msr_val = MSR_IDLE;
79         uint64_t psscr_val = pnv_deepest_stop_psscr_val;
80
81         for_each_possible_cpu(cpu) {
82                 uint64_t pir = get_hard_smp_processor_id(cpu);
83                 uint64_t hsprg0_val = (uint64_t)&paca[cpu];
84
85                 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
86                 if (rc != 0)
87                         return rc;
88
89                 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
90                 if (rc != 0)
91                         return rc;
92
93                 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
94                         rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
95                         if (rc)
96                                 return rc;
97
98                         rc = opal_slw_set_reg(pir,
99                                               P9_STOP_SPR_PSSCR, psscr_val);
100
101                         if (rc)
102                                 return rc;
103                 }
104
105                 /* HIDs are per core registers */
106                 if (cpu_thread_in_core(cpu) == 0) {
107
108                         rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
109                         if (rc != 0)
110                                 return rc;
111
112                         rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
113                         if (rc != 0)
114                                 return rc;
115
116                         /* Only p8 needs to set extra HID regiters */
117                         if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
118
119                                 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
120                                 if (rc != 0)
121                                         return rc;
122
123                                 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
124                                 if (rc != 0)
125                                         return rc;
126
127                                 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
128                                 if (rc != 0)
129                                         return rc;
130                         }
131                 }
132         }
133
134         return 0;
135 }
136
137 static void pnv_alloc_idle_core_states(void)
138 {
139         int i, j;
140         int nr_cores = cpu_nr_cores();
141         u32 *core_idle_state;
142
143         /*
144          * core_idle_state - The lower 8 bits track the idle state of
145          * each thread of the core.
146          *
147          * The most significant bit is the lock bit.
148          *
149          * Initially all the bits corresponding to threads_per_core
150          * are set. They are cleared when the thread enters deep idle
151          * state like sleep and winkle/stop.
152          *
153          * Initially the lock bit is cleared.  The lock bit has 2
154          * purposes:
155          *      a. While the first thread in the core waking up from
156          *         idle is restoring core state, it prevents other
157          *         threads in the core from switching to process
158          *         context.
159          *      b. While the last thread in the core is saving the
160          *         core state, it prevents a different thread from
161          *         waking up.
162          */
163         for (i = 0; i < nr_cores; i++) {
164                 int first_cpu = i * threads_per_core;
165                 int node = cpu_to_node(first_cpu);
166                 size_t paca_ptr_array_size;
167
168                 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
169                 *core_idle_state = (1 << threads_per_core) - 1;
170                 paca_ptr_array_size = (threads_per_core *
171                                        sizeof(struct paca_struct *));
172
173                 for (j = 0; j < threads_per_core; j++) {
174                         int cpu = first_cpu + j;
175
176                         paca[cpu].core_idle_state_ptr = core_idle_state;
177                         paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
178                         paca[cpu].thread_mask = 1 << j;
179                         if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
180                                 continue;
181                         paca[cpu].thread_sibling_pacas =
182                                 kmalloc_node(paca_ptr_array_size,
183                                              GFP_KERNEL, node);
184                 }
185         }
186
187         update_subcore_sibling_mask();
188
189         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
190                 int rc = pnv_save_sprs_for_deep_states();
191
192                 if (likely(!rc))
193                         return;
194
195                 /*
196                  * The stop-api is unable to restore hypervisor
197                  * resources on wakeup from platform idle states which
198                  * lose full context. So disable such states.
199                  */
200                 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
201                 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
202                 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
203
204                 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
205                     (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
206                         /*
207                          * Use the default stop state for CPU-Hotplug
208                          * if available.
209                          */
210                         if (default_stop_found) {
211                                 pnv_deepest_stop_psscr_val =
212                                         pnv_default_stop_val;
213                                 pnv_deepest_stop_psscr_mask =
214                                         pnv_default_stop_mask;
215                                 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
216                                         pnv_deepest_stop_psscr_val);
217                         } else { /* Fallback to snooze loop for CPU-Hotplug */
218                                 deepest_stop_found = false;
219                                 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
220                         }
221                 }
222         }
223 }
224
225 u32 pnv_get_supported_cpuidle_states(void)
226 {
227         return supported_cpuidle_states;
228 }
229 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
230
231 static void pnv_fastsleep_workaround_apply(void *info)
232
233 {
234         int rc;
235         int *err = info;
236
237         rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
238                                         OPAL_CONFIG_IDLE_APPLY);
239         if (rc)
240                 *err = 1;
241 }
242
243 /*
244  * Used to store fastsleep workaround state
245  * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
246  * 1 - Workaround applied once, never undone.
247  */
248 static u8 fastsleep_workaround_applyonce;
249
250 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
251                 struct device_attribute *attr, char *buf)
252 {
253         return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
254 }
255
256 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
257                 struct device_attribute *attr, const char *buf,
258                 size_t count)
259 {
260         cpumask_t primary_thread_mask;
261         int err;
262         u8 val;
263
264         if (kstrtou8(buf, 0, &val) || val != 1)
265                 return -EINVAL;
266
267         if (fastsleep_workaround_applyonce == 1)
268                 return count;
269
270         /*
271          * fastsleep_workaround_applyonce = 1 implies
272          * fastsleep workaround needs to be left in 'applied' state on all
273          * the cores. Do this by-
274          * 1. Patching out the call to 'undo' workaround in fastsleep exit path
275          * 2. Sending ipi to all the cores which have at least one online thread
276          * 3. Patching out the call to 'apply' workaround in fastsleep entry
277          * path
278          * There is no need to send ipi to cores which have all threads
279          * offlined, as last thread of the core entering fastsleep or deeper
280          * state would have applied workaround.
281          */
282         err = patch_instruction(
283                 (unsigned int *)pnv_fastsleep_workaround_at_exit,
284                 PPC_INST_NOP);
285         if (err) {
286                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
287                 goto fail;
288         }
289
290         get_online_cpus();
291         primary_thread_mask = cpu_online_cores_map();
292         on_each_cpu_mask(&primary_thread_mask,
293                                 pnv_fastsleep_workaround_apply,
294                                 &err, 1);
295         put_online_cpus();
296         if (err) {
297                 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
298                 goto fail;
299         }
300
301         err = patch_instruction(
302                 (unsigned int *)pnv_fastsleep_workaround_at_entry,
303                 PPC_INST_NOP);
304         if (err) {
305                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
306                 goto fail;
307         }
308
309         fastsleep_workaround_applyonce = 1;
310
311         return count;
312 fail:
313         return -EIO;
314 }
315
316 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
317                         show_fastsleep_workaround_applyonce,
318                         store_fastsleep_workaround_applyonce);
319
320 static unsigned long __power7_idle_type(unsigned long type)
321 {
322         unsigned long srr1;
323
324         if (!prep_irq_for_idle_irqsoff())
325                 return 0;
326
327         __ppc64_runlatch_off();
328         srr1 = power7_idle_insn(type);
329         __ppc64_runlatch_on();
330
331         fini_irq_for_idle_irqsoff();
332
333         return srr1;
334 }
335
336 void power7_idle_type(unsigned long type)
337 {
338         unsigned long srr1;
339
340         srr1 = __power7_idle_type(type);
341         irq_set_pending_from_srr1(srr1);
342 }
343
344 void power7_idle(void)
345 {
346         if (!powersave_nap)
347                 return;
348
349         power7_idle_type(PNV_THREAD_NAP);
350 }
351
352 static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
353                                       unsigned long stop_psscr_mask)
354 {
355         unsigned long psscr;
356         unsigned long srr1;
357
358         if (!prep_irq_for_idle_irqsoff())
359                 return 0;
360
361         psscr = mfspr(SPRN_PSSCR);
362         psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
363
364         __ppc64_runlatch_off();
365         srr1 = power9_idle_stop(psscr);
366         __ppc64_runlatch_on();
367
368         fini_irq_for_idle_irqsoff();
369
370         return srr1;
371 }
372
373 void power9_idle_type(unsigned long stop_psscr_val,
374                                       unsigned long stop_psscr_mask)
375 {
376         unsigned long srr1;
377
378         srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
379         irq_set_pending_from_srr1(srr1);
380 }
381
382 /*
383  * Used for ppc_md.power_save which needs a function with no parameters
384  */
385 void power9_idle(void)
386 {
387         power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
388 }
389
390 #ifdef CONFIG_HOTPLUG_CPU
391 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
392 {
393         u64 pir = get_hard_smp_processor_id(cpu);
394
395         mtspr(SPRN_LPCR, lpcr_val);
396
397         /*
398          * Program the LPCR via stop-api only if the deepest stop state
399          * can lose hypervisor context.
400          */
401         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
402                 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
403 }
404
405 /*
406  * pnv_cpu_offline: A function that puts the CPU into the deepest
407  * available platform idle state on a CPU-Offline.
408  * interrupts hard disabled and no lazy irq pending.
409  */
410 unsigned long pnv_cpu_offline(unsigned int cpu)
411 {
412         unsigned long srr1;
413         u32 idle_states = pnv_get_supported_cpuidle_states();
414         u64 lpcr_val;
415
416         /*
417          * We don't want to take decrementer interrupts while we are
418          * offline, so clear LPCR:PECE1. We keep PECE2 (and
419          * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
420          *
421          * If the CPU gets woken up by a special wakeup, ensure that
422          * the SLW engine sets LPCR with decrementer bit cleared, else
423          * the CPU will come back to the kernel due to a spurious
424          * wakeup.
425          */
426         lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
427         pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
428
429         __ppc64_runlatch_off();
430
431         if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
432                 unsigned long psscr;
433
434                 psscr = mfspr(SPRN_PSSCR);
435                 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
436                                                 pnv_deepest_stop_psscr_val;
437                 srr1 = power9_idle_stop(psscr);
438
439         } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
440                    (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
441                 srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
442         } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
443                    (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
444                 srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
445         } else if (idle_states & OPAL_PM_NAP_ENABLED) {
446                 srr1 = power7_idle_insn(PNV_THREAD_NAP);
447         } else {
448                 /* This is the fallback method. We emulate snooze */
449                 while (!generic_check_cpu_restart(cpu)) {
450                         HMT_low();
451                         HMT_very_low();
452                 }
453                 srr1 = 0;
454                 HMT_medium();
455         }
456
457         __ppc64_runlatch_on();
458
459         /*
460          * Re-enable decrementer interrupts in LPCR.
461          *
462          * Further, we want stop states to be woken up by decrementer
463          * for non-hotplug cases. So program the LPCR via stop api as
464          * well.
465          */
466         lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
467         pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
468
469         return srr1;
470 }
471 #endif
472
473 /*
474  * Power ISA 3.0 idle initialization.
475  *
476  * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
477  * Register (PSSCR) to control idle behavior.
478  *
479  * PSSCR layout:
480  * ----------------------------------------------------------
481  * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
482  * ----------------------------------------------------------
483  * 0      4     41   42    43   44     48    54   56    60
484  *
485  * PSSCR key fields:
486  *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
487  *      lowest power-saving state the thread entered since stop instruction was
488  *      last executed.
489  *
490  *      Bit 41 - Status Disable(SD)
491  *      0 - Shows PLS entries
492  *      1 - PLS entries are all 0
493  *
494  *      Bit 42 - Enable State Loss
495  *      0 - No state is lost irrespective of other fields
496  *      1 - Allows state loss
497  *
498  *      Bit 43 - Exit Criterion
499  *      0 - Exit from power-save mode on any interrupt
500  *      1 - Exit from power-save mode controlled by LPCR's PECE bits
501  *
502  *      Bits 44:47 - Power-Saving Level Limit
503  *      This limits the power-saving level that can be entered into.
504  *
505  *      Bits 60:63 - Requested Level
506  *      Used to specify which power-saving level must be entered on executing
507  *      stop instruction
508  */
509
510 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
511 {
512         int err = 0;
513
514         /*
515          * psscr_mask == 0xf indicates an older firmware.
516          * Set remaining fields of psscr to the default values.
517          * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
518          */
519         if (*psscr_mask == 0xf) {
520                 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
521                 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
522                 return err;
523         }
524
525         /*
526          * New firmware is expected to set the psscr_val bits correctly.
527          * Validate that the following invariants are correctly maintained by
528          * the new firmware.
529          * - ESL bit value matches the EC bit value.
530          * - ESL bit is set for all the deep stop states.
531          */
532         if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
533                 err = ERR_EC_ESL_MISMATCH;
534         } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
535                 GET_PSSCR_ESL(*psscr_val) == 0) {
536                 err = ERR_DEEP_STATE_ESL_MISMATCH;
537         }
538
539         return err;
540 }
541
542 /*
543  * pnv_arch300_idle_init: Initializes the default idle state, first
544  *                        deep idle state and deepest idle state on
545  *                        ISA 3.0 CPUs.
546  *
547  * @np: /ibm,opal/power-mgt device node
548  * @flags: cpu-idle-state-flags array
549  * @dt_idle_states: Number of idle state entries
550  * Returns 0 on success
551  */
552 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
553                                         int dt_idle_states)
554 {
555         u64 *psscr_val = NULL;
556         u64 *psscr_mask = NULL;
557         u32 *residency_ns = NULL;
558         u64 max_residency_ns = 0;
559         int rc = 0, i;
560
561         psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
562         psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
563         residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
564                                GFP_KERNEL);
565
566         if (!psscr_val || !psscr_mask || !residency_ns) {
567                 rc = -1;
568                 goto out;
569         }
570
571         if (of_property_read_u64_array(np,
572                 "ibm,cpu-idle-state-psscr",
573                 psscr_val, dt_idle_states)) {
574                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
575                 rc = -1;
576                 goto out;
577         }
578
579         if (of_property_read_u64_array(np,
580                                        "ibm,cpu-idle-state-psscr-mask",
581                                        psscr_mask, dt_idle_states)) {
582                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
583                 rc = -1;
584                 goto out;
585         }
586
587         if (of_property_read_u32_array(np,
588                                        "ibm,cpu-idle-state-residency-ns",
589                                         residency_ns, dt_idle_states)) {
590                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
591                 rc = -1;
592                 goto out;
593         }
594
595         /*
596          * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
597          * and the pnv_default_stop_{val,mask}.
598          *
599          * pnv_first_deep_stop_state should be set to the first stop
600          * level to cause hypervisor state loss.
601          *
602          * pnv_deepest_stop_{val,mask} should be set to values corresponding to
603          * the deepest stop state.
604          *
605          * pnv_default_stop_{val,mask} should be set to values corresponding to
606          * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
607          */
608         pnv_first_deep_stop_state = MAX_STOP_STATE;
609         for (i = 0; i < dt_idle_states; i++) {
610                 int err;
611                 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
612
613                 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
614                      (pnv_first_deep_stop_state > psscr_rl))
615                         pnv_first_deep_stop_state = psscr_rl;
616
617                 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
618                                               flags[i]);
619                 if (err) {
620                         report_invalid_psscr_val(psscr_val[i], err);
621                         continue;
622                 }
623
624                 if (max_residency_ns < residency_ns[i]) {
625                         max_residency_ns = residency_ns[i];
626                         pnv_deepest_stop_psscr_val = psscr_val[i];
627                         pnv_deepest_stop_psscr_mask = psscr_mask[i];
628                         pnv_deepest_stop_flag = flags[i];
629                         deepest_stop_found = true;
630                 }
631
632                 if (!default_stop_found &&
633                     (flags[i] & OPAL_PM_STOP_INST_FAST)) {
634                         pnv_default_stop_val = psscr_val[i];
635                         pnv_default_stop_mask = psscr_mask[i];
636                         default_stop_found = true;
637                 }
638         }
639
640         if (unlikely(!default_stop_found)) {
641                 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
642         } else {
643                 ppc_md.power_save = power9_idle;
644                 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
645                         pnv_default_stop_val, pnv_default_stop_mask);
646         }
647
648         if (unlikely(!deepest_stop_found)) {
649                 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
650         } else {
651                 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
652                         pnv_deepest_stop_psscr_val,
653                         pnv_deepest_stop_psscr_mask);
654         }
655
656         pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
657                 pnv_first_deep_stop_state);
658 out:
659         kfree(psscr_val);
660         kfree(psscr_mask);
661         kfree(residency_ns);
662         return rc;
663 }
664
665 /*
666  * Probe device tree for supported idle states
667  */
668 static void __init pnv_probe_idle_states(void)
669 {
670         struct device_node *np;
671         int dt_idle_states;
672         u32 *flags = NULL;
673         int i;
674
675         np = of_find_node_by_path("/ibm,opal/power-mgt");
676         if (!np) {
677                 pr_warn("opal: PowerMgmt Node not found\n");
678                 goto out;
679         }
680         dt_idle_states = of_property_count_u32_elems(np,
681                         "ibm,cpu-idle-state-flags");
682         if (dt_idle_states < 0) {
683                 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
684                 goto out;
685         }
686
687         flags = kcalloc(dt_idle_states, sizeof(*flags),  GFP_KERNEL);
688
689         if (of_property_read_u32_array(np,
690                         "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
691                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
692                 goto out;
693         }
694
695         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
696                 if (pnv_power9_idle_init(np, flags, dt_idle_states))
697                         goto out;
698         }
699
700         for (i = 0; i < dt_idle_states; i++)
701                 supported_cpuidle_states |= flags[i];
702
703 out:
704         kfree(flags);
705 }
706 static int __init pnv_init_idle_states(void)
707 {
708
709         supported_cpuidle_states = 0;
710
711         if (cpuidle_disable != IDLE_NO_OVERRIDE)
712                 goto out;
713
714         pnv_probe_idle_states();
715
716         if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
717                 patch_instruction(
718                         (unsigned int *)pnv_fastsleep_workaround_at_entry,
719                         PPC_INST_NOP);
720                 patch_instruction(
721                         (unsigned int *)pnv_fastsleep_workaround_at_exit,
722                         PPC_INST_NOP);
723         } else {
724                 /*
725                  * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
726                  * workaround is needed to use fastsleep. Provide sysfs
727                  * control to choose how this workaround has to be applied.
728                  */
729                 device_create_file(cpu_subsys.dev_root,
730                                 &dev_attr_fastsleep_workaround_applyonce);
731         }
732
733         pnv_alloc_idle_core_states();
734
735         /*
736          * For each CPU, record its PACA address in each of it's
737          * sibling thread's PACA at the slot corresponding to this
738          * CPU's index in the core.
739          */
740         if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
741                 int cpu;
742
743                 pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
744                 for_each_possible_cpu(cpu) {
745                         int base_cpu = cpu_first_thread_sibling(cpu);
746                         int idx = cpu_thread_in_core(cpu);
747                         int i;
748
749                         for (i = 0; i < threads_per_core; i++) {
750                                 int j = base_cpu + i;
751
752                                 paca[j].thread_sibling_pacas[idx] = &paca[cpu];
753                         }
754                 }
755         }
756
757         if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
758                 ppc_md.power_save = power7_idle;
759
760 out:
761         return 0;
762 }
763 machine_subsys_initcall(powernv, pnv_init_idle_states);