tools/power turbostat: decode Baytrail CC6 and MC6 demotion configuration
[jlayton/linux.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/resource.h>
33 #include <fcntl.h>
34 #include <signal.h>
35 #include <sys/time.h>
36 #include <stdlib.h>
37 #include <getopt.h>
38 #include <dirent.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include <sched.h>
42 #include <time.h>
43 #include <cpuid.h>
44 #include <linux/capability.h>
45 #include <errno.h>
46
47 char *proc_stat = "/proc/stat";
48 FILE *outf;
49 int *fd_percpu;
50 struct timespec interval_ts = {5, 0};
51 unsigned int debug;
52 unsigned int rapl_joules;
53 unsigned int summary_only;
54 unsigned int dump_only;
55 unsigned int do_snb_cstates;
56 unsigned int do_knl_cstates;
57 unsigned int do_pc2;
58 unsigned int do_pc3;
59 unsigned int do_pc6;
60 unsigned int do_pc7;
61 unsigned int do_c8_c9_c10;
62 unsigned int do_skl_residency;
63 unsigned int do_slm_cstates;
64 unsigned int use_c1_residency_msr;
65 unsigned int has_aperf;
66 unsigned int has_epb;
67 unsigned int do_irtl_snb;
68 unsigned int do_irtl_hsw;
69 unsigned int units = 1000000;   /* MHz etc */
70 unsigned int genuine_intel;
71 unsigned int has_invariant_tsc;
72 unsigned int do_nhm_platform_info;
73 unsigned int no_MSR_MISC_PWR_MGMT;
74 unsigned int aperf_mperf_multiplier = 1;
75 double bclk;
76 double base_hz;
77 unsigned int has_base_hz;
78 double tsc_tweak = 1.0;
79 unsigned int show_pkg_only;
80 unsigned int show_core_only;
81 char *output_buffer, *outp;
82 unsigned int do_rapl;
83 unsigned int do_dts;
84 unsigned int do_ptm;
85 unsigned long long  gfx_cur_rc6_ms;
86 unsigned int gfx_cur_mhz;
87 unsigned int tcc_activation_temp;
88 unsigned int tcc_activation_temp_override;
89 double rapl_power_units, rapl_time_units;
90 double rapl_dram_energy_units, rapl_energy_units;
91 double rapl_joule_counter_range;
92 unsigned int do_core_perf_limit_reasons;
93 unsigned int do_gfx_perf_limit_reasons;
94 unsigned int do_ring_perf_limit_reasons;
95 unsigned int crystal_hz;
96 unsigned long long tsc_hz;
97 int base_cpu;
98 double discover_bclk(unsigned int family, unsigned int model);
99 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
100                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
101 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
102 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
103 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
104 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
105
106 #define RAPL_PKG                (1 << 0)
107                                         /* 0x610 MSR_PKG_POWER_LIMIT */
108                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
109 #define RAPL_PKG_PERF_STATUS    (1 << 1)
110                                         /* 0x613 MSR_PKG_PERF_STATUS */
111 #define RAPL_PKG_POWER_INFO     (1 << 2)
112                                         /* 0x614 MSR_PKG_POWER_INFO */
113
114 #define RAPL_DRAM               (1 << 3)
115                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
116                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
117 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
118                                         /* 0x61b MSR_DRAM_PERF_STATUS */
119 #define RAPL_DRAM_POWER_INFO    (1 << 5)
120                                         /* 0x61c MSR_DRAM_POWER_INFO */
121
122 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
123                                         /* 0x638 MSR_PP0_POWER_LIMIT */
124 #define RAPL_CORE_POLICY        (1 << 7)
125                                         /* 0x63a MSR_PP0_POLICY */
126
127 #define RAPL_GFX                (1 << 8)
128                                         /* 0x640 MSR_PP1_POWER_LIMIT */
129                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
130                                         /* 0x642 MSR_PP1_POLICY */
131
132 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
133                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
134 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
135 #define TJMAX_DEFAULT   100
136
137 #define MAX(a, b) ((a) > (b) ? (a) : (b))
138
139 /*
140  * buffer size used by sscanf() for added column names
141  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
142  */
143 #define NAME_BYTES 20
144
145 int backwards_count;
146 char *progname;
147
148 cpu_set_t *cpu_present_set, *cpu_affinity_set;
149 size_t cpu_present_setsize, cpu_affinity_setsize;
150 #define MAX_ADDED_COUNTERS 16
151
152 struct thread_data {
153         unsigned long long tsc;
154         unsigned long long aperf;
155         unsigned long long mperf;
156         unsigned long long c1;
157         unsigned int irq_count;
158         unsigned int smi_count;
159         unsigned int cpu_id;
160         unsigned int flags;
161 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
162 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
163         unsigned long long counter[MAX_ADDED_COUNTERS];
164 } *thread_even, *thread_odd;
165
166 struct core_data {
167         unsigned long long c3;
168         unsigned long long c6;
169         unsigned long long c7;
170         unsigned int core_temp_c;
171         unsigned int core_id;
172         unsigned long long counter[MAX_ADDED_COUNTERS];
173 } *core_even, *core_odd;
174
175 struct pkg_data {
176         unsigned long long pc2;
177         unsigned long long pc3;
178         unsigned long long pc6;
179         unsigned long long pc7;
180         unsigned long long pc8;
181         unsigned long long pc9;
182         unsigned long long pc10;
183         unsigned long long pkg_wtd_core_c0;
184         unsigned long long pkg_any_core_c0;
185         unsigned long long pkg_any_gfxe_c0;
186         unsigned long long pkg_both_core_gfxe_c0;
187         long long gfx_rc6_ms;
188         unsigned int gfx_mhz;
189         unsigned int package_id;
190         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
191         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
192         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
193         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
194         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
195         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
196         unsigned int pkg_temp_c;
197         unsigned long long counter[MAX_ADDED_COUNTERS];
198 } *package_even, *package_odd;
199
200 #define ODD_COUNTERS thread_odd, core_odd, package_odd
201 #define EVEN_COUNTERS thread_even, core_even, package_even
202
203 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
204         (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
205                 topo.num_threads_per_core + \
206                 (core_no) * topo.num_threads_per_core + (thread_no))
207 #define GET_CORE(core_base, core_no, pkg_no) \
208         (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
209 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
210
211 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
212 enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
213 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
214
215 struct msr_counter {
216         unsigned int msr_num;
217         char name[NAME_BYTES];
218         unsigned int width;
219         enum counter_type type;
220         enum counter_format format;
221         struct msr_counter *next;
222         unsigned int flags;
223 #define FLAGS_HIDE      (1 << 0)
224 #define FLAGS_SHOW      (1 << 1)
225 };
226
227 struct sys_counters {
228         unsigned int added_thread_counters;
229         unsigned int added_core_counters;
230         unsigned int added_package_counters;
231         struct msr_counter *tp;
232         struct msr_counter *cp;
233         struct msr_counter *pp;
234 } sys;
235
236 struct system_summary {
237         struct thread_data threads;
238         struct core_data cores;
239         struct pkg_data packages;
240 } average;
241
242
243 struct topo_params {
244         int num_packages;
245         int num_cpus;
246         int num_cores;
247         int max_cpu_num;
248         int num_cores_per_pkg;
249         int num_threads_per_core;
250 } topo;
251
252 struct timeval tv_even, tv_odd, tv_delta;
253
254 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
255 int *irqs_per_cpu;              /* indexed by cpu_num */
256
257 void setup_all_buffers(void);
258
259 int cpu_is_not_present(int cpu)
260 {
261         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
262 }
263 /*
264  * run func(thread, core, package) in topology order
265  * skip non-present cpus
266  */
267
268 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
269         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
270 {
271         int retval, pkg_no, core_no, thread_no;
272
273         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
274                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
275                         for (thread_no = 0; thread_no <
276                                 topo.num_threads_per_core; ++thread_no) {
277                                 struct thread_data *t;
278                                 struct core_data *c;
279                                 struct pkg_data *p;
280
281                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
282
283                                 if (cpu_is_not_present(t->cpu_id))
284                                         continue;
285
286                                 c = GET_CORE(core_base, core_no, pkg_no);
287                                 p = GET_PKG(pkg_base, pkg_no);
288
289                                 retval = func(t, c, p);
290                                 if (retval)
291                                         return retval;
292                         }
293                 }
294         }
295         return 0;
296 }
297
298 int cpu_migrate(int cpu)
299 {
300         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
301         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
302         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
303                 return -1;
304         else
305                 return 0;
306 }
307 int get_msr_fd(int cpu)
308 {
309         char pathname[32];
310         int fd;
311
312         fd = fd_percpu[cpu];
313
314         if (fd)
315                 return fd;
316
317         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
318         fd = open(pathname, O_RDONLY);
319         if (fd < 0)
320                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
321
322         fd_percpu[cpu] = fd;
323
324         return fd;
325 }
326
327 int get_msr(int cpu, off_t offset, unsigned long long *msr)
328 {
329         ssize_t retval;
330
331         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
332
333         if (retval != sizeof *msr)
334                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
335
336         return 0;
337 }
338
339 /*
340  * Each string in this array is compared in --show and --hide cmdline.
341  * Thus, strings that are proper sub-sets must follow their more specific peers.
342  */
343 struct msr_counter bic[] = {
344         { 0x0, "Package" },
345         { 0x0, "Avg_MHz" },
346         { 0x0, "Bzy_MHz" },
347         { 0x0, "TSC_MHz" },
348         { 0x0, "IRQ" },
349         { 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
350         { 0x0, "Busy%" },
351         { 0x0, "CPU%c1" },
352         { 0x0, "CPU%c3" },
353         { 0x0, "CPU%c6" },
354         { 0x0, "CPU%c7" },
355         { 0x0, "ThreadC" },
356         { 0x0, "CoreTmp" },
357         { 0x0, "CoreCnt" },
358         { 0x0, "PkgTmp" },
359         { 0x0, "GFX%rc6" },
360         { 0x0, "GFXMHz" },
361         { 0x0, "Pkg%pc2" },
362         { 0x0, "Pkg%pc3" },
363         { 0x0, "Pkg%pc6" },
364         { 0x0, "Pkg%pc7" },
365         { 0x0, "PkgWatt" },
366         { 0x0, "CorWatt" },
367         { 0x0, "GFXWatt" },
368         { 0x0, "PkgCnt" },
369         { 0x0, "RAMWatt" },
370         { 0x0, "PKG_%" },
371         { 0x0, "RAM_%" },
372         { 0x0, "Pkg_J" },
373         { 0x0, "Cor_J" },
374         { 0x0, "GFX_J" },
375         { 0x0, "RAM_J" },
376         { 0x0, "Core" },
377         { 0x0, "CPU" },
378 };
379
380 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
381 #define BIC_Package     (1ULL << 0)
382 #define BIC_Avg_MHz     (1ULL << 1)
383 #define BIC_Bzy_MHz     (1ULL << 2)
384 #define BIC_TSC_MHz     (1ULL << 3)
385 #define BIC_IRQ         (1ULL << 4)
386 #define BIC_SMI         (1ULL << 5)
387 #define BIC_Busy        (1ULL << 6)
388 #define BIC_CPU_c1      (1ULL << 7)
389 #define BIC_CPU_c3      (1ULL << 8)
390 #define BIC_CPU_c6      (1ULL << 9)
391 #define BIC_CPU_c7      (1ULL << 10)
392 #define BIC_ThreadC     (1ULL << 11)
393 #define BIC_CoreTmp     (1ULL << 12)
394 #define BIC_CoreCnt     (1ULL << 13)
395 #define BIC_PkgTmp      (1ULL << 14)
396 #define BIC_GFX_rc6     (1ULL << 15)
397 #define BIC_GFXMHz      (1ULL << 16)
398 #define BIC_Pkgpc2      (1ULL << 17)
399 #define BIC_Pkgpc3      (1ULL << 18)
400 #define BIC_Pkgpc6      (1ULL << 19)
401 #define BIC_Pkgpc7      (1ULL << 20)
402 #define BIC_PkgWatt     (1ULL << 21)
403 #define BIC_CorWatt     (1ULL << 22)
404 #define BIC_GFXWatt     (1ULL << 23)
405 #define BIC_PkgCnt      (1ULL << 24)
406 #define BIC_RAMWatt     (1ULL << 27)
407 #define BIC_PKG__       (1ULL << 28)
408 #define BIC_RAM__       (1ULL << 29)
409 #define BIC_Pkg_J       (1ULL << 30)
410 #define BIC_Cor_J       (1ULL << 31)
411 #define BIC_GFX_J       (1ULL << 30)
412 #define BIC_RAM_J       (1ULL << 31)
413 #define BIC_Core        (1ULL << 32)
414 #define BIC_CPU         (1ULL << 33)
415
416 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
417 unsigned long long bic_present;
418
419 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
420 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
421
422 /*
423  * bic_lookup
424  * for all the strings in comma separate name_list,
425  * set the approprate bit in return value.
426  */
427 unsigned long long bic_lookup(char *name_list)
428 {
429         int i;
430         unsigned long long retval = 0;
431
432         while (name_list) {
433                 char *comma;
434
435                 comma = strchr(name_list, ',');
436
437                 if (comma)
438                         *comma = '\0';
439
440                 for (i = 0; i < MAX_BIC; ++i) {
441                         if (!strcmp(name_list, bic[i].name)) {
442                                 retval |= (1ULL << i);
443                                 break;
444                         }
445                 }
446                 if (i == MAX_BIC) {
447                         fprintf(stderr, "Invalid counter name: %s\n", name_list);
448                         exit(-1);
449                 }
450
451                 name_list = comma;
452                 if (name_list)
453                         name_list++;
454
455         }
456         return retval;
457 }
458
459 void print_header(void)
460 {
461         struct msr_counter *mp;
462
463         if (DO_BIC(BIC_Package))
464                 outp += sprintf(outp, "\tPackage");
465         if (DO_BIC(BIC_Core))
466                 outp += sprintf(outp, "\tCore");
467         if (DO_BIC(BIC_CPU))
468                 outp += sprintf(outp, "\tCPU");
469         if (DO_BIC(BIC_Avg_MHz))
470                 outp += sprintf(outp, "\tAvg_MHz");
471         if (DO_BIC(BIC_Busy))
472                 outp += sprintf(outp, "\tBusy%%");
473         if (DO_BIC(BIC_Bzy_MHz))
474                 outp += sprintf(outp, "\tBzy_MHz");
475         if (DO_BIC(BIC_TSC_MHz))
476                 outp += sprintf(outp, "\tTSC_MHz");
477
478         if (!debug)
479                 goto done;
480
481         if (DO_BIC(BIC_IRQ))
482                 outp += sprintf(outp, "\tIRQ");
483         if (DO_BIC(BIC_SMI))
484                 outp += sprintf(outp, "\tSMI");
485
486         if (DO_BIC(BIC_CPU_c1))
487                 outp += sprintf(outp, "\tCPU%%c1");
488
489         for (mp = sys.tp; mp; mp = mp->next) {
490                 if (mp->format == FORMAT_RAW) {
491                         if (mp->width == 64)
492                                 outp += sprintf(outp, "\t%18.18s", mp->name);
493                         else
494                                 outp += sprintf(outp, "\t%10.10s", mp->name);
495                 } else {
496                         outp += sprintf(outp, "\t%-7.7s", mp->name);
497                 }
498         }
499
500         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
501                 outp += sprintf(outp, "\tCPU%%c3");
502         if (DO_BIC(BIC_CPU_c6))
503                 outp += sprintf(outp, "\tCPU%%c6");
504         if (DO_BIC(BIC_CPU_c7))
505                 outp += sprintf(outp, "\tCPU%%c7");
506
507
508         if (DO_BIC(BIC_CoreTmp))
509                 outp += sprintf(outp, "\tCoreTmp");
510
511         for (mp = sys.cp; mp; mp = mp->next) {
512                 if (mp->format == FORMAT_RAW) {
513                         if (mp->width == 64)
514                                 outp += sprintf(outp, "\t%18.18s", mp->name);
515                         else
516                                 outp += sprintf(outp, "\t%10.10s", mp->name);
517                 } else {
518                         outp += sprintf(outp, "\t%-7.7s", mp->name);
519                 }
520         }
521
522         if (DO_BIC(BIC_PkgTmp))
523                 outp += sprintf(outp, "\tPkgTmp");
524
525         if (DO_BIC(BIC_GFX_rc6))
526                 outp += sprintf(outp, "\tGFX%%rc6");
527
528         if (DO_BIC(BIC_GFXMHz))
529                 outp += sprintf(outp, "\tGFXMHz");
530
531         if (do_skl_residency) {
532                 outp += sprintf(outp, "\tTotl%%C0");
533                 outp += sprintf(outp, "\tAny%%C0");
534                 outp += sprintf(outp, "\tGFX%%C0");
535                 outp += sprintf(outp, "\tCPUGFX%%");
536         }
537
538         if (do_pc2)
539                 outp += sprintf(outp, "\tPkg%%pc2");
540         if (do_pc3)
541                 outp += sprintf(outp, "\tPkg%%pc3");
542         if (do_pc6)
543                 outp += sprintf(outp, "\tPkg%%pc6");
544         if (do_pc7)
545                 outp += sprintf(outp, "\tPkg%%pc7");
546         if (do_c8_c9_c10) {
547                 outp += sprintf(outp, "\tPkg%%pc8");
548                 outp += sprintf(outp, "\tPkg%%pc9");
549                 outp += sprintf(outp, "\tPk%%pc10");
550         }
551
552         if (do_rapl && !rapl_joules) {
553                 if (DO_BIC(BIC_PkgWatt))
554                         outp += sprintf(outp, "\tPkgWatt");
555                 if (DO_BIC(BIC_CorWatt))
556                         outp += sprintf(outp, "\tCorWatt");
557                 if (DO_BIC(BIC_GFXWatt))
558                         outp += sprintf(outp, "\tGFXWatt");
559                 if (DO_BIC(BIC_RAMWatt))
560                         outp += sprintf(outp, "\tRAMWatt");
561                 if (DO_BIC(BIC_PKG__))
562                         outp += sprintf(outp, "\tPKG_%%");
563                 if (DO_BIC(BIC_RAM__))
564                         outp += sprintf(outp, "\tRAM_%%");
565         } else if (do_rapl && rapl_joules) {
566                 if (DO_BIC(BIC_Pkg_J))
567                         outp += sprintf(outp, "\tPkg_J");
568                 if (DO_BIC(BIC_Cor_J))
569                         outp += sprintf(outp, "\tCor_J");
570                 if (DO_BIC(BIC_GFX_J))
571                         outp += sprintf(outp, "\tGFX_J");
572                 if (DO_BIC(BIC_RAM_J))
573                         outp += sprintf(outp, "\tRAM_J");
574                 if (DO_BIC(BIC_PKG__))
575                         outp += sprintf(outp, "\tPKG_%%");
576                 if (DO_BIC(BIC_RAM__))
577                         outp += sprintf(outp, "\tRAM_%%");
578         }
579         for (mp = sys.pp; mp; mp = mp->next) {
580                 if (mp->format == FORMAT_RAW) {
581                         if (mp->width == 64)
582                                 outp += sprintf(outp, "\t%18.18s", mp->name);
583                         else
584                                 outp += sprintf(outp, "\t%10.10s", mp->name);
585                 } else {
586                         outp += sprintf(outp, "\t%-7.7s", mp->name);
587                 }
588         }
589
590 done:
591         outp += sprintf(outp, "\n");
592 }
593
594 int dump_counters(struct thread_data *t, struct core_data *c,
595         struct pkg_data *p)
596 {
597         int i;
598         struct msr_counter *mp;
599
600         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
601
602         if (t) {
603                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
604                         t->cpu_id, t->flags);
605                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
606                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
607                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
608                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
609
610                 if (DO_BIC(BIC_IRQ))
611                         outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
612                 if (DO_BIC(BIC_SMI))
613                         outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
614
615                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
616                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
617                                 i, mp->msr_num, t->counter[i]);
618                 }
619         }
620
621         if (c) {
622                 outp += sprintf(outp, "core: %d\n", c->core_id);
623                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
624                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
625                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
626                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
627
628                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
629                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
630                                 i, mp->msr_num, c->counter[i]);
631                 }
632         }
633
634         if (p) {
635                 outp += sprintf(outp, "package: %d\n", p->package_id);
636
637                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
638                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
639                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
640                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
641
642                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
643                 if (do_pc3)
644                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
645                 if (do_pc6)
646                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
647                 if (do_pc7)
648                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
649                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
650                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
651                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
652                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
653                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
654                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
655                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
656                 outp += sprintf(outp, "Throttle PKG: %0X\n",
657                         p->rapl_pkg_perf_status);
658                 outp += sprintf(outp, "Throttle RAM: %0X\n",
659                         p->rapl_dram_perf_status);
660                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
661
662                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
663                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
664                                 i, mp->msr_num, p->counter[i]);
665                 }
666         }
667
668         outp += sprintf(outp, "\n");
669
670         return 0;
671 }
672
673 /*
674  * column formatting convention & formats
675  */
676 int format_counters(struct thread_data *t, struct core_data *c,
677         struct pkg_data *p)
678 {
679         double interval_float;
680         char *fmt8;
681         int i;
682         struct msr_counter *mp;
683
684          /* if showing only 1st thread in core and this isn't one, bail out */
685         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
686                 return 0;
687
688          /* if showing only 1st thread in pkg and this isn't one, bail out */
689         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
690                 return 0;
691
692         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
693
694         /* topo columns, print blanks on 1st (average) line */
695         if (t == &average.threads) {
696                 if (DO_BIC(BIC_Package))
697                         outp += sprintf(outp, "\t-");
698                 if (DO_BIC(BIC_Core))
699                         outp += sprintf(outp, "\t-");
700                 if (DO_BIC(BIC_CPU))
701                         outp += sprintf(outp, "\t-");
702         } else {
703                 if (DO_BIC(BIC_Package)) {
704                         if (p)
705                                 outp += sprintf(outp, "\t%d", p->package_id);
706                         else
707                                 outp += sprintf(outp, "\t-");
708                 }
709                 if (DO_BIC(BIC_Core)) {
710                         if (c)
711                                 outp += sprintf(outp, "\t%d", c->core_id);
712                         else
713                                 outp += sprintf(outp, "\t-");
714                 }
715                 if (DO_BIC(BIC_CPU))
716                         outp += sprintf(outp, "\t%d", t->cpu_id);
717         }
718
719         if (DO_BIC(BIC_Avg_MHz))
720                 outp += sprintf(outp, "\t%.0f",
721                         1.0 / units * t->aperf / interval_float);
722
723         if (DO_BIC(BIC_Busy))
724                 outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
725
726         if (DO_BIC(BIC_Bzy_MHz)) {
727                 if (has_base_hz)
728                         outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
729                 else
730                         outp += sprintf(outp, "\t%.0f",
731                                 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
732         }
733
734         if (DO_BIC(BIC_TSC_MHz))
735                 outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
736
737         if (!debug)
738                 goto done;
739
740         /* IRQ */
741         if (DO_BIC(BIC_IRQ))
742                 outp += sprintf(outp, "\t%d", t->irq_count);
743
744         /* SMI */
745         if (DO_BIC(BIC_SMI))
746                 outp += sprintf(outp, "\t%d", t->smi_count);
747
748         /* C1 */
749         if (DO_BIC(BIC_CPU_c1))
750                 outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
751
752         /* Added counters */
753         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
754                 if (mp->format == FORMAT_RAW) {
755                         if (mp->width == 32)
756                                 outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
757                         else
758                                 outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
759                 } else if (mp->format == FORMAT_DELTA) {
760                         outp += sprintf(outp, "\t%lld", t->counter[i]);
761                 } else if (mp->format == FORMAT_PERCENT) {
762                         outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
763                 }
764         }
765
766         /* print per-core data only for 1st thread in core */
767         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
768                 goto done;
769
770         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
771                 outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
772         if (DO_BIC(BIC_CPU_c6))
773                 outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
774         if (DO_BIC(BIC_CPU_c7))
775                 outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
776
777         if (DO_BIC(BIC_CoreTmp))
778                 outp += sprintf(outp, "\t%d", c->core_temp_c);
779
780         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
781                 if (mp->format == FORMAT_RAW) {
782                         if (mp->width == 32)
783                                 outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
784                         else
785                                 outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
786                 } else if (mp->format == FORMAT_DELTA) {
787                         outp += sprintf(outp, "\t%lld", c->counter[i]);
788                 } else if (mp->format == FORMAT_PERCENT) {
789                         outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
790                 }
791         }
792
793         /* print per-package data only for 1st core in package */
794         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
795                 goto done;
796
797         /* PkgTmp */
798         if (DO_BIC(BIC_PkgTmp))
799                 outp += sprintf(outp, "\t%d", p->pkg_temp_c);
800
801         /* GFXrc6 */
802         if (DO_BIC(BIC_GFX_rc6)) {
803                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
804                         outp += sprintf(outp, "\t**.**");
805                 } else {
806                         outp += sprintf(outp, "\t%.2f",
807                                 p->gfx_rc6_ms / 10.0 / interval_float);
808                 }
809         }
810
811         /* GFXMHz */
812         if (DO_BIC(BIC_GFXMHz))
813                 outp += sprintf(outp, "\t%d", p->gfx_mhz);
814
815         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
816         if (do_skl_residency) {
817                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
818                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
819                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
820                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
821         }
822
823         if (do_pc2)
824                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
825         if (do_pc3)
826                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
827         if (do_pc6)
828                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
829         if (do_pc7)
830                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
831         if (do_c8_c9_c10) {
832                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
833                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
834                 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
835         }
836
837         /*
838          * If measurement interval exceeds minimum RAPL Joule Counter range,
839          * indicate that results are suspect by printing "**" in fraction place.
840          */
841         if (interval_float < rapl_joule_counter_range)
842                 fmt8 = "\t%.2f";
843         else
844                 fmt8 = "%6.0f**";
845
846         if (DO_BIC(BIC_PkgWatt))
847                 outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
848         if (DO_BIC(BIC_CorWatt))
849                 outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
850         if (DO_BIC(BIC_GFXWatt))
851                 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
852         if (DO_BIC(BIC_RAMWatt))
853                 outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
854         if (DO_BIC(BIC_Pkg_J))
855                 outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
856         if (DO_BIC(BIC_Cor_J))
857                 outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
858         if (DO_BIC(BIC_GFX_J))
859                 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
860         if (DO_BIC(BIC_RAM_J))
861                 outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
862         if (DO_BIC(BIC_PKG__))
863                 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
864         if (DO_BIC(BIC_RAM__))
865                 outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
866
867         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
868                 if (mp->format == FORMAT_RAW) {
869                         if (mp->width == 32)
870                                 outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
871                         else
872                                 outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
873                 } else if (mp->format == FORMAT_DELTA) {
874                         outp += sprintf(outp, "\t%lld", p->counter[i]);
875                 } else if (mp->format == FORMAT_PERCENT) {
876                         outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
877                 }
878         }
879
880 done:
881         outp += sprintf(outp, "\n");
882
883         return 0;
884 }
885
886 void flush_output_stdout(void)
887 {
888         FILE *filep;
889
890         if (outf == stderr)
891                 filep = stdout;
892         else
893                 filep = outf;
894
895         fputs(output_buffer, filep);
896         fflush(filep);
897
898         outp = output_buffer;
899 }
900 void flush_output_stderr(void)
901 {
902         fputs(output_buffer, outf);
903         fflush(outf);
904         outp = output_buffer;
905 }
906 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
907 {
908         static int printed;
909
910         if (!printed || !summary_only)
911                 print_header();
912
913         if (topo.num_cpus > 1)
914                 format_counters(&average.threads, &average.cores,
915                         &average.packages);
916
917         printed = 1;
918
919         if (summary_only)
920                 return;
921
922         for_all_cpus(format_counters, t, c, p);
923 }
924
925 #define DELTA_WRAP32(new, old)                  \
926         if (new > old) {                        \
927                 old = new - old;                \
928         } else {                                \
929                 old = 0x100000000 + new - old;  \
930         }
931
932 int
933 delta_package(struct pkg_data *new, struct pkg_data *old)
934 {
935         int i;
936         struct msr_counter *mp;
937
938         if (do_skl_residency) {
939                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
940                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
941                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
942                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
943         }
944         old->pc2 = new->pc2 - old->pc2;
945         if (do_pc3)
946                 old->pc3 = new->pc3 - old->pc3;
947         if (do_pc6)
948                 old->pc6 = new->pc6 - old->pc6;
949         if (do_pc7)
950                 old->pc7 = new->pc7 - old->pc7;
951         old->pc8 = new->pc8 - old->pc8;
952         old->pc9 = new->pc9 - old->pc9;
953         old->pc10 = new->pc10 - old->pc10;
954         old->pkg_temp_c = new->pkg_temp_c;
955
956         /* flag an error when rc6 counter resets/wraps */
957         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
958                 old->gfx_rc6_ms = -1;
959         else
960                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
961
962         old->gfx_mhz = new->gfx_mhz;
963
964         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
965         DELTA_WRAP32(new->energy_cores, old->energy_cores);
966         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
967         DELTA_WRAP32(new->energy_dram, old->energy_dram);
968         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
969         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
970
971         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
972                 if (mp->format == FORMAT_RAW)
973                         old->counter[i] = new->counter[i];
974                 else
975                         old->counter[i] = new->counter[i] - old->counter[i];
976         }
977
978         return 0;
979 }
980
981 void
982 delta_core(struct core_data *new, struct core_data *old)
983 {
984         int i;
985         struct msr_counter *mp;
986
987         old->c3 = new->c3 - old->c3;
988         old->c6 = new->c6 - old->c6;
989         old->c7 = new->c7 - old->c7;
990         old->core_temp_c = new->core_temp_c;
991
992         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
993                 if (mp->format == FORMAT_RAW)
994                         old->counter[i] = new->counter[i];
995                 else
996                         old->counter[i] = new->counter[i] - old->counter[i];
997         }
998 }
999
1000 /*
1001  * old = new - old
1002  */
1003 int
1004 delta_thread(struct thread_data *new, struct thread_data *old,
1005         struct core_data *core_delta)
1006 {
1007         int i;
1008         struct msr_counter *mp;
1009
1010         old->tsc = new->tsc - old->tsc;
1011
1012         /* check for TSC < 1 Mcycles over interval */
1013         if (old->tsc < (1000 * 1000))
1014                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1015                      "You can disable all c-states by booting with \"idle=poll\"\n"
1016                      "or just the deep ones with \"processor.max_cstate=1\"");
1017
1018         old->c1 = new->c1 - old->c1;
1019
1020         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1021                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1022                         old->aperf = new->aperf - old->aperf;
1023                         old->mperf = new->mperf - old->mperf;
1024                 } else {
1025                         return -1;
1026                 }
1027         }
1028
1029
1030         if (use_c1_residency_msr) {
1031                 /*
1032                  * Some models have a dedicated C1 residency MSR,
1033                  * which should be more accurate than the derivation below.
1034                  */
1035         } else {
1036                 /*
1037                  * As counter collection is not atomic,
1038                  * it is possible for mperf's non-halted cycles + idle states
1039                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1040                  */
1041                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
1042                         old->c1 = 0;
1043                 else {
1044                         /* normal case, derive c1 */
1045                         old->c1 = old->tsc - old->mperf - core_delta->c3
1046                                 - core_delta->c6 - core_delta->c7;
1047                 }
1048         }
1049
1050         if (old->mperf == 0) {
1051                 if (debug > 1)
1052                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1053                 old->mperf = 1; /* divide by 0 protection */
1054         }
1055
1056         if (DO_BIC(BIC_IRQ))
1057                 old->irq_count = new->irq_count - old->irq_count;
1058
1059         if (DO_BIC(BIC_SMI))
1060                 old->smi_count = new->smi_count - old->smi_count;
1061
1062         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1063                 if (mp->format == FORMAT_RAW)
1064                         old->counter[i] = new->counter[i];
1065                 else
1066                         old->counter[i] = new->counter[i] - old->counter[i];
1067         }
1068         return 0;
1069 }
1070
1071 int delta_cpu(struct thread_data *t, struct core_data *c,
1072         struct pkg_data *p, struct thread_data *t2,
1073         struct core_data *c2, struct pkg_data *p2)
1074 {
1075         int retval = 0;
1076
1077         /* calculate core delta only for 1st thread in core */
1078         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1079                 delta_core(c, c2);
1080
1081         /* always calculate thread delta */
1082         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1083         if (retval)
1084                 return retval;
1085
1086         /* calculate package delta only for 1st core in package */
1087         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1088                 retval = delta_package(p, p2);
1089
1090         return retval;
1091 }
1092
1093 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1094 {
1095         int i;
1096         struct msr_counter  *mp;
1097
1098         t->tsc = 0;
1099         t->aperf = 0;
1100         t->mperf = 0;
1101         t->c1 = 0;
1102
1103         t->irq_count = 0;
1104         t->smi_count = 0;
1105
1106         /* tells format_counters to dump all fields from this set */
1107         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1108
1109         c->c3 = 0;
1110         c->c6 = 0;
1111         c->c7 = 0;
1112         c->core_temp_c = 0;
1113
1114         p->pkg_wtd_core_c0 = 0;
1115         p->pkg_any_core_c0 = 0;
1116         p->pkg_any_gfxe_c0 = 0;
1117         p->pkg_both_core_gfxe_c0 = 0;
1118
1119         p->pc2 = 0;
1120         if (do_pc3)
1121                 p->pc3 = 0;
1122         if (do_pc6)
1123                 p->pc6 = 0;
1124         if (do_pc7)
1125                 p->pc7 = 0;
1126         p->pc8 = 0;
1127         p->pc9 = 0;
1128         p->pc10 = 0;
1129
1130         p->energy_pkg = 0;
1131         p->energy_dram = 0;
1132         p->energy_cores = 0;
1133         p->energy_gfx = 0;
1134         p->rapl_pkg_perf_status = 0;
1135         p->rapl_dram_perf_status = 0;
1136         p->pkg_temp_c = 0;
1137
1138         p->gfx_rc6_ms = 0;
1139         p->gfx_mhz = 0;
1140         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1141                 t->counter[i] = 0;
1142
1143         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1144                 c->counter[i] = 0;
1145
1146         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1147                 p->counter[i] = 0;
1148 }
1149 int sum_counters(struct thread_data *t, struct core_data *c,
1150         struct pkg_data *p)
1151 {
1152         int i;
1153         struct msr_counter *mp;
1154
1155         average.threads.tsc += t->tsc;
1156         average.threads.aperf += t->aperf;
1157         average.threads.mperf += t->mperf;
1158         average.threads.c1 += t->c1;
1159
1160         average.threads.irq_count += t->irq_count;
1161         average.threads.smi_count += t->smi_count;
1162
1163         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1164                 if (mp->format == FORMAT_RAW)
1165                         continue;
1166                 average.threads.counter[i] += t->counter[i];
1167         }
1168
1169         /* sum per-core values only for 1st thread in core */
1170         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1171                 return 0;
1172
1173         average.cores.c3 += c->c3;
1174         average.cores.c6 += c->c6;
1175         average.cores.c7 += c->c7;
1176
1177         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1178
1179         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1180                 if (mp->format == FORMAT_RAW)
1181                         continue;
1182                 average.cores.counter[i] += c->counter[i];
1183         }
1184
1185         /* sum per-pkg values only for 1st core in pkg */
1186         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1187                 return 0;
1188
1189         if (do_skl_residency) {
1190                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1191                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1192                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1193                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1194         }
1195
1196         average.packages.pc2 += p->pc2;
1197         if (do_pc3)
1198                 average.packages.pc3 += p->pc3;
1199         if (do_pc6)
1200                 average.packages.pc6 += p->pc6;
1201         if (do_pc7)
1202                 average.packages.pc7 += p->pc7;
1203         average.packages.pc8 += p->pc8;
1204         average.packages.pc9 += p->pc9;
1205         average.packages.pc10 += p->pc10;
1206
1207         average.packages.energy_pkg += p->energy_pkg;
1208         average.packages.energy_dram += p->energy_dram;
1209         average.packages.energy_cores += p->energy_cores;
1210         average.packages.energy_gfx += p->energy_gfx;
1211
1212         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1213         average.packages.gfx_mhz = p->gfx_mhz;
1214
1215         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1216
1217         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1218         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1219
1220         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1221                 if (mp->format == FORMAT_RAW)
1222                         continue;
1223                 average.packages.counter[i] += p->counter[i];
1224         }
1225         return 0;
1226 }
1227 /*
1228  * sum the counters for all cpus in the system
1229  * compute the weighted average
1230  */
1231 void compute_average(struct thread_data *t, struct core_data *c,
1232         struct pkg_data *p)
1233 {
1234         int i;
1235         struct msr_counter *mp;
1236
1237         clear_counters(&average.threads, &average.cores, &average.packages);
1238
1239         for_all_cpus(sum_counters, t, c, p);
1240
1241         average.threads.tsc /= topo.num_cpus;
1242         average.threads.aperf /= topo.num_cpus;
1243         average.threads.mperf /= topo.num_cpus;
1244         average.threads.c1 /= topo.num_cpus;
1245
1246         average.cores.c3 /= topo.num_cores;
1247         average.cores.c6 /= topo.num_cores;
1248         average.cores.c7 /= topo.num_cores;
1249
1250         if (do_skl_residency) {
1251                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1252                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1253                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1254                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1255         }
1256
1257         average.packages.pc2 /= topo.num_packages;
1258         if (do_pc3)
1259                 average.packages.pc3 /= topo.num_packages;
1260         if (do_pc6)
1261                 average.packages.pc6 /= topo.num_packages;
1262         if (do_pc7)
1263                 average.packages.pc7 /= topo.num_packages;
1264
1265         average.packages.pc8 /= topo.num_packages;
1266         average.packages.pc9 /= topo.num_packages;
1267         average.packages.pc10 /= topo.num_packages;
1268
1269         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1270                 if (mp->format == FORMAT_RAW)
1271                         continue;
1272                 average.threads.counter[i] /= topo.num_cpus;
1273         }
1274         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1275                 if (mp->format == FORMAT_RAW)
1276                         continue;
1277                 average.cores.counter[i] /= topo.num_cores;
1278         }
1279         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1280                 if (mp->format == FORMAT_RAW)
1281                         continue;
1282                 average.packages.counter[i] /= topo.num_packages;
1283         }
1284 }
1285
1286 static unsigned long long rdtsc(void)
1287 {
1288         unsigned int low, high;
1289
1290         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1291
1292         return low | ((unsigned long long)high) << 32;
1293 }
1294
1295 /*
1296  * get_counters(...)
1297  * migrate to cpu
1298  * acquire and record local counters for that cpu
1299  */
1300 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1301 {
1302         int cpu = t->cpu_id;
1303         unsigned long long msr;
1304         int aperf_mperf_retry_count = 0;
1305         struct msr_counter *mp;
1306         int i;
1307
1308         if (cpu_migrate(cpu)) {
1309                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1310                 return -1;
1311         }
1312
1313 retry:
1314         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1315
1316         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1317                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1318
1319                 /*
1320                  * The TSC, APERF and MPERF must be read together for
1321                  * APERF/MPERF and MPERF/TSC to give accurate results.
1322                  *
1323                  * Unfortunately, APERF and MPERF are read by
1324                  * individual system call, so delays may occur
1325                  * between them.  If the time to read them
1326                  * varies by a large amount, we re-read them.
1327                  */
1328
1329                 /*
1330                  * This initial dummy APERF read has been seen to
1331                  * reduce jitter in the subsequent reads.
1332                  */
1333
1334                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1335                         return -3;
1336
1337                 t->tsc = rdtsc();       /* re-read close to APERF */
1338
1339                 tsc_before = t->tsc;
1340
1341                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1342                         return -3;
1343
1344                 tsc_between = rdtsc();
1345
1346                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1347                         return -4;
1348
1349                 tsc_after = rdtsc();
1350
1351                 aperf_time = tsc_between - tsc_before;
1352                 mperf_time = tsc_after - tsc_between;
1353
1354                 /*
1355                  * If the system call latency to read APERF and MPERF
1356                  * differ by more than 2x, then try again.
1357                  */
1358                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1359                         aperf_mperf_retry_count++;
1360                         if (aperf_mperf_retry_count < 5)
1361                                 goto retry;
1362                         else
1363                                 warnx("cpu%d jitter %lld %lld",
1364                                         cpu, aperf_time, mperf_time);
1365                 }
1366                 aperf_mperf_retry_count = 0;
1367
1368                 t->aperf = t->aperf * aperf_mperf_multiplier;
1369                 t->mperf = t->mperf * aperf_mperf_multiplier;
1370         }
1371
1372         if (DO_BIC(BIC_IRQ))
1373                 t->irq_count = irqs_per_cpu[cpu];
1374         if (DO_BIC(BIC_SMI)) {
1375                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1376                         return -5;
1377                 t->smi_count = msr & 0xFFFFFFFF;
1378         }
1379
1380         if (use_c1_residency_msr) {
1381                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1382                         return -6;
1383         }
1384
1385         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1386                 if (get_msr(cpu, mp->msr_num, &t->counter[i]))
1387                         return -10;
1388         }
1389
1390
1391         /* collect core counters only for 1st thread in core */
1392         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1393                 return 0;
1394
1395         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
1396                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1397                         return -6;
1398         }
1399
1400         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1401                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1402                         return -7;
1403         } else if (do_knl_cstates) {
1404                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1405                         return -7;
1406         }
1407
1408         if (DO_BIC(BIC_CPU_c7))
1409                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1410                         return -8;
1411
1412         if (DO_BIC(BIC_CoreTmp)) {
1413                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1414                         return -9;
1415                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1416         }
1417
1418         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1419                 if (get_msr(cpu, mp->msr_num, &c->counter[i]))
1420                         return -10;
1421         }
1422
1423         /* collect package counters only for 1st core in package */
1424         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1425                 return 0;
1426
1427         if (do_skl_residency) {
1428                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1429                         return -10;
1430                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1431                         return -11;
1432                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1433                         return -12;
1434                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1435                         return -13;
1436         }
1437         if (do_pc3)
1438                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1439                         return -9;
1440         if (do_pc6)
1441                 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1442                         return -10;
1443         if (do_pc2)
1444                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1445                         return -11;
1446         if (do_pc7)
1447                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1448                         return -12;
1449         if (do_c8_c9_c10) {
1450                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1451                         return -13;
1452                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1453                         return -13;
1454                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1455                         return -13;
1456         }
1457         if (do_rapl & RAPL_PKG) {
1458                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1459                         return -13;
1460                 p->energy_pkg = msr & 0xFFFFFFFF;
1461         }
1462         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1463                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1464                         return -14;
1465                 p->energy_cores = msr & 0xFFFFFFFF;
1466         }
1467         if (do_rapl & RAPL_DRAM) {
1468                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1469                         return -15;
1470                 p->energy_dram = msr & 0xFFFFFFFF;
1471         }
1472         if (do_rapl & RAPL_GFX) {
1473                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1474                         return -16;
1475                 p->energy_gfx = msr & 0xFFFFFFFF;
1476         }
1477         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1478                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1479                         return -16;
1480                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1481         }
1482         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1483                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1484                         return -16;
1485                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1486         }
1487         if (DO_BIC(BIC_PkgTmp)) {
1488                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1489                         return -17;
1490                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1491         }
1492
1493         if (DO_BIC(BIC_GFX_rc6))
1494                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1495
1496         if (DO_BIC(BIC_GFXMHz))
1497                 p->gfx_mhz = gfx_cur_mhz;
1498
1499         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1500                 if (get_msr(cpu, mp->msr_num, &p->counter[i]))
1501                         return -10;
1502         }
1503
1504         return 0;
1505 }
1506
1507 /*
1508  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1509  * If you change the values, note they are used both in comparisons
1510  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1511  */
1512
1513 #define PCLUKN 0 /* Unknown */
1514 #define PCLRSV 1 /* Reserved */
1515 #define PCL__0 2 /* PC0 */
1516 #define PCL__1 3 /* PC1 */
1517 #define PCL__2 4 /* PC2 */
1518 #define PCL__3 5 /* PC3 */
1519 #define PCL__4 6 /* PC4 */
1520 #define PCL__6 7 /* PC6 */
1521 #define PCL_6N 8 /* PC6 No Retention */
1522 #define PCL_6R 9 /* PC6 Retention */
1523 #define PCL__7 10 /* PC7 */
1524 #define PCL_7S 11 /* PC7 Shrink */
1525 #define PCL__8 12 /* PC8 */
1526 #define PCL__9 13 /* PC9 */
1527 #define PCLUNL 14 /* Unlimited */
1528
1529 int pkg_cstate_limit = PCLUKN;
1530 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1531         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1532
1533 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1534 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1535 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1536 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1537 int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1538 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1539 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1540 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1541
1542
1543 static void
1544 calculate_tsc_tweak()
1545 {
1546         tsc_tweak = base_hz / tsc_hz;
1547 }
1548
1549 static void
1550 dump_nhm_platform_info(void)
1551 {
1552         unsigned long long msr;
1553         unsigned int ratio;
1554
1555         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1556
1557         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1558
1559         ratio = (msr >> 40) & 0xFF;
1560         fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
1561                 ratio, bclk, ratio * bclk);
1562
1563         ratio = (msr >> 8) & 0xFF;
1564         fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
1565                 ratio, bclk, ratio * bclk);
1566
1567         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1568         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1569                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1570
1571         return;
1572 }
1573
1574 static void
1575 dump_hsw_turbo_ratio_limits(void)
1576 {
1577         unsigned long long msr;
1578         unsigned int ratio;
1579
1580         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1581
1582         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1583
1584         ratio = (msr >> 8) & 0xFF;
1585         if (ratio)
1586                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
1587                         ratio, bclk, ratio * bclk);
1588
1589         ratio = (msr >> 0) & 0xFF;
1590         if (ratio)
1591                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
1592                         ratio, bclk, ratio * bclk);
1593         return;
1594 }
1595
1596 static void
1597 dump_ivt_turbo_ratio_limits(void)
1598 {
1599         unsigned long long msr;
1600         unsigned int ratio;
1601
1602         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1603
1604         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1605
1606         ratio = (msr >> 56) & 0xFF;
1607         if (ratio)
1608                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1609                         ratio, bclk, ratio * bclk);
1610
1611         ratio = (msr >> 48) & 0xFF;
1612         if (ratio)
1613                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1614                         ratio, bclk, ratio * bclk);
1615
1616         ratio = (msr >> 40) & 0xFF;
1617         if (ratio)
1618                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1619                         ratio, bclk, ratio * bclk);
1620
1621         ratio = (msr >> 32) & 0xFF;
1622         if (ratio)
1623                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1624                         ratio, bclk, ratio * bclk);
1625
1626         ratio = (msr >> 24) & 0xFF;
1627         if (ratio)
1628                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1629                         ratio, bclk, ratio * bclk);
1630
1631         ratio = (msr >> 16) & 0xFF;
1632         if (ratio)
1633                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1634                         ratio, bclk, ratio * bclk);
1635
1636         ratio = (msr >> 8) & 0xFF;
1637         if (ratio)
1638                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1639                         ratio, bclk, ratio * bclk);
1640
1641         ratio = (msr >> 0) & 0xFF;
1642         if (ratio)
1643                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1644                         ratio, bclk, ratio * bclk);
1645         return;
1646 }
1647
1648 static void
1649 dump_nhm_turbo_ratio_limits(void)
1650 {
1651         unsigned long long msr;
1652         unsigned int ratio;
1653
1654         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1655
1656         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1657
1658         ratio = (msr >> 56) & 0xFF;
1659         if (ratio)
1660                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1661                         ratio, bclk, ratio * bclk);
1662
1663         ratio = (msr >> 48) & 0xFF;
1664         if (ratio)
1665                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1666                         ratio, bclk, ratio * bclk);
1667
1668         ratio = (msr >> 40) & 0xFF;
1669         if (ratio)
1670                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1671                         ratio, bclk, ratio * bclk);
1672
1673         ratio = (msr >> 32) & 0xFF;
1674         if (ratio)
1675                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1676                         ratio, bclk, ratio * bclk);
1677
1678         ratio = (msr >> 24) & 0xFF;
1679         if (ratio)
1680                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1681                         ratio, bclk, ratio * bclk);
1682
1683         ratio = (msr >> 16) & 0xFF;
1684         if (ratio)
1685                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1686                         ratio, bclk, ratio * bclk);
1687
1688         ratio = (msr >> 8) & 0xFF;
1689         if (ratio)
1690                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1691                         ratio, bclk, ratio * bclk);
1692
1693         ratio = (msr >> 0) & 0xFF;
1694         if (ratio)
1695                 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1696                         ratio, bclk, ratio * bclk);
1697         return;
1698 }
1699
1700 static void
1701 dump_knl_turbo_ratio_limits(void)
1702 {
1703         const unsigned int buckets_no = 7;
1704
1705         unsigned long long msr;
1706         int delta_cores, delta_ratio;
1707         int i, b_nr;
1708         unsigned int cores[buckets_no];
1709         unsigned int ratio[buckets_no];
1710
1711         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1712
1713         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
1714                 base_cpu, msr);
1715
1716         /**
1717          * Turbo encoding in KNL is as follows:
1718          * [0] -- Reserved
1719          * [7:1] -- Base value of number of active cores of bucket 1.
1720          * [15:8] -- Base value of freq ratio of bucket 1.
1721          * [20:16] -- +ve delta of number of active cores of bucket 2.
1722          * i.e. active cores of bucket 2 =
1723          * active cores of bucket 1 + delta
1724          * [23:21] -- Negative delta of freq ratio of bucket 2.
1725          * i.e. freq ratio of bucket 2 =
1726          * freq ratio of bucket 1 - delta
1727          * [28:24]-- +ve delta of number of active cores of bucket 3.
1728          * [31:29]-- -ve delta of freq ratio of bucket 3.
1729          * [36:32]-- +ve delta of number of active cores of bucket 4.
1730          * [39:37]-- -ve delta of freq ratio of bucket 4.
1731          * [44:40]-- +ve delta of number of active cores of bucket 5.
1732          * [47:45]-- -ve delta of freq ratio of bucket 5.
1733          * [52:48]-- +ve delta of number of active cores of bucket 6.
1734          * [55:53]-- -ve delta of freq ratio of bucket 6.
1735          * [60:56]-- +ve delta of number of active cores of bucket 7.
1736          * [63:61]-- -ve delta of freq ratio of bucket 7.
1737          */
1738
1739         b_nr = 0;
1740         cores[b_nr] = (msr & 0xFF) >> 1;
1741         ratio[b_nr] = (msr >> 8) & 0xFF;
1742
1743         for (i = 16; i < 64; i += 8) {
1744                 delta_cores = (msr >> i) & 0x1F;
1745                 delta_ratio = (msr >> (i + 5)) & 0x7;
1746
1747                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
1748                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
1749                 b_nr++;
1750         }
1751
1752         for (i = buckets_no - 1; i >= 0; i--)
1753                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
1754                         fprintf(outf,
1755                                 "%d * %.0f = %.0f MHz max turbo %d active cores\n",
1756                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
1757 }
1758
1759 static void
1760 dump_nhm_cst_cfg(void)
1761 {
1762         unsigned long long msr;
1763
1764         get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1765
1766 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1767 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1768
1769         fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
1770
1771         fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1772                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1773                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1774                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1775                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1776                 (msr & (1 << 15)) ? "" : "UN",
1777                 (unsigned int)msr & 0xF,
1778                 pkg_cstate_limit_strings[pkg_cstate_limit]);
1779         return;
1780 }
1781
1782 static void
1783 dump_config_tdp(void)
1784 {
1785         unsigned long long msr;
1786
1787         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
1788         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
1789         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
1790
1791         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
1792         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
1793         if (msr) {
1794                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1795                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1796                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1797                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
1798         }
1799         fprintf(outf, ")\n");
1800
1801         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
1802         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
1803         if (msr) {
1804                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1805                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1806                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1807                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
1808         }
1809         fprintf(outf, ")\n");
1810
1811         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
1812         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
1813         if ((msr) & 0x3)
1814                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
1815         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1816         fprintf(outf, ")\n");
1817
1818         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
1819         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
1820         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
1821         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1822         fprintf(outf, ")\n");
1823 }
1824
1825 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1826
1827 void print_irtl(void)
1828 {
1829         unsigned long long msr;
1830
1831         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
1832         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
1833         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1834                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1835
1836         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
1837         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
1838         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1839                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1840
1841         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
1842         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
1843         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1844                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1845
1846         if (!do_irtl_hsw)
1847                 return;
1848
1849         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
1850         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
1851         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1852                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1853
1854         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
1855         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
1856         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1857                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1858
1859         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
1860         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
1861         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1862                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1863
1864 }
1865 void free_fd_percpu(void)
1866 {
1867         int i;
1868
1869         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
1870                 if (fd_percpu[i] != 0)
1871                         close(fd_percpu[i]);
1872         }
1873
1874         free(fd_percpu);
1875 }
1876
1877 void free_all_buffers(void)
1878 {
1879         CPU_FREE(cpu_present_set);
1880         cpu_present_set = NULL;
1881         cpu_present_setsize = 0;
1882
1883         CPU_FREE(cpu_affinity_set);
1884         cpu_affinity_set = NULL;
1885         cpu_affinity_setsize = 0;
1886
1887         free(thread_even);
1888         free(core_even);
1889         free(package_even);
1890
1891         thread_even = NULL;
1892         core_even = NULL;
1893         package_even = NULL;
1894
1895         free(thread_odd);
1896         free(core_odd);
1897         free(package_odd);
1898
1899         thread_odd = NULL;
1900         core_odd = NULL;
1901         package_odd = NULL;
1902
1903         free(output_buffer);
1904         output_buffer = NULL;
1905         outp = NULL;
1906
1907         free_fd_percpu();
1908
1909         free(irq_column_2_cpu);
1910         free(irqs_per_cpu);
1911 }
1912
1913 /*
1914  * Open a file, and exit on failure
1915  */
1916 FILE *fopen_or_die(const char *path, const char *mode)
1917 {
1918         FILE *filep = fopen(path, mode);
1919         if (!filep)
1920                 err(1, "%s: open failed", path);
1921         return filep;
1922 }
1923
1924 /*
1925  * Parse a file containing a single int.
1926  */
1927 int parse_int_file(const char *fmt, ...)
1928 {
1929         va_list args;
1930         char path[PATH_MAX];
1931         FILE *filep;
1932         int value;
1933
1934         va_start(args, fmt);
1935         vsnprintf(path, sizeof(path), fmt, args);
1936         va_end(args);
1937         filep = fopen_or_die(path, "r");
1938         if (fscanf(filep, "%d", &value) != 1)
1939                 err(1, "%s: failed to parse number from file", path);
1940         fclose(filep);
1941         return value;
1942 }
1943
1944 /*
1945  * get_cpu_position_in_core(cpu)
1946  * return the position of the CPU among its HT siblings in the core
1947  * return -1 if the sibling is not in list
1948  */
1949 int get_cpu_position_in_core(int cpu)
1950 {
1951         char path[64];
1952         FILE *filep;
1953         int this_cpu;
1954         char character;
1955         int i;
1956
1957         sprintf(path,
1958                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
1959                 cpu);
1960         filep = fopen(path, "r");
1961         if (filep == NULL) {
1962                 perror(path);
1963                 exit(1);
1964         }
1965
1966         for (i = 0; i < topo.num_threads_per_core; i++) {
1967                 fscanf(filep, "%d", &this_cpu);
1968                 if (this_cpu == cpu) {
1969                         fclose(filep);
1970                         return i;
1971                 }
1972
1973                 /* Account for no separator after last thread*/
1974                 if (i != (topo.num_threads_per_core - 1))
1975                         fscanf(filep, "%c", &character);
1976         }
1977
1978         fclose(filep);
1979         return -1;
1980 }
1981
1982 /*
1983  * cpu_is_first_core_in_package(cpu)
1984  * return 1 if given CPU is 1st core in package
1985  */
1986 int cpu_is_first_core_in_package(int cpu)
1987 {
1988         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1989 }
1990
1991 int get_physical_package_id(int cpu)
1992 {
1993         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1994 }
1995
1996 int get_core_id(int cpu)
1997 {
1998         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1999 }
2000
2001 int get_num_ht_siblings(int cpu)
2002 {
2003         char path[80];
2004         FILE *filep;
2005         int sib1;
2006         int matches = 0;
2007         char character;
2008         char str[100];
2009         char *ch;
2010
2011         sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
2012         filep = fopen_or_die(path, "r");
2013
2014         /*
2015          * file format:
2016          * A ',' separated or '-' separated set of numbers
2017          * (eg 1-2 or 1,3,4,5)
2018          */
2019         fscanf(filep, "%d%c\n", &sib1, &character);
2020         fseek(filep, 0, SEEK_SET);
2021         fgets(str, 100, filep);
2022         ch = strchr(str, character);
2023         while (ch != NULL) {
2024                 matches++;
2025                 ch = strchr(ch+1, character);
2026         }
2027
2028         fclose(filep);
2029         return matches+1;
2030 }
2031
2032 /*
2033  * run func(thread, core, package) in topology order
2034  * skip non-present cpus
2035  */
2036
2037 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2038         struct pkg_data *, struct thread_data *, struct core_data *,
2039         struct pkg_data *), struct thread_data *thread_base,
2040         struct core_data *core_base, struct pkg_data *pkg_base,
2041         struct thread_data *thread_base2, struct core_data *core_base2,
2042         struct pkg_data *pkg_base2)
2043 {
2044         int retval, pkg_no, core_no, thread_no;
2045
2046         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2047                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
2048                         for (thread_no = 0; thread_no <
2049                                 topo.num_threads_per_core; ++thread_no) {
2050                                 struct thread_data *t, *t2;
2051                                 struct core_data *c, *c2;
2052                                 struct pkg_data *p, *p2;
2053
2054                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
2055
2056                                 if (cpu_is_not_present(t->cpu_id))
2057                                         continue;
2058
2059                                 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
2060
2061                                 c = GET_CORE(core_base, core_no, pkg_no);
2062                                 c2 = GET_CORE(core_base2, core_no, pkg_no);
2063
2064                                 p = GET_PKG(pkg_base, pkg_no);
2065                                 p2 = GET_PKG(pkg_base2, pkg_no);
2066
2067                                 retval = func(t, c, p, t2, c2, p2);
2068                                 if (retval)
2069                                         return retval;
2070                         }
2071                 }
2072         }
2073         return 0;
2074 }
2075
2076 /*
2077  * run func(cpu) on every cpu in /proc/stat
2078  * return max_cpu number
2079  */
2080 int for_all_proc_cpus(int (func)(int))
2081 {
2082         FILE *fp;
2083         int cpu_num;
2084         int retval;
2085
2086         fp = fopen_or_die(proc_stat, "r");
2087
2088         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2089         if (retval != 0)
2090                 err(1, "%s: failed to parse format", proc_stat);
2091
2092         while (1) {
2093                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2094                 if (retval != 1)
2095                         break;
2096
2097                 retval = func(cpu_num);
2098                 if (retval) {
2099                         fclose(fp);
2100                         return(retval);
2101                 }
2102         }
2103         fclose(fp);
2104         return 0;
2105 }
2106
2107 void re_initialize(void)
2108 {
2109         free_all_buffers();
2110         setup_all_buffers();
2111         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2112 }
2113
2114
2115 /*
2116  * count_cpus()
2117  * remember the last one seen, it will be the max
2118  */
2119 int count_cpus(int cpu)
2120 {
2121         if (topo.max_cpu_num < cpu)
2122                 topo.max_cpu_num = cpu;
2123
2124         topo.num_cpus += 1;
2125         return 0;
2126 }
2127 int mark_cpu_present(int cpu)
2128 {
2129         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2130         return 0;
2131 }
2132
2133 /*
2134  * snapshot_proc_interrupts()
2135  *
2136  * read and record summary of /proc/interrupts
2137  *
2138  * return 1 if config change requires a restart, else return 0
2139  */
2140 int snapshot_proc_interrupts(void)
2141 {
2142         static FILE *fp;
2143         int column, retval;
2144
2145         if (fp == NULL)
2146                 fp = fopen_or_die("/proc/interrupts", "r");
2147         else
2148                 rewind(fp);
2149
2150         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2151         for (column = 0; column < topo.num_cpus; ++column) {
2152                 int cpu_number;
2153
2154                 retval = fscanf(fp, " CPU%d", &cpu_number);
2155                 if (retval != 1)
2156                         break;
2157
2158                 if (cpu_number > topo.max_cpu_num) {
2159                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2160                         return 1;
2161                 }
2162
2163                 irq_column_2_cpu[column] = cpu_number;
2164                 irqs_per_cpu[cpu_number] = 0;
2165         }
2166
2167         /* read /proc/interrupt count lines and sum up irqs per cpu */
2168         while (1) {
2169                 int column;
2170                 char buf[64];
2171
2172                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2173                 if (retval != 1)
2174                         break;
2175
2176                 /* read the count per cpu */
2177                 for (column = 0; column < topo.num_cpus; ++column) {
2178
2179                         int cpu_number, irq_count;
2180
2181                         retval = fscanf(fp, " %d", &irq_count);
2182                         if (retval != 1)
2183                                 break;
2184
2185                         cpu_number = irq_column_2_cpu[column];
2186                         irqs_per_cpu[cpu_number] += irq_count;
2187
2188                 }
2189
2190                 while (getc(fp) != '\n')
2191                         ;       /* flush interrupt description */
2192
2193         }
2194         return 0;
2195 }
2196 /*
2197  * snapshot_gfx_rc6_ms()
2198  *
2199  * record snapshot of
2200  * /sys/class/drm/card0/power/rc6_residency_ms
2201  *
2202  * return 1 if config change requires a restart, else return 0
2203  */
2204 int snapshot_gfx_rc6_ms(void)
2205 {
2206         FILE *fp;
2207         int retval;
2208
2209         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2210
2211         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2212         if (retval != 1)
2213                 err(1, "GFX rc6");
2214
2215         fclose(fp);
2216
2217         return 0;
2218 }
2219 /*
2220  * snapshot_gfx_mhz()
2221  *
2222  * record snapshot of
2223  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2224  *
2225  * return 1 if config change requires a restart, else return 0
2226  */
2227 int snapshot_gfx_mhz(void)
2228 {
2229         static FILE *fp;
2230         int retval;
2231
2232         if (fp == NULL)
2233                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2234         else
2235                 rewind(fp);
2236
2237         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2238         if (retval != 1)
2239                 err(1, "GFX MHz");
2240
2241         return 0;
2242 }
2243
2244 /*
2245  * snapshot /proc and /sys files
2246  *
2247  * return 1 if configuration restart needed, else return 0
2248  */
2249 int snapshot_proc_sysfs_files(void)
2250 {
2251         if (snapshot_proc_interrupts())
2252                 return 1;
2253
2254         if (DO_BIC(BIC_GFX_rc6))
2255                 snapshot_gfx_rc6_ms();
2256
2257         if (DO_BIC(BIC_GFXMHz))
2258                 snapshot_gfx_mhz();
2259
2260         return 0;
2261 }
2262
2263 void turbostat_loop()
2264 {
2265         int retval;
2266         int restarted = 0;
2267
2268 restart:
2269         restarted++;
2270
2271         snapshot_proc_sysfs_files();
2272         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2273         if (retval < -1) {
2274                 exit(retval);
2275         } else if (retval == -1) {
2276                 if (restarted > 1) {
2277                         exit(retval);
2278                 }
2279                 re_initialize();
2280                 goto restart;
2281         }
2282         restarted = 0;
2283         gettimeofday(&tv_even, (struct timezone *)NULL);
2284
2285         while (1) {
2286                 if (for_all_proc_cpus(cpu_is_not_present)) {
2287                         re_initialize();
2288                         goto restart;
2289                 }
2290                 nanosleep(&interval_ts, NULL);
2291                 if (snapshot_proc_sysfs_files())
2292                         goto restart;
2293                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
2294                 if (retval < -1) {
2295                         exit(retval);
2296                 } else if (retval == -1) {
2297                         re_initialize();
2298                         goto restart;
2299                 }
2300                 gettimeofday(&tv_odd, (struct timezone *)NULL);
2301                 timersub(&tv_odd, &tv_even, &tv_delta);
2302                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2303                         re_initialize();
2304                         goto restart;
2305                 }
2306                 compute_average(EVEN_COUNTERS);
2307                 format_all_counters(EVEN_COUNTERS);
2308                 flush_output_stdout();
2309                 nanosleep(&interval_ts, NULL);
2310                 if (snapshot_proc_sysfs_files())
2311                         goto restart;
2312                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2313                 if (retval < -1) {
2314                         exit(retval);
2315                 } else if (retval == -1) {
2316                         re_initialize();
2317                         goto restart;
2318                 }
2319                 gettimeofday(&tv_even, (struct timezone *)NULL);
2320                 timersub(&tv_even, &tv_odd, &tv_delta);
2321                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2322                         re_initialize();
2323                         goto restart;
2324                 }
2325                 compute_average(ODD_COUNTERS);
2326                 format_all_counters(ODD_COUNTERS);
2327                 flush_output_stdout();
2328         }
2329 }
2330
2331 void check_dev_msr()
2332 {
2333         struct stat sb;
2334         char pathname[32];
2335
2336         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2337         if (stat(pathname, &sb))
2338                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2339                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2340 }
2341
2342 void check_permissions()
2343 {
2344         struct __user_cap_header_struct cap_header_data;
2345         cap_user_header_t cap_header = &cap_header_data;
2346         struct __user_cap_data_struct cap_data_data;
2347         cap_user_data_t cap_data = &cap_data_data;
2348         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2349         int do_exit = 0;
2350         char pathname[32];
2351
2352         /* check for CAP_SYS_RAWIO */
2353         cap_header->pid = getpid();
2354         cap_header->version = _LINUX_CAPABILITY_VERSION;
2355         if (capget(cap_header, cap_data) < 0)
2356                 err(-6, "capget(2) failed");
2357
2358         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2359                 do_exit++;
2360                 warnx("capget(CAP_SYS_RAWIO) failed,"
2361                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
2362         }
2363
2364         /* test file permissions */
2365         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2366         if (euidaccess(pathname, R_OK)) {
2367                 do_exit++;
2368                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2369         }
2370
2371         /* if all else fails, thell them to be root */
2372         if (do_exit)
2373                 if (getuid() != 0)
2374                         warnx("... or simply run as root");
2375
2376         if (do_exit)
2377                 exit(-6);
2378 }
2379
2380 /*
2381  * NHM adds support for additional MSRs:
2382  *
2383  * MSR_SMI_COUNT                   0x00000034
2384  *
2385  * MSR_PLATFORM_INFO               0x000000ce
2386  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
2387  *
2388  * MSR_MISC_PWR_MGMT               0x000001aa
2389  *
2390  * MSR_PKG_C3_RESIDENCY            0x000003f8
2391  * MSR_PKG_C6_RESIDENCY            0x000003f9
2392  * MSR_CORE_C3_RESIDENCY           0x000003fc
2393  * MSR_CORE_C6_RESIDENCY           0x000003fd
2394  *
2395  * Side effect:
2396  * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
2397  */
2398 int probe_nhm_msrs(unsigned int family, unsigned int model)
2399 {
2400         unsigned long long msr;
2401         unsigned int base_ratio;
2402         int *pkg_cstate_limits;
2403
2404         if (!genuine_intel)
2405                 return 0;
2406
2407         if (family != 6)
2408                 return 0;
2409
2410         bclk = discover_bclk(family, model);
2411
2412         switch (model) {
2413         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2414         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2415         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
2416         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
2417         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
2418         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2419         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2420                 pkg_cstate_limits = nhm_pkg_cstate_limits;
2421                 break;
2422         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
2423         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
2424         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2425         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2426                 pkg_cstate_limits = snb_pkg_cstate_limits;
2427                 break;
2428         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2429         case INTEL_FAM6_HASWELL_X:      /* HSX */
2430         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2431         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2432         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2433         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2434         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2435         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2436         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2437         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2438         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2439         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2440                 pkg_cstate_limits = hsw_pkg_cstate_limits;
2441                 break;
2442         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2443                 pkg_cstate_limits = skx_pkg_cstate_limits;
2444                 break;
2445         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
2446                 no_MSR_MISC_PWR_MGMT = 1;
2447         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
2448                 pkg_cstate_limits = slv_pkg_cstate_limits;
2449                 break;
2450         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
2451                 pkg_cstate_limits = amt_pkg_cstate_limits;
2452                 no_MSR_MISC_PWR_MGMT = 1;
2453                 break;
2454         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
2455         case INTEL_FAM6_XEON_PHI_KNM:
2456                 pkg_cstate_limits = phi_pkg_cstate_limits;
2457                 break;
2458         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
2459         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2460                 pkg_cstate_limits = bxt_pkg_cstate_limits;
2461                 break;
2462         default:
2463                 return 0;
2464         }
2465         get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
2466         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2467
2468         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2469         base_ratio = (msr >> 8) & 0xFF;
2470
2471         base_hz = base_ratio * bclk * 1000000;
2472         has_base_hz = 1;
2473         return 1;
2474 }
2475 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
2476 {
2477         switch (model) {
2478         /* Nehalem compatible, but do not include turbo-ratio limit support */
2479         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2480         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2481         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
2482         case INTEL_FAM6_XEON_PHI_KNM:
2483                 return 0;
2484         default:
2485                 return 1;
2486         }
2487 }
2488 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2489 {
2490         if (!genuine_intel)
2491                 return 0;
2492
2493         if (family != 6)
2494                 return 0;
2495
2496         switch (model) {
2497         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2498         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2499                 return 1;
2500         default:
2501                 return 0;
2502         }
2503 }
2504 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2505 {
2506         if (!genuine_intel)
2507                 return 0;
2508
2509         if (family != 6)
2510                 return 0;
2511
2512         switch (model) {
2513         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2514                 return 1;
2515         default:
2516                 return 0;
2517         }
2518 }
2519
2520 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2521 {
2522         if (!genuine_intel)
2523                 return 0;
2524
2525         if (family != 6)
2526                 return 0;
2527
2528         switch (model) {
2529         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
2530         case INTEL_FAM6_XEON_PHI_KNM:
2531                 return 1;
2532         default:
2533                 return 0;
2534         }
2535 }
2536 int has_config_tdp(unsigned int family, unsigned int model)
2537 {
2538         if (!genuine_intel)
2539                 return 0;
2540
2541         if (family != 6)
2542                 return 0;
2543
2544         switch (model) {
2545         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2546         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2547         case INTEL_FAM6_HASWELL_X:      /* HSX */
2548         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2549         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2550         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2551         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2552         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2553         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2554         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2555         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2556         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2557         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2558         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2559
2560         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
2561         case INTEL_FAM6_XEON_PHI_KNM:
2562                 return 1;
2563         default:
2564                 return 0;
2565         }
2566 }
2567
2568 static void
2569 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
2570 {
2571         if (!do_nhm_platform_info)
2572                 return;
2573
2574         dump_nhm_platform_info();
2575
2576         if (has_hsw_turbo_ratio_limit(family, model))
2577                 dump_hsw_turbo_ratio_limits();
2578
2579         if (has_ivt_turbo_ratio_limit(family, model))
2580                 dump_ivt_turbo_ratio_limits();
2581
2582         if (has_nhm_turbo_ratio_limit(family, model))
2583                 dump_nhm_turbo_ratio_limits();
2584
2585         if (has_knl_turbo_ratio_limit(family, model))
2586                 dump_knl_turbo_ratio_limits();
2587
2588         if (has_config_tdp(family, model))
2589                 dump_config_tdp();
2590
2591         dump_nhm_cst_cfg();
2592 }
2593
2594
2595 /*
2596  * print_epb()
2597  * Decode the ENERGY_PERF_BIAS MSR
2598  */
2599 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2600 {
2601         unsigned long long msr;
2602         char *epb_string;
2603         int cpu;
2604
2605         if (!has_epb)
2606                 return 0;
2607
2608         cpu = t->cpu_id;
2609
2610         /* EPB is per-package */
2611         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2612                 return 0;
2613
2614         if (cpu_migrate(cpu)) {
2615                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2616                 return -1;
2617         }
2618
2619         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
2620                 return 0;
2621
2622         switch (msr & 0xF) {
2623         case ENERGY_PERF_BIAS_PERFORMANCE:
2624                 epb_string = "performance";
2625                 break;
2626         case ENERGY_PERF_BIAS_NORMAL:
2627                 epb_string = "balanced";
2628                 break;
2629         case ENERGY_PERF_BIAS_POWERSAVE:
2630                 epb_string = "powersave";
2631                 break;
2632         default:
2633                 epb_string = "custom";
2634                 break;
2635         }
2636         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
2637
2638         return 0;
2639 }
2640 /*
2641  * print_hwp()
2642  * Decode the MSR_HWP_CAPABILITIES
2643  */
2644 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2645 {
2646         unsigned long long msr;
2647         int cpu;
2648
2649         if (!has_hwp)
2650                 return 0;
2651
2652         cpu = t->cpu_id;
2653
2654         /* MSR_HWP_CAPABILITIES is per-package */
2655         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2656                 return 0;
2657
2658         if (cpu_migrate(cpu)) {
2659                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2660                 return -1;
2661         }
2662
2663         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
2664                 return 0;
2665
2666         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
2667                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
2668
2669         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
2670         if ((msr & (1 << 0)) == 0)
2671                 return 0;
2672
2673         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
2674                 return 0;
2675
2676         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
2677                         "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
2678                         cpu, msr,
2679                         (unsigned int)HWP_HIGHEST_PERF(msr),
2680                         (unsigned int)HWP_GUARANTEED_PERF(msr),
2681                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
2682                         (unsigned int)HWP_LOWEST_PERF(msr));
2683
2684         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
2685                 return 0;
2686
2687         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
2688                         "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
2689                         cpu, msr,
2690                         (unsigned int)(((msr) >> 0) & 0xff),
2691                         (unsigned int)(((msr) >> 8) & 0xff),
2692                         (unsigned int)(((msr) >> 16) & 0xff),
2693                         (unsigned int)(((msr) >> 24) & 0xff),
2694                         (unsigned int)(((msr) >> 32) & 0xff3),
2695                         (unsigned int)(((msr) >> 42) & 0x1));
2696
2697         if (has_hwp_pkg) {
2698                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
2699                         return 0;
2700
2701                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
2702                         "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
2703                         cpu, msr,
2704                         (unsigned int)(((msr) >> 0) & 0xff),
2705                         (unsigned int)(((msr) >> 8) & 0xff),
2706                         (unsigned int)(((msr) >> 16) & 0xff),
2707                         (unsigned int)(((msr) >> 24) & 0xff),
2708                         (unsigned int)(((msr) >> 32) & 0xff3));
2709         }
2710         if (has_hwp_notify) {
2711                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
2712                         return 0;
2713
2714                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
2715                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
2716                         cpu, msr,
2717                         ((msr) & 0x1) ? "EN" : "Dis",
2718                         ((msr) & 0x2) ? "EN" : "Dis");
2719         }
2720         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
2721                 return 0;
2722
2723         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
2724                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
2725                         cpu, msr,
2726                         ((msr) & 0x1) ? "" : "No-",
2727                         ((msr) & 0x2) ? "" : "No-");
2728
2729         return 0;
2730 }
2731
2732 /*
2733  * print_perf_limit()
2734  */
2735 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2736 {
2737         unsigned long long msr;
2738         int cpu;
2739
2740         cpu = t->cpu_id;
2741
2742         /* per-package */
2743         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2744                 return 0;
2745
2746         if (cpu_migrate(cpu)) {
2747                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2748                 return -1;
2749         }
2750
2751         if (do_core_perf_limit_reasons) {
2752                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
2753                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2754                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
2755                         (msr & 1 << 15) ? "bit15, " : "",
2756                         (msr & 1 << 14) ? "bit14, " : "",
2757                         (msr & 1 << 13) ? "Transitions, " : "",
2758                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
2759                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
2760                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2761                         (msr & 1 << 9) ? "CorePwr, " : "",
2762                         (msr & 1 << 8) ? "Amps, " : "",
2763                         (msr & 1 << 6) ? "VR-Therm, " : "",
2764                         (msr & 1 << 5) ? "Auto-HWP, " : "",
2765                         (msr & 1 << 4) ? "Graphics, " : "",
2766                         (msr & 1 << 2) ? "bit2, " : "",
2767                         (msr & 1 << 1) ? "ThermStatus, " : "",
2768                         (msr & 1 << 0) ? "PROCHOT, " : "");
2769                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
2770                         (msr & 1 << 31) ? "bit31, " : "",
2771                         (msr & 1 << 30) ? "bit30, " : "",
2772                         (msr & 1 << 29) ? "Transitions, " : "",
2773                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
2774                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
2775                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2776                         (msr & 1 << 25) ? "CorePwr, " : "",
2777                         (msr & 1 << 24) ? "Amps, " : "",
2778                         (msr & 1 << 22) ? "VR-Therm, " : "",
2779                         (msr & 1 << 21) ? "Auto-HWP, " : "",
2780                         (msr & 1 << 20) ? "Graphics, " : "",
2781                         (msr & 1 << 18) ? "bit18, " : "",
2782                         (msr & 1 << 17) ? "ThermStatus, " : "",
2783                         (msr & 1 << 16) ? "PROCHOT, " : "");
2784
2785         }
2786         if (do_gfx_perf_limit_reasons) {
2787                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
2788                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2789                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
2790                         (msr & 1 << 0) ? "PROCHOT, " : "",
2791                         (msr & 1 << 1) ? "ThermStatus, " : "",
2792                         (msr & 1 << 4) ? "Graphics, " : "",
2793                         (msr & 1 << 6) ? "VR-Therm, " : "",
2794                         (msr & 1 << 8) ? "Amps, " : "",
2795                         (msr & 1 << 9) ? "GFXPwr, " : "",
2796                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2797                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
2798                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
2799                         (msr & 1 << 16) ? "PROCHOT, " : "",
2800                         (msr & 1 << 17) ? "ThermStatus, " : "",
2801                         (msr & 1 << 20) ? "Graphics, " : "",
2802                         (msr & 1 << 22) ? "VR-Therm, " : "",
2803                         (msr & 1 << 24) ? "Amps, " : "",
2804                         (msr & 1 << 25) ? "GFXPwr, " : "",
2805                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2806                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
2807         }
2808         if (do_ring_perf_limit_reasons) {
2809                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
2810                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2811                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
2812                         (msr & 1 << 0) ? "PROCHOT, " : "",
2813                         (msr & 1 << 1) ? "ThermStatus, " : "",
2814                         (msr & 1 << 6) ? "VR-Therm, " : "",
2815                         (msr & 1 << 8) ? "Amps, " : "",
2816                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
2817                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
2818                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
2819                         (msr & 1 << 16) ? "PROCHOT, " : "",
2820                         (msr & 1 << 17) ? "ThermStatus, " : "",
2821                         (msr & 1 << 22) ? "VR-Therm, " : "",
2822                         (msr & 1 << 24) ? "Amps, " : "",
2823                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
2824                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
2825         }
2826         return 0;
2827 }
2828
2829 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
2830 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
2831
2832 double get_tdp(unsigned int model)
2833 {
2834         unsigned long long msr;
2835
2836         if (do_rapl & RAPL_PKG_POWER_INFO)
2837                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
2838                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
2839
2840         switch (model) {
2841         case INTEL_FAM6_ATOM_SILVERMONT1:
2842         case INTEL_FAM6_ATOM_SILVERMONT2:
2843                 return 30.0;
2844         default:
2845                 return 135.0;
2846         }
2847 }
2848
2849 /*
2850  * rapl_dram_energy_units_probe()
2851  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
2852  */
2853 static double
2854 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
2855 {
2856         /* only called for genuine_intel, family 6 */
2857
2858         switch (model) {
2859         case INTEL_FAM6_HASWELL_X:      /* HSX */
2860         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2861         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2862         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
2863         case INTEL_FAM6_XEON_PHI_KNM:
2864                 return (rapl_dram_energy_units = 15.3 / 1000000);
2865         default:
2866                 return (rapl_energy_units);
2867         }
2868 }
2869
2870
2871 /*
2872  * rapl_probe()
2873  *
2874  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
2875  */
2876 void rapl_probe(unsigned int family, unsigned int model)
2877 {
2878         unsigned long long msr;
2879         unsigned int time_unit;
2880         double tdp;
2881
2882         if (!genuine_intel)
2883                 return;
2884
2885         if (family != 6)
2886                 return;
2887
2888         switch (model) {
2889         case INTEL_FAM6_SANDYBRIDGE:
2890         case INTEL_FAM6_IVYBRIDGE:
2891         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2892         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2893         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2894         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2895         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2896                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
2897                 if (rapl_joules) {
2898                         BIC_PRESENT(BIC_Pkg_J);
2899                         BIC_PRESENT(BIC_Cor_J);
2900                         BIC_PRESENT(BIC_GFX_J);
2901                 } else {
2902                         BIC_PRESENT(BIC_PkgWatt);
2903                         BIC_PRESENT(BIC_CorWatt);
2904                         BIC_PRESENT(BIC_GFXWatt);
2905                 }
2906                 break;
2907         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
2908                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
2909                 if (rapl_joules)
2910                         BIC_PRESENT(BIC_Pkg_J);
2911                 else
2912                         BIC_PRESENT(BIC_PkgWatt);
2913                 break;
2914         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2915         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2916         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2917         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2918                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2919                 BIC_PRESENT(BIC_PKG__);
2920                 BIC_PRESENT(BIC_RAM__);
2921                 if (rapl_joules) {
2922                         BIC_PRESENT(BIC_Pkg_J);
2923                         BIC_PRESENT(BIC_Cor_J);
2924                         BIC_PRESENT(BIC_RAM_J);
2925                 } else {
2926                         BIC_PRESENT(BIC_PkgWatt);
2927                         BIC_PRESENT(BIC_CorWatt);
2928                         BIC_PRESENT(BIC_RAMWatt);
2929                 }
2930                 break;
2931         case INTEL_FAM6_HASWELL_X:      /* HSX */
2932         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2933         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2934         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2935         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
2936         case INTEL_FAM6_XEON_PHI_KNM:
2937                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2938                 BIC_PRESENT(BIC_PKG__);
2939                 BIC_PRESENT(BIC_RAM__);
2940                 if (rapl_joules) {
2941                         BIC_PRESENT(BIC_Pkg_J);
2942                         BIC_PRESENT(BIC_RAM_J);
2943                 } else {
2944                         BIC_PRESENT(BIC_PkgWatt);
2945                         BIC_PRESENT(BIC_RAMWatt);
2946                 }
2947                 break;
2948         case INTEL_FAM6_SANDYBRIDGE_X:
2949         case INTEL_FAM6_IVYBRIDGE_X:
2950                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
2951                 BIC_PRESENT(BIC_PKG__);
2952                 BIC_PRESENT(BIC_RAM__);
2953                 if (rapl_joules) {
2954                         BIC_PRESENT(BIC_Pkg_J);
2955                         BIC_PRESENT(BIC_Cor_J);
2956                         BIC_PRESENT(BIC_RAM_J);
2957                 } else {
2958                         BIC_PRESENT(BIC_PkgWatt);
2959                         BIC_PRESENT(BIC_CorWatt);
2960                         BIC_PRESENT(BIC_RAMWatt);
2961                 }
2962                 break;
2963         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
2964         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
2965                 do_rapl = RAPL_PKG | RAPL_CORES;
2966                 if (rapl_joules) {
2967                         BIC_PRESENT(BIC_Pkg_J);
2968                         BIC_PRESENT(BIC_Cor_J);
2969                 } else {
2970                         BIC_PRESENT(BIC_PkgWatt);
2971                         BIC_PRESENT(BIC_CorWatt);
2972                 }
2973                 break;
2974         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2975                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
2976                 BIC_PRESENT(BIC_PKG__);
2977                 BIC_PRESENT(BIC_RAM__);
2978                 if (rapl_joules) {
2979                         BIC_PRESENT(BIC_Pkg_J);
2980                         BIC_PRESENT(BIC_Cor_J);
2981                         BIC_PRESENT(BIC_RAM_J);
2982                 } else {
2983                         BIC_PRESENT(BIC_PkgWatt);
2984                         BIC_PRESENT(BIC_CorWatt);
2985                         BIC_PRESENT(BIC_RAMWatt);
2986                 }
2987                 break;
2988         default:
2989                 return;
2990         }
2991
2992         /* units on package 0, verify later other packages match */
2993         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
2994                 return;
2995
2996         rapl_power_units = 1.0 / (1 << (msr & 0xF));
2997         if (model == INTEL_FAM6_ATOM_SILVERMONT1)
2998                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
2999         else
3000                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3001
3002         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3003
3004         time_unit = msr >> 16 & 0xF;
3005         if (time_unit == 0)
3006                 time_unit = 0xA;
3007
3008         rapl_time_units = 1.0 / (1 << (time_unit));
3009
3010         tdp = get_tdp(model);
3011
3012         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3013         if (debug)
3014                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3015
3016         return;
3017 }
3018
3019 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3020 {
3021         if (!genuine_intel)
3022                 return;
3023
3024         if (family != 6)
3025                 return;
3026
3027         switch (model) {
3028         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3029         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3030         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3031                 do_gfx_perf_limit_reasons = 1;
3032         case INTEL_FAM6_HASWELL_X:      /* HSX */
3033                 do_core_perf_limit_reasons = 1;
3034                 do_ring_perf_limit_reasons = 1;
3035         default:
3036                 return;
3037         }
3038 }
3039
3040 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3041 {
3042         unsigned long long msr;
3043         unsigned int dts;
3044         int cpu;
3045
3046         if (!(do_dts || do_ptm))
3047                 return 0;
3048
3049         cpu = t->cpu_id;
3050
3051         /* DTS is per-core, no need to print for each thread */
3052         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3053                 return 0;
3054
3055         if (cpu_migrate(cpu)) {
3056                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3057                 return -1;
3058         }
3059
3060         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3061                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3062                         return 0;
3063
3064                 dts = (msr >> 16) & 0x7F;
3065                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3066                         cpu, msr, tcc_activation_temp - dts);
3067
3068 #ifdef  THERM_DEBUG
3069                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3070                         return 0;
3071
3072                 dts = (msr >> 16) & 0x7F;
3073                 dts2 = (msr >> 8) & 0x7F;
3074                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3075                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3076 #endif
3077         }
3078
3079
3080         if (do_dts) {
3081                 unsigned int resolution;
3082
3083                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3084                         return 0;
3085
3086                 dts = (msr >> 16) & 0x7F;
3087                 resolution = (msr >> 27) & 0xF;
3088                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3089                         cpu, msr, tcc_activation_temp - dts, resolution);
3090
3091 #ifdef THERM_DEBUG
3092                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3093                         return 0;
3094
3095                 dts = (msr >> 16) & 0x7F;
3096                 dts2 = (msr >> 8) & 0x7F;
3097                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3098                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3099 #endif
3100         }
3101
3102         return 0;
3103 }
3104
3105 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3106 {
3107         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3108                 cpu, label,
3109                 ((msr >> 15) & 1) ? "EN" : "DIS",
3110                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
3111                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3112                 (((msr >> 16) & 1) ? "EN" : "DIS"));
3113
3114         return;
3115 }
3116
3117 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3118 {
3119         unsigned long long msr;
3120         int cpu;
3121
3122         if (!do_rapl)
3123                 return 0;
3124
3125         /* RAPL counters are per package, so print only for 1st thread/package */
3126         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3127                 return 0;
3128
3129         cpu = t->cpu_id;
3130         if (cpu_migrate(cpu)) {
3131                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3132                 return -1;
3133         }
3134
3135         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
3136                 return -1;
3137
3138         if (debug) {
3139                 fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
3140                         "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
3141                         rapl_power_units, rapl_energy_units, rapl_time_units);
3142         }
3143         if (do_rapl & RAPL_PKG_POWER_INFO) {
3144
3145                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
3146                         return -5;
3147
3148
3149                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3150                         cpu, msr,
3151                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3152                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3153                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3154                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3155
3156         }
3157         if (do_rapl & RAPL_PKG) {
3158
3159                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
3160                         return -9;
3161
3162                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3163                         cpu, msr, (msr >> 63) & 1 ? "": "UN");
3164
3165                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
3166                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3167                         cpu,
3168                         ((msr >> 47) & 1) ? "EN" : "DIS",
3169                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
3170                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
3171                         ((msr >> 48) & 1) ? "EN" : "DIS");
3172         }
3173
3174         if (do_rapl & RAPL_DRAM_POWER_INFO) {
3175                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
3176                         return -6;
3177
3178                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3179                         cpu, msr,
3180                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3181                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3182                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3183                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3184         }
3185         if (do_rapl & RAPL_DRAM) {
3186                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
3187                         return -9;
3188                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
3189                                 cpu, msr, (msr >> 31) & 1 ? "": "UN");
3190
3191                 print_power_limit_msr(cpu, msr, "DRAM Limit");
3192         }
3193         if (do_rapl & RAPL_CORE_POLICY) {
3194                 if (debug) {
3195                         if (get_msr(cpu, MSR_PP0_POLICY, &msr))
3196                                 return -7;
3197
3198                         fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
3199                 }
3200         }
3201         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
3202                 if (debug) {
3203                         if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
3204                                 return -9;
3205                         fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
3206                                         cpu, msr, (msr >> 31) & 1 ? "": "UN");
3207                         print_power_limit_msr(cpu, msr, "Cores Limit");
3208                 }
3209         }
3210         if (do_rapl & RAPL_GFX) {
3211                 if (debug) {
3212                         if (get_msr(cpu, MSR_PP1_POLICY, &msr))
3213                                 return -8;
3214
3215                         fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
3216
3217                         if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
3218                                 return -9;
3219                         fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
3220                                         cpu, msr, (msr >> 31) & 1 ? "": "UN");
3221                         print_power_limit_msr(cpu, msr, "GFX Limit");
3222                 }
3223         }
3224         return 0;
3225 }
3226
3227 /*
3228  * SNB adds support for additional MSRs:
3229  *
3230  * MSR_PKG_C7_RESIDENCY            0x000003fa
3231  * MSR_CORE_C7_RESIDENCY           0x000003fe
3232  * MSR_PKG_C2_RESIDENCY            0x0000060d
3233  */
3234
3235 int has_snb_msrs(unsigned int family, unsigned int model)
3236 {
3237         if (!genuine_intel)
3238                 return 0;
3239
3240         switch (model) {
3241         case INTEL_FAM6_SANDYBRIDGE:
3242         case INTEL_FAM6_SANDYBRIDGE_X:
3243         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3244         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3245         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3246         case INTEL_FAM6_HASWELL_X:      /* HSW */
3247         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3248         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3249         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3250         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3251         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3252         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3253         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3254         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3255         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3256         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3257         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3258         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3259         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3260                 return 1;
3261         }
3262         return 0;
3263 }
3264
3265 /*
3266  * SLV client has supporet for unique MSRs:
3267  *
3268  * MSR_CC6_DEMOTION_POLICY_CONFIG
3269  * MSR_MC6_DEMOTION_POLICY_CONFIG
3270  */
3271
3272 int has_slv_msrs(unsigned int family, unsigned int model)
3273 {
3274         if (!genuine_intel)
3275                 return 0;
3276
3277         switch (model) {
3278         case INTEL_FAM6_ATOM_SILVERMONT1:
3279         case INTEL_FAM6_ATOM_MERRIFIELD:
3280         case INTEL_FAM6_ATOM_MOOREFIELD:
3281                 return 1;
3282         }
3283         return 0;
3284 }
3285
3286 /*
3287  * HSW adds support for additional MSRs:
3288  *
3289  * MSR_PKG_C8_RESIDENCY         0x00000630
3290  * MSR_PKG_C9_RESIDENCY         0x00000631
3291  * MSR_PKG_C10_RESIDENCY        0x00000632
3292  *
3293  * MSR_PKGC8_IRTL               0x00000633
3294  * MSR_PKGC9_IRTL               0x00000634
3295  * MSR_PKGC10_IRTL              0x00000635
3296  *
3297  */
3298 int has_hsw_msrs(unsigned int family, unsigned int model)
3299 {
3300         if (!genuine_intel)
3301                 return 0;
3302
3303         switch (model) {
3304         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3305         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3306         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3307         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3308         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3309         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3310         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3311                 return 1;
3312         }
3313         return 0;
3314 }
3315
3316 /*
3317  * SKL adds support for additional MSRS:
3318  *
3319  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
3320  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
3321  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
3322  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
3323  */
3324 int has_skl_msrs(unsigned int family, unsigned int model)
3325 {
3326         if (!genuine_intel)
3327                 return 0;
3328
3329         switch (model) {
3330         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3331         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3332         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3333         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3334                 return 1;
3335         }
3336         return 0;
3337 }
3338
3339
3340
3341 int is_slm(unsigned int family, unsigned int model)
3342 {
3343         if (!genuine_intel)
3344                 return 0;
3345         switch (model) {
3346         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3347         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3348                 return 1;
3349         }
3350         return 0;
3351 }
3352
3353 int is_knl(unsigned int family, unsigned int model)
3354 {
3355         if (!genuine_intel)
3356                 return 0;
3357         switch (model) {
3358         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3359         case INTEL_FAM6_XEON_PHI_KNM:
3360                 return 1;
3361         }
3362         return 0;
3363 }
3364
3365 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
3366 {
3367         if (is_knl(family, model))
3368                 return 1024;
3369         return 1;
3370 }
3371
3372 #define SLM_BCLK_FREQS 5
3373 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
3374
3375 double slm_bclk(void)
3376 {
3377         unsigned long long msr = 3;
3378         unsigned int i;
3379         double freq;
3380
3381         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
3382                 fprintf(outf, "SLM BCLK: unknown\n");
3383
3384         i = msr & 0xf;
3385         if (i >= SLM_BCLK_FREQS) {
3386                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
3387                 i = 3;
3388         }
3389         freq = slm_freq_table[i];
3390
3391         fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
3392
3393         return freq;
3394 }
3395
3396 double discover_bclk(unsigned int family, unsigned int model)
3397 {
3398         if (has_snb_msrs(family, model) || is_knl(family, model))
3399                 return 100.00;
3400         else if (is_slm(family, model))
3401                 return slm_bclk();
3402         else
3403                 return 133.33;
3404 }
3405
3406 /*
3407  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
3408  * the Thermal Control Circuit (TCC) activates.
3409  * This is usually equal to tjMax.
3410  *
3411  * Older processors do not have this MSR, so there we guess,
3412  * but also allow cmdline over-ride with -T.
3413  *
3414  * Several MSR temperature values are in units of degrees-C
3415  * below this value, including the Digital Thermal Sensor (DTS),
3416  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
3417  */
3418 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3419 {
3420         unsigned long long msr;
3421         unsigned int target_c_local;
3422         int cpu;
3423
3424         /* tcc_activation_temp is used only for dts or ptm */
3425         if (!(do_dts || do_ptm))
3426                 return 0;
3427
3428         /* this is a per-package concept */
3429         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3430                 return 0;
3431
3432         cpu = t->cpu_id;
3433         if (cpu_migrate(cpu)) {
3434                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3435                 return -1;
3436         }
3437
3438         if (tcc_activation_temp_override != 0) {
3439                 tcc_activation_temp = tcc_activation_temp_override;
3440                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
3441                         cpu, tcc_activation_temp);
3442                 return 0;
3443         }
3444
3445         /* Temperature Target MSR is Nehalem and newer only */
3446         if (!do_nhm_platform_info)
3447                 goto guess;
3448
3449         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
3450                 goto guess;
3451
3452         target_c_local = (msr >> 16) & 0xFF;
3453
3454         if (debug)
3455                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
3456                         cpu, msr, target_c_local);
3457
3458         if (!target_c_local)
3459                 goto guess;
3460
3461         tcc_activation_temp = target_c_local;
3462
3463         return 0;
3464
3465 guess:
3466         tcc_activation_temp = TJMAX_DEFAULT;
3467         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
3468                 cpu, tcc_activation_temp);
3469
3470         return 0;
3471 }
3472
3473 void decode_feature_control_msr(void)
3474 {
3475         unsigned long long msr;
3476
3477         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3478                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3479                         base_cpu, msr,
3480                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3481                         msr & (1 << 18) ? "SGX" : "");
3482 }
3483
3484 void decode_misc_enable_msr(void)
3485 {
3486         unsigned long long msr;
3487
3488         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3489                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
3490                         base_cpu, msr,
3491                         msr & (1 << 3) ? "TCC" : "",
3492                         msr & (1 << 16) ? "EIST" : "",
3493                         msr & (1 << 18) ? "MONITOR" : "");
3494 }
3495
3496 /*
3497  * Decode MSR_MISC_PWR_MGMT
3498  *
3499  * Decode the bits according to the Nehalem documentation
3500  * bit[0] seems to continue to have same meaning going forward
3501  * bit[1] less so...
3502  */
3503 void decode_misc_pwr_mgmt_msr(void)
3504 {
3505         unsigned long long msr;
3506
3507         if (!do_nhm_platform_info)
3508                 return;
3509
3510         if (no_MSR_MISC_PWR_MGMT)
3511                 return;
3512
3513         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3514                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
3515                         base_cpu, msr,
3516                         msr & (1 << 0) ? "DIS" : "EN",
3517                         msr & (1 << 1) ? "EN" : "DIS",
3518                         msr & (1 << 8) ? "EN" : "DIS");
3519 }
3520 /*
3521  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
3522  *
3523  * This MSRs are present on Silvermont processors,
3524  * Intel Atom processor E3000 series (Baytrail), and friends.
3525  */
3526 void decode_c6_demotion_policy_msr(void)
3527 {
3528         unsigned long long msr;
3529
3530         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
3531                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
3532                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
3533
3534         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
3535                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
3536                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
3537 }
3538
3539 void process_cpuid()
3540 {
3541         unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
3542         unsigned int fms, family, model, stepping;
3543
3544         eax = ebx = ecx = edx = 0;
3545
3546         __cpuid(0, max_level, ebx, ecx, edx);
3547
3548         if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
3549                 genuine_intel = 1;
3550
3551         if (debug)
3552                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
3553                         (char *)&ebx, (char *)&edx, (char *)&ecx);
3554
3555         __cpuid(1, fms, ebx, ecx, edx);
3556         family = (fms >> 8) & 0xf;
3557         model = (fms >> 4) & 0xf;
3558         stepping = fms & 0xf;
3559         if (family == 6 || family == 0xf)
3560                 model += ((fms >> 16) & 0xf) << 4;
3561
3562         if (debug) {
3563                 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
3564                         max_level, family, model, stepping, family, model, stepping);
3565                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
3566                         ecx & (1 << 0) ? "SSE3" : "-",
3567                         ecx & (1 << 3) ? "MONITOR" : "-",
3568                         ecx & (1 << 6) ? "SMX" : "-",
3569                         ecx & (1 << 7) ? "EIST" : "-",
3570                         ecx & (1 << 8) ? "TM2" : "-",
3571                         edx & (1 << 4) ? "TSC" : "-",
3572                         edx & (1 << 5) ? "MSR" : "-",
3573                         edx & (1 << 22) ? "ACPI-TM" : "-",
3574                         edx & (1 << 29) ? "TM" : "-");
3575         }
3576
3577         if (!(edx & (1 << 5)))
3578                 errx(1, "CPUID: no MSR");
3579
3580         /*
3581          * check max extended function levels of CPUID.
3582          * This is needed to check for invariant TSC.
3583          * This check is valid for both Intel and AMD.
3584          */
3585         ebx = ecx = edx = 0;
3586         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
3587
3588         if (max_extended_level >= 0x80000007) {
3589
3590                 /*
3591                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
3592                  * this check is valid for both Intel and AMD
3593                  */
3594                 __cpuid(0x80000007, eax, ebx, ecx, edx);
3595                 has_invariant_tsc = edx & (1 << 8);
3596         }
3597
3598         /*
3599          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
3600          * this check is valid for both Intel and AMD
3601          */
3602
3603         __cpuid(0x6, eax, ebx, ecx, edx);
3604         has_aperf = ecx & (1 << 0);
3605         if (has_aperf) {
3606                 BIC_PRESENT(BIC_Avg_MHz);
3607                 BIC_PRESENT(BIC_Busy);
3608                 BIC_PRESENT(BIC_Bzy_MHz);
3609         }
3610         do_dts = eax & (1 << 0);
3611         if (do_dts)
3612                 BIC_PRESENT(BIC_CoreTmp);
3613         do_ptm = eax & (1 << 6);
3614         if (do_ptm)
3615                 BIC_PRESENT(BIC_PkgTmp);
3616         has_hwp = eax & (1 << 7);
3617         has_hwp_notify = eax & (1 << 8);
3618         has_hwp_activity_window = eax & (1 << 9);
3619         has_hwp_epp = eax & (1 << 10);
3620         has_hwp_pkg = eax & (1 << 11);
3621         has_epb = ecx & (1 << 3);
3622
3623         if (debug)
3624                 fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
3625                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
3626                         has_aperf ? "" : "No-",
3627                         do_dts ? "" : "No-",
3628                         do_ptm ? "" : "No-",
3629                         has_hwp ? "" : "No-",
3630                         has_hwp_notify ? "" : "No-",
3631                         has_hwp_activity_window ? "" : "No-",
3632                         has_hwp_epp ? "" : "No-",
3633                         has_hwp_pkg ? "" : "No-",
3634                         has_epb ? "" : "No-");
3635
3636         if (debug)
3637                 decode_misc_enable_msr();
3638
3639         if (max_level >= 0x7 && debug) {
3640                 int has_sgx;
3641
3642                 ecx = 0;
3643
3644                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
3645
3646                 has_sgx = ebx & (1 << 2);
3647                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
3648
3649                 if (has_sgx)
3650                         decode_feature_control_msr();
3651         }
3652
3653         if (max_level >= 0x15) {
3654                 unsigned int eax_crystal;
3655                 unsigned int ebx_tsc;
3656
3657                 /*
3658                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
3659                  */
3660                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
3661                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
3662
3663                 if (ebx_tsc != 0) {
3664
3665                         if (debug && (ebx != 0))
3666                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
3667                                         eax_crystal, ebx_tsc, crystal_hz);
3668
3669                         if (crystal_hz == 0)
3670                                 switch(model) {
3671                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3672                                 case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3673                                 case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3674                                 case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3675                                         crystal_hz = 24000000;  /* 24.0 MHz */
3676                                         break;
3677                                 case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3678                                 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3679                                         crystal_hz = 25000000;  /* 25.0 MHz */
3680                                         break;
3681                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3682                                         crystal_hz = 19200000;  /* 19.2 MHz */
3683                                         break;
3684                                 default:
3685                                         crystal_hz = 0;
3686                         }
3687
3688                         if (crystal_hz) {
3689                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
3690                                 if (debug)
3691                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
3692                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
3693                         }
3694                 }
3695         }
3696         if (max_level >= 0x16) {
3697                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
3698
3699                 /*
3700                  * CPUID 16H Base MHz, Max MHz, Bus MHz
3701                  */
3702                 base_mhz = max_mhz = bus_mhz = edx = 0;
3703
3704                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
3705                 if (debug)
3706                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
3707                                 base_mhz, max_mhz, bus_mhz);
3708         }
3709
3710         if (has_aperf)
3711                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
3712
3713         BIC_PRESENT(BIC_IRQ);
3714         BIC_PRESENT(BIC_TSC_MHz);
3715
3716         if (probe_nhm_msrs(family, model)) {
3717                 do_nhm_platform_info = 1;
3718                 BIC_PRESENT(BIC_CPU_c1);
3719                 BIC_PRESENT(BIC_CPU_c3);
3720                 BIC_PRESENT(BIC_CPU_c6);
3721                 BIC_PRESENT(BIC_SMI);
3722         }
3723         do_snb_cstates = has_snb_msrs(family, model);
3724
3725         if (do_snb_cstates)
3726                 BIC_PRESENT(BIC_CPU_c7);
3727
3728         do_irtl_snb = has_snb_msrs(family, model);
3729         do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
3730         do_pc3 = (pkg_cstate_limit >= PCL__3);
3731         do_pc6 = (pkg_cstate_limit >= PCL__6);
3732         do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
3733         do_c8_c9_c10 = has_hsw_msrs(family, model);
3734         do_irtl_hsw = has_hsw_msrs(family, model);
3735         do_skl_residency = has_skl_msrs(family, model);
3736         do_slm_cstates = is_slm(family, model);
3737         do_knl_cstates  = is_knl(family, model);
3738
3739         if (debug)
3740                 decode_misc_pwr_mgmt_msr();
3741
3742         if (debug && has_slv_msrs(family, model))
3743                 decode_c6_demotion_policy_msr();
3744
3745         rapl_probe(family, model);
3746         perf_limit_reasons_probe(family, model);
3747
3748         if (debug)
3749                 dump_cstate_pstate_config_info(family, model);
3750
3751         if (has_skl_msrs(family, model))
3752                 calculate_tsc_tweak();
3753
3754         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
3755                 BIC_PRESENT(BIC_GFX_rc6);
3756
3757         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
3758                 BIC_PRESENT(BIC_GFXMHz);
3759
3760         return;
3761 }
3762
3763 void help()
3764 {
3765         fprintf(outf,
3766         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
3767         "\n"
3768         "Turbostat forks the specified COMMAND and prints statistics\n"
3769         "when COMMAND completes.\n"
3770         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
3771         "to print statistics, until interrupted.\n"
3772         "--add          add a counter\n"
3773         "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
3774         "--debug        run in \"debug\" mode\n"
3775         "--interval sec Override default 5-second measurement interval\n"
3776         "--help         print this help message\n"
3777         "--out file     create or truncate \"file\" for all output\n"
3778         "--version      print version information\n"
3779         "\n"
3780         "For more help, run \"man turbostat\"\n");
3781 }
3782
3783
3784 /*
3785  * in /dev/cpu/ return success for names that are numbers
3786  * ie. filter out ".", "..", "microcode".
3787  */
3788 int dir_filter(const struct dirent *dirp)
3789 {
3790         if (isdigit(dirp->d_name[0]))
3791                 return 1;
3792         else
3793                 return 0;
3794 }
3795
3796 int open_dev_cpu_msr(int dummy1)
3797 {
3798         return 0;
3799 }
3800
3801 void topology_probe()
3802 {
3803         int i;
3804         int max_core_id = 0;
3805         int max_package_id = 0;
3806         int max_siblings = 0;
3807         struct cpu_topology {
3808                 int core_id;
3809                 int physical_package_id;
3810         } *cpus;
3811
3812         /* Initialize num_cpus, max_cpu_num */
3813         topo.num_cpus = 0;
3814         topo.max_cpu_num = 0;
3815         for_all_proc_cpus(count_cpus);
3816         if (!summary_only && topo.num_cpus > 1)
3817                 BIC_PRESENT(BIC_CPU);
3818
3819         if (debug > 1)
3820                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
3821
3822         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
3823         if (cpus == NULL)
3824                 err(1, "calloc cpus");
3825
3826         /*
3827          * Allocate and initialize cpu_present_set
3828          */
3829         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
3830         if (cpu_present_set == NULL)
3831                 err(3, "CPU_ALLOC");
3832         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3833         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
3834         for_all_proc_cpus(mark_cpu_present);
3835
3836         /*
3837          * Allocate and initialize cpu_affinity_set
3838          */
3839         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
3840         if (cpu_affinity_set == NULL)
3841                 err(3, "CPU_ALLOC");
3842         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3843         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
3844
3845
3846         /*
3847          * For online cpus
3848          * find max_core_id, max_package_id
3849          */
3850         for (i = 0; i <= topo.max_cpu_num; ++i) {
3851                 int siblings;
3852
3853                 if (cpu_is_not_present(i)) {
3854                         if (debug > 1)
3855                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
3856                         continue;
3857                 }
3858                 cpus[i].core_id = get_core_id(i);
3859                 if (cpus[i].core_id > max_core_id)
3860                         max_core_id = cpus[i].core_id;
3861
3862                 cpus[i].physical_package_id = get_physical_package_id(i);
3863                 if (cpus[i].physical_package_id > max_package_id)
3864                         max_package_id = cpus[i].physical_package_id;
3865
3866                 siblings = get_num_ht_siblings(i);
3867                 if (siblings > max_siblings)
3868                         max_siblings = siblings;
3869                 if (debug > 1)
3870                         fprintf(outf, "cpu %d pkg %d core %d\n",
3871                                 i, cpus[i].physical_package_id, cpus[i].core_id);
3872         }
3873         topo.num_cores_per_pkg = max_core_id + 1;
3874         if (debug > 1)
3875                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
3876                         max_core_id, topo.num_cores_per_pkg);
3877         if (debug && !summary_only && topo.num_cores_per_pkg > 1)
3878                 BIC_PRESENT(BIC_Core);
3879
3880         topo.num_packages = max_package_id + 1;
3881         if (debug > 1)
3882                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
3883                         max_package_id, topo.num_packages);
3884         if (debug && !summary_only && topo.num_packages > 1)
3885                 BIC_PRESENT(BIC_Package);
3886
3887         topo.num_threads_per_core = max_siblings;
3888         if (debug > 1)
3889                 fprintf(outf, "max_siblings %d\n", max_siblings);
3890
3891         free(cpus);
3892 }
3893
3894 void
3895 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
3896 {
3897         int i;
3898
3899         *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
3900                 topo.num_packages, sizeof(struct thread_data));
3901         if (*t == NULL)
3902                 goto error;
3903
3904         for (i = 0; i < topo.num_threads_per_core *
3905                 topo.num_cores_per_pkg * topo.num_packages; i++)
3906                 (*t)[i].cpu_id = -1;
3907
3908         *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
3909                 sizeof(struct core_data));
3910         if (*c == NULL)
3911                 goto error;
3912
3913         for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
3914                 (*c)[i].core_id = -1;
3915
3916         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
3917         if (*p == NULL)
3918                 goto error;
3919
3920         for (i = 0; i < topo.num_packages; i++)
3921                 (*p)[i].package_id = i;
3922
3923         return;
3924 error:
3925         err(1, "calloc counters");
3926 }
3927 /*
3928  * init_counter()
3929  *
3930  * set cpu_id, core_num, pkg_num
3931  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
3932  *
3933  * increment topo.num_cores when 1st core in pkg seen
3934  */
3935 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
3936         struct pkg_data *pkg_base, int thread_num, int core_num,
3937         int pkg_num, int cpu_id)
3938 {
3939         struct thread_data *t;
3940         struct core_data *c;
3941         struct pkg_data *p;
3942
3943         t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
3944         c = GET_CORE(core_base, core_num, pkg_num);
3945         p = GET_PKG(pkg_base, pkg_num);
3946
3947         t->cpu_id = cpu_id;
3948         if (thread_num == 0) {
3949                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
3950                 if (cpu_is_first_core_in_package(cpu_id))
3951                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
3952         }
3953
3954         c->core_id = core_num;
3955         p->package_id = pkg_num;
3956 }
3957
3958
3959 int initialize_counters(int cpu_id)
3960 {
3961         int my_thread_id, my_core_id, my_package_id;
3962
3963         my_package_id = get_physical_package_id(cpu_id);
3964         my_core_id = get_core_id(cpu_id);
3965         my_thread_id = get_cpu_position_in_core(cpu_id);
3966         if (!my_thread_id)
3967                 topo.num_cores++;
3968
3969         init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3970         init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3971         return 0;
3972 }
3973
3974 void allocate_output_buffer()
3975 {
3976         output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
3977         outp = output_buffer;
3978         if (outp == NULL)
3979                 err(-1, "calloc output buffer");
3980 }
3981 void allocate_fd_percpu(void)
3982 {
3983         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
3984         if (fd_percpu == NULL)
3985                 err(-1, "calloc fd_percpu");
3986 }
3987 void allocate_irq_buffers(void)
3988 {
3989         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
3990         if (irq_column_2_cpu == NULL)
3991                 err(-1, "calloc %d", topo.num_cpus);
3992
3993         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
3994         if (irqs_per_cpu == NULL)
3995                 err(-1, "calloc %d", topo.max_cpu_num + 1);
3996 }
3997 void setup_all_buffers(void)
3998 {
3999         topology_probe();
4000         allocate_irq_buffers();
4001         allocate_fd_percpu();
4002         allocate_counters(&thread_even, &core_even, &package_even);
4003         allocate_counters(&thread_odd, &core_odd, &package_odd);
4004         allocate_output_buffer();
4005         for_all_proc_cpus(initialize_counters);
4006 }
4007
4008 void set_base_cpu(void)
4009 {
4010         base_cpu = sched_getcpu();
4011         if (base_cpu < 0)
4012                 err(-ENODEV, "No valid cpus found");
4013
4014         if (debug > 1)
4015                 fprintf(outf, "base_cpu = %d\n", base_cpu);
4016 }
4017
4018 void turbostat_init()
4019 {
4020         setup_all_buffers();
4021         set_base_cpu();
4022         check_dev_msr();
4023         check_permissions();
4024         process_cpuid();
4025
4026
4027         if (debug)
4028                 for_all_cpus(print_hwp, ODD_COUNTERS);
4029
4030         if (debug)
4031                 for_all_cpus(print_epb, ODD_COUNTERS);
4032
4033         if (debug)
4034                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
4035
4036         if (debug)
4037                 for_all_cpus(print_rapl, ODD_COUNTERS);
4038
4039         for_all_cpus(set_temperature_target, ODD_COUNTERS);
4040
4041         if (debug)
4042                 for_all_cpus(print_thermal, ODD_COUNTERS);
4043
4044         if (debug && do_irtl_snb)
4045                 print_irtl();
4046 }
4047
4048 int fork_it(char **argv)
4049 {
4050         pid_t child_pid;
4051         int status;
4052
4053         status = for_all_cpus(get_counters, EVEN_COUNTERS);
4054         if (status)
4055                 exit(status);
4056         /* clear affinity side-effect of get_counters() */
4057         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
4058         gettimeofday(&tv_even, (struct timezone *)NULL);
4059
4060         child_pid = fork();
4061         if (!child_pid) {
4062                 /* child */
4063                 execvp(argv[0], argv);
4064         } else {
4065
4066                 /* parent */
4067                 if (child_pid == -1)
4068                         err(1, "fork");
4069
4070                 signal(SIGINT, SIG_IGN);
4071                 signal(SIGQUIT, SIG_IGN);
4072                 if (waitpid(child_pid, &status, 0) == -1)
4073                         err(status, "waitpid");
4074         }
4075         /*
4076          * n.b. fork_it() does not check for errors from for_all_cpus()
4077          * because re-starting is problematic when forking
4078          */
4079         for_all_cpus(get_counters, ODD_COUNTERS);
4080         gettimeofday(&tv_odd, (struct timezone *)NULL);
4081         timersub(&tv_odd, &tv_even, &tv_delta);
4082         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
4083                 fprintf(outf, "%s: Counter reset detected\n", progname);
4084         else {
4085                 compute_average(EVEN_COUNTERS);
4086                 format_all_counters(EVEN_COUNTERS);
4087         }
4088
4089         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
4090
4091         flush_output_stderr();
4092
4093         return status;
4094 }
4095
4096 int get_and_dump_counters(void)
4097 {
4098         int status;
4099
4100         status = for_all_cpus(get_counters, ODD_COUNTERS);
4101         if (status)
4102                 return status;
4103
4104         status = for_all_cpus(dump_counters, ODD_COUNTERS);
4105         if (status)
4106                 return status;
4107
4108         flush_output_stdout();
4109
4110         return status;
4111 }
4112
4113 void print_version() {
4114         fprintf(outf, "turbostat version 4.17 1 Jan 2017"
4115                 " - Len Brown <lenb@kernel.org>\n");
4116 }
4117
4118 int add_counter(unsigned int msr_num, char *name, unsigned int width,
4119         enum counter_scope scope, enum counter_type type,
4120         enum counter_format format)
4121 {
4122         struct msr_counter *msrp;
4123
4124         msrp = calloc(1, sizeof(struct msr_counter));
4125         if (msrp == NULL) {
4126                 perror("calloc");
4127                 exit(1);
4128         }
4129
4130         msrp->msr_num = msr_num;
4131         strncpy(msrp->name, name, NAME_BYTES);
4132         msrp->width = width;
4133         msrp->type = type;
4134         msrp->format = format;
4135
4136         switch (scope) {
4137
4138         case SCOPE_CPU:
4139                 msrp->next = sys.tp;
4140                 sys.tp = msrp;
4141                 sys.added_thread_counters++;
4142                 if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
4143                         fprintf(stderr, "exceeded max %d added thread counters\n",
4144                                 MAX_ADDED_COUNTERS);
4145                         exit(-1);
4146                 }
4147                 break;
4148
4149         case SCOPE_CORE:
4150                 msrp->next = sys.cp;
4151                 sys.cp = msrp;
4152                 sys.added_core_counters++;
4153                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
4154                         fprintf(stderr, "exceeded max %d added core counters\n",
4155                                 MAX_ADDED_COUNTERS);
4156                         exit(-1);
4157                 }
4158                 break;
4159
4160         case SCOPE_PACKAGE:
4161                 msrp->next = sys.pp;
4162                 sys.pp = msrp;
4163                 sys.added_package_counters++;
4164                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
4165                         fprintf(stderr, "exceeded max %d added package counters\n",
4166                                 MAX_ADDED_COUNTERS);
4167                         exit(-1);
4168                 }
4169                 break;
4170         }
4171
4172         return 0;
4173 }
4174
4175 void parse_add_command(char *add_command)
4176 {
4177         int msr_num = 0;
4178         char name_buffer[NAME_BYTES];
4179         int width = 64;
4180         int fail = 0;
4181         enum counter_scope scope = SCOPE_CPU;
4182         enum counter_type type = COUNTER_CYCLES;
4183         enum counter_format format = FORMAT_DELTA;
4184
4185         while (add_command) {
4186
4187                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
4188                         goto next;
4189
4190                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
4191                         goto next;
4192
4193                 if (sscanf(add_command, "u%d", &width) == 1) {
4194                         if ((width == 32) || (width == 64))
4195                                 goto next;
4196                         width = 64;
4197                 }
4198                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
4199                         scope = SCOPE_CPU;
4200                         goto next;
4201                 }
4202                 if (!strncmp(add_command, "core", strlen("core"))) {
4203                         scope = SCOPE_CORE;
4204                         goto next;
4205                 }
4206                 if (!strncmp(add_command, "package", strlen("package"))) {
4207                         scope = SCOPE_PACKAGE;
4208                         goto next;
4209                 }
4210                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
4211                         type = COUNTER_CYCLES;
4212                         goto next;
4213                 }
4214                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
4215                         type = COUNTER_SECONDS;
4216                         goto next;
4217                 }
4218                 if (!strncmp(add_command, "raw", strlen("raw"))) {
4219                         format = FORMAT_RAW;
4220                         goto next;
4221                 }
4222                 if (!strncmp(add_command, "delta", strlen("delta"))) {
4223                         format = FORMAT_DELTA;
4224                         goto next;
4225                 }
4226                 if (!strncmp(add_command, "percent", strlen("percent"))) {
4227                         format = FORMAT_PERCENT;
4228                         goto next;
4229                 }
4230
4231                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
4232                         char *eos;
4233
4234                         eos = strchr(name_buffer, ',');
4235                         if (eos)
4236                                 *eos = '\0';
4237                         goto next;
4238                 }
4239
4240 next:
4241                 add_command = strchr(add_command, ',');
4242                 if (add_command)
4243                         add_command++;
4244
4245         }
4246         if (msr_num == 0) {
4247                 fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
4248                 fail++;
4249         }
4250
4251         /* generate default column header */
4252         if (*name_buffer == '\0') {
4253                 if (format == FORMAT_RAW) {
4254                         if (width == 32)
4255                                 sprintf(name_buffer, "msr%d", msr_num);
4256                         else
4257                                 sprintf(name_buffer, "MSR%d", msr_num);
4258                 } else if (format == FORMAT_DELTA) {
4259                         if (width == 32)
4260                                 sprintf(name_buffer, "cnt%d", msr_num);
4261                         else
4262                                 sprintf(name_buffer, "CNT%d", msr_num);
4263                 } else if (format == FORMAT_PERCENT) {
4264                         if (width == 32)
4265                                 sprintf(name_buffer, "msr%d%%", msr_num);
4266                         else
4267                                 sprintf(name_buffer, "MSR%d%%", msr_num);
4268                 }
4269         }
4270
4271         if (add_counter(msr_num, name_buffer, width, scope, type, format))
4272                 fail++;
4273
4274         if (fail) {
4275                 help();
4276                 exit(1);
4277         }
4278 }
4279 /*
4280  * HIDE_LIST - hide this list of counters, show the rest [default]
4281  * SHOW_LIST - show this list of counters, hide the rest
4282  */
4283 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
4284
4285 int shown;
4286 /*
4287  * parse_show_hide() - process cmdline to set default counter action
4288  */
4289 void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
4290 {
4291         /*
4292          * --show: show only those specified
4293          *  The 1st invocation will clear and replace the enabled mask
4294          *  subsequent invocations can add to it.
4295          */
4296         if (new_mode == SHOW_LIST) {
4297                 if (shown == 0)
4298                         bic_enabled = bic_lookup(optarg);
4299                 else
4300                         bic_enabled |= bic_lookup(optarg);
4301                 shown = 1;
4302
4303                 return;
4304         }
4305
4306         /*
4307          * --hide: do not show those specified
4308          *  multiple invocations simply clear more bits in enabled mask
4309          */
4310         bic_enabled &= ~bic_lookup(optarg);
4311 }
4312
4313 void cmdline(int argc, char **argv)
4314 {
4315         int opt;
4316         int option_index = 0;
4317         static struct option long_options[] = {
4318                 {"add",         required_argument,      0, 'a'},
4319                 {"Dump",        no_argument,            0, 'D'},
4320                 {"debug",       no_argument,            0, 'd'},
4321                 {"interval",    required_argument,      0, 'i'},
4322                 {"help",        no_argument,            0, 'h'},
4323                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
4324                 {"Joules",      no_argument,            0, 'J'},
4325                 {"out",         required_argument,      0, 'o'},
4326                 {"Package",     no_argument,            0, 'p'},
4327                 {"processor",   no_argument,            0, 'p'},
4328                 {"show",        required_argument,      0, 's'},
4329                 {"Summary",     no_argument,            0, 'S'},
4330                 {"TCC",         required_argument,      0, 'T'},
4331                 {"version",     no_argument,            0, 'v' },
4332                 {0,             0,                      0,  0 }
4333         };
4334
4335         progname = argv[0];
4336
4337         while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
4338                                 long_options, &option_index)) != -1) {
4339                 switch (opt) {
4340                 case 'a':
4341                         parse_add_command(optarg);
4342                         break;
4343                 case 'D':
4344                         dump_only++;
4345                         break;
4346                 case 'd':
4347                         debug++;
4348                         break;
4349                 case 'H':
4350                         parse_show_hide(optarg, HIDE_LIST);
4351                         break;
4352                 case 'h':
4353                 default:
4354                         help();
4355                         exit(1);
4356                 case 'i':
4357                         {
4358                                 double interval = strtod(optarg, NULL);
4359
4360                                 if (interval < 0.001) {
4361                                         fprintf(outf, "interval %f seconds is too small\n",
4362                                                 interval);
4363                                         exit(2);
4364                                 }
4365
4366                                 interval_ts.tv_sec = interval;
4367                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
4368                         }
4369                         break;
4370                 case 'J':
4371                         rapl_joules++;
4372                         break;
4373                 case 'o':
4374                         outf = fopen_or_die(optarg, "w");
4375                         break;
4376                 case 'P':
4377                         show_pkg_only++;
4378                         break;
4379                 case 'p':
4380                         show_core_only++;
4381                         break;
4382                 case 's':
4383                         parse_show_hide(optarg, SHOW_LIST);
4384                         break;
4385                 case 'S':
4386                         summary_only++;
4387                         break;
4388                 case 'T':
4389                         tcc_activation_temp_override = atoi(optarg);
4390                         break;
4391                 case 'v':
4392                         print_version();
4393                         exit(0);
4394                         break;
4395                 }
4396         }
4397 }
4398
4399 int main(int argc, char **argv)
4400 {
4401         outf = stderr;
4402
4403         cmdline(argc, argv);
4404
4405         if (debug)
4406                 print_version();
4407
4408         turbostat_init();
4409
4410         /* dump counters and exit */
4411         if (dump_only)
4412                 return get_and_dump_counters();
4413
4414         /*
4415          * if any params left, it must be a command to fork
4416          */
4417         if (argc - optind)
4418                 return fork_it(argv + optind);
4419         else
4420                 turbostat_loop();
4421
4422         return 0;
4423 }