tools/power turbostat: fix MSR_IA32_MISC_ENABLE MWAIT printout
[sfrench/cifs-2.6.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/select.h>
33 #include <sys/resource.h>
34 #include <fcntl.h>
35 #include <signal.h>
36 #include <sys/time.h>
37 #include <stdlib.h>
38 #include <getopt.h>
39 #include <dirent.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <sched.h>
43 #include <time.h>
44 #include <cpuid.h>
45 #include <linux/capability.h>
46 #include <errno.h>
47
48 char *proc_stat = "/proc/stat";
49 FILE *outf;
50 int *fd_percpu;
51 struct timeval interval_tv = {5, 0};
52 struct timespec interval_ts = {5, 0};
53 struct timespec one_msec = {0, 1000000};
54 unsigned int debug;
55 unsigned int quiet;
56 unsigned int shown;
57 unsigned int sums_need_wide_columns;
58 unsigned int rapl_joules;
59 unsigned int summary_only;
60 unsigned int list_header_only;
61 unsigned int dump_only;
62 unsigned int do_snb_cstates;
63 unsigned int do_knl_cstates;
64 unsigned int do_slm_cstates;
65 unsigned int use_c1_residency_msr;
66 unsigned int has_aperf;
67 unsigned int has_epb;
68 unsigned int do_irtl_snb;
69 unsigned int do_irtl_hsw;
70 unsigned int units = 1000000;   /* MHz etc */
71 unsigned int genuine_intel;
72 unsigned int has_invariant_tsc;
73 unsigned int do_nhm_platform_info;
74 unsigned int no_MSR_MISC_PWR_MGMT;
75 unsigned int aperf_mperf_multiplier = 1;
76 double bclk;
77 double base_hz;
78 unsigned int has_base_hz;
79 double tsc_tweak = 1.0;
80 unsigned int show_pkg_only;
81 unsigned int show_core_only;
82 char *output_buffer, *outp;
83 unsigned int do_rapl;
84 unsigned int do_dts;
85 unsigned int do_ptm;
86 unsigned long long  gfx_cur_rc6_ms;
87 unsigned int gfx_cur_mhz;
88 unsigned int tcc_activation_temp;
89 unsigned int tcc_activation_temp_override;
90 double rapl_power_units, rapl_time_units;
91 double rapl_dram_energy_units, rapl_energy_units;
92 double rapl_joule_counter_range;
93 unsigned int do_core_perf_limit_reasons;
94 unsigned int do_gfx_perf_limit_reasons;
95 unsigned int do_ring_perf_limit_reasons;
96 unsigned int crystal_hz;
97 unsigned long long tsc_hz;
98 int base_cpu;
99 double discover_bclk(unsigned int family, unsigned int model);
100 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
101                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
102 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
103 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
104 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
105 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
106 unsigned int has_misc_feature_control;
107
108 #define RAPL_PKG                (1 << 0)
109                                         /* 0x610 MSR_PKG_POWER_LIMIT */
110                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
111 #define RAPL_PKG_PERF_STATUS    (1 << 1)
112                                         /* 0x613 MSR_PKG_PERF_STATUS */
113 #define RAPL_PKG_POWER_INFO     (1 << 2)
114                                         /* 0x614 MSR_PKG_POWER_INFO */
115
116 #define RAPL_DRAM               (1 << 3)
117                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
118                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
119 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
120                                         /* 0x61b MSR_DRAM_PERF_STATUS */
121 #define RAPL_DRAM_POWER_INFO    (1 << 5)
122                                         /* 0x61c MSR_DRAM_POWER_INFO */
123
124 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
125                                         /* 0x638 MSR_PP0_POWER_LIMIT */
126 #define RAPL_CORE_POLICY        (1 << 7)
127                                         /* 0x63a MSR_PP0_POLICY */
128
129 #define RAPL_GFX                (1 << 8)
130                                         /* 0x640 MSR_PP1_POWER_LIMIT */
131                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
132                                         /* 0x642 MSR_PP1_POLICY */
133
134 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
135                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
136 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
137 #define TJMAX_DEFAULT   100
138
139 #define MAX(a, b) ((a) > (b) ? (a) : (b))
140
141 /*
142  * buffer size used by sscanf() for added column names
143  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
144  */
145 #define NAME_BYTES 20
146 #define PATH_BYTES 128
147
148 int backwards_count;
149 char *progname;
150
151 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
152 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
153 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
154 #define MAX_ADDED_COUNTERS 16
155
156 struct thread_data {
157         struct timeval tv_begin;
158         struct timeval tv_end;
159         unsigned long long tsc;
160         unsigned long long aperf;
161         unsigned long long mperf;
162         unsigned long long c1;
163         unsigned long long  irq_count;
164         unsigned int smi_count;
165         unsigned int cpu_id;
166         unsigned int flags;
167 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
168 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
169         unsigned long long counter[MAX_ADDED_COUNTERS];
170 } *thread_even, *thread_odd;
171
172 struct core_data {
173         unsigned long long c3;
174         unsigned long long c6;
175         unsigned long long c7;
176         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
177         unsigned int core_temp_c;
178         unsigned int core_id;
179         unsigned long long counter[MAX_ADDED_COUNTERS];
180 } *core_even, *core_odd;
181
182 struct pkg_data {
183         unsigned long long pc2;
184         unsigned long long pc3;
185         unsigned long long pc6;
186         unsigned long long pc7;
187         unsigned long long pc8;
188         unsigned long long pc9;
189         unsigned long long pc10;
190         unsigned long long pkg_wtd_core_c0;
191         unsigned long long pkg_any_core_c0;
192         unsigned long long pkg_any_gfxe_c0;
193         unsigned long long pkg_both_core_gfxe_c0;
194         long long gfx_rc6_ms;
195         unsigned int gfx_mhz;
196         unsigned int package_id;
197         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
198         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
199         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
200         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
201         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
202         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
203         unsigned int pkg_temp_c;
204         unsigned long long counter[MAX_ADDED_COUNTERS];
205 } *package_even, *package_odd;
206
207 #define ODD_COUNTERS thread_odd, core_odd, package_odd
208 #define EVEN_COUNTERS thread_even, core_even, package_even
209
210 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
211         (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
212                 topo.num_threads_per_core + \
213                 (core_no) * topo.num_threads_per_core + (thread_no))
214 #define GET_CORE(core_base, core_no, pkg_no) \
215         (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
216 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
217
218 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
219 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
220 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
221
222 struct msr_counter {
223         unsigned int msr_num;
224         char name[NAME_BYTES];
225         char path[PATH_BYTES];
226         unsigned int width;
227         enum counter_type type;
228         enum counter_format format;
229         struct msr_counter *next;
230         unsigned int flags;
231 #define FLAGS_HIDE      (1 << 0)
232 #define FLAGS_SHOW      (1 << 1)
233 #define SYSFS_PERCPU    (1 << 1)
234 };
235
236 struct sys_counters {
237         unsigned int added_thread_counters;
238         unsigned int added_core_counters;
239         unsigned int added_package_counters;
240         struct msr_counter *tp;
241         struct msr_counter *cp;
242         struct msr_counter *pp;
243 } sys;
244
245 struct system_summary {
246         struct thread_data threads;
247         struct core_data cores;
248         struct pkg_data packages;
249 } average;
250
251
252 struct topo_params {
253         int num_packages;
254         int num_cpus;
255         int num_cores;
256         int max_cpu_num;
257         int num_cores_per_pkg;
258         int num_threads_per_core;
259 } topo;
260
261 struct timeval tv_even, tv_odd, tv_delta;
262
263 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
264 int *irqs_per_cpu;              /* indexed by cpu_num */
265
266 void setup_all_buffers(void);
267
268 int cpu_is_not_present(int cpu)
269 {
270         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
271 }
272 /*
273  * run func(thread, core, package) in topology order
274  * skip non-present cpus
275  */
276
277 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
278         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
279 {
280         int retval, pkg_no, core_no, thread_no;
281
282         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
283                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
284                         for (thread_no = 0; thread_no <
285                                 topo.num_threads_per_core; ++thread_no) {
286                                 struct thread_data *t;
287                                 struct core_data *c;
288                                 struct pkg_data *p;
289
290                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
291
292                                 if (cpu_is_not_present(t->cpu_id))
293                                         continue;
294
295                                 c = GET_CORE(core_base, core_no, pkg_no);
296                                 p = GET_PKG(pkg_base, pkg_no);
297
298                                 retval = func(t, c, p);
299                                 if (retval)
300                                         return retval;
301                         }
302                 }
303         }
304         return 0;
305 }
306
307 int cpu_migrate(int cpu)
308 {
309         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
310         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
311         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
312                 return -1;
313         else
314                 return 0;
315 }
316 int get_msr_fd(int cpu)
317 {
318         char pathname[32];
319         int fd;
320
321         fd = fd_percpu[cpu];
322
323         if (fd)
324                 return fd;
325
326         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
327         fd = open(pathname, O_RDONLY);
328         if (fd < 0)
329                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
330
331         fd_percpu[cpu] = fd;
332
333         return fd;
334 }
335
336 int get_msr(int cpu, off_t offset, unsigned long long *msr)
337 {
338         ssize_t retval;
339
340         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
341
342         if (retval != sizeof *msr)
343                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
344
345         return 0;
346 }
347
348 /*
349  * Each string in this array is compared in --show and --hide cmdline.
350  * Thus, strings that are proper sub-sets must follow their more specific peers.
351  */
352 struct msr_counter bic[] = {
353         { 0x0, "usec" },
354         { 0x0, "Time_Of_Day_Seconds" },
355         { 0x0, "Package" },
356         { 0x0, "Avg_MHz" },
357         { 0x0, "Bzy_MHz" },
358         { 0x0, "TSC_MHz" },
359         { 0x0, "IRQ" },
360         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
361         { 0x0, "Busy%" },
362         { 0x0, "CPU%c1" },
363         { 0x0, "CPU%c3" },
364         { 0x0, "CPU%c6" },
365         { 0x0, "CPU%c7" },
366         { 0x0, "ThreadC" },
367         { 0x0, "CoreTmp" },
368         { 0x0, "CoreCnt" },
369         { 0x0, "PkgTmp" },
370         { 0x0, "GFX%rc6" },
371         { 0x0, "GFXMHz" },
372         { 0x0, "Pkg%pc2" },
373         { 0x0, "Pkg%pc3" },
374         { 0x0, "Pkg%pc6" },
375         { 0x0, "Pkg%pc7" },
376         { 0x0, "Pkg%pc8" },
377         { 0x0, "Pkg%pc9" },
378         { 0x0, "Pkg%pc10" },
379         { 0x0, "PkgWatt" },
380         { 0x0, "CorWatt" },
381         { 0x0, "GFXWatt" },
382         { 0x0, "PkgCnt" },
383         { 0x0, "RAMWatt" },
384         { 0x0, "PKG_%" },
385         { 0x0, "RAM_%" },
386         { 0x0, "Pkg_J" },
387         { 0x0, "Cor_J" },
388         { 0x0, "GFX_J" },
389         { 0x0, "RAM_J" },
390         { 0x0, "Core" },
391         { 0x0, "CPU" },
392         { 0x0, "Mod%c6" },
393         { 0x0, "sysfs" },
394         { 0x0, "Totl%C0" },
395         { 0x0, "Any%C0" },
396         { 0x0, "GFX%C0" },
397         { 0x0, "CPUGFX%" },
398 };
399
400
401
402 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
403 #define BIC_USEC        (1ULL << 0)
404 #define BIC_TOD         (1ULL << 1)
405 #define BIC_Package     (1ULL << 2)
406 #define BIC_Avg_MHz     (1ULL << 3)
407 #define BIC_Bzy_MHz     (1ULL << 4)
408 #define BIC_TSC_MHz     (1ULL << 5)
409 #define BIC_IRQ         (1ULL << 6)
410 #define BIC_SMI         (1ULL << 7)
411 #define BIC_Busy        (1ULL << 8)
412 #define BIC_CPU_c1      (1ULL << 9)
413 #define BIC_CPU_c3      (1ULL << 10)
414 #define BIC_CPU_c6      (1ULL << 11)
415 #define BIC_CPU_c7      (1ULL << 12)
416 #define BIC_ThreadC     (1ULL << 13)
417 #define BIC_CoreTmp     (1ULL << 14)
418 #define BIC_CoreCnt     (1ULL << 15)
419 #define BIC_PkgTmp      (1ULL << 16)
420 #define BIC_GFX_rc6     (1ULL << 17)
421 #define BIC_GFXMHz      (1ULL << 18)
422 #define BIC_Pkgpc2      (1ULL << 19)
423 #define BIC_Pkgpc3      (1ULL << 20)
424 #define BIC_Pkgpc6      (1ULL << 21)
425 #define BIC_Pkgpc7      (1ULL << 22)
426 #define BIC_Pkgpc8      (1ULL << 23)
427 #define BIC_Pkgpc9      (1ULL << 24)
428 #define BIC_Pkgpc10     (1ULL << 25)
429 #define BIC_PkgWatt     (1ULL << 26)
430 #define BIC_CorWatt     (1ULL << 27)
431 #define BIC_GFXWatt     (1ULL << 28)
432 #define BIC_PkgCnt      (1ULL << 29)
433 #define BIC_RAMWatt     (1ULL << 30)
434 #define BIC_PKG__       (1ULL << 31)
435 #define BIC_RAM__       (1ULL << 32)
436 #define BIC_Pkg_J       (1ULL << 33)
437 #define BIC_Cor_J       (1ULL << 34)
438 #define BIC_GFX_J       (1ULL << 35)
439 #define BIC_RAM_J       (1ULL << 36)
440 #define BIC_Core        (1ULL << 37)
441 #define BIC_CPU         (1ULL << 38)
442 #define BIC_Mod_c6      (1ULL << 39)
443 #define BIC_sysfs       (1ULL << 40)
444 #define BIC_Totl_c0     (1ULL << 41)
445 #define BIC_Any_c0      (1ULL << 42)
446 #define BIC_GFX_c0      (1ULL << 43)
447 #define BIC_CPUGFX      (1ULL << 44)
448
449 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD)
450
451 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
452 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
453
454 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
455 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
456 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
457 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
458
459
460 #define MAX_DEFERRED 16
461 char *deferred_skip_names[MAX_DEFERRED];
462 int deferred_skip_index;
463
464 /*
465  * HIDE_LIST - hide this list of counters, show the rest [default]
466  * SHOW_LIST - show this list of counters, hide the rest
467  */
468 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
469
470 void help(void)
471 {
472         fprintf(outf,
473         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
474         "\n"
475         "Turbostat forks the specified COMMAND and prints statistics\n"
476         "when COMMAND completes.\n"
477         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
478         "to print statistics, until interrupted.\n"
479         "--add          add a counter\n"
480         "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
481         "--cpu  cpu-set limit output to summary plus cpu-set:\n"
482         "               {core | package | j,k,l..m,n-p }\n"
483         "--quiet        skip decoding system configuration header\n"
484         "--interval sec.subsec  Override default 5-second measurement interval\n"
485         "--help         print this help message\n"
486         "--list         list column headers only\n"
487         "--out file     create or truncate \"file\" for all output\n"
488         "--version      print version information\n"
489         "\n"
490         "For more help, run \"man turbostat\"\n");
491 }
492
493 /*
494  * bic_lookup
495  * for all the strings in comma separate name_list,
496  * set the approprate bit in return value.
497  */
498 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
499 {
500         int i;
501         unsigned long long retval = 0;
502
503         while (name_list) {
504                 char *comma;
505
506                 comma = strchr(name_list, ',');
507
508                 if (comma)
509                         *comma = '\0';
510
511                 if (!strcmp(name_list, "all"))
512                         return ~0;
513
514                 for (i = 0; i < MAX_BIC; ++i) {
515                         if (!strcmp(name_list, bic[i].name)) {
516                                 retval |= (1ULL << i);
517                                 break;
518                         }
519                 }
520                 if (i == MAX_BIC) {
521                         if (mode == SHOW_LIST) {
522                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
523                                 exit(-1);
524                         }
525                         deferred_skip_names[deferred_skip_index++] = name_list;
526                         if (debug)
527                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
528                         if (deferred_skip_index >= MAX_DEFERRED) {
529                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
530                                         MAX_DEFERRED, name_list);
531                                 help();
532                                 exit(1);
533                         }
534                 }
535
536                 name_list = comma;
537                 if (name_list)
538                         name_list++;
539
540         }
541         return retval;
542 }
543
544
545 void print_header(char *delim)
546 {
547         struct msr_counter *mp;
548         int printed = 0;
549
550         if (DO_BIC(BIC_USEC))
551                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
552         if (DO_BIC(BIC_TOD))
553                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
554         if (DO_BIC(BIC_Package))
555                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
556         if (DO_BIC(BIC_Core))
557                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
558         if (DO_BIC(BIC_CPU))
559                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
560         if (DO_BIC(BIC_Avg_MHz))
561                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
562         if (DO_BIC(BIC_Busy))
563                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
564         if (DO_BIC(BIC_Bzy_MHz))
565                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
566         if (DO_BIC(BIC_TSC_MHz))
567                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
568
569         if (DO_BIC(BIC_IRQ)) {
570                 if (sums_need_wide_columns)
571                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
572                 else
573                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
574         }
575
576         if (DO_BIC(BIC_SMI))
577                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
578
579         for (mp = sys.tp; mp; mp = mp->next) {
580
581                 if (mp->format == FORMAT_RAW) {
582                         if (mp->width == 64)
583                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
584                         else
585                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
586                 } else {
587                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
588                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
589                         else
590                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
591                 }
592         }
593
594         if (DO_BIC(BIC_CPU_c1))
595                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
596         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
597                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
598         if (DO_BIC(BIC_CPU_c6))
599                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
600         if (DO_BIC(BIC_CPU_c7))
601                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
602
603         if (DO_BIC(BIC_Mod_c6))
604                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
605
606         if (DO_BIC(BIC_CoreTmp))
607                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
608
609         for (mp = sys.cp; mp; mp = mp->next) {
610                 if (mp->format == FORMAT_RAW) {
611                         if (mp->width == 64)
612                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
613                         else
614                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
615                 } else {
616                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
617                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
618                         else
619                                 outp += sprintf(outp, "%s%s", delim, mp->name);
620                 }
621         }
622
623         if (DO_BIC(BIC_PkgTmp))
624                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
625
626         if (DO_BIC(BIC_GFX_rc6))
627                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
628
629         if (DO_BIC(BIC_GFXMHz))
630                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
631
632         if (DO_BIC(BIC_Totl_c0))
633                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
634         if (DO_BIC(BIC_Any_c0))
635                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
636         if (DO_BIC(BIC_GFX_c0))
637                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
638         if (DO_BIC(BIC_CPUGFX))
639                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
640
641         if (DO_BIC(BIC_Pkgpc2))
642                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
643         if (DO_BIC(BIC_Pkgpc3))
644                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
645         if (DO_BIC(BIC_Pkgpc6))
646                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
647         if (DO_BIC(BIC_Pkgpc7))
648                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
649         if (DO_BIC(BIC_Pkgpc8))
650                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
651         if (DO_BIC(BIC_Pkgpc9))
652                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
653         if (DO_BIC(BIC_Pkgpc10))
654                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
655
656         if (do_rapl && !rapl_joules) {
657                 if (DO_BIC(BIC_PkgWatt))
658                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
659                 if (DO_BIC(BIC_CorWatt))
660                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
661                 if (DO_BIC(BIC_GFXWatt))
662                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
663                 if (DO_BIC(BIC_RAMWatt))
664                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
665                 if (DO_BIC(BIC_PKG__))
666                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
667                 if (DO_BIC(BIC_RAM__))
668                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
669         } else if (do_rapl && rapl_joules) {
670                 if (DO_BIC(BIC_Pkg_J))
671                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
672                 if (DO_BIC(BIC_Cor_J))
673                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
674                 if (DO_BIC(BIC_GFX_J))
675                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
676                 if (DO_BIC(BIC_RAM_J))
677                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
678                 if (DO_BIC(BIC_PKG__))
679                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
680                 if (DO_BIC(BIC_RAM__))
681                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
682         }
683         for (mp = sys.pp; mp; mp = mp->next) {
684                 if (mp->format == FORMAT_RAW) {
685                         if (mp->width == 64)
686                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
687                         else
688                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
689                 } else {
690                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
691                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
692                         else
693                                 outp += sprintf(outp, "%s%s", delim, mp->name);
694                 }
695         }
696
697         outp += sprintf(outp, "\n");
698 }
699
700 int dump_counters(struct thread_data *t, struct core_data *c,
701         struct pkg_data *p)
702 {
703         int i;
704         struct msr_counter *mp;
705
706         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
707
708         if (t) {
709                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
710                         t->cpu_id, t->flags);
711                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
712                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
713                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
714                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
715
716                 if (DO_BIC(BIC_IRQ))
717                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
718                 if (DO_BIC(BIC_SMI))
719                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
720
721                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
722                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
723                                 i, mp->msr_num, t->counter[i]);
724                 }
725         }
726
727         if (c) {
728                 outp += sprintf(outp, "core: %d\n", c->core_id);
729                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
730                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
731                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
732                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
733
734                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
735                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
736                                 i, mp->msr_num, c->counter[i]);
737                 }
738                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
739         }
740
741         if (p) {
742                 outp += sprintf(outp, "package: %d\n", p->package_id);
743
744                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
745                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
746                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
747                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
748
749                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
750                 if (DO_BIC(BIC_Pkgpc3))
751                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
752                 if (DO_BIC(BIC_Pkgpc6))
753                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
754                 if (DO_BIC(BIC_Pkgpc7))
755                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
756                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
757                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
758                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
759                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
760                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
761                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
762                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
763                 outp += sprintf(outp, "Throttle PKG: %0X\n",
764                         p->rapl_pkg_perf_status);
765                 outp += sprintf(outp, "Throttle RAM: %0X\n",
766                         p->rapl_dram_perf_status);
767                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
768
769                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
770                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
771                                 i, mp->msr_num, p->counter[i]);
772                 }
773         }
774
775         outp += sprintf(outp, "\n");
776
777         return 0;
778 }
779
780 /*
781  * column formatting convention & formats
782  */
783 int format_counters(struct thread_data *t, struct core_data *c,
784         struct pkg_data *p)
785 {
786         double interval_float, tsc;
787         char *fmt8;
788         int i;
789         struct msr_counter *mp;
790         char *delim = "\t";
791         int printed = 0;
792
793          /* if showing only 1st thread in core and this isn't one, bail out */
794         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
795                 return 0;
796
797          /* if showing only 1st thread in pkg and this isn't one, bail out */
798         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
799                 return 0;
800
801         /*if not summary line and --cpu is used */
802         if ((t != &average.threads) &&
803                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
804                 return 0;
805
806         if (DO_BIC(BIC_USEC)) {
807                 /* on each row, print how many usec each timestamp took to gather */
808                 struct timeval tv;
809
810                 timersub(&t->tv_end, &t->tv_begin, &tv);
811                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
812         }
813
814         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
815         if (DO_BIC(BIC_TOD))
816                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
817
818         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
819
820         tsc = t->tsc * tsc_tweak;
821
822         /* topo columns, print blanks on 1st (average) line */
823         if (t == &average.threads) {
824                 if (DO_BIC(BIC_Package))
825                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
826                 if (DO_BIC(BIC_Core))
827                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
828                 if (DO_BIC(BIC_CPU))
829                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
830         } else {
831                 if (DO_BIC(BIC_Package)) {
832                         if (p)
833                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
834                         else
835                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
836                 }
837                 if (DO_BIC(BIC_Core)) {
838                         if (c)
839                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
840                         else
841                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
842                 }
843                 if (DO_BIC(BIC_CPU))
844                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
845         }
846
847         if (DO_BIC(BIC_Avg_MHz))
848                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
849                         1.0 / units * t->aperf / interval_float);
850
851         if (DO_BIC(BIC_Busy))
852                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
853
854         if (DO_BIC(BIC_Bzy_MHz)) {
855                 if (has_base_hz)
856                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
857                 else
858                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
859                                 tsc / units * t->aperf / t->mperf / interval_float);
860         }
861
862         if (DO_BIC(BIC_TSC_MHz))
863                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
864
865         /* IRQ */
866         if (DO_BIC(BIC_IRQ)) {
867                 if (sums_need_wide_columns)
868                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
869                 else
870                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
871         }
872
873         /* SMI */
874         if (DO_BIC(BIC_SMI))
875                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
876
877         /* Added counters */
878         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
879                 if (mp->format == FORMAT_RAW) {
880                         if (mp->width == 32)
881                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
882                         else
883                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
884                 } else if (mp->format == FORMAT_DELTA) {
885                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
886                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
887                         else
888                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
889                 } else if (mp->format == FORMAT_PERCENT) {
890                         if (mp->type == COUNTER_USEC)
891                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
892                         else
893                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
894                 }
895         }
896
897         /* C1 */
898         if (DO_BIC(BIC_CPU_c1))
899                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
900
901
902         /* print per-core data only for 1st thread in core */
903         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
904                 goto done;
905
906         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
907                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
908         if (DO_BIC(BIC_CPU_c6))
909                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
910         if (DO_BIC(BIC_CPU_c7))
911                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
912
913         /* Mod%c6 */
914         if (DO_BIC(BIC_Mod_c6))
915                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
916
917         if (DO_BIC(BIC_CoreTmp))
918                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
919
920         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
921                 if (mp->format == FORMAT_RAW) {
922                         if (mp->width == 32)
923                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
924                         else
925                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
926                 } else if (mp->format == FORMAT_DELTA) {
927                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
928                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
929                         else
930                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
931                 } else if (mp->format == FORMAT_PERCENT) {
932                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
933                 }
934         }
935
936         /* print per-package data only for 1st core in package */
937         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
938                 goto done;
939
940         /* PkgTmp */
941         if (DO_BIC(BIC_PkgTmp))
942                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
943
944         /* GFXrc6 */
945         if (DO_BIC(BIC_GFX_rc6)) {
946                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
947                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
948                 } else {
949                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
950                                 p->gfx_rc6_ms / 10.0 / interval_float);
951                 }
952         }
953
954         /* GFXMHz */
955         if (DO_BIC(BIC_GFXMHz))
956                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
957
958         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
959         if (DO_BIC(BIC_Totl_c0))
960                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
961         if (DO_BIC(BIC_Any_c0))
962                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
963         if (DO_BIC(BIC_GFX_c0))
964                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
965         if (DO_BIC(BIC_CPUGFX))
966                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
967
968         if (DO_BIC(BIC_Pkgpc2))
969                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
970         if (DO_BIC(BIC_Pkgpc3))
971                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
972         if (DO_BIC(BIC_Pkgpc6))
973                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
974         if (DO_BIC(BIC_Pkgpc7))
975                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
976         if (DO_BIC(BIC_Pkgpc8))
977                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
978         if (DO_BIC(BIC_Pkgpc9))
979                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
980         if (DO_BIC(BIC_Pkgpc10))
981                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
982
983         /*
984          * If measurement interval exceeds minimum RAPL Joule Counter range,
985          * indicate that results are suspect by printing "**" in fraction place.
986          */
987         if (interval_float < rapl_joule_counter_range)
988                 fmt8 = "%s%.2f";
989         else
990                 fmt8 = "%6.0f**";
991
992         if (DO_BIC(BIC_PkgWatt))
993                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
994         if (DO_BIC(BIC_CorWatt))
995                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
996         if (DO_BIC(BIC_GFXWatt))
997                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
998         if (DO_BIC(BIC_RAMWatt))
999                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1000         if (DO_BIC(BIC_Pkg_J))
1001                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1002         if (DO_BIC(BIC_Cor_J))
1003                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1004         if (DO_BIC(BIC_GFX_J))
1005                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1006         if (DO_BIC(BIC_RAM_J))
1007                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1008         if (DO_BIC(BIC_PKG__))
1009                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1010         if (DO_BIC(BIC_RAM__))
1011                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1012
1013         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1014                 if (mp->format == FORMAT_RAW) {
1015                         if (mp->width == 32)
1016                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1017                         else
1018                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1019                 } else if (mp->format == FORMAT_DELTA) {
1020                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1021                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1022                         else
1023                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1024                 } else if (mp->format == FORMAT_PERCENT) {
1025                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1026                 }
1027         }
1028
1029 done:
1030         outp += sprintf(outp, "\n");
1031
1032         return 0;
1033 }
1034
1035 void flush_output_stdout(void)
1036 {
1037         FILE *filep;
1038
1039         if (outf == stderr)
1040                 filep = stdout;
1041         else
1042                 filep = outf;
1043
1044         fputs(output_buffer, filep);
1045         fflush(filep);
1046
1047         outp = output_buffer;
1048 }
1049 void flush_output_stderr(void)
1050 {
1051         fputs(output_buffer, outf);
1052         fflush(outf);
1053         outp = output_buffer;
1054 }
1055 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1056 {
1057         static int printed;
1058
1059         if (!printed || !summary_only)
1060                 print_header("\t");
1061
1062         if (topo.num_cpus > 1)
1063                 format_counters(&average.threads, &average.cores,
1064                         &average.packages);
1065
1066         printed = 1;
1067
1068         if (summary_only)
1069                 return;
1070
1071         for_all_cpus(format_counters, t, c, p);
1072 }
1073
1074 #define DELTA_WRAP32(new, old)                  \
1075         if (new > old) {                        \
1076                 old = new - old;                \
1077         } else {                                \
1078                 old = 0x100000000 + new - old;  \
1079         }
1080
1081 int
1082 delta_package(struct pkg_data *new, struct pkg_data *old)
1083 {
1084         int i;
1085         struct msr_counter *mp;
1086
1087
1088         if (DO_BIC(BIC_Totl_c0))
1089                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1090         if (DO_BIC(BIC_Any_c0))
1091                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1092         if (DO_BIC(BIC_GFX_c0))
1093                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1094         if (DO_BIC(BIC_CPUGFX))
1095                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1096
1097         old->pc2 = new->pc2 - old->pc2;
1098         if (DO_BIC(BIC_Pkgpc3))
1099                 old->pc3 = new->pc3 - old->pc3;
1100         if (DO_BIC(BIC_Pkgpc6))
1101                 old->pc6 = new->pc6 - old->pc6;
1102         if (DO_BIC(BIC_Pkgpc7))
1103                 old->pc7 = new->pc7 - old->pc7;
1104         old->pc8 = new->pc8 - old->pc8;
1105         old->pc9 = new->pc9 - old->pc9;
1106         old->pc10 = new->pc10 - old->pc10;
1107         old->pkg_temp_c = new->pkg_temp_c;
1108
1109         /* flag an error when rc6 counter resets/wraps */
1110         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1111                 old->gfx_rc6_ms = -1;
1112         else
1113                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1114
1115         old->gfx_mhz = new->gfx_mhz;
1116
1117         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1118         DELTA_WRAP32(new->energy_cores, old->energy_cores);
1119         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1120         DELTA_WRAP32(new->energy_dram, old->energy_dram);
1121         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1122         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1123
1124         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1125                 if (mp->format == FORMAT_RAW)
1126                         old->counter[i] = new->counter[i];
1127                 else
1128                         old->counter[i] = new->counter[i] - old->counter[i];
1129         }
1130
1131         return 0;
1132 }
1133
1134 void
1135 delta_core(struct core_data *new, struct core_data *old)
1136 {
1137         int i;
1138         struct msr_counter *mp;
1139
1140         old->c3 = new->c3 - old->c3;
1141         old->c6 = new->c6 - old->c6;
1142         old->c7 = new->c7 - old->c7;
1143         old->core_temp_c = new->core_temp_c;
1144         old->mc6_us = new->mc6_us - old->mc6_us;
1145
1146         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1147                 if (mp->format == FORMAT_RAW)
1148                         old->counter[i] = new->counter[i];
1149                 else
1150                         old->counter[i] = new->counter[i] - old->counter[i];
1151         }
1152 }
1153
1154 /*
1155  * old = new - old
1156  */
1157 int
1158 delta_thread(struct thread_data *new, struct thread_data *old,
1159         struct core_data *core_delta)
1160 {
1161         int i;
1162         struct msr_counter *mp;
1163
1164         /*
1165          * the timestamps from start of measurement interval are in "old"
1166          * the timestamp from end of measurement interval are in "new"
1167          * over-write old w/ new so we can print end of interval values
1168          */
1169
1170         old->tv_begin = new->tv_begin;
1171         old->tv_end = new->tv_end;
1172
1173         old->tsc = new->tsc - old->tsc;
1174
1175         /* check for TSC < 1 Mcycles over interval */
1176         if (old->tsc < (1000 * 1000))
1177                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1178                      "You can disable all c-states by booting with \"idle=poll\"\n"
1179                      "or just the deep ones with \"processor.max_cstate=1\"");
1180
1181         old->c1 = new->c1 - old->c1;
1182
1183         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1184                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1185                         old->aperf = new->aperf - old->aperf;
1186                         old->mperf = new->mperf - old->mperf;
1187                 } else {
1188                         return -1;
1189                 }
1190         }
1191
1192
1193         if (use_c1_residency_msr) {
1194                 /*
1195                  * Some models have a dedicated C1 residency MSR,
1196                  * which should be more accurate than the derivation below.
1197                  */
1198         } else {
1199                 /*
1200                  * As counter collection is not atomic,
1201                  * it is possible for mperf's non-halted cycles + idle states
1202                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1203                  */
1204                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1205                         old->c1 = 0;
1206                 else {
1207                         /* normal case, derive c1 */
1208                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1209                                 - core_delta->c6 - core_delta->c7;
1210                 }
1211         }
1212
1213         if (old->mperf == 0) {
1214                 if (debug > 1)
1215                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1216                 old->mperf = 1; /* divide by 0 protection */
1217         }
1218
1219         if (DO_BIC(BIC_IRQ))
1220                 old->irq_count = new->irq_count - old->irq_count;
1221
1222         if (DO_BIC(BIC_SMI))
1223                 old->smi_count = new->smi_count - old->smi_count;
1224
1225         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1226                 if (mp->format == FORMAT_RAW)
1227                         old->counter[i] = new->counter[i];
1228                 else
1229                         old->counter[i] = new->counter[i] - old->counter[i];
1230         }
1231         return 0;
1232 }
1233
1234 int delta_cpu(struct thread_data *t, struct core_data *c,
1235         struct pkg_data *p, struct thread_data *t2,
1236         struct core_data *c2, struct pkg_data *p2)
1237 {
1238         int retval = 0;
1239
1240         /* calculate core delta only for 1st thread in core */
1241         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1242                 delta_core(c, c2);
1243
1244         /* always calculate thread delta */
1245         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1246         if (retval)
1247                 return retval;
1248
1249         /* calculate package delta only for 1st core in package */
1250         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1251                 retval = delta_package(p, p2);
1252
1253         return retval;
1254 }
1255
1256 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1257 {
1258         int i;
1259         struct msr_counter  *mp;
1260
1261         t->tv_begin.tv_sec = 0;
1262         t->tv_begin.tv_usec = 0;
1263         t->tv_end.tv_sec = 0;
1264         t->tv_end.tv_usec = 0;
1265
1266         t->tsc = 0;
1267         t->aperf = 0;
1268         t->mperf = 0;
1269         t->c1 = 0;
1270
1271         t->irq_count = 0;
1272         t->smi_count = 0;
1273
1274         /* tells format_counters to dump all fields from this set */
1275         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1276
1277         c->c3 = 0;
1278         c->c6 = 0;
1279         c->c7 = 0;
1280         c->mc6_us = 0;
1281         c->core_temp_c = 0;
1282
1283         p->pkg_wtd_core_c0 = 0;
1284         p->pkg_any_core_c0 = 0;
1285         p->pkg_any_gfxe_c0 = 0;
1286         p->pkg_both_core_gfxe_c0 = 0;
1287
1288         p->pc2 = 0;
1289         if (DO_BIC(BIC_Pkgpc3))
1290                 p->pc3 = 0;
1291         if (DO_BIC(BIC_Pkgpc6))
1292                 p->pc6 = 0;
1293         if (DO_BIC(BIC_Pkgpc7))
1294                 p->pc7 = 0;
1295         p->pc8 = 0;
1296         p->pc9 = 0;
1297         p->pc10 = 0;
1298
1299         p->energy_pkg = 0;
1300         p->energy_dram = 0;
1301         p->energy_cores = 0;
1302         p->energy_gfx = 0;
1303         p->rapl_pkg_perf_status = 0;
1304         p->rapl_dram_perf_status = 0;
1305         p->pkg_temp_c = 0;
1306
1307         p->gfx_rc6_ms = 0;
1308         p->gfx_mhz = 0;
1309         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1310                 t->counter[i] = 0;
1311
1312         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1313                 c->counter[i] = 0;
1314
1315         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1316                 p->counter[i] = 0;
1317 }
1318 int sum_counters(struct thread_data *t, struct core_data *c,
1319         struct pkg_data *p)
1320 {
1321         int i;
1322         struct msr_counter *mp;
1323
1324         /* remember first tv_begin */
1325         if (average.threads.tv_begin.tv_sec == 0)
1326                 average.threads.tv_begin = t->tv_begin;
1327
1328         /* remember last tv_end */
1329         average.threads.tv_end = t->tv_end;
1330
1331         average.threads.tsc += t->tsc;
1332         average.threads.aperf += t->aperf;
1333         average.threads.mperf += t->mperf;
1334         average.threads.c1 += t->c1;
1335
1336         average.threads.irq_count += t->irq_count;
1337         average.threads.smi_count += t->smi_count;
1338
1339         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1340                 if (mp->format == FORMAT_RAW)
1341                         continue;
1342                 average.threads.counter[i] += t->counter[i];
1343         }
1344
1345         /* sum per-core values only for 1st thread in core */
1346         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1347                 return 0;
1348
1349         average.cores.c3 += c->c3;
1350         average.cores.c6 += c->c6;
1351         average.cores.c7 += c->c7;
1352         average.cores.mc6_us += c->mc6_us;
1353
1354         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1355
1356         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1357                 if (mp->format == FORMAT_RAW)
1358                         continue;
1359                 average.cores.counter[i] += c->counter[i];
1360         }
1361
1362         /* sum per-pkg values only for 1st core in pkg */
1363         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1364                 return 0;
1365
1366         if (DO_BIC(BIC_Totl_c0))
1367                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1368         if (DO_BIC(BIC_Any_c0))
1369                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1370         if (DO_BIC(BIC_GFX_c0))
1371                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1372         if (DO_BIC(BIC_CPUGFX))
1373                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1374
1375         average.packages.pc2 += p->pc2;
1376         if (DO_BIC(BIC_Pkgpc3))
1377                 average.packages.pc3 += p->pc3;
1378         if (DO_BIC(BIC_Pkgpc6))
1379                 average.packages.pc6 += p->pc6;
1380         if (DO_BIC(BIC_Pkgpc7))
1381                 average.packages.pc7 += p->pc7;
1382         average.packages.pc8 += p->pc8;
1383         average.packages.pc9 += p->pc9;
1384         average.packages.pc10 += p->pc10;
1385
1386         average.packages.energy_pkg += p->energy_pkg;
1387         average.packages.energy_dram += p->energy_dram;
1388         average.packages.energy_cores += p->energy_cores;
1389         average.packages.energy_gfx += p->energy_gfx;
1390
1391         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1392         average.packages.gfx_mhz = p->gfx_mhz;
1393
1394         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1395
1396         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1397         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1398
1399         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1400                 if (mp->format == FORMAT_RAW)
1401                         continue;
1402                 average.packages.counter[i] += p->counter[i];
1403         }
1404         return 0;
1405 }
1406 /*
1407  * sum the counters for all cpus in the system
1408  * compute the weighted average
1409  */
1410 void compute_average(struct thread_data *t, struct core_data *c,
1411         struct pkg_data *p)
1412 {
1413         int i;
1414         struct msr_counter *mp;
1415
1416         clear_counters(&average.threads, &average.cores, &average.packages);
1417
1418         for_all_cpus(sum_counters, t, c, p);
1419
1420         average.threads.tsc /= topo.num_cpus;
1421         average.threads.aperf /= topo.num_cpus;
1422         average.threads.mperf /= topo.num_cpus;
1423         average.threads.c1 /= topo.num_cpus;
1424
1425         if (average.threads.irq_count > 9999999)
1426                 sums_need_wide_columns = 1;
1427
1428         average.cores.c3 /= topo.num_cores;
1429         average.cores.c6 /= topo.num_cores;
1430         average.cores.c7 /= topo.num_cores;
1431         average.cores.mc6_us /= topo.num_cores;
1432
1433         if (DO_BIC(BIC_Totl_c0))
1434                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1435         if (DO_BIC(BIC_Any_c0))
1436                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1437         if (DO_BIC(BIC_GFX_c0))
1438                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1439         if (DO_BIC(BIC_CPUGFX))
1440                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1441
1442         average.packages.pc2 /= topo.num_packages;
1443         if (DO_BIC(BIC_Pkgpc3))
1444                 average.packages.pc3 /= topo.num_packages;
1445         if (DO_BIC(BIC_Pkgpc6))
1446                 average.packages.pc6 /= topo.num_packages;
1447         if (DO_BIC(BIC_Pkgpc7))
1448                 average.packages.pc7 /= topo.num_packages;
1449
1450         average.packages.pc8 /= topo.num_packages;
1451         average.packages.pc9 /= topo.num_packages;
1452         average.packages.pc10 /= topo.num_packages;
1453
1454         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1455                 if (mp->format == FORMAT_RAW)
1456                         continue;
1457                 if (mp->type == COUNTER_ITEMS) {
1458                         if (average.threads.counter[i] > 9999999)
1459                                 sums_need_wide_columns = 1;
1460                         continue;
1461                 }
1462                 average.threads.counter[i] /= topo.num_cpus;
1463         }
1464         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1465                 if (mp->format == FORMAT_RAW)
1466                         continue;
1467                 if (mp->type == COUNTER_ITEMS) {
1468                         if (average.cores.counter[i] > 9999999)
1469                                 sums_need_wide_columns = 1;
1470                 }
1471                 average.cores.counter[i] /= topo.num_cores;
1472         }
1473         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1474                 if (mp->format == FORMAT_RAW)
1475                         continue;
1476                 if (mp->type == COUNTER_ITEMS) {
1477                         if (average.packages.counter[i] > 9999999)
1478                                 sums_need_wide_columns = 1;
1479                 }
1480                 average.packages.counter[i] /= topo.num_packages;
1481         }
1482 }
1483
1484 static unsigned long long rdtsc(void)
1485 {
1486         unsigned int low, high;
1487
1488         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1489
1490         return low | ((unsigned long long)high) << 32;
1491 }
1492
1493 /*
1494  * Open a file, and exit on failure
1495  */
1496 FILE *fopen_or_die(const char *path, const char *mode)
1497 {
1498         FILE *filep = fopen(path, mode);
1499
1500         if (!filep)
1501                 err(1, "%s: open failed", path);
1502         return filep;
1503 }
1504 /*
1505  * snapshot_sysfs_counter()
1506  *
1507  * return snapshot of given counter
1508  */
1509 unsigned long long snapshot_sysfs_counter(char *path)
1510 {
1511         FILE *fp;
1512         int retval;
1513         unsigned long long counter;
1514
1515         fp = fopen_or_die(path, "r");
1516
1517         retval = fscanf(fp, "%lld", &counter);
1518         if (retval != 1)
1519                 err(1, "snapshot_sysfs_counter(%s)", path);
1520
1521         fclose(fp);
1522
1523         return counter;
1524 }
1525
1526 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1527 {
1528         if (mp->msr_num != 0) {
1529                 if (get_msr(cpu, mp->msr_num, counterp))
1530                         return -1;
1531         } else {
1532                 char path[128];
1533
1534                 if (mp->flags & SYSFS_PERCPU) {
1535                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1536                                  cpu, mp->path);
1537
1538                         *counterp = snapshot_sysfs_counter(path);
1539                 } else {
1540                         *counterp = snapshot_sysfs_counter(mp->path);
1541                 }
1542         }
1543
1544         return 0;
1545 }
1546
1547 /*
1548  * get_counters(...)
1549  * migrate to cpu
1550  * acquire and record local counters for that cpu
1551  */
1552 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1553 {
1554         int cpu = t->cpu_id;
1555         unsigned long long msr;
1556         int aperf_mperf_retry_count = 0;
1557         struct msr_counter *mp;
1558         int i;
1559
1560
1561         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1562
1563         if (cpu_migrate(cpu)) {
1564                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1565                 return -1;
1566         }
1567
1568 retry:
1569         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1570
1571         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1572                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1573
1574                 /*
1575                  * The TSC, APERF and MPERF must be read together for
1576                  * APERF/MPERF and MPERF/TSC to give accurate results.
1577                  *
1578                  * Unfortunately, APERF and MPERF are read by
1579                  * individual system call, so delays may occur
1580                  * between them.  If the time to read them
1581                  * varies by a large amount, we re-read them.
1582                  */
1583
1584                 /*
1585                  * This initial dummy APERF read has been seen to
1586                  * reduce jitter in the subsequent reads.
1587                  */
1588
1589                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1590                         return -3;
1591
1592                 t->tsc = rdtsc();       /* re-read close to APERF */
1593
1594                 tsc_before = t->tsc;
1595
1596                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1597                         return -3;
1598
1599                 tsc_between = rdtsc();
1600
1601                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1602                         return -4;
1603
1604                 tsc_after = rdtsc();
1605
1606                 aperf_time = tsc_between - tsc_before;
1607                 mperf_time = tsc_after - tsc_between;
1608
1609                 /*
1610                  * If the system call latency to read APERF and MPERF
1611                  * differ by more than 2x, then try again.
1612                  */
1613                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1614                         aperf_mperf_retry_count++;
1615                         if (aperf_mperf_retry_count < 5)
1616                                 goto retry;
1617                         else
1618                                 warnx("cpu%d jitter %lld %lld",
1619                                         cpu, aperf_time, mperf_time);
1620                 }
1621                 aperf_mperf_retry_count = 0;
1622
1623                 t->aperf = t->aperf * aperf_mperf_multiplier;
1624                 t->mperf = t->mperf * aperf_mperf_multiplier;
1625         }
1626
1627         if (DO_BIC(BIC_IRQ))
1628                 t->irq_count = irqs_per_cpu[cpu];
1629         if (DO_BIC(BIC_SMI)) {
1630                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1631                         return -5;
1632                 t->smi_count = msr & 0xFFFFFFFF;
1633         }
1634         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1635                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1636                         return -6;
1637         }
1638
1639         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1640                 if (get_mp(cpu, mp, &t->counter[i]))
1641                         return -10;
1642         }
1643
1644         /* collect core counters only for 1st thread in core */
1645         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1646                 goto done;
1647
1648         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
1649                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1650                         return -6;
1651         }
1652
1653         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1654                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1655                         return -7;
1656         } else if (do_knl_cstates) {
1657                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1658                         return -7;
1659         }
1660
1661         if (DO_BIC(BIC_CPU_c7))
1662                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1663                         return -8;
1664
1665         if (DO_BIC(BIC_Mod_c6))
1666                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1667                         return -8;
1668
1669         if (DO_BIC(BIC_CoreTmp)) {
1670                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1671                         return -9;
1672                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1673         }
1674
1675         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1676                 if (get_mp(cpu, mp, &c->counter[i]))
1677                         return -10;
1678         }
1679
1680         /* collect package counters only for 1st core in package */
1681         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1682                 goto done;
1683
1684         if (DO_BIC(BIC_Totl_c0)) {
1685                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1686                         return -10;
1687         }
1688         if (DO_BIC(BIC_Any_c0)) {
1689                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1690                         return -11;
1691         }
1692         if (DO_BIC(BIC_GFX_c0)) {
1693                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1694                         return -12;
1695         }
1696         if (DO_BIC(BIC_CPUGFX)) {
1697                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1698                         return -13;
1699         }
1700         if (DO_BIC(BIC_Pkgpc3))
1701                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1702                         return -9;
1703         if (DO_BIC(BIC_Pkgpc6)) {
1704                 if (do_slm_cstates) {
1705                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1706                                 return -10;
1707                 } else {
1708                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1709                                 return -10;
1710                 }
1711         }
1712
1713         if (DO_BIC(BIC_Pkgpc2))
1714                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1715                         return -11;
1716         if (DO_BIC(BIC_Pkgpc7))
1717                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1718                         return -12;
1719         if (DO_BIC(BIC_Pkgpc8))
1720                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1721                         return -13;
1722         if (DO_BIC(BIC_Pkgpc9))
1723                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1724                         return -13;
1725         if (DO_BIC(BIC_Pkgpc10))
1726                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1727                         return -13;
1728
1729         if (do_rapl & RAPL_PKG) {
1730                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1731                         return -13;
1732                 p->energy_pkg = msr & 0xFFFFFFFF;
1733         }
1734         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1735                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1736                         return -14;
1737                 p->energy_cores = msr & 0xFFFFFFFF;
1738         }
1739         if (do_rapl & RAPL_DRAM) {
1740                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1741                         return -15;
1742                 p->energy_dram = msr & 0xFFFFFFFF;
1743         }
1744         if (do_rapl & RAPL_GFX) {
1745                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1746                         return -16;
1747                 p->energy_gfx = msr & 0xFFFFFFFF;
1748         }
1749         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1750                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1751                         return -16;
1752                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1753         }
1754         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1755                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1756                         return -16;
1757                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1758         }
1759         if (DO_BIC(BIC_PkgTmp)) {
1760                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1761                         return -17;
1762                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1763         }
1764
1765         if (DO_BIC(BIC_GFX_rc6))
1766                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1767
1768         if (DO_BIC(BIC_GFXMHz))
1769                 p->gfx_mhz = gfx_cur_mhz;
1770
1771         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1772                 if (get_mp(cpu, mp, &p->counter[i]))
1773                         return -10;
1774         }
1775 done:
1776         gettimeofday(&t->tv_end, (struct timezone *)NULL);
1777
1778         return 0;
1779 }
1780
1781 /*
1782  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1783  * If you change the values, note they are used both in comparisons
1784  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1785  */
1786
1787 #define PCLUKN 0 /* Unknown */
1788 #define PCLRSV 1 /* Reserved */
1789 #define PCL__0 2 /* PC0 */
1790 #define PCL__1 3 /* PC1 */
1791 #define PCL__2 4 /* PC2 */
1792 #define PCL__3 5 /* PC3 */
1793 #define PCL__4 6 /* PC4 */
1794 #define PCL__6 7 /* PC6 */
1795 #define PCL_6N 8 /* PC6 No Retention */
1796 #define PCL_6R 9 /* PC6 Retention */
1797 #define PCL__7 10 /* PC7 */
1798 #define PCL_7S 11 /* PC7 Shrink */
1799 #define PCL__8 12 /* PC8 */
1800 #define PCL__9 13 /* PC9 */
1801 #define PCLUNL 14 /* Unlimited */
1802
1803 int pkg_cstate_limit = PCLUKN;
1804 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1805         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1806
1807 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1808 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1809 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1810 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1811 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1812 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1813 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1814 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1815
1816
1817 static void
1818 calculate_tsc_tweak()
1819 {
1820         tsc_tweak = base_hz / tsc_hz;
1821 }
1822
1823 static void
1824 dump_nhm_platform_info(void)
1825 {
1826         unsigned long long msr;
1827         unsigned int ratio;
1828
1829         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1830
1831         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1832
1833         ratio = (msr >> 40) & 0xFF;
1834         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
1835                 ratio, bclk, ratio * bclk);
1836
1837         ratio = (msr >> 8) & 0xFF;
1838         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1839                 ratio, bclk, ratio * bclk);
1840
1841         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1842         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1843                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1844
1845         return;
1846 }
1847
1848 static void
1849 dump_hsw_turbo_ratio_limits(void)
1850 {
1851         unsigned long long msr;
1852         unsigned int ratio;
1853
1854         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1855
1856         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1857
1858         ratio = (msr >> 8) & 0xFF;
1859         if (ratio)
1860                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
1861                         ratio, bclk, ratio * bclk);
1862
1863         ratio = (msr >> 0) & 0xFF;
1864         if (ratio)
1865                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
1866                         ratio, bclk, ratio * bclk);
1867         return;
1868 }
1869
1870 static void
1871 dump_ivt_turbo_ratio_limits(void)
1872 {
1873         unsigned long long msr;
1874         unsigned int ratio;
1875
1876         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1877
1878         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1879
1880         ratio = (msr >> 56) & 0xFF;
1881         if (ratio)
1882                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
1883                         ratio, bclk, ratio * bclk);
1884
1885         ratio = (msr >> 48) & 0xFF;
1886         if (ratio)
1887                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
1888                         ratio, bclk, ratio * bclk);
1889
1890         ratio = (msr >> 40) & 0xFF;
1891         if (ratio)
1892                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
1893                         ratio, bclk, ratio * bclk);
1894
1895         ratio = (msr >> 32) & 0xFF;
1896         if (ratio)
1897                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
1898                         ratio, bclk, ratio * bclk);
1899
1900         ratio = (msr >> 24) & 0xFF;
1901         if (ratio)
1902                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
1903                         ratio, bclk, ratio * bclk);
1904
1905         ratio = (msr >> 16) & 0xFF;
1906         if (ratio)
1907                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
1908                         ratio, bclk, ratio * bclk);
1909
1910         ratio = (msr >> 8) & 0xFF;
1911         if (ratio)
1912                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
1913                         ratio, bclk, ratio * bclk);
1914
1915         ratio = (msr >> 0) & 0xFF;
1916         if (ratio)
1917                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
1918                         ratio, bclk, ratio * bclk);
1919         return;
1920 }
1921 int has_turbo_ratio_group_limits(int family, int model)
1922 {
1923
1924         if (!genuine_intel)
1925                 return 0;
1926
1927         switch (model) {
1928         case INTEL_FAM6_ATOM_GOLDMONT:
1929         case INTEL_FAM6_SKYLAKE_X:
1930         case INTEL_FAM6_ATOM_DENVERTON:
1931                 return 1;
1932         }
1933         return 0;
1934 }
1935
1936 static void
1937 dump_turbo_ratio_limits(int family, int model)
1938 {
1939         unsigned long long msr, core_counts;
1940         unsigned int ratio, group_size;
1941
1942         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1943         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1944
1945         if (has_turbo_ratio_group_limits(family, model)) {
1946                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
1947                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
1948         } else {
1949                 core_counts = 0x0807060504030201;
1950         }
1951
1952         ratio = (msr >> 56) & 0xFF;
1953         group_size = (core_counts >> 56) & 0xFF;
1954         if (ratio)
1955                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1956                         ratio, bclk, ratio * bclk, group_size);
1957
1958         ratio = (msr >> 48) & 0xFF;
1959         group_size = (core_counts >> 48) & 0xFF;
1960         if (ratio)
1961                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1962                         ratio, bclk, ratio * bclk, group_size);
1963
1964         ratio = (msr >> 40) & 0xFF;
1965         group_size = (core_counts >> 40) & 0xFF;
1966         if (ratio)
1967                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1968                         ratio, bclk, ratio * bclk, group_size);
1969
1970         ratio = (msr >> 32) & 0xFF;
1971         group_size = (core_counts >> 32) & 0xFF;
1972         if (ratio)
1973                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1974                         ratio, bclk, ratio * bclk, group_size);
1975
1976         ratio = (msr >> 24) & 0xFF;
1977         group_size = (core_counts >> 24) & 0xFF;
1978         if (ratio)
1979                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1980                         ratio, bclk, ratio * bclk, group_size);
1981
1982         ratio = (msr >> 16) & 0xFF;
1983         group_size = (core_counts >> 16) & 0xFF;
1984         if (ratio)
1985                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1986                         ratio, bclk, ratio * bclk, group_size);
1987
1988         ratio = (msr >> 8) & 0xFF;
1989         group_size = (core_counts >> 8) & 0xFF;
1990         if (ratio)
1991                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1992                         ratio, bclk, ratio * bclk, group_size);
1993
1994         ratio = (msr >> 0) & 0xFF;
1995         group_size = (core_counts >> 0) & 0xFF;
1996         if (ratio)
1997                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
1998                         ratio, bclk, ratio * bclk, group_size);
1999         return;
2000 }
2001
2002 static void
2003 dump_atom_turbo_ratio_limits(void)
2004 {
2005         unsigned long long msr;
2006         unsigned int ratio;
2007
2008         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2009         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2010
2011         ratio = (msr >> 0) & 0x3F;
2012         if (ratio)
2013                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2014                         ratio, bclk, ratio * bclk);
2015
2016         ratio = (msr >> 8) & 0x3F;
2017         if (ratio)
2018                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2019                         ratio, bclk, ratio * bclk);
2020
2021         ratio = (msr >> 16) & 0x3F;
2022         if (ratio)
2023                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2024                         ratio, bclk, ratio * bclk);
2025
2026         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2027         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2028
2029         ratio = (msr >> 24) & 0x3F;
2030         if (ratio)
2031                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2032                         ratio, bclk, ratio * bclk);
2033
2034         ratio = (msr >> 16) & 0x3F;
2035         if (ratio)
2036                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2037                         ratio, bclk, ratio * bclk);
2038
2039         ratio = (msr >> 8) & 0x3F;
2040         if (ratio)
2041                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2042                         ratio, bclk, ratio * bclk);
2043
2044         ratio = (msr >> 0) & 0x3F;
2045         if (ratio)
2046                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2047                         ratio, bclk, ratio * bclk);
2048 }
2049
2050 static void
2051 dump_knl_turbo_ratio_limits(void)
2052 {
2053         const unsigned int buckets_no = 7;
2054
2055         unsigned long long msr;
2056         int delta_cores, delta_ratio;
2057         int i, b_nr;
2058         unsigned int cores[buckets_no];
2059         unsigned int ratio[buckets_no];
2060
2061         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2062
2063         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2064                 base_cpu, msr);
2065
2066         /**
2067          * Turbo encoding in KNL is as follows:
2068          * [0] -- Reserved
2069          * [7:1] -- Base value of number of active cores of bucket 1.
2070          * [15:8] -- Base value of freq ratio of bucket 1.
2071          * [20:16] -- +ve delta of number of active cores of bucket 2.
2072          * i.e. active cores of bucket 2 =
2073          * active cores of bucket 1 + delta
2074          * [23:21] -- Negative delta of freq ratio of bucket 2.
2075          * i.e. freq ratio of bucket 2 =
2076          * freq ratio of bucket 1 - delta
2077          * [28:24]-- +ve delta of number of active cores of bucket 3.
2078          * [31:29]-- -ve delta of freq ratio of bucket 3.
2079          * [36:32]-- +ve delta of number of active cores of bucket 4.
2080          * [39:37]-- -ve delta of freq ratio of bucket 4.
2081          * [44:40]-- +ve delta of number of active cores of bucket 5.
2082          * [47:45]-- -ve delta of freq ratio of bucket 5.
2083          * [52:48]-- +ve delta of number of active cores of bucket 6.
2084          * [55:53]-- -ve delta of freq ratio of bucket 6.
2085          * [60:56]-- +ve delta of number of active cores of bucket 7.
2086          * [63:61]-- -ve delta of freq ratio of bucket 7.
2087          */
2088
2089         b_nr = 0;
2090         cores[b_nr] = (msr & 0xFF) >> 1;
2091         ratio[b_nr] = (msr >> 8) & 0xFF;
2092
2093         for (i = 16; i < 64; i += 8) {
2094                 delta_cores = (msr >> i) & 0x1F;
2095                 delta_ratio = (msr >> (i + 5)) & 0x7;
2096
2097                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2098                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2099                 b_nr++;
2100         }
2101
2102         for (i = buckets_no - 1; i >= 0; i--)
2103                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2104                         fprintf(outf,
2105                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2106                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2107 }
2108
2109 static void
2110 dump_nhm_cst_cfg(void)
2111 {
2112         unsigned long long msr;
2113
2114         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2115
2116 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
2117 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
2118
2119         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2120
2121         fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
2122                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2123                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2124                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2125                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2126                 (msr & (1 << 15)) ? "" : "UN",
2127                 (unsigned int)msr & 0xF,
2128                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2129         return;
2130 }
2131
2132 static void
2133 dump_config_tdp(void)
2134 {
2135         unsigned long long msr;
2136
2137         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2138         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2139         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2140
2141         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2142         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2143         if (msr) {
2144                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2145                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2146                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2147                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2148         }
2149         fprintf(outf, ")\n");
2150
2151         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2152         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2153         if (msr) {
2154                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2155                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2156                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2157                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2158         }
2159         fprintf(outf, ")\n");
2160
2161         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2162         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2163         if ((msr) & 0x3)
2164                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2165         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2166         fprintf(outf, ")\n");
2167
2168         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2169         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2170         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2171         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2172         fprintf(outf, ")\n");
2173 }
2174
2175 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2176
2177 void print_irtl(void)
2178 {
2179         unsigned long long msr;
2180
2181         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2182         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2183         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2184                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2185
2186         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2187         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2188         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2189                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2190
2191         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2192         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2193         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2194                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2195
2196         if (!do_irtl_hsw)
2197                 return;
2198
2199         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2200         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2201         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2202                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2203
2204         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2205         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2206         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2207                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2208
2209         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2210         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2211         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2212                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2213
2214 }
2215 void free_fd_percpu(void)
2216 {
2217         int i;
2218
2219         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2220                 if (fd_percpu[i] != 0)
2221                         close(fd_percpu[i]);
2222         }
2223
2224         free(fd_percpu);
2225 }
2226
2227 void free_all_buffers(void)
2228 {
2229         CPU_FREE(cpu_present_set);
2230         cpu_present_set = NULL;
2231         cpu_present_setsize = 0;
2232
2233         CPU_FREE(cpu_affinity_set);
2234         cpu_affinity_set = NULL;
2235         cpu_affinity_setsize = 0;
2236
2237         free(thread_even);
2238         free(core_even);
2239         free(package_even);
2240
2241         thread_even = NULL;
2242         core_even = NULL;
2243         package_even = NULL;
2244
2245         free(thread_odd);
2246         free(core_odd);
2247         free(package_odd);
2248
2249         thread_odd = NULL;
2250         core_odd = NULL;
2251         package_odd = NULL;
2252
2253         free(output_buffer);
2254         output_buffer = NULL;
2255         outp = NULL;
2256
2257         free_fd_percpu();
2258
2259         free(irq_column_2_cpu);
2260         free(irqs_per_cpu);
2261 }
2262
2263
2264 /*
2265  * Parse a file containing a single int.
2266  */
2267 int parse_int_file(const char *fmt, ...)
2268 {
2269         va_list args;
2270         char path[PATH_MAX];
2271         FILE *filep;
2272         int value;
2273
2274         va_start(args, fmt);
2275         vsnprintf(path, sizeof(path), fmt, args);
2276         va_end(args);
2277         filep = fopen_or_die(path, "r");
2278         if (fscanf(filep, "%d", &value) != 1)
2279                 err(1, "%s: failed to parse number from file", path);
2280         fclose(filep);
2281         return value;
2282 }
2283
2284 /*
2285  * get_cpu_position_in_core(cpu)
2286  * return the position of the CPU among its HT siblings in the core
2287  * return -1 if the sibling is not in list
2288  */
2289 int get_cpu_position_in_core(int cpu)
2290 {
2291         char path[64];
2292         FILE *filep;
2293         int this_cpu;
2294         char character;
2295         int i;
2296
2297         sprintf(path,
2298                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
2299                 cpu);
2300         filep = fopen(path, "r");
2301         if (filep == NULL) {
2302                 perror(path);
2303                 exit(1);
2304         }
2305
2306         for (i = 0; i < topo.num_threads_per_core; i++) {
2307                 fscanf(filep, "%d", &this_cpu);
2308                 if (this_cpu == cpu) {
2309                         fclose(filep);
2310                         return i;
2311                 }
2312
2313                 /* Account for no separator after last thread*/
2314                 if (i != (topo.num_threads_per_core - 1))
2315                         fscanf(filep, "%c", &character);
2316         }
2317
2318         fclose(filep);
2319         return -1;
2320 }
2321
2322 /*
2323  * cpu_is_first_core_in_package(cpu)
2324  * return 1 if given CPU is 1st core in package
2325  */
2326 int cpu_is_first_core_in_package(int cpu)
2327 {
2328         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2329 }
2330
2331 int get_physical_package_id(int cpu)
2332 {
2333         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2334 }
2335
2336 int get_core_id(int cpu)
2337 {
2338         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2339 }
2340
2341 int get_num_ht_siblings(int cpu)
2342 {
2343         char path[80];
2344         FILE *filep;
2345         int sib1;
2346         int matches = 0;
2347         char character;
2348         char str[100];
2349         char *ch;
2350
2351         sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
2352         filep = fopen_or_die(path, "r");
2353
2354         /*
2355          * file format:
2356          * A ',' separated or '-' separated set of numbers
2357          * (eg 1-2 or 1,3,4,5)
2358          */
2359         fscanf(filep, "%d%c\n", &sib1, &character);
2360         fseek(filep, 0, SEEK_SET);
2361         fgets(str, 100, filep);
2362         ch = strchr(str, character);
2363         while (ch != NULL) {
2364                 matches++;
2365                 ch = strchr(ch+1, character);
2366         }
2367
2368         fclose(filep);
2369         return matches+1;
2370 }
2371
2372 /*
2373  * run func(thread, core, package) in topology order
2374  * skip non-present cpus
2375  */
2376
2377 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2378         struct pkg_data *, struct thread_data *, struct core_data *,
2379         struct pkg_data *), struct thread_data *thread_base,
2380         struct core_data *core_base, struct pkg_data *pkg_base,
2381         struct thread_data *thread_base2, struct core_data *core_base2,
2382         struct pkg_data *pkg_base2)
2383 {
2384         int retval, pkg_no, core_no, thread_no;
2385
2386         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2387                 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
2388                         for (thread_no = 0; thread_no <
2389                                 topo.num_threads_per_core; ++thread_no) {
2390                                 struct thread_data *t, *t2;
2391                                 struct core_data *c, *c2;
2392                                 struct pkg_data *p, *p2;
2393
2394                                 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
2395
2396                                 if (cpu_is_not_present(t->cpu_id))
2397                                         continue;
2398
2399                                 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
2400
2401                                 c = GET_CORE(core_base, core_no, pkg_no);
2402                                 c2 = GET_CORE(core_base2, core_no, pkg_no);
2403
2404                                 p = GET_PKG(pkg_base, pkg_no);
2405                                 p2 = GET_PKG(pkg_base2, pkg_no);
2406
2407                                 retval = func(t, c, p, t2, c2, p2);
2408                                 if (retval)
2409                                         return retval;
2410                         }
2411                 }
2412         }
2413         return 0;
2414 }
2415
2416 /*
2417  * run func(cpu) on every cpu in /proc/stat
2418  * return max_cpu number
2419  */
2420 int for_all_proc_cpus(int (func)(int))
2421 {
2422         FILE *fp;
2423         int cpu_num;
2424         int retval;
2425
2426         fp = fopen_or_die(proc_stat, "r");
2427
2428         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2429         if (retval != 0)
2430                 err(1, "%s: failed to parse format", proc_stat);
2431
2432         while (1) {
2433                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2434                 if (retval != 1)
2435                         break;
2436
2437                 retval = func(cpu_num);
2438                 if (retval) {
2439                         fclose(fp);
2440                         return(retval);
2441                 }
2442         }
2443         fclose(fp);
2444         return 0;
2445 }
2446
2447 void re_initialize(void)
2448 {
2449         free_all_buffers();
2450         setup_all_buffers();
2451         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2452 }
2453
2454
2455 /*
2456  * count_cpus()
2457  * remember the last one seen, it will be the max
2458  */
2459 int count_cpus(int cpu)
2460 {
2461         if (topo.max_cpu_num < cpu)
2462                 topo.max_cpu_num = cpu;
2463
2464         topo.num_cpus += 1;
2465         return 0;
2466 }
2467 int mark_cpu_present(int cpu)
2468 {
2469         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2470         return 0;
2471 }
2472
2473 /*
2474  * snapshot_proc_interrupts()
2475  *
2476  * read and record summary of /proc/interrupts
2477  *
2478  * return 1 if config change requires a restart, else return 0
2479  */
2480 int snapshot_proc_interrupts(void)
2481 {
2482         static FILE *fp;
2483         int column, retval;
2484
2485         if (fp == NULL)
2486                 fp = fopen_or_die("/proc/interrupts", "r");
2487         else
2488                 rewind(fp);
2489
2490         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2491         for (column = 0; column < topo.num_cpus; ++column) {
2492                 int cpu_number;
2493
2494                 retval = fscanf(fp, " CPU%d", &cpu_number);
2495                 if (retval != 1)
2496                         break;
2497
2498                 if (cpu_number > topo.max_cpu_num) {
2499                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2500                         return 1;
2501                 }
2502
2503                 irq_column_2_cpu[column] = cpu_number;
2504                 irqs_per_cpu[cpu_number] = 0;
2505         }
2506
2507         /* read /proc/interrupt count lines and sum up irqs per cpu */
2508         while (1) {
2509                 int column;
2510                 char buf[64];
2511
2512                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2513                 if (retval != 1)
2514                         break;
2515
2516                 /* read the count per cpu */
2517                 for (column = 0; column < topo.num_cpus; ++column) {
2518
2519                         int cpu_number, irq_count;
2520
2521                         retval = fscanf(fp, " %d", &irq_count);
2522                         if (retval != 1)
2523                                 break;
2524
2525                         cpu_number = irq_column_2_cpu[column];
2526                         irqs_per_cpu[cpu_number] += irq_count;
2527
2528                 }
2529
2530                 while (getc(fp) != '\n')
2531                         ;       /* flush interrupt description */
2532
2533         }
2534         return 0;
2535 }
2536 /*
2537  * snapshot_gfx_rc6_ms()
2538  *
2539  * record snapshot of
2540  * /sys/class/drm/card0/power/rc6_residency_ms
2541  *
2542  * return 1 if config change requires a restart, else return 0
2543  */
2544 int snapshot_gfx_rc6_ms(void)
2545 {
2546         FILE *fp;
2547         int retval;
2548
2549         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2550
2551         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2552         if (retval != 1)
2553                 err(1, "GFX rc6");
2554
2555         fclose(fp);
2556
2557         return 0;
2558 }
2559 /*
2560  * snapshot_gfx_mhz()
2561  *
2562  * record snapshot of
2563  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2564  *
2565  * return 1 if config change requires a restart, else return 0
2566  */
2567 int snapshot_gfx_mhz(void)
2568 {
2569         static FILE *fp;
2570         int retval;
2571
2572         if (fp == NULL)
2573                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2574         else {
2575                 rewind(fp);
2576                 fflush(fp);
2577         }
2578
2579         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2580         if (retval != 1)
2581                 err(1, "GFX MHz");
2582
2583         return 0;
2584 }
2585
2586 /*
2587  * snapshot /proc and /sys files
2588  *
2589  * return 1 if configuration restart needed, else return 0
2590  */
2591 int snapshot_proc_sysfs_files(void)
2592 {
2593         if (DO_BIC(BIC_IRQ))
2594                 if (snapshot_proc_interrupts())
2595                         return 1;
2596
2597         if (DO_BIC(BIC_GFX_rc6))
2598                 snapshot_gfx_rc6_ms();
2599
2600         if (DO_BIC(BIC_GFXMHz))
2601                 snapshot_gfx_mhz();
2602
2603         return 0;
2604 }
2605
2606 int exit_requested;
2607
2608 static void signal_handler (int signal)
2609 {
2610         switch (signal) {
2611         case SIGINT:
2612                 exit_requested = 1;
2613                 if (debug)
2614                         fprintf(stderr, " SIGINT\n");
2615                 break;
2616         case SIGUSR1:
2617                 if (debug > 1)
2618                         fprintf(stderr, "SIGUSR1\n");
2619                 break;
2620         }
2621         /* make sure this manually-invoked interval is at least 1ms long */
2622         nanosleep(&one_msec, NULL);
2623 }
2624
2625 void setup_signal_handler(void)
2626 {
2627         struct sigaction sa;
2628
2629         memset(&sa, 0, sizeof(sa));
2630
2631         sa.sa_handler = &signal_handler;
2632
2633         if (sigaction(SIGINT, &sa, NULL) < 0)
2634                 err(1, "sigaction SIGINT");
2635         if (sigaction(SIGUSR1, &sa, NULL) < 0)
2636                 err(1, "sigaction SIGUSR1");
2637 }
2638
2639 void do_sleep(void)
2640 {
2641         struct timeval select_timeout;
2642         fd_set readfds;
2643         int retval;
2644
2645         FD_ZERO(&readfds);
2646         FD_SET(0, &readfds);
2647
2648         if (!isatty(fileno(stdin))) {
2649                 nanosleep(&interval_ts, NULL);
2650                 return;
2651         }
2652
2653         select_timeout = interval_tv;
2654         retval = select(1, &readfds, NULL, NULL, &select_timeout);
2655
2656         if (retval == 1) {
2657                 switch (getc(stdin)) {
2658                 case 'q':
2659                         exit_requested = 1;
2660                         break;
2661                 }
2662                 /* make sure this manually-invoked interval is at least 1ms long */
2663                 nanosleep(&one_msec, NULL);
2664         }
2665 }
2666
2667 void turbostat_loop()
2668 {
2669         int retval;
2670         int restarted = 0;
2671
2672         setup_signal_handler();
2673
2674 restart:
2675         restarted++;
2676
2677         snapshot_proc_sysfs_files();
2678         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2679         if (retval < -1) {
2680                 exit(retval);
2681         } else if (retval == -1) {
2682                 if (restarted > 1) {
2683                         exit(retval);
2684                 }
2685                 re_initialize();
2686                 goto restart;
2687         }
2688         restarted = 0;
2689         gettimeofday(&tv_even, (struct timezone *)NULL);
2690
2691         while (1) {
2692                 if (for_all_proc_cpus(cpu_is_not_present)) {
2693                         re_initialize();
2694                         goto restart;
2695                 }
2696                 do_sleep();
2697                 if (snapshot_proc_sysfs_files())
2698                         goto restart;
2699                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
2700                 if (retval < -1) {
2701                         exit(retval);
2702                 } else if (retval == -1) {
2703                         re_initialize();
2704                         goto restart;
2705                 }
2706                 gettimeofday(&tv_odd, (struct timezone *)NULL);
2707                 timersub(&tv_odd, &tv_even, &tv_delta);
2708                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2709                         re_initialize();
2710                         goto restart;
2711                 }
2712                 compute_average(EVEN_COUNTERS);
2713                 format_all_counters(EVEN_COUNTERS);
2714                 flush_output_stdout();
2715                 if (exit_requested)
2716                         break;
2717                 do_sleep();
2718                 if (snapshot_proc_sysfs_files())
2719                         goto restart;
2720                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2721                 if (retval < -1) {
2722                         exit(retval);
2723                 } else if (retval == -1) {
2724                         re_initialize();
2725                         goto restart;
2726                 }
2727                 gettimeofday(&tv_even, (struct timezone *)NULL);
2728                 timersub(&tv_even, &tv_odd, &tv_delta);
2729                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2730                         re_initialize();
2731                         goto restart;
2732                 }
2733                 compute_average(ODD_COUNTERS);
2734                 format_all_counters(ODD_COUNTERS);
2735                 flush_output_stdout();
2736                 if (exit_requested)
2737                         break;
2738         }
2739 }
2740
2741 void check_dev_msr()
2742 {
2743         struct stat sb;
2744         char pathname[32];
2745
2746         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2747         if (stat(pathname, &sb))
2748                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2749                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2750 }
2751
2752 void check_permissions()
2753 {
2754         struct __user_cap_header_struct cap_header_data;
2755         cap_user_header_t cap_header = &cap_header_data;
2756         struct __user_cap_data_struct cap_data_data;
2757         cap_user_data_t cap_data = &cap_data_data;
2758         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2759         int do_exit = 0;
2760         char pathname[32];
2761
2762         /* check for CAP_SYS_RAWIO */
2763         cap_header->pid = getpid();
2764         cap_header->version = _LINUX_CAPABILITY_VERSION;
2765         if (capget(cap_header, cap_data) < 0)
2766                 err(-6, "capget(2) failed");
2767
2768         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2769                 do_exit++;
2770                 warnx("capget(CAP_SYS_RAWIO) failed,"
2771                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
2772         }
2773
2774         /* test file permissions */
2775         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2776         if (euidaccess(pathname, R_OK)) {
2777                 do_exit++;
2778                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2779         }
2780
2781         /* if all else fails, thell them to be root */
2782         if (do_exit)
2783                 if (getuid() != 0)
2784                         warnx("... or simply run as root");
2785
2786         if (do_exit)
2787                 exit(-6);
2788 }
2789
2790 /*
2791  * NHM adds support for additional MSRs:
2792  *
2793  * MSR_SMI_COUNT                   0x00000034
2794  *
2795  * MSR_PLATFORM_INFO               0x000000ce
2796  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
2797  *
2798  * MSR_MISC_PWR_MGMT               0x000001aa
2799  *
2800  * MSR_PKG_C3_RESIDENCY            0x000003f8
2801  * MSR_PKG_C6_RESIDENCY            0x000003f9
2802  * MSR_CORE_C3_RESIDENCY           0x000003fc
2803  * MSR_CORE_C6_RESIDENCY           0x000003fd
2804  *
2805  * Side effect:
2806  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
2807  * sets has_misc_feature_control
2808  */
2809 int probe_nhm_msrs(unsigned int family, unsigned int model)
2810 {
2811         unsigned long long msr;
2812         unsigned int base_ratio;
2813         int *pkg_cstate_limits;
2814
2815         if (!genuine_intel)
2816                 return 0;
2817
2818         if (family != 6)
2819                 return 0;
2820
2821         bclk = discover_bclk(family, model);
2822
2823         switch (model) {
2824         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2825         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2826         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
2827         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
2828         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
2829         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2830         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2831                 pkg_cstate_limits = nhm_pkg_cstate_limits;
2832                 break;
2833         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
2834         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
2835         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
2836         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2837                 pkg_cstate_limits = snb_pkg_cstate_limits;
2838                 has_misc_feature_control = 1;
2839                 break;
2840         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
2841         case INTEL_FAM6_HASWELL_X:      /* HSX */
2842         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
2843         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
2844         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2845         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2846         case INTEL_FAM6_BROADWELL_X:    /* BDX */
2847         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
2848         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2849         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
2850         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
2851         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
2852                 pkg_cstate_limits = hsw_pkg_cstate_limits;
2853                 has_misc_feature_control = 1;
2854                 break;
2855         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
2856                 pkg_cstate_limits = skx_pkg_cstate_limits;
2857                 has_misc_feature_control = 1;
2858                 break;
2859         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
2860                 no_MSR_MISC_PWR_MGMT = 1;
2861         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
2862                 pkg_cstate_limits = slv_pkg_cstate_limits;
2863                 break;
2864         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
2865                 pkg_cstate_limits = amt_pkg_cstate_limits;
2866                 no_MSR_MISC_PWR_MGMT = 1;
2867                 break;
2868         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
2869         case INTEL_FAM6_XEON_PHI_KNM:
2870                 pkg_cstate_limits = phi_pkg_cstate_limits;
2871                 break;
2872         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
2873         case INTEL_FAM6_ATOM_GEMINI_LAKE:
2874         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2875                 pkg_cstate_limits = bxt_pkg_cstate_limits;
2876                 break;
2877         default:
2878                 return 0;
2879         }
2880         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2881         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2882
2883         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2884         base_ratio = (msr >> 8) & 0xFF;
2885
2886         base_hz = base_ratio * bclk * 1000000;
2887         has_base_hz = 1;
2888         return 1;
2889 }
2890 /*
2891  * SLV client has support for unique MSRs:
2892  *
2893  * MSR_CC6_DEMOTION_POLICY_CONFIG
2894  * MSR_MC6_DEMOTION_POLICY_CONFIG
2895  */
2896
2897 int has_slv_msrs(unsigned int family, unsigned int model)
2898 {
2899         if (!genuine_intel)
2900                 return 0;
2901
2902         switch (model) {
2903         case INTEL_FAM6_ATOM_SILVERMONT1:
2904         case INTEL_FAM6_ATOM_MERRIFIELD:
2905         case INTEL_FAM6_ATOM_MOOREFIELD:
2906                 return 1;
2907         }
2908         return 0;
2909 }
2910 int is_dnv(unsigned int family, unsigned int model)
2911 {
2912
2913         if (!genuine_intel)
2914                 return 0;
2915
2916         switch (model) {
2917         case INTEL_FAM6_ATOM_DENVERTON:
2918                 return 1;
2919         }
2920         return 0;
2921 }
2922 int is_bdx(unsigned int family, unsigned int model)
2923 {
2924
2925         if (!genuine_intel)
2926                 return 0;
2927
2928         switch (model) {
2929         case INTEL_FAM6_BROADWELL_X:
2930         case INTEL_FAM6_BROADWELL_XEON_D:
2931                 return 1;
2932         }
2933         return 0;
2934 }
2935 int is_skx(unsigned int family, unsigned int model)
2936 {
2937
2938         if (!genuine_intel)
2939                 return 0;
2940
2941         switch (model) {
2942         case INTEL_FAM6_SKYLAKE_X:
2943                 return 1;
2944         }
2945         return 0;
2946 }
2947
2948 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
2949 {
2950         if (has_slv_msrs(family, model))
2951                 return 0;
2952
2953         switch (model) {
2954         /* Nehalem compatible, but do not include turbo-ratio limit support */
2955         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
2956         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
2957         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
2958         case INTEL_FAM6_XEON_PHI_KNM:
2959                 return 0;
2960         default:
2961                 return 1;
2962         }
2963 }
2964 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
2965 {
2966         if (has_slv_msrs(family, model))
2967                 return 1;
2968
2969         return 0;
2970 }
2971 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2972 {
2973         if (!genuine_intel)
2974                 return 0;
2975
2976         if (family != 6)
2977                 return 0;
2978
2979         switch (model) {
2980         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
2981         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2982                 return 1;
2983         default:
2984                 return 0;
2985         }
2986 }
2987 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2988 {
2989         if (!genuine_intel)
2990                 return 0;
2991
2992         if (family != 6)
2993                 return 0;
2994
2995         switch (model) {
2996         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
2997                 return 1;
2998         default:
2999                 return 0;
3000         }
3001 }
3002
3003 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3004 {
3005         if (!genuine_intel)
3006                 return 0;
3007
3008         if (family != 6)
3009                 return 0;
3010
3011         switch (model) {
3012         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3013         case INTEL_FAM6_XEON_PHI_KNM:
3014                 return 1;
3015         default:
3016                 return 0;
3017         }
3018 }
3019 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3020 {
3021         if (!genuine_intel)
3022                 return 0;
3023
3024         if (family != 6)
3025                 return 0;
3026
3027         switch (model) {
3028         case INTEL_FAM6_ATOM_GOLDMONT:
3029         case INTEL_FAM6_SKYLAKE_X:
3030                 return 1;
3031         default:
3032                 return 0;
3033         }
3034 }
3035 int has_config_tdp(unsigned int family, unsigned int model)
3036 {
3037         if (!genuine_intel)
3038                 return 0;
3039
3040         if (family != 6)
3041                 return 0;
3042
3043         switch (model) {
3044         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3045         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3046         case INTEL_FAM6_HASWELL_X:      /* HSX */
3047         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3048         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3049         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3050         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3051         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3052         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3053         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3054         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3055         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3056         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3057         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3058
3059         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3060         case INTEL_FAM6_XEON_PHI_KNM:
3061                 return 1;
3062         default:
3063                 return 0;
3064         }
3065 }
3066
3067 static void
3068 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3069 {
3070         if (!do_nhm_platform_info)
3071                 return;
3072
3073         dump_nhm_platform_info();
3074
3075         if (has_hsw_turbo_ratio_limit(family, model))
3076                 dump_hsw_turbo_ratio_limits();
3077
3078         if (has_ivt_turbo_ratio_limit(family, model))
3079                 dump_ivt_turbo_ratio_limits();
3080
3081         if (has_turbo_ratio_limit(family, model))
3082                 dump_turbo_ratio_limits(family, model);
3083
3084         if (has_atom_turbo_ratio_limit(family, model))
3085                 dump_atom_turbo_ratio_limits();
3086
3087         if (has_knl_turbo_ratio_limit(family, model))
3088                 dump_knl_turbo_ratio_limits();
3089
3090         if (has_config_tdp(family, model))
3091                 dump_config_tdp();
3092
3093         dump_nhm_cst_cfg();
3094 }
3095
3096 static void
3097 dump_sysfs_cstate_config(void)
3098 {
3099         char path[64];
3100         char name_buf[16];
3101         char desc[64];
3102         FILE *input;
3103         int state;
3104         char *sp;
3105
3106         if (!DO_BIC(BIC_sysfs))
3107                 return;
3108
3109         for (state = 0; state < 10; ++state) {
3110
3111                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3112                         base_cpu, state);
3113                 input = fopen(path, "r");
3114                 if (input == NULL)
3115                         continue;
3116                 fgets(name_buf, sizeof(name_buf), input);
3117
3118                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3119                 sp = strchr(name_buf, '-');
3120                 if (!sp)
3121                         sp = strchrnul(name_buf, '\n');
3122                 *sp = '\0';
3123
3124                 fclose(input);
3125
3126                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3127                         base_cpu, state);
3128                 input = fopen(path, "r");
3129                 if (input == NULL)
3130                         continue;
3131                 fgets(desc, sizeof(desc), input);
3132
3133                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3134                 fclose(input);
3135         }
3136 }
3137 static void
3138 dump_sysfs_pstate_config(void)
3139 {
3140         char path[64];
3141         char driver_buf[64];
3142         char governor_buf[64];
3143         FILE *input;
3144         int turbo;
3145
3146         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3147                         base_cpu);
3148         input = fopen(path, "r");
3149         if (input == NULL) {
3150                 fprintf(stderr, "NSFOD %s\n", path);
3151                 return;
3152         }
3153         fgets(driver_buf, sizeof(driver_buf), input);
3154         fclose(input);
3155
3156         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3157                         base_cpu);
3158         input = fopen(path, "r");
3159         if (input == NULL) {
3160                 fprintf(stderr, "NSFOD %s\n", path);
3161                 return;
3162         }
3163         fgets(governor_buf, sizeof(governor_buf), input);
3164         fclose(input);
3165
3166         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3167         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3168
3169         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3170         input = fopen(path, "r");
3171         if (input != NULL) {
3172                 fscanf(input, "%d", &turbo);
3173                 fprintf(outf, "cpufreq boost: %d\n", turbo);
3174                 fclose(input);
3175         }
3176
3177         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3178         input = fopen(path, "r");
3179         if (input != NULL) {
3180                 fscanf(input, "%d", &turbo);
3181                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3182                 fclose(input);
3183         }
3184 }
3185
3186
3187 /*
3188  * print_epb()
3189  * Decode the ENERGY_PERF_BIAS MSR
3190  */
3191 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3192 {
3193         unsigned long long msr;
3194         char *epb_string;
3195         int cpu;
3196
3197         if (!has_epb)
3198                 return 0;
3199
3200         cpu = t->cpu_id;
3201
3202         /* EPB is per-package */
3203         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3204                 return 0;
3205
3206         if (cpu_migrate(cpu)) {
3207                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3208                 return -1;
3209         }
3210
3211         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3212                 return 0;
3213
3214         switch (msr & 0xF) {
3215         case ENERGY_PERF_BIAS_PERFORMANCE:
3216                 epb_string = "performance";
3217                 break;
3218         case ENERGY_PERF_BIAS_NORMAL:
3219                 epb_string = "balanced";
3220                 break;
3221         case ENERGY_PERF_BIAS_POWERSAVE:
3222                 epb_string = "powersave";
3223                 break;
3224         default:
3225                 epb_string = "custom";
3226                 break;
3227         }
3228         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3229
3230         return 0;
3231 }
3232 /*
3233  * print_hwp()
3234  * Decode the MSR_HWP_CAPABILITIES
3235  */
3236 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3237 {
3238         unsigned long long msr;
3239         int cpu;
3240
3241         if (!has_hwp)
3242                 return 0;
3243
3244         cpu = t->cpu_id;
3245
3246         /* MSR_HWP_CAPABILITIES is per-package */
3247         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3248                 return 0;
3249
3250         if (cpu_migrate(cpu)) {
3251                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3252                 return -1;
3253         }
3254
3255         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3256                 return 0;
3257
3258         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3259                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3260
3261         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3262         if ((msr & (1 << 0)) == 0)
3263                 return 0;
3264
3265         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3266                 return 0;
3267
3268         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3269                         "(high %d guar %d eff %d low %d)\n",
3270                         cpu, msr,
3271                         (unsigned int)HWP_HIGHEST_PERF(msr),
3272                         (unsigned int)HWP_GUARANTEED_PERF(msr),
3273                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3274                         (unsigned int)HWP_LOWEST_PERF(msr));
3275
3276         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3277                 return 0;
3278
3279         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3280                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3281                         cpu, msr,
3282                         (unsigned int)(((msr) >> 0) & 0xff),
3283                         (unsigned int)(((msr) >> 8) & 0xff),
3284                         (unsigned int)(((msr) >> 16) & 0xff),
3285                         (unsigned int)(((msr) >> 24) & 0xff),
3286                         (unsigned int)(((msr) >> 32) & 0xff3),
3287                         (unsigned int)(((msr) >> 42) & 0x1));
3288
3289         if (has_hwp_pkg) {
3290                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3291                         return 0;
3292
3293                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3294                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3295                         cpu, msr,
3296                         (unsigned int)(((msr) >> 0) & 0xff),
3297                         (unsigned int)(((msr) >> 8) & 0xff),
3298                         (unsigned int)(((msr) >> 16) & 0xff),
3299                         (unsigned int)(((msr) >> 24) & 0xff),
3300                         (unsigned int)(((msr) >> 32) & 0xff3));
3301         }
3302         if (has_hwp_notify) {
3303                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3304                         return 0;
3305
3306                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3307                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3308                         cpu, msr,
3309                         ((msr) & 0x1) ? "EN" : "Dis",
3310                         ((msr) & 0x2) ? "EN" : "Dis");
3311         }
3312         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3313                 return 0;
3314
3315         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3316                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3317                         cpu, msr,
3318                         ((msr) & 0x1) ? "" : "No-",
3319                         ((msr) & 0x2) ? "" : "No-");
3320
3321         return 0;
3322 }
3323
3324 /*
3325  * print_perf_limit()
3326  */
3327 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3328 {
3329         unsigned long long msr;
3330         int cpu;
3331
3332         cpu = t->cpu_id;
3333
3334         /* per-package */
3335         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3336                 return 0;
3337
3338         if (cpu_migrate(cpu)) {
3339                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3340                 return -1;
3341         }
3342
3343         if (do_core_perf_limit_reasons) {
3344                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3345                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3346                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3347                         (msr & 1 << 15) ? "bit15, " : "",
3348                         (msr & 1 << 14) ? "bit14, " : "",
3349                         (msr & 1 << 13) ? "Transitions, " : "",
3350                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3351                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
3352                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3353                         (msr & 1 << 9) ? "CorePwr, " : "",
3354                         (msr & 1 << 8) ? "Amps, " : "",
3355                         (msr & 1 << 6) ? "VR-Therm, " : "",
3356                         (msr & 1 << 5) ? "Auto-HWP, " : "",
3357                         (msr & 1 << 4) ? "Graphics, " : "",
3358                         (msr & 1 << 2) ? "bit2, " : "",
3359                         (msr & 1 << 1) ? "ThermStatus, " : "",
3360                         (msr & 1 << 0) ? "PROCHOT, " : "");
3361                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3362                         (msr & 1 << 31) ? "bit31, " : "",
3363                         (msr & 1 << 30) ? "bit30, " : "",
3364                         (msr & 1 << 29) ? "Transitions, " : "",
3365                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3366                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
3367                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3368                         (msr & 1 << 25) ? "CorePwr, " : "",
3369                         (msr & 1 << 24) ? "Amps, " : "",
3370                         (msr & 1 << 22) ? "VR-Therm, " : "",
3371                         (msr & 1 << 21) ? "Auto-HWP, " : "",
3372                         (msr & 1 << 20) ? "Graphics, " : "",
3373                         (msr & 1 << 18) ? "bit18, " : "",
3374                         (msr & 1 << 17) ? "ThermStatus, " : "",
3375                         (msr & 1 << 16) ? "PROCHOT, " : "");
3376
3377         }
3378         if (do_gfx_perf_limit_reasons) {
3379                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3380                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3381                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3382                         (msr & 1 << 0) ? "PROCHOT, " : "",
3383                         (msr & 1 << 1) ? "ThermStatus, " : "",
3384                         (msr & 1 << 4) ? "Graphics, " : "",
3385                         (msr & 1 << 6) ? "VR-Therm, " : "",
3386                         (msr & 1 << 8) ? "Amps, " : "",
3387                         (msr & 1 << 9) ? "GFXPwr, " : "",
3388                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3389                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3390                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3391                         (msr & 1 << 16) ? "PROCHOT, " : "",
3392                         (msr & 1 << 17) ? "ThermStatus, " : "",
3393                         (msr & 1 << 20) ? "Graphics, " : "",
3394                         (msr & 1 << 22) ? "VR-Therm, " : "",
3395                         (msr & 1 << 24) ? "Amps, " : "",
3396                         (msr & 1 << 25) ? "GFXPwr, " : "",
3397                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3398                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3399         }
3400         if (do_ring_perf_limit_reasons) {
3401                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3402                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3403                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
3404                         (msr & 1 << 0) ? "PROCHOT, " : "",
3405                         (msr & 1 << 1) ? "ThermStatus, " : "",
3406                         (msr & 1 << 6) ? "VR-Therm, " : "",
3407                         (msr & 1 << 8) ? "Amps, " : "",
3408                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3409                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3410                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3411                         (msr & 1 << 16) ? "PROCHOT, " : "",
3412                         (msr & 1 << 17) ? "ThermStatus, " : "",
3413                         (msr & 1 << 22) ? "VR-Therm, " : "",
3414                         (msr & 1 << 24) ? "Amps, " : "",
3415                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3416                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3417         }
3418         return 0;
3419 }
3420
3421 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3422 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3423
3424 double get_tdp(unsigned int model)
3425 {
3426         unsigned long long msr;
3427
3428         if (do_rapl & RAPL_PKG_POWER_INFO)
3429                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3430                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3431
3432         switch (model) {
3433         case INTEL_FAM6_ATOM_SILVERMONT1:
3434         case INTEL_FAM6_ATOM_SILVERMONT2:
3435                 return 30.0;
3436         default:
3437                 return 135.0;
3438         }
3439 }
3440
3441 /*
3442  * rapl_dram_energy_units_probe()
3443  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3444  */
3445 static double
3446 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3447 {
3448         /* only called for genuine_intel, family 6 */
3449
3450         switch (model) {
3451         case INTEL_FAM6_HASWELL_X:      /* HSX */
3452         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3453         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3454         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3455         case INTEL_FAM6_XEON_PHI_KNM:
3456                 return (rapl_dram_energy_units = 15.3 / 1000000);
3457         default:
3458                 return (rapl_energy_units);
3459         }
3460 }
3461
3462
3463 /*
3464  * rapl_probe()
3465  *
3466  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3467  */
3468 void rapl_probe(unsigned int family, unsigned int model)
3469 {
3470         unsigned long long msr;
3471         unsigned int time_unit;
3472         double tdp;
3473
3474         if (!genuine_intel)
3475                 return;
3476
3477         if (family != 6)
3478                 return;
3479
3480         switch (model) {
3481         case INTEL_FAM6_SANDYBRIDGE:
3482         case INTEL_FAM6_IVYBRIDGE:
3483         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3484         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3485         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3486         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3487         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3488                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3489                 if (rapl_joules) {
3490                         BIC_PRESENT(BIC_Pkg_J);
3491                         BIC_PRESENT(BIC_Cor_J);
3492                         BIC_PRESENT(BIC_GFX_J);
3493                 } else {
3494                         BIC_PRESENT(BIC_PkgWatt);
3495                         BIC_PRESENT(BIC_CorWatt);
3496                         BIC_PRESENT(BIC_GFXWatt);
3497                 }
3498                 break;
3499         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3500         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3501                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3502                 if (rapl_joules)
3503                         BIC_PRESENT(BIC_Pkg_J);
3504                 else
3505                         BIC_PRESENT(BIC_PkgWatt);
3506                 break;
3507         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3508         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3509         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3510         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3511                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3512                 BIC_PRESENT(BIC_PKG__);
3513                 BIC_PRESENT(BIC_RAM__);
3514                 if (rapl_joules) {
3515                         BIC_PRESENT(BIC_Pkg_J);
3516                         BIC_PRESENT(BIC_Cor_J);
3517                         BIC_PRESENT(BIC_RAM_J);
3518                         BIC_PRESENT(BIC_GFX_J);
3519                 } else {
3520                         BIC_PRESENT(BIC_PkgWatt);
3521                         BIC_PRESENT(BIC_CorWatt);
3522                         BIC_PRESENT(BIC_RAMWatt);
3523                         BIC_PRESENT(BIC_GFXWatt);
3524                 }
3525                 break;
3526         case INTEL_FAM6_HASWELL_X:      /* HSX */
3527         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3528         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3529         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3530         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3531         case INTEL_FAM6_XEON_PHI_KNM:
3532                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3533                 BIC_PRESENT(BIC_PKG__);
3534                 BIC_PRESENT(BIC_RAM__);
3535                 if (rapl_joules) {
3536                         BIC_PRESENT(BIC_Pkg_J);
3537                         BIC_PRESENT(BIC_RAM_J);
3538                 } else {
3539                         BIC_PRESENT(BIC_PkgWatt);
3540                         BIC_PRESENT(BIC_RAMWatt);
3541                 }
3542                 break;
3543         case INTEL_FAM6_SANDYBRIDGE_X:
3544         case INTEL_FAM6_IVYBRIDGE_X:
3545                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3546                 BIC_PRESENT(BIC_PKG__);
3547                 BIC_PRESENT(BIC_RAM__);
3548                 if (rapl_joules) {
3549                         BIC_PRESENT(BIC_Pkg_J);
3550                         BIC_PRESENT(BIC_Cor_J);
3551                         BIC_PRESENT(BIC_RAM_J);
3552                 } else {
3553                         BIC_PRESENT(BIC_PkgWatt);
3554                         BIC_PRESENT(BIC_CorWatt);
3555                         BIC_PRESENT(BIC_RAMWatt);
3556                 }
3557                 break;
3558         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3559         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3560                 do_rapl = RAPL_PKG | RAPL_CORES;
3561                 if (rapl_joules) {
3562                         BIC_PRESENT(BIC_Pkg_J);
3563                         BIC_PRESENT(BIC_Cor_J);
3564                 } else {
3565                         BIC_PRESENT(BIC_PkgWatt);
3566                         BIC_PRESENT(BIC_CorWatt);
3567                 }
3568                 break;
3569         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3570                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3571                 BIC_PRESENT(BIC_PKG__);
3572                 BIC_PRESENT(BIC_RAM__);
3573                 if (rapl_joules) {
3574                         BIC_PRESENT(BIC_Pkg_J);
3575                         BIC_PRESENT(BIC_Cor_J);
3576                         BIC_PRESENT(BIC_RAM_J);
3577                 } else {
3578                         BIC_PRESENT(BIC_PkgWatt);
3579                         BIC_PRESENT(BIC_CorWatt);
3580                         BIC_PRESENT(BIC_RAMWatt);
3581                 }
3582                 break;
3583         default:
3584                 return;
3585         }
3586
3587         /* units on package 0, verify later other packages match */
3588         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3589                 return;
3590
3591         rapl_power_units = 1.0 / (1 << (msr & 0xF));
3592         if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3593                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3594         else
3595                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3596
3597         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3598
3599         time_unit = msr >> 16 & 0xF;
3600         if (time_unit == 0)
3601                 time_unit = 0xA;
3602
3603         rapl_time_units = 1.0 / (1 << (time_unit));
3604
3605         tdp = get_tdp(model);
3606
3607         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3608         if (!quiet)
3609                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3610
3611         return;
3612 }
3613
3614 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3615 {
3616         if (!genuine_intel)
3617                 return;
3618
3619         if (family != 6)
3620                 return;
3621
3622         switch (model) {
3623         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3624         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3625         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3626                 do_gfx_perf_limit_reasons = 1;
3627         case INTEL_FAM6_HASWELL_X:      /* HSX */
3628                 do_core_perf_limit_reasons = 1;
3629                 do_ring_perf_limit_reasons = 1;
3630         default:
3631                 return;
3632         }
3633 }
3634
3635 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3636 {
3637         unsigned long long msr;
3638         unsigned int dts, dts2;
3639         int cpu;
3640
3641         if (!(do_dts || do_ptm))
3642                 return 0;
3643
3644         cpu = t->cpu_id;
3645
3646         /* DTS is per-core, no need to print for each thread */
3647         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3648                 return 0;
3649
3650         if (cpu_migrate(cpu)) {
3651                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3652                 return -1;
3653         }
3654
3655         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3656                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3657                         return 0;
3658
3659                 dts = (msr >> 16) & 0x7F;
3660                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3661                         cpu, msr, tcc_activation_temp - dts);
3662
3663                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3664                         return 0;
3665
3666                 dts = (msr >> 16) & 0x7F;
3667                 dts2 = (msr >> 8) & 0x7F;
3668                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3669                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3670         }
3671
3672
3673         if (do_dts && debug) {
3674                 unsigned int resolution;
3675
3676                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3677                         return 0;
3678
3679                 dts = (msr >> 16) & 0x7F;
3680                 resolution = (msr >> 27) & 0xF;
3681                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3682                         cpu, msr, tcc_activation_temp - dts, resolution);
3683
3684                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3685                         return 0;
3686
3687                 dts = (msr >> 16) & 0x7F;
3688                 dts2 = (msr >> 8) & 0x7F;
3689                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3690                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3691         }
3692
3693         return 0;
3694 }
3695
3696 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3697 {
3698         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3699                 cpu, label,
3700                 ((msr >> 15) & 1) ? "EN" : "DIS",
3701                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
3702                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3703                 (((msr >> 16) & 1) ? "EN" : "DIS"));
3704
3705         return;
3706 }
3707
3708 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3709 {
3710         unsigned long long msr;
3711         int cpu;
3712
3713         if (!do_rapl)
3714                 return 0;
3715
3716         /* RAPL counters are per package, so print only for 1st thread/package */
3717         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3718                 return 0;
3719
3720         cpu = t->cpu_id;
3721         if (cpu_migrate(cpu)) {
3722                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3723                 return -1;
3724         }
3725
3726         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
3727                 return -1;
3728
3729         fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
3730                 rapl_power_units, rapl_energy_units, rapl_time_units);
3731
3732         if (do_rapl & RAPL_PKG_POWER_INFO) {
3733
3734                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
3735                         return -5;
3736
3737
3738                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3739                         cpu, msr,
3740                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3741                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3742                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3743                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3744
3745         }
3746         if (do_rapl & RAPL_PKG) {
3747
3748                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
3749                         return -9;
3750
3751                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3752                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
3753
3754                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
3755                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3756                         cpu,
3757                         ((msr >> 47) & 1) ? "EN" : "DIS",
3758                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
3759                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
3760                         ((msr >> 48) & 1) ? "EN" : "DIS");
3761         }
3762
3763         if (do_rapl & RAPL_DRAM_POWER_INFO) {
3764                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
3765                         return -6;
3766
3767                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3768                         cpu, msr,
3769                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3770                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3771                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3772                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3773         }
3774         if (do_rapl & RAPL_DRAM) {
3775                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
3776                         return -9;
3777                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
3778                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3779
3780                 print_power_limit_msr(cpu, msr, "DRAM Limit");
3781         }
3782         if (do_rapl & RAPL_CORE_POLICY) {
3783                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
3784                         return -7;
3785
3786                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
3787         }
3788         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
3789                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
3790                         return -9;
3791                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
3792                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3793                 print_power_limit_msr(cpu, msr, "Cores Limit");
3794         }
3795         if (do_rapl & RAPL_GFX) {
3796                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
3797                         return -8;
3798
3799                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
3800
3801                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
3802                         return -9;
3803                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
3804                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3805                 print_power_limit_msr(cpu, msr, "GFX Limit");
3806         }
3807         return 0;
3808 }
3809
3810 /*
3811  * SNB adds support for additional MSRs:
3812  *
3813  * MSR_PKG_C7_RESIDENCY            0x000003fa
3814  * MSR_CORE_C7_RESIDENCY           0x000003fe
3815  * MSR_PKG_C2_RESIDENCY            0x0000060d
3816  */
3817
3818 int has_snb_msrs(unsigned int family, unsigned int model)
3819 {
3820         if (!genuine_intel)
3821                 return 0;
3822
3823         switch (model) {
3824         case INTEL_FAM6_SANDYBRIDGE:
3825         case INTEL_FAM6_SANDYBRIDGE_X:
3826         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3827         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3828         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3829         case INTEL_FAM6_HASWELL_X:      /* HSW */
3830         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3831         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3832         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3833         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3834         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3835         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
3836         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3837         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3838         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3839         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3840         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3841         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3842         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3843         case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3844                 return 1;
3845         }
3846         return 0;
3847 }
3848
3849 /*
3850  * HSW adds support for additional MSRs:
3851  *
3852  * MSR_PKG_C8_RESIDENCY         0x00000630
3853  * MSR_PKG_C9_RESIDENCY         0x00000631
3854  * MSR_PKG_C10_RESIDENCY        0x00000632
3855  *
3856  * MSR_PKGC8_IRTL               0x00000633
3857  * MSR_PKGC9_IRTL               0x00000634
3858  * MSR_PKGC10_IRTL              0x00000635
3859  *
3860  */
3861 int has_hsw_msrs(unsigned int family, unsigned int model)
3862 {
3863         if (!genuine_intel)
3864                 return 0;
3865
3866         switch (model) {
3867         case INTEL_FAM6_HASWELL_ULT:    /* HSW */
3868         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3869         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3870         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3871         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3872         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3873         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3874         case INTEL_FAM6_ATOM_GEMINI_LAKE:
3875                 return 1;
3876         }
3877         return 0;
3878 }
3879
3880 /*
3881  * SKL adds support for additional MSRS:
3882  *
3883  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
3884  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
3885  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
3886  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
3887  */
3888 int has_skl_msrs(unsigned int family, unsigned int model)
3889 {
3890         if (!genuine_intel)
3891                 return 0;
3892
3893         switch (model) {
3894         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3895         case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
3896         case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
3897         case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
3898                 return 1;
3899         }
3900         return 0;
3901 }
3902
3903 int is_slm(unsigned int family, unsigned int model)
3904 {
3905         if (!genuine_intel)
3906                 return 0;
3907         switch (model) {
3908         case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
3909         case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
3910                 return 1;
3911         }
3912         return 0;
3913 }
3914
3915 int is_knl(unsigned int family, unsigned int model)
3916 {
3917         if (!genuine_intel)
3918                 return 0;
3919         switch (model) {
3920         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3921         case INTEL_FAM6_XEON_PHI_KNM:
3922                 return 1;
3923         }
3924         return 0;
3925 }
3926
3927 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
3928 {
3929         if (is_knl(family, model))
3930                 return 1024;
3931         return 1;
3932 }
3933
3934 #define SLM_BCLK_FREQS 5
3935 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
3936
3937 double slm_bclk(void)
3938 {
3939         unsigned long long msr = 3;
3940         unsigned int i;
3941         double freq;
3942
3943         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
3944                 fprintf(outf, "SLM BCLK: unknown\n");
3945
3946         i = msr & 0xf;
3947         if (i >= SLM_BCLK_FREQS) {
3948                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
3949                 i = 3;
3950         }
3951         freq = slm_freq_table[i];
3952
3953         if (!quiet)
3954                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
3955
3956         return freq;
3957 }
3958
3959 double discover_bclk(unsigned int family, unsigned int model)
3960 {
3961         if (has_snb_msrs(family, model) || is_knl(family, model))
3962                 return 100.00;
3963         else if (is_slm(family, model))
3964                 return slm_bclk();
3965         else
3966                 return 133.33;
3967 }
3968
3969 /*
3970  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
3971  * the Thermal Control Circuit (TCC) activates.
3972  * This is usually equal to tjMax.
3973  *
3974  * Older processors do not have this MSR, so there we guess,
3975  * but also allow cmdline over-ride with -T.
3976  *
3977  * Several MSR temperature values are in units of degrees-C
3978  * below this value, including the Digital Thermal Sensor (DTS),
3979  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
3980  */
3981 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3982 {
3983         unsigned long long msr;
3984         unsigned int target_c_local;
3985         int cpu;
3986
3987         /* tcc_activation_temp is used only for dts or ptm */
3988         if (!(do_dts || do_ptm))
3989                 return 0;
3990
3991         /* this is a per-package concept */
3992         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3993                 return 0;
3994
3995         cpu = t->cpu_id;
3996         if (cpu_migrate(cpu)) {
3997                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3998                 return -1;
3999         }
4000
4001         if (tcc_activation_temp_override != 0) {
4002                 tcc_activation_temp = tcc_activation_temp_override;
4003                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4004                         cpu, tcc_activation_temp);
4005                 return 0;
4006         }
4007
4008         /* Temperature Target MSR is Nehalem and newer only */
4009         if (!do_nhm_platform_info)
4010                 goto guess;
4011
4012         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4013                 goto guess;
4014
4015         target_c_local = (msr >> 16) & 0xFF;
4016
4017         if (!quiet)
4018                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4019                         cpu, msr, target_c_local);
4020
4021         if (!target_c_local)
4022                 goto guess;
4023
4024         tcc_activation_temp = target_c_local;
4025
4026         return 0;
4027
4028 guess:
4029         tcc_activation_temp = TJMAX_DEFAULT;
4030         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4031                 cpu, tcc_activation_temp);
4032
4033         return 0;
4034 }
4035
4036 void decode_feature_control_msr(void)
4037 {
4038         unsigned long long msr;
4039
4040         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4041                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4042                         base_cpu, msr,
4043                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4044                         msr & (1 << 18) ? "SGX" : "");
4045 }
4046
4047 void decode_misc_enable_msr(void)
4048 {
4049         unsigned long long msr;
4050
4051         if (!genuine_intel)
4052                 return;
4053
4054         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4055                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4056                         base_cpu, msr,
4057                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4058                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4059                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4060                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4061                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4062 }
4063
4064 void decode_misc_feature_control(void)
4065 {
4066         unsigned long long msr;
4067
4068         if (!has_misc_feature_control)
4069                 return;
4070
4071         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4072                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4073                         base_cpu, msr,
4074                         msr & (0 << 0) ? "No-" : "",
4075                         msr & (1 << 0) ? "No-" : "",
4076                         msr & (2 << 0) ? "No-" : "",
4077                         msr & (3 << 0) ? "No-" : "");
4078 }
4079 /*
4080  * Decode MSR_MISC_PWR_MGMT
4081  *
4082  * Decode the bits according to the Nehalem documentation
4083  * bit[0] seems to continue to have same meaning going forward
4084  * bit[1] less so...
4085  */
4086 void decode_misc_pwr_mgmt_msr(void)
4087 {
4088         unsigned long long msr;
4089
4090         if (!do_nhm_platform_info)
4091                 return;
4092
4093         if (no_MSR_MISC_PWR_MGMT)
4094                 return;
4095
4096         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4097                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4098                         base_cpu, msr,
4099                         msr & (1 << 0) ? "DIS" : "EN",
4100                         msr & (1 << 1) ? "EN" : "DIS",
4101                         msr & (1 << 8) ? "EN" : "DIS");
4102 }
4103 /*
4104  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4105  *
4106  * This MSRs are present on Silvermont processors,
4107  * Intel Atom processor E3000 series (Baytrail), and friends.
4108  */
4109 void decode_c6_demotion_policy_msr(void)
4110 {
4111         unsigned long long msr;
4112
4113         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4114                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4115                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4116
4117         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4118                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4119                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4120 }
4121
4122 void process_cpuid()
4123 {
4124         unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
4125         unsigned int fms, family, model, stepping;
4126         unsigned int has_turbo;
4127
4128         eax = ebx = ecx = edx = 0;
4129
4130         __cpuid(0, max_level, ebx, ecx, edx);
4131
4132         if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
4133                 genuine_intel = 1;
4134
4135         if (!quiet)
4136                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4137                         (char *)&ebx, (char *)&edx, (char *)&ecx);
4138
4139         __cpuid(1, fms, ebx, ecx, edx);
4140         family = (fms >> 8) & 0xf;
4141         model = (fms >> 4) & 0xf;
4142         stepping = fms & 0xf;
4143         if (family == 6 || family == 0xf)
4144                 model += ((fms >> 16) & 0xf) << 4;
4145
4146         if (!quiet) {
4147                 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4148                         max_level, family, model, stepping, family, model, stepping);
4149                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
4150                         ecx & (1 << 0) ? "SSE3" : "-",
4151                         ecx & (1 << 3) ? "MONITOR" : "-",
4152                         ecx & (1 << 6) ? "SMX" : "-",
4153                         ecx & (1 << 7) ? "EIST" : "-",
4154                         ecx & (1 << 8) ? "TM2" : "-",
4155                         edx & (1 << 4) ? "TSC" : "-",
4156                         edx & (1 << 5) ? "MSR" : "-",
4157                         edx & (1 << 22) ? "ACPI-TM" : "-",
4158                         edx & (1 << 29) ? "TM" : "-");
4159         }
4160
4161         if (!(edx & (1 << 5)))
4162                 errx(1, "CPUID: no MSR");
4163
4164         /*
4165          * check max extended function levels of CPUID.
4166          * This is needed to check for invariant TSC.
4167          * This check is valid for both Intel and AMD.
4168          */
4169         ebx = ecx = edx = 0;
4170         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4171
4172         if (max_extended_level >= 0x80000007) {
4173
4174                 /*
4175                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4176                  * this check is valid for both Intel and AMD
4177                  */
4178                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4179                 has_invariant_tsc = edx & (1 << 8);
4180         }
4181
4182         /*
4183          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4184          * this check is valid for both Intel and AMD
4185          */
4186
4187         __cpuid(0x6, eax, ebx, ecx, edx);
4188         has_aperf = ecx & (1 << 0);
4189         if (has_aperf) {
4190                 BIC_PRESENT(BIC_Avg_MHz);
4191                 BIC_PRESENT(BIC_Busy);
4192                 BIC_PRESENT(BIC_Bzy_MHz);
4193         }
4194         do_dts = eax & (1 << 0);
4195         if (do_dts)
4196                 BIC_PRESENT(BIC_CoreTmp);
4197         has_turbo = eax & (1 << 1);
4198         do_ptm = eax & (1 << 6);
4199         if (do_ptm)
4200                 BIC_PRESENT(BIC_PkgTmp);
4201         has_hwp = eax & (1 << 7);
4202         has_hwp_notify = eax & (1 << 8);
4203         has_hwp_activity_window = eax & (1 << 9);
4204         has_hwp_epp = eax & (1 << 10);
4205         has_hwp_pkg = eax & (1 << 11);
4206         has_epb = ecx & (1 << 3);
4207
4208         if (!quiet)
4209                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4210                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4211                         has_aperf ? "" : "No-",
4212                         has_turbo ? "" : "No-",
4213                         do_dts ? "" : "No-",
4214                         do_ptm ? "" : "No-",
4215                         has_hwp ? "" : "No-",
4216                         has_hwp_notify ? "" : "No-",
4217                         has_hwp_activity_window ? "" : "No-",
4218                         has_hwp_epp ? "" : "No-",
4219                         has_hwp_pkg ? "" : "No-",
4220                         has_epb ? "" : "No-");
4221
4222         if (!quiet)
4223                 decode_misc_enable_msr();
4224
4225
4226         if (max_level >= 0x7 && !quiet) {
4227                 int has_sgx;
4228
4229                 ecx = 0;
4230
4231                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4232
4233                 has_sgx = ebx & (1 << 2);
4234                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4235
4236                 if (has_sgx)
4237                         decode_feature_control_msr();
4238         }
4239
4240         if (max_level >= 0x15) {
4241                 unsigned int eax_crystal;
4242                 unsigned int ebx_tsc;
4243
4244                 /*
4245                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4246                  */
4247                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4248                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4249
4250                 if (ebx_tsc != 0) {
4251
4252                         if (!quiet && (ebx != 0))
4253                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4254                                         eax_crystal, ebx_tsc, crystal_hz);
4255
4256                         if (crystal_hz == 0)
4257                                 switch(model) {
4258                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4259                                 case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
4260                                 case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
4261                                 case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
4262                                         crystal_hz = 24000000;  /* 24.0 MHz */
4263                                         break;
4264                                 case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4265                                 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
4266                                         crystal_hz = 25000000;  /* 25.0 MHz */
4267                                         break;
4268                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4269                                 case INTEL_FAM6_ATOM_GEMINI_LAKE:
4270                                         crystal_hz = 19200000;  /* 19.2 MHz */
4271                                         break;
4272                                 default:
4273                                         crystal_hz = 0;
4274                         }
4275
4276                         if (crystal_hz) {
4277                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4278                                 if (!quiet)
4279                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4280                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4281                         }
4282                 }
4283         }
4284         if (max_level >= 0x16) {
4285                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
4286
4287                 /*
4288                  * CPUID 16H Base MHz, Max MHz, Bus MHz
4289                  */
4290                 base_mhz = max_mhz = bus_mhz = edx = 0;
4291
4292                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4293                 if (!quiet)
4294                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4295                                 base_mhz, max_mhz, bus_mhz);
4296         }
4297
4298         if (has_aperf)
4299                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4300
4301         BIC_PRESENT(BIC_IRQ);
4302         BIC_PRESENT(BIC_TSC_MHz);
4303
4304         if (probe_nhm_msrs(family, model)) {
4305                 do_nhm_platform_info = 1;
4306                 BIC_PRESENT(BIC_CPU_c1);
4307                 BIC_PRESENT(BIC_CPU_c3);
4308                 BIC_PRESENT(BIC_CPU_c6);
4309                 BIC_PRESENT(BIC_SMI);
4310         }
4311         do_snb_cstates = has_snb_msrs(family, model);
4312
4313         if (do_snb_cstates)
4314                 BIC_PRESENT(BIC_CPU_c7);
4315
4316         do_irtl_snb = has_snb_msrs(family, model);
4317         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4318                 BIC_PRESENT(BIC_Pkgpc2);
4319         if (pkg_cstate_limit >= PCL__3)
4320                 BIC_PRESENT(BIC_Pkgpc3);
4321         if (pkg_cstate_limit >= PCL__6)
4322                 BIC_PRESENT(BIC_Pkgpc6);
4323         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4324                 BIC_PRESENT(BIC_Pkgpc7);
4325         if (has_slv_msrs(family, model)) {
4326                 BIC_NOT_PRESENT(BIC_Pkgpc2);
4327                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4328                 BIC_PRESENT(BIC_Pkgpc6);
4329                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4330                 BIC_PRESENT(BIC_Mod_c6);
4331                 use_c1_residency_msr = 1;
4332         }
4333         if (is_dnv(family, model)) {
4334                 BIC_PRESENT(BIC_CPU_c1);
4335                 BIC_NOT_PRESENT(BIC_CPU_c3);
4336                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4337                 BIC_NOT_PRESENT(BIC_CPU_c7);
4338                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4339                 use_c1_residency_msr = 1;
4340         }
4341         if (is_skx(family, model)) {
4342                 BIC_NOT_PRESENT(BIC_CPU_c3);
4343                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4344                 BIC_NOT_PRESENT(BIC_CPU_c7);
4345                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4346         }
4347         if (is_bdx(family, model)) {
4348                 BIC_NOT_PRESENT(BIC_CPU_c7);
4349                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4350         }
4351         if (has_hsw_msrs(family, model)) {
4352                 BIC_PRESENT(BIC_Pkgpc8);
4353                 BIC_PRESENT(BIC_Pkgpc9);
4354                 BIC_PRESENT(BIC_Pkgpc10);
4355         }
4356         do_irtl_hsw = has_hsw_msrs(family, model);
4357         if (has_skl_msrs(family, model)) {
4358                 BIC_PRESENT(BIC_Totl_c0);
4359                 BIC_PRESENT(BIC_Any_c0);
4360                 BIC_PRESENT(BIC_GFX_c0);
4361                 BIC_PRESENT(BIC_CPUGFX);
4362         }
4363         do_slm_cstates = is_slm(family, model);
4364         do_knl_cstates  = is_knl(family, model);
4365
4366         if (!quiet)
4367                 decode_misc_pwr_mgmt_msr();
4368
4369         if (!quiet && has_slv_msrs(family, model))
4370                 decode_c6_demotion_policy_msr();
4371
4372         rapl_probe(family, model);
4373         perf_limit_reasons_probe(family, model);
4374
4375         if (!quiet)
4376                 dump_cstate_pstate_config_info(family, model);
4377
4378         if (!quiet)
4379                 dump_sysfs_cstate_config();
4380         if (!quiet)
4381                 dump_sysfs_pstate_config();
4382
4383         if (has_skl_msrs(family, model))
4384                 calculate_tsc_tweak();
4385
4386         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4387                 BIC_PRESENT(BIC_GFX_rc6);
4388
4389         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4390                 BIC_PRESENT(BIC_GFXMHz);
4391
4392         if (!quiet)
4393                 decode_misc_feature_control();
4394
4395         return;
4396 }
4397
4398
4399 /*
4400  * in /dev/cpu/ return success for names that are numbers
4401  * ie. filter out ".", "..", "microcode".
4402  */
4403 int dir_filter(const struct dirent *dirp)
4404 {
4405         if (isdigit(dirp->d_name[0]))
4406                 return 1;
4407         else
4408                 return 0;
4409 }
4410
4411 int open_dev_cpu_msr(int dummy1)
4412 {
4413         return 0;
4414 }
4415
4416 void topology_probe()
4417 {
4418         int i;
4419         int max_core_id = 0;
4420         int max_package_id = 0;
4421         int max_siblings = 0;
4422         struct cpu_topology {
4423                 int core_id;
4424                 int physical_package_id;
4425         } *cpus;
4426
4427         /* Initialize num_cpus, max_cpu_num */
4428         topo.num_cpus = 0;
4429         topo.max_cpu_num = 0;
4430         for_all_proc_cpus(count_cpus);
4431         if (!summary_only && topo.num_cpus > 1)
4432                 BIC_PRESENT(BIC_CPU);
4433
4434         if (debug > 1)
4435                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4436
4437         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4438         if (cpus == NULL)
4439                 err(1, "calloc cpus");
4440
4441         /*
4442          * Allocate and initialize cpu_present_set
4443          */
4444         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4445         if (cpu_present_set == NULL)
4446                 err(3, "CPU_ALLOC");
4447         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4448         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4449         for_all_proc_cpus(mark_cpu_present);
4450
4451         /*
4452          * Validate that all cpus in cpu_subset are also in cpu_present_set
4453          */
4454         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4455                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4456                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4457                                 err(1, "cpu%d not present", i);
4458         }
4459
4460         /*
4461          * Allocate and initialize cpu_affinity_set
4462          */
4463         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4464         if (cpu_affinity_set == NULL)
4465                 err(3, "CPU_ALLOC");
4466         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4467         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4468
4469
4470         /*
4471          * For online cpus
4472          * find max_core_id, max_package_id
4473          */
4474         for (i = 0; i <= topo.max_cpu_num; ++i) {
4475                 int siblings;
4476
4477                 if (cpu_is_not_present(i)) {
4478                         if (debug > 1)
4479                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4480                         continue;
4481                 }
4482                 cpus[i].core_id = get_core_id(i);
4483                 if (cpus[i].core_id > max_core_id)
4484                         max_core_id = cpus[i].core_id;
4485
4486                 cpus[i].physical_package_id = get_physical_package_id(i);
4487                 if (cpus[i].physical_package_id > max_package_id)
4488                         max_package_id = cpus[i].physical_package_id;
4489
4490                 siblings = get_num_ht_siblings(i);
4491                 if (siblings > max_siblings)
4492                         max_siblings = siblings;
4493                 if (debug > 1)
4494                         fprintf(outf, "cpu %d pkg %d core %d\n",
4495                                 i, cpus[i].physical_package_id, cpus[i].core_id);
4496         }
4497         topo.num_cores_per_pkg = max_core_id + 1;
4498         if (debug > 1)
4499                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4500                         max_core_id, topo.num_cores_per_pkg);
4501         if (!summary_only && topo.num_cores_per_pkg > 1)
4502                 BIC_PRESENT(BIC_Core);
4503
4504         topo.num_packages = max_package_id + 1;
4505         if (debug > 1)
4506                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4507                         max_package_id, topo.num_packages);
4508         if (!summary_only && topo.num_packages > 1)
4509                 BIC_PRESENT(BIC_Package);
4510
4511         topo.num_threads_per_core = max_siblings;
4512         if (debug > 1)
4513                 fprintf(outf, "max_siblings %d\n", max_siblings);
4514
4515         free(cpus);
4516 }
4517
4518 void
4519 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
4520 {
4521         int i;
4522
4523         *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
4524                 topo.num_packages, sizeof(struct thread_data));
4525         if (*t == NULL)
4526                 goto error;
4527
4528         for (i = 0; i < topo.num_threads_per_core *
4529                 topo.num_cores_per_pkg * topo.num_packages; i++)
4530                 (*t)[i].cpu_id = -1;
4531
4532         *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
4533                 sizeof(struct core_data));
4534         if (*c == NULL)
4535                 goto error;
4536
4537         for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
4538                 (*c)[i].core_id = -1;
4539
4540         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
4541         if (*p == NULL)
4542                 goto error;
4543
4544         for (i = 0; i < topo.num_packages; i++)
4545                 (*p)[i].package_id = i;
4546
4547         return;
4548 error:
4549         err(1, "calloc counters");
4550 }
4551 /*
4552  * init_counter()
4553  *
4554  * set cpu_id, core_num, pkg_num
4555  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4556  *
4557  * increment topo.num_cores when 1st core in pkg seen
4558  */
4559 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4560         struct pkg_data *pkg_base, int thread_num, int core_num,
4561         int pkg_num, int cpu_id)
4562 {
4563         struct thread_data *t;
4564         struct core_data *c;
4565         struct pkg_data *p;
4566
4567         t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
4568         c = GET_CORE(core_base, core_num, pkg_num);
4569         p = GET_PKG(pkg_base, pkg_num);
4570
4571         t->cpu_id = cpu_id;
4572         if (thread_num == 0) {
4573                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4574                 if (cpu_is_first_core_in_package(cpu_id))
4575                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4576         }
4577
4578         c->core_id = core_num;
4579         p->package_id = pkg_num;
4580 }
4581
4582
4583 int initialize_counters(int cpu_id)
4584 {
4585         int my_thread_id, my_core_id, my_package_id;
4586
4587         my_package_id = get_physical_package_id(cpu_id);
4588         my_core_id = get_core_id(cpu_id);
4589         my_thread_id = get_cpu_position_in_core(cpu_id);
4590         if (!my_thread_id)
4591                 topo.num_cores++;
4592
4593         init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4594         init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4595         return 0;
4596 }
4597
4598 void allocate_output_buffer()
4599 {
4600         output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4601         outp = output_buffer;
4602         if (outp == NULL)
4603                 err(-1, "calloc output buffer");
4604 }
4605 void allocate_fd_percpu(void)
4606 {
4607         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4608         if (fd_percpu == NULL)
4609                 err(-1, "calloc fd_percpu");
4610 }
4611 void allocate_irq_buffers(void)
4612 {
4613         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4614         if (irq_column_2_cpu == NULL)
4615                 err(-1, "calloc %d", topo.num_cpus);
4616
4617         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4618         if (irqs_per_cpu == NULL)
4619                 err(-1, "calloc %d", topo.max_cpu_num + 1);
4620 }
4621 void setup_all_buffers(void)
4622 {
4623         topology_probe();
4624         allocate_irq_buffers();
4625         allocate_fd_percpu();
4626         allocate_counters(&thread_even, &core_even, &package_even);
4627         allocate_counters(&thread_odd, &core_odd, &package_odd);
4628         allocate_output_buffer();
4629         for_all_proc_cpus(initialize_counters);
4630 }
4631
4632 void set_base_cpu(void)
4633 {
4634         base_cpu = sched_getcpu();
4635         if (base_cpu < 0)
4636                 err(-ENODEV, "No valid cpus found");
4637
4638         if (debug > 1)
4639                 fprintf(outf, "base_cpu = %d\n", base_cpu);
4640 }
4641
4642 void turbostat_init()
4643 {
4644         setup_all_buffers();
4645         set_base_cpu();
4646         check_dev_msr();
4647         check_permissions();
4648         process_cpuid();
4649
4650
4651         if (!quiet)
4652                 for_all_cpus(print_hwp, ODD_COUNTERS);
4653
4654         if (!quiet)
4655                 for_all_cpus(print_epb, ODD_COUNTERS);
4656
4657         if (!quiet)
4658                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
4659
4660         if (!quiet)
4661                 for_all_cpus(print_rapl, ODD_COUNTERS);
4662
4663         for_all_cpus(set_temperature_target, ODD_COUNTERS);
4664
4665         if (!quiet)
4666                 for_all_cpus(print_thermal, ODD_COUNTERS);
4667
4668         if (!quiet && do_irtl_snb)
4669                 print_irtl();
4670 }
4671
4672 int fork_it(char **argv)
4673 {
4674         pid_t child_pid;
4675         int status;
4676
4677         snapshot_proc_sysfs_files();
4678         status = for_all_cpus(get_counters, EVEN_COUNTERS);
4679         if (status)
4680                 exit(status);
4681         /* clear affinity side-effect of get_counters() */
4682         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
4683         gettimeofday(&tv_even, (struct timezone *)NULL);
4684
4685         child_pid = fork();
4686         if (!child_pid) {
4687                 /* child */
4688                 execvp(argv[0], argv);
4689                 err(errno, "exec %s", argv[0]);
4690         } else {
4691
4692                 /* parent */
4693                 if (child_pid == -1)
4694                         err(1, "fork");
4695
4696                 signal(SIGINT, SIG_IGN);
4697                 signal(SIGQUIT, SIG_IGN);
4698                 if (waitpid(child_pid, &status, 0) == -1)
4699                         err(status, "waitpid");
4700         }
4701         /*
4702          * n.b. fork_it() does not check for errors from for_all_cpus()
4703          * because re-starting is problematic when forking
4704          */
4705         snapshot_proc_sysfs_files();
4706         for_all_cpus(get_counters, ODD_COUNTERS);
4707         gettimeofday(&tv_odd, (struct timezone *)NULL);
4708         timersub(&tv_odd, &tv_even, &tv_delta);
4709         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
4710                 fprintf(outf, "%s: Counter reset detected\n", progname);
4711         else {
4712                 compute_average(EVEN_COUNTERS);
4713                 format_all_counters(EVEN_COUNTERS);
4714         }
4715
4716         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
4717
4718         flush_output_stderr();
4719
4720         return status;
4721 }
4722
4723 int get_and_dump_counters(void)
4724 {
4725         int status;
4726
4727         snapshot_proc_sysfs_files();
4728         status = for_all_cpus(get_counters, ODD_COUNTERS);
4729         if (status)
4730                 return status;
4731
4732         status = for_all_cpus(dump_counters, ODD_COUNTERS);
4733         if (status)
4734                 return status;
4735
4736         flush_output_stdout();
4737
4738         return status;
4739 }
4740
4741 void print_version() {
4742         fprintf(outf, "turbostat version 17.06.23"
4743                 " - Len Brown <lenb@kernel.org>\n");
4744 }
4745
4746 int add_counter(unsigned int msr_num, char *path, char *name,
4747         unsigned int width, enum counter_scope scope,
4748         enum counter_type type, enum counter_format format, int flags)
4749 {
4750         struct msr_counter *msrp;
4751
4752         msrp = calloc(1, sizeof(struct msr_counter));
4753         if (msrp == NULL) {
4754                 perror("calloc");
4755                 exit(1);
4756         }
4757
4758         msrp->msr_num = msr_num;
4759         strncpy(msrp->name, name, NAME_BYTES);
4760         if (path)
4761                 strncpy(msrp->path, path, PATH_BYTES);
4762         msrp->width = width;
4763         msrp->type = type;
4764         msrp->format = format;
4765         msrp->flags = flags;
4766
4767         switch (scope) {
4768
4769         case SCOPE_CPU:
4770                 msrp->next = sys.tp;
4771                 sys.tp = msrp;
4772                 sys.added_thread_counters++;
4773                 if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
4774                         fprintf(stderr, "exceeded max %d added thread counters\n",
4775                                 MAX_ADDED_COUNTERS);
4776                         exit(-1);
4777                 }
4778                 break;
4779
4780         case SCOPE_CORE:
4781                 msrp->next = sys.cp;
4782                 sys.cp = msrp;
4783                 sys.added_core_counters++;
4784                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
4785                         fprintf(stderr, "exceeded max %d added core counters\n",
4786                                 MAX_ADDED_COUNTERS);
4787                         exit(-1);
4788                 }
4789                 break;
4790
4791         case SCOPE_PACKAGE:
4792                 msrp->next = sys.pp;
4793                 sys.pp = msrp;
4794                 sys.added_package_counters++;
4795                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
4796                         fprintf(stderr, "exceeded max %d added package counters\n",
4797                                 MAX_ADDED_COUNTERS);
4798                         exit(-1);
4799                 }
4800                 break;
4801         }
4802
4803         return 0;
4804 }
4805
4806 void parse_add_command(char *add_command)
4807 {
4808         int msr_num = 0;
4809         char *path = NULL;
4810         char name_buffer[NAME_BYTES] = "";
4811         int width = 64;
4812         int fail = 0;
4813         enum counter_scope scope = SCOPE_CPU;
4814         enum counter_type type = COUNTER_CYCLES;
4815         enum counter_format format = FORMAT_DELTA;
4816
4817         while (add_command) {
4818
4819                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
4820                         goto next;
4821
4822                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
4823                         goto next;
4824
4825                 if (*add_command == '/') {
4826                         path = add_command;
4827                         goto next;
4828                 }
4829
4830                 if (sscanf(add_command, "u%d", &width) == 1) {
4831                         if ((width == 32) || (width == 64))
4832                                 goto next;
4833                         width = 64;
4834                 }
4835                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
4836                         scope = SCOPE_CPU;
4837                         goto next;
4838                 }
4839                 if (!strncmp(add_command, "core", strlen("core"))) {
4840                         scope = SCOPE_CORE;
4841                         goto next;
4842                 }
4843                 if (!strncmp(add_command, "package", strlen("package"))) {
4844                         scope = SCOPE_PACKAGE;
4845                         goto next;
4846                 }
4847                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
4848                         type = COUNTER_CYCLES;
4849                         goto next;
4850                 }
4851                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
4852                         type = COUNTER_SECONDS;
4853                         goto next;
4854                 }
4855                 if (!strncmp(add_command, "usec", strlen("usec"))) {
4856                         type = COUNTER_USEC;
4857                         goto next;
4858                 }
4859                 if (!strncmp(add_command, "raw", strlen("raw"))) {
4860                         format = FORMAT_RAW;
4861                         goto next;
4862                 }
4863                 if (!strncmp(add_command, "delta", strlen("delta"))) {
4864                         format = FORMAT_DELTA;
4865                         goto next;
4866                 }
4867                 if (!strncmp(add_command, "percent", strlen("percent"))) {
4868                         format = FORMAT_PERCENT;
4869                         goto next;
4870                 }
4871
4872                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
4873                         char *eos;
4874
4875                         eos = strchr(name_buffer, ',');
4876                         if (eos)
4877                                 *eos = '\0';
4878                         goto next;
4879                 }
4880
4881 next:
4882                 add_command = strchr(add_command, ',');
4883                 if (add_command) {
4884                         *add_command = '\0';
4885                         add_command++;
4886                 }
4887
4888         }
4889         if ((msr_num == 0) && (path == NULL)) {
4890                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
4891                 fail++;
4892         }
4893
4894         /* generate default column header */
4895         if (*name_buffer == '\0') {
4896                 if (width == 32)
4897                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
4898                 else
4899                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
4900         }
4901
4902         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
4903                 fail++;
4904
4905         if (fail) {
4906                 help();
4907                 exit(1);
4908         }
4909 }
4910
4911 int is_deferred_skip(char *name)
4912 {
4913         int i;
4914
4915         for (i = 0; i < deferred_skip_index; ++i)
4916                 if (!strcmp(name, deferred_skip_names[i]))
4917                         return 1;
4918         return 0;
4919 }
4920
4921 void probe_sysfs(void)
4922 {
4923         char path[64];
4924         char name_buf[16];
4925         FILE *input;
4926         int state;
4927         char *sp;
4928
4929         if (!DO_BIC(BIC_sysfs))
4930                 return;
4931
4932         for (state = 10; state > 0; --state) {
4933
4934                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4935                         base_cpu, state);
4936                 input = fopen(path, "r");
4937                 if (input == NULL)
4938                         continue;
4939                 fgets(name_buf, sizeof(name_buf), input);
4940
4941                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4942                 sp = strchr(name_buf, '-');
4943                 if (!sp)
4944                         sp = strchrnul(name_buf, '\n');
4945                 *sp = '%';
4946                 *(sp + 1) = '\0';
4947
4948                 fclose(input);
4949
4950                 sprintf(path, "cpuidle/state%d/time", state);
4951
4952                 if (is_deferred_skip(name_buf))
4953                         continue;
4954
4955                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
4956                                 FORMAT_PERCENT, SYSFS_PERCPU);
4957         }
4958
4959         for (state = 10; state > 0; --state) {
4960
4961                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4962                         base_cpu, state);
4963                 input = fopen(path, "r");
4964                 if (input == NULL)
4965                         continue;
4966                 fgets(name_buf, sizeof(name_buf), input);
4967                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4968                 sp = strchr(name_buf, '-');
4969                 if (!sp)
4970                         sp = strchrnul(name_buf, '\n');
4971                 *sp = '\0';
4972                 fclose(input);
4973
4974                 sprintf(path, "cpuidle/state%d/usage", state);
4975
4976                 if (is_deferred_skip(name_buf))
4977                         continue;
4978
4979                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
4980                                 FORMAT_DELTA, SYSFS_PERCPU);
4981         }
4982
4983 }
4984
4985
4986 /*
4987  * parse cpuset with following syntax
4988  * 1,2,4..6,8-10 and set bits in cpu_subset
4989  */
4990 void parse_cpu_command(char *optarg)
4991 {
4992         unsigned int start, end;
4993         char *next;
4994
4995         if (!strcmp(optarg, "core")) {
4996                 if (cpu_subset)
4997                         goto error;
4998                 show_core_only++;
4999                 return;
5000         }
5001         if (!strcmp(optarg, "package")) {
5002                 if (cpu_subset)
5003                         goto error;
5004                 show_pkg_only++;
5005                 return;
5006         }
5007         if (show_core_only || show_pkg_only)
5008                 goto error;
5009
5010         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5011         if (cpu_subset == NULL)
5012                 err(3, "CPU_ALLOC");
5013         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5014
5015         CPU_ZERO_S(cpu_subset_size, cpu_subset);
5016
5017         next = optarg;
5018
5019         while (next && *next) {
5020
5021                 if (*next == '-')       /* no negative cpu numbers */
5022                         goto error;
5023
5024                 start = strtoul(next, &next, 10);
5025
5026                 if (start >= CPU_SUBSET_MAXCPUS)
5027                         goto error;
5028                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
5029
5030                 if (*next == '\0')
5031                         break;
5032
5033                 if (*next == ',') {
5034                         next += 1;
5035                         continue;
5036                 }
5037
5038                 if (*next == '-') {
5039                         next += 1;      /* start range */
5040                 } else if (*next == '.') {
5041                         next += 1;
5042                         if (*next == '.')
5043                                 next += 1;      /* start range */
5044                         else
5045                                 goto error;
5046                 }
5047
5048                 end = strtoul(next, &next, 10);
5049                 if (end <= start)
5050                         goto error;
5051
5052                 while (++start <= end) {
5053                         if (start >= CPU_SUBSET_MAXCPUS)
5054                                 goto error;
5055                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
5056                 }
5057
5058                 if (*next == ',')
5059                         next += 1;
5060                 else if (*next != '\0')
5061                         goto error;
5062         }
5063
5064         return;
5065
5066 error:
5067         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5068         help();
5069         exit(-1);
5070 }
5071
5072
5073 void cmdline(int argc, char **argv)
5074 {
5075         int opt;
5076         int option_index = 0;
5077         static struct option long_options[] = {
5078                 {"add",         required_argument,      0, 'a'},
5079                 {"cpu",         required_argument,      0, 'c'},
5080                 {"Dump",        no_argument,            0, 'D'},
5081                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5082                 {"enable",      required_argument,      0, 'e'},
5083                 {"interval",    required_argument,      0, 'i'},
5084                 {"help",        no_argument,            0, 'h'},
5085                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5086                 {"Joules",      no_argument,            0, 'J'},
5087                 {"list",        no_argument,            0, 'l'},
5088                 {"out",         required_argument,      0, 'o'},
5089                 {"quiet",       no_argument,            0, 'q'},
5090                 {"show",        required_argument,      0, 's'},
5091                 {"Summary",     no_argument,            0, 'S'},
5092                 {"TCC",         required_argument,      0, 'T'},
5093                 {"version",     no_argument,            0, 'v' },
5094                 {0,             0,                      0,  0 }
5095         };
5096
5097         progname = argv[0];
5098
5099         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jo:qST:v",
5100                                 long_options, &option_index)) != -1) {
5101                 switch (opt) {
5102                 case 'a':
5103                         parse_add_command(optarg);
5104                         break;
5105                 case 'c':
5106                         parse_cpu_command(optarg);
5107                         break;
5108                 case 'D':
5109                         dump_only++;
5110                         break;
5111                 case 'e':
5112                         /* --enable specified counter */
5113                         bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5114                         break;
5115                 case 'd':
5116                         debug++;
5117                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5118                         break;
5119                 case 'H':
5120                         /*
5121                          * --hide: do not show those specified
5122                          *  multiple invocations simply clear more bits in enabled mask
5123                          */
5124                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5125                         break;
5126                 case 'h':
5127                 default:
5128                         help();
5129                         exit(1);
5130                 case 'i':
5131                         {
5132                                 double interval = strtod(optarg, NULL);
5133
5134                                 if (interval < 0.001) {
5135                                         fprintf(outf, "interval %f seconds is too small\n",
5136                                                 interval);
5137                                         exit(2);
5138                                 }
5139
5140                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5141                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5142                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5143                         }
5144                         break;
5145                 case 'J':
5146                         rapl_joules++;
5147                         break;
5148                 case 'l':
5149                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5150                         list_header_only++;
5151                         quiet++;
5152                         break;
5153                 case 'o':
5154                         outf = fopen_or_die(optarg, "w");
5155                         break;
5156                 case 'q':
5157                         quiet = 1;
5158                         break;
5159                 case 's':
5160                         /*
5161                          * --show: show only those specified
5162                          *  The 1st invocation will clear and replace the enabled mask
5163                          *  subsequent invocations can add to it.
5164                          */
5165                         if (shown == 0)
5166                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5167                         else
5168                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5169                         shown = 1;
5170                         break;
5171                 case 'S':
5172                         summary_only++;
5173                         break;
5174                 case 'T':
5175                         tcc_activation_temp_override = atoi(optarg);
5176                         break;
5177                 case 'v':
5178                         print_version();
5179                         exit(0);
5180                         break;
5181                 }
5182         }
5183 }
5184
5185 int main(int argc, char **argv)
5186 {
5187         outf = stderr;
5188
5189         cmdline(argc, argv);
5190
5191         if (!quiet)
5192                 print_version();
5193
5194         probe_sysfs();
5195
5196         turbostat_init();
5197
5198         /* dump counters and exit */
5199         if (dump_only)
5200                 return get_and_dump_counters();
5201
5202         /* list header and exit */
5203         if (list_header_only) {
5204                 print_header(",");
5205                 flush_output_stdout();
5206                 return 0;
5207         }
5208
5209         /*
5210          * if any params left, it must be a command to fork
5211          */
5212         if (argc - optind)
5213                 return fork_it(argv + optind);
5214         else
5215                 turbostat_loop();
5216
5217         return 0;
5218 }