Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[sfrench/cifs-2.6.git] / tools / power / x86 / turbostat / turbostat.c
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel and AMD processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/select.h>
33 #include <sys/resource.h>
34 #include <fcntl.h>
35 #include <signal.h>
36 #include <sys/time.h>
37 #include <stdlib.h>
38 #include <getopt.h>
39 #include <dirent.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <sched.h>
43 #include <time.h>
44 #include <cpuid.h>
45 #include <linux/capability.h>
46 #include <errno.h>
47
48 char *proc_stat = "/proc/stat";
49 FILE *outf;
50 int *fd_percpu;
51 struct timeval interval_tv = {5, 0};
52 struct timespec interval_ts = {5, 0};
53 struct timespec one_msec = {0, 1000000};
54 unsigned int num_iterations;
55 unsigned int debug;
56 unsigned int quiet;
57 unsigned int shown;
58 unsigned int sums_need_wide_columns;
59 unsigned int rapl_joules;
60 unsigned int summary_only;
61 unsigned int list_header_only;
62 unsigned int dump_only;
63 unsigned int do_snb_cstates;
64 unsigned int do_knl_cstates;
65 unsigned int do_slm_cstates;
66 unsigned int do_cnl_cstates;
67 unsigned int use_c1_residency_msr;
68 unsigned int has_aperf;
69 unsigned int has_epb;
70 unsigned int do_irtl_snb;
71 unsigned int do_irtl_hsw;
72 unsigned int units = 1000000;   /* MHz etc */
73 unsigned int genuine_intel;
74 unsigned int authentic_amd;
75 unsigned int max_level, max_extended_level;
76 unsigned int has_invariant_tsc;
77 unsigned int do_nhm_platform_info;
78 unsigned int no_MSR_MISC_PWR_MGMT;
79 unsigned int aperf_mperf_multiplier = 1;
80 double bclk;
81 double base_hz;
82 unsigned int has_base_hz;
83 double tsc_tweak = 1.0;
84 unsigned int show_pkg_only;
85 unsigned int show_core_only;
86 char *output_buffer, *outp;
87 unsigned int do_rapl;
88 unsigned int do_dts;
89 unsigned int do_ptm;
90 unsigned long long  gfx_cur_rc6_ms;
91 unsigned long long cpuidle_cur_cpu_lpi_us;
92 unsigned long long cpuidle_cur_sys_lpi_us;
93 unsigned int gfx_cur_mhz;
94 unsigned int tcc_activation_temp;
95 unsigned int tcc_activation_temp_override;
96 double rapl_power_units, rapl_time_units;
97 double rapl_dram_energy_units, rapl_energy_units;
98 double rapl_joule_counter_range;
99 unsigned int do_core_perf_limit_reasons;
100 unsigned int has_automatic_cstate_conversion;
101 unsigned int do_gfx_perf_limit_reasons;
102 unsigned int do_ring_perf_limit_reasons;
103 unsigned int crystal_hz;
104 unsigned long long tsc_hz;
105 int base_cpu;
106 double discover_bclk(unsigned int family, unsigned int model);
107 unsigned int has_hwp;   /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
108                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
109 unsigned int has_hwp_notify;            /* IA32_HWP_INTERRUPT */
110 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
111 unsigned int has_hwp_epp;               /* IA32_HWP_REQUEST[bits 31:24] */
112 unsigned int has_hwp_pkg;               /* IA32_HWP_REQUEST_PKG */
113 unsigned int has_misc_feature_control;
114 unsigned int first_counter_read = 1;
115
116 #define RAPL_PKG                (1 << 0)
117                                         /* 0x610 MSR_PKG_POWER_LIMIT */
118                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
119 #define RAPL_PKG_PERF_STATUS    (1 << 1)
120                                         /* 0x613 MSR_PKG_PERF_STATUS */
121 #define RAPL_PKG_POWER_INFO     (1 << 2)
122                                         /* 0x614 MSR_PKG_POWER_INFO */
123
124 #define RAPL_DRAM               (1 << 3)
125                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
126                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
127 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
128                                         /* 0x61b MSR_DRAM_PERF_STATUS */
129 #define RAPL_DRAM_POWER_INFO    (1 << 5)
130                                         /* 0x61c MSR_DRAM_POWER_INFO */
131
132 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
133                                         /* 0x638 MSR_PP0_POWER_LIMIT */
134 #define RAPL_CORE_POLICY        (1 << 7)
135                                         /* 0x63a MSR_PP0_POLICY */
136
137 #define RAPL_GFX                (1 << 8)
138                                         /* 0x640 MSR_PP1_POWER_LIMIT */
139                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
140                                         /* 0x642 MSR_PP1_POLICY */
141
142 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
143                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
144 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
145 #define TJMAX_DEFAULT   100
146
147 #define MAX(a, b) ((a) > (b) ? (a) : (b))
148
149 /*
150  * buffer size used by sscanf() for added column names
151  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
152  */
153 #define NAME_BYTES 20
154 #define PATH_BYTES 128
155
156 int backwards_count;
157 char *progname;
158
159 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
160 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
161 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
162 #define MAX_ADDED_COUNTERS 8
163 #define MAX_ADDED_THREAD_COUNTERS 24
164 #define BITMASK_SIZE 32
165
166 struct thread_data {
167         struct timeval tv_begin;
168         struct timeval tv_end;
169         unsigned long long tsc;
170         unsigned long long aperf;
171         unsigned long long mperf;
172         unsigned long long c1;
173         unsigned long long  irq_count;
174         unsigned int smi_count;
175         unsigned int cpu_id;
176         unsigned int apic_id;
177         unsigned int x2apic_id;
178         unsigned int flags;
179 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
180 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
181         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
182 } *thread_even, *thread_odd;
183
184 struct core_data {
185         unsigned long long c3;
186         unsigned long long c6;
187         unsigned long long c7;
188         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
189         unsigned int core_temp_c;
190         unsigned int core_id;
191         unsigned long long counter[MAX_ADDED_COUNTERS];
192 } *core_even, *core_odd;
193
194 struct pkg_data {
195         unsigned long long pc2;
196         unsigned long long pc3;
197         unsigned long long pc6;
198         unsigned long long pc7;
199         unsigned long long pc8;
200         unsigned long long pc9;
201         unsigned long long pc10;
202         unsigned long long cpu_lpi;
203         unsigned long long sys_lpi;
204         unsigned long long pkg_wtd_core_c0;
205         unsigned long long pkg_any_core_c0;
206         unsigned long long pkg_any_gfxe_c0;
207         unsigned long long pkg_both_core_gfxe_c0;
208         long long gfx_rc6_ms;
209         unsigned int gfx_mhz;
210         unsigned int package_id;
211         unsigned int energy_pkg;        /* MSR_PKG_ENERGY_STATUS */
212         unsigned int energy_dram;       /* MSR_DRAM_ENERGY_STATUS */
213         unsigned int energy_cores;      /* MSR_PP0_ENERGY_STATUS */
214         unsigned int energy_gfx;        /* MSR_PP1_ENERGY_STATUS */
215         unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
216         unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
217         unsigned int pkg_temp_c;
218         unsigned long long counter[MAX_ADDED_COUNTERS];
219 } *package_even, *package_odd;
220
221 #define ODD_COUNTERS thread_odd, core_odd, package_odd
222 #define EVEN_COUNTERS thread_even, core_even, package_even
223
224 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
225         ((thread_base) +                                                      \
226          ((pkg_no) *                                                          \
227           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
228          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
229          ((core_no) * topo.threads_per_core) +                                \
230          (thread_no))
231
232 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
233         ((core_base) +                                                  \
234          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
235          ((node_no) * topo.cores_per_node) +                            \
236          (core_no))
237
238
239 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
240
241 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
242 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
243 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
244
245 struct msr_counter {
246         unsigned int msr_num;
247         char name[NAME_BYTES];
248         char path[PATH_BYTES];
249         unsigned int width;
250         enum counter_type type;
251         enum counter_format format;
252         struct msr_counter *next;
253         unsigned int flags;
254 #define FLAGS_HIDE      (1 << 0)
255 #define FLAGS_SHOW      (1 << 1)
256 #define SYSFS_PERCPU    (1 << 1)
257 };
258
259 struct sys_counters {
260         unsigned int added_thread_counters;
261         unsigned int added_core_counters;
262         unsigned int added_package_counters;
263         struct msr_counter *tp;
264         struct msr_counter *cp;
265         struct msr_counter *pp;
266 } sys;
267
268 struct system_summary {
269         struct thread_data threads;
270         struct core_data cores;
271         struct pkg_data packages;
272 } average;
273
274 struct cpu_topology {
275         int physical_package_id;
276         int logical_cpu_id;
277         int physical_node_id;
278         int logical_node_id;    /* 0-based count within the package */
279         int physical_core_id;
280         int thread_id;
281         cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
282 } *cpus;
283
284 struct topo_params {
285         int num_packages;
286         int num_cpus;
287         int num_cores;
288         int max_cpu_num;
289         int max_node_num;
290         int nodes_per_pkg;
291         int cores_per_node;
292         int threads_per_core;
293 } topo;
294
295 struct timeval tv_even, tv_odd, tv_delta;
296
297 int *irq_column_2_cpu;  /* /proc/interrupts column numbers */
298 int *irqs_per_cpu;              /* indexed by cpu_num */
299
300 void setup_all_buffers(void);
301
302 int cpu_is_not_present(int cpu)
303 {
304         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
305 }
306 /*
307  * run func(thread, core, package) in topology order
308  * skip non-present cpus
309  */
310
311 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
312         struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
313 {
314         int retval, pkg_no, core_no, thread_no, node_no;
315
316         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
317                 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
318                         for (node_no = 0; node_no < topo.nodes_per_pkg;
319                              node_no++) {
320                                 for (thread_no = 0; thread_no <
321                                         topo.threads_per_core; ++thread_no) {
322                                         struct thread_data *t;
323                                         struct core_data *c;
324                                         struct pkg_data *p;
325
326                                         t = GET_THREAD(thread_base, thread_no,
327                                                        core_no, node_no,
328                                                        pkg_no);
329
330                                         if (cpu_is_not_present(t->cpu_id))
331                                                 continue;
332
333                                         c = GET_CORE(core_base, core_no,
334                                                      node_no, pkg_no);
335                                         p = GET_PKG(pkg_base, pkg_no);
336
337                                         retval = func(t, c, p);
338                                         if (retval)
339                                                 return retval;
340                                 }
341                         }
342                 }
343         }
344         return 0;
345 }
346
347 int cpu_migrate(int cpu)
348 {
349         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
350         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
351         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
352                 return -1;
353         else
354                 return 0;
355 }
356 int get_msr_fd(int cpu)
357 {
358         char pathname[32];
359         int fd;
360
361         fd = fd_percpu[cpu];
362
363         if (fd)
364                 return fd;
365
366         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
367         fd = open(pathname, O_RDONLY);
368         if (fd < 0)
369                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
370
371         fd_percpu[cpu] = fd;
372
373         return fd;
374 }
375
376 int get_msr(int cpu, off_t offset, unsigned long long *msr)
377 {
378         ssize_t retval;
379
380         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
381
382         if (retval != sizeof *msr)
383                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
384
385         return 0;
386 }
387
388 /*
389  * This list matches the column headers, except
390  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
391  * 2. Core and CPU are moved to the end, we can't have strings that contain them
392  *    matching on them for --show and --hide.
393  */
394 struct msr_counter bic[] = {
395         { 0x0, "usec" },
396         { 0x0, "Time_Of_Day_Seconds" },
397         { 0x0, "Package" },
398         { 0x0, "Node" },
399         { 0x0, "Avg_MHz" },
400         { 0x0, "Busy%" },
401         { 0x0, "Bzy_MHz" },
402         { 0x0, "TSC_MHz" },
403         { 0x0, "IRQ" },
404         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
405         { 0x0, "sysfs" },
406         { 0x0, "CPU%c1" },
407         { 0x0, "CPU%c3" },
408         { 0x0, "CPU%c6" },
409         { 0x0, "CPU%c7" },
410         { 0x0, "ThreadC" },
411         { 0x0, "CoreTmp" },
412         { 0x0, "CoreCnt" },
413         { 0x0, "PkgTmp" },
414         { 0x0, "GFX%rc6" },
415         { 0x0, "GFXMHz" },
416         { 0x0, "Pkg%pc2" },
417         { 0x0, "Pkg%pc3" },
418         { 0x0, "Pkg%pc6" },
419         { 0x0, "Pkg%pc7" },
420         { 0x0, "Pkg%pc8" },
421         { 0x0, "Pkg%pc9" },
422         { 0x0, "Pk%pc10" },
423         { 0x0, "CPU%LPI" },
424         { 0x0, "SYS%LPI" },
425         { 0x0, "PkgWatt" },
426         { 0x0, "CorWatt" },
427         { 0x0, "GFXWatt" },
428         { 0x0, "PkgCnt" },
429         { 0x0, "RAMWatt" },
430         { 0x0, "PKG_%" },
431         { 0x0, "RAM_%" },
432         { 0x0, "Pkg_J" },
433         { 0x0, "Cor_J" },
434         { 0x0, "GFX_J" },
435         { 0x0, "RAM_J" },
436         { 0x0, "Mod%c6" },
437         { 0x0, "Totl%C0" },
438         { 0x0, "Any%C0" },
439         { 0x0, "GFX%C0" },
440         { 0x0, "CPUGFX%" },
441         { 0x0, "Core" },
442         { 0x0, "CPU" },
443         { 0x0, "APIC" },
444         { 0x0, "X2APIC" },
445 };
446
447 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
448 #define BIC_USEC        (1ULL << 0)
449 #define BIC_TOD         (1ULL << 1)
450 #define BIC_Package     (1ULL << 2)
451 #define BIC_Node        (1ULL << 3)
452 #define BIC_Avg_MHz     (1ULL << 4)
453 #define BIC_Busy        (1ULL << 5)
454 #define BIC_Bzy_MHz     (1ULL << 6)
455 #define BIC_TSC_MHz     (1ULL << 7)
456 #define BIC_IRQ         (1ULL << 8)
457 #define BIC_SMI         (1ULL << 9)
458 #define BIC_sysfs       (1ULL << 10)
459 #define BIC_CPU_c1      (1ULL << 11)
460 #define BIC_CPU_c3      (1ULL << 12)
461 #define BIC_CPU_c6      (1ULL << 13)
462 #define BIC_CPU_c7      (1ULL << 14)
463 #define BIC_ThreadC     (1ULL << 15)
464 #define BIC_CoreTmp     (1ULL << 16)
465 #define BIC_CoreCnt     (1ULL << 17)
466 #define BIC_PkgTmp      (1ULL << 18)
467 #define BIC_GFX_rc6     (1ULL << 19)
468 #define BIC_GFXMHz      (1ULL << 20)
469 #define BIC_Pkgpc2      (1ULL << 21)
470 #define BIC_Pkgpc3      (1ULL << 22)
471 #define BIC_Pkgpc6      (1ULL << 23)
472 #define BIC_Pkgpc7      (1ULL << 24)
473 #define BIC_Pkgpc8      (1ULL << 25)
474 #define BIC_Pkgpc9      (1ULL << 26)
475 #define BIC_Pkgpc10     (1ULL << 27)
476 #define BIC_CPU_LPI     (1ULL << 28)
477 #define BIC_SYS_LPI     (1ULL << 29)
478 #define BIC_PkgWatt     (1ULL << 30)
479 #define BIC_CorWatt     (1ULL << 31)
480 #define BIC_GFXWatt     (1ULL << 32)
481 #define BIC_PkgCnt      (1ULL << 33)
482 #define BIC_RAMWatt     (1ULL << 34)
483 #define BIC_PKG__       (1ULL << 35)
484 #define BIC_RAM__       (1ULL << 36)
485 #define BIC_Pkg_J       (1ULL << 37)
486 #define BIC_Cor_J       (1ULL << 38)
487 #define BIC_GFX_J       (1ULL << 39)
488 #define BIC_RAM_J       (1ULL << 40)
489 #define BIC_Mod_c6      (1ULL << 41)
490 #define BIC_Totl_c0     (1ULL << 42)
491 #define BIC_Any_c0      (1ULL << 43)
492 #define BIC_GFX_c0      (1ULL << 44)
493 #define BIC_CPUGFX      (1ULL << 45)
494 #define BIC_Core        (1ULL << 46)
495 #define BIC_CPU         (1ULL << 47)
496 #define BIC_APIC        (1ULL << 48)
497 #define BIC_X2APIC      (1ULL << 49)
498
499 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
500
501 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
502 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
503
504 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
505 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
506 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
507 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
508
509
510 #define MAX_DEFERRED 16
511 char *deferred_skip_names[MAX_DEFERRED];
512 int deferred_skip_index;
513
514 /*
515  * HIDE_LIST - hide this list of counters, show the rest [default]
516  * SHOW_LIST - show this list of counters, hide the rest
517  */
518 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
519
520 void help(void)
521 {
522         fprintf(outf,
523         "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
524         "\n"
525         "Turbostat forks the specified COMMAND and prints statistics\n"
526         "when COMMAND completes.\n"
527         "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
528         "to print statistics, until interrupted.\n"
529         "  -a, --add    add a counter\n"
530         "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
531         "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
532         "                 {core | package | j,k,l..m,n-p }\n"
533         "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
534         "  -D, --Dump   displays the raw counter values\n"
535         "  -e, --enable [all | column]\n"
536         "               shows all or the specified disabled column\n"
537         "  -H, --hide [column|column,column,...]\n"
538         "               hide the specified column(s)\n"
539         "  -i, --interval sec.subsec\n"
540         "               Override default 5-second measurement interval\n"
541         "  -J, --Joules displays energy in Joules instead of Watts\n"
542         "  -l, --list   list column headers only\n"
543         "  -n, --num_iterations num\n"
544         "               number of the measurement iterations\n"
545         "  -o, --out file\n"
546         "               create or truncate \"file\" for all output\n"
547         "  -q, --quiet  skip decoding system configuration header\n"
548         "  -s, --show [column|column,column,...]\n"
549         "               show only the specified column(s)\n"
550         "  -S, --Summary\n"
551         "               limits output to 1-line system summary per interval\n"
552         "  -T, --TCC temperature\n"
553         "               sets the Thermal Control Circuit temperature in\n"
554         "                 degrees Celsius\n"
555         "  -h, --help   print this help message\n"
556         "  -v, --version        print version information\n"
557         "\n"
558         "For more help, run \"man turbostat\"\n");
559 }
560
561 /*
562  * bic_lookup
563  * for all the strings in comma separate name_list,
564  * set the approprate bit in return value.
565  */
566 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
567 {
568         int i;
569         unsigned long long retval = 0;
570
571         while (name_list) {
572                 char *comma;
573
574                 comma = strchr(name_list, ',');
575
576                 if (comma)
577                         *comma = '\0';
578
579                 if (!strcmp(name_list, "all"))
580                         return ~0;
581
582                 for (i = 0; i < MAX_BIC; ++i) {
583                         if (!strcmp(name_list, bic[i].name)) {
584                                 retval |= (1ULL << i);
585                                 break;
586                         }
587                 }
588                 if (i == MAX_BIC) {
589                         if (mode == SHOW_LIST) {
590                                 fprintf(stderr, "Invalid counter name: %s\n", name_list);
591                                 exit(-1);
592                         }
593                         deferred_skip_names[deferred_skip_index++] = name_list;
594                         if (debug)
595                                 fprintf(stderr, "deferred \"%s\"\n", name_list);
596                         if (deferred_skip_index >= MAX_DEFERRED) {
597                                 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
598                                         MAX_DEFERRED, name_list);
599                                 help();
600                                 exit(1);
601                         }
602                 }
603
604                 name_list = comma;
605                 if (name_list)
606                         name_list++;
607
608         }
609         return retval;
610 }
611
612
613 void print_header(char *delim)
614 {
615         struct msr_counter *mp;
616         int printed = 0;
617
618         if (DO_BIC(BIC_USEC))
619                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
620         if (DO_BIC(BIC_TOD))
621                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
622         if (DO_BIC(BIC_Package))
623                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
624         if (DO_BIC(BIC_Node))
625                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
626         if (DO_BIC(BIC_Core))
627                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
628         if (DO_BIC(BIC_CPU))
629                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
630         if (DO_BIC(BIC_APIC))
631                 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
632         if (DO_BIC(BIC_X2APIC))
633                 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
634         if (DO_BIC(BIC_Avg_MHz))
635                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
636         if (DO_BIC(BIC_Busy))
637                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
638         if (DO_BIC(BIC_Bzy_MHz))
639                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
640         if (DO_BIC(BIC_TSC_MHz))
641                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
642
643         if (DO_BIC(BIC_IRQ)) {
644                 if (sums_need_wide_columns)
645                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
646                 else
647                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
648         }
649
650         if (DO_BIC(BIC_SMI))
651                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
652
653         for (mp = sys.tp; mp; mp = mp->next) {
654
655                 if (mp->format == FORMAT_RAW) {
656                         if (mp->width == 64)
657                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
658                         else
659                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
660                 } else {
661                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
662                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
663                         else
664                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
665                 }
666         }
667
668         if (DO_BIC(BIC_CPU_c1))
669                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
670         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
671                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
672         if (DO_BIC(BIC_CPU_c6))
673                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
674         if (DO_BIC(BIC_CPU_c7))
675                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
676
677         if (DO_BIC(BIC_Mod_c6))
678                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
679
680         if (DO_BIC(BIC_CoreTmp))
681                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
682
683         for (mp = sys.cp; mp; mp = mp->next) {
684                 if (mp->format == FORMAT_RAW) {
685                         if (mp->width == 64)
686                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
687                         else
688                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
689                 } else {
690                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
691                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
692                         else
693                                 outp += sprintf(outp, "%s%s", delim, mp->name);
694                 }
695         }
696
697         if (DO_BIC(BIC_PkgTmp))
698                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
699
700         if (DO_BIC(BIC_GFX_rc6))
701                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
702
703         if (DO_BIC(BIC_GFXMHz))
704                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
705
706         if (DO_BIC(BIC_Totl_c0))
707                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
708         if (DO_BIC(BIC_Any_c0))
709                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
710         if (DO_BIC(BIC_GFX_c0))
711                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
712         if (DO_BIC(BIC_CPUGFX))
713                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
714
715         if (DO_BIC(BIC_Pkgpc2))
716                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
717         if (DO_BIC(BIC_Pkgpc3))
718                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
719         if (DO_BIC(BIC_Pkgpc6))
720                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
721         if (DO_BIC(BIC_Pkgpc7))
722                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
723         if (DO_BIC(BIC_Pkgpc8))
724                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
725         if (DO_BIC(BIC_Pkgpc9))
726                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
727         if (DO_BIC(BIC_Pkgpc10))
728                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
729         if (DO_BIC(BIC_CPU_LPI))
730                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
731         if (DO_BIC(BIC_SYS_LPI))
732                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
733
734         if (do_rapl && !rapl_joules) {
735                 if (DO_BIC(BIC_PkgWatt))
736                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
737                 if (DO_BIC(BIC_CorWatt))
738                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
739                 if (DO_BIC(BIC_GFXWatt))
740                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
741                 if (DO_BIC(BIC_RAMWatt))
742                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
743                 if (DO_BIC(BIC_PKG__))
744                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
745                 if (DO_BIC(BIC_RAM__))
746                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
747         } else if (do_rapl && rapl_joules) {
748                 if (DO_BIC(BIC_Pkg_J))
749                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
750                 if (DO_BIC(BIC_Cor_J))
751                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
752                 if (DO_BIC(BIC_GFX_J))
753                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
754                 if (DO_BIC(BIC_RAM_J))
755                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
756                 if (DO_BIC(BIC_PKG__))
757                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
758                 if (DO_BIC(BIC_RAM__))
759                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
760         }
761         for (mp = sys.pp; mp; mp = mp->next) {
762                 if (mp->format == FORMAT_RAW) {
763                         if (mp->width == 64)
764                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
765                         else
766                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
767                 } else {
768                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
769                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
770                         else
771                                 outp += sprintf(outp, "%s%s", delim, mp->name);
772                 }
773         }
774
775         outp += sprintf(outp, "\n");
776 }
777
778 int dump_counters(struct thread_data *t, struct core_data *c,
779         struct pkg_data *p)
780 {
781         int i;
782         struct msr_counter *mp;
783
784         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
785
786         if (t) {
787                 outp += sprintf(outp, "CPU: %d flags 0x%x\n",
788                         t->cpu_id, t->flags);
789                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
790                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
791                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
792                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
793
794                 if (DO_BIC(BIC_IRQ))
795                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
796                 if (DO_BIC(BIC_SMI))
797                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
798
799                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
800                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
801                                 i, mp->msr_num, t->counter[i]);
802                 }
803         }
804
805         if (c) {
806                 outp += sprintf(outp, "core: %d\n", c->core_id);
807                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
808                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
809                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
810                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
811
812                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
813                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
814                                 i, mp->msr_num, c->counter[i]);
815                 }
816                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
817         }
818
819         if (p) {
820                 outp += sprintf(outp, "package: %d\n", p->package_id);
821
822                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
823                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
824                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
825                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
826
827                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
828                 if (DO_BIC(BIC_Pkgpc3))
829                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
830                 if (DO_BIC(BIC_Pkgpc6))
831                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
832                 if (DO_BIC(BIC_Pkgpc7))
833                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
834                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
835                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
836                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
837                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
838                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
839                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
840                 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
841                 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
842                 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
843                 outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
844                 outp += sprintf(outp, "Throttle PKG: %0X\n",
845                         p->rapl_pkg_perf_status);
846                 outp += sprintf(outp, "Throttle RAM: %0X\n",
847                         p->rapl_dram_perf_status);
848                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
849
850                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
851                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
852                                 i, mp->msr_num, p->counter[i]);
853                 }
854         }
855
856         outp += sprintf(outp, "\n");
857
858         return 0;
859 }
860
861 /*
862  * column formatting convention & formats
863  */
864 int format_counters(struct thread_data *t, struct core_data *c,
865         struct pkg_data *p)
866 {
867         double interval_float, tsc;
868         char *fmt8;
869         int i;
870         struct msr_counter *mp;
871         char *delim = "\t";
872         int printed = 0;
873
874          /* if showing only 1st thread in core and this isn't one, bail out */
875         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
876                 return 0;
877
878          /* if showing only 1st thread in pkg and this isn't one, bail out */
879         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
880                 return 0;
881
882         /*if not summary line and --cpu is used */
883         if ((t != &average.threads) &&
884                 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
885                 return 0;
886
887         if (DO_BIC(BIC_USEC)) {
888                 /* on each row, print how many usec each timestamp took to gather */
889                 struct timeval tv;
890
891                 timersub(&t->tv_end, &t->tv_begin, &tv);
892                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
893         }
894
895         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
896         if (DO_BIC(BIC_TOD))
897                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
898
899         interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
900
901         tsc = t->tsc * tsc_tweak;
902
903         /* topo columns, print blanks on 1st (average) line */
904         if (t == &average.threads) {
905                 if (DO_BIC(BIC_Package))
906                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
907                 if (DO_BIC(BIC_Node))
908                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
909                 if (DO_BIC(BIC_Core))
910                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
911                 if (DO_BIC(BIC_CPU))
912                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
913                 if (DO_BIC(BIC_APIC))
914                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
915                 if (DO_BIC(BIC_X2APIC))
916                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
917         } else {
918                 if (DO_BIC(BIC_Package)) {
919                         if (p)
920                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
921                         else
922                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
923                 }
924                 if (DO_BIC(BIC_Node)) {
925                         if (t)
926                                 outp += sprintf(outp, "%s%d",
927                                                 (printed++ ? delim : ""),
928                                               cpus[t->cpu_id].physical_node_id);
929                         else
930                                 outp += sprintf(outp, "%s-",
931                                                 (printed++ ? delim : ""));
932                 }
933                 if (DO_BIC(BIC_Core)) {
934                         if (c)
935                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
936                         else
937                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
938                 }
939                 if (DO_BIC(BIC_CPU))
940                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
941                 if (DO_BIC(BIC_APIC))
942                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
943                 if (DO_BIC(BIC_X2APIC))
944                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
945         }
946
947         if (DO_BIC(BIC_Avg_MHz))
948                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
949                         1.0 / units * t->aperf / interval_float);
950
951         if (DO_BIC(BIC_Busy))
952                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
953
954         if (DO_BIC(BIC_Bzy_MHz)) {
955                 if (has_base_hz)
956                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
957                 else
958                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
959                                 tsc / units * t->aperf / t->mperf / interval_float);
960         }
961
962         if (DO_BIC(BIC_TSC_MHz))
963                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
964
965         /* IRQ */
966         if (DO_BIC(BIC_IRQ)) {
967                 if (sums_need_wide_columns)
968                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
969                 else
970                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
971         }
972
973         /* SMI */
974         if (DO_BIC(BIC_SMI))
975                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
976
977         /* Added counters */
978         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
979                 if (mp->format == FORMAT_RAW) {
980                         if (mp->width == 32)
981                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
982                         else
983                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
984                 } else if (mp->format == FORMAT_DELTA) {
985                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
986                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
987                         else
988                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
989                 } else if (mp->format == FORMAT_PERCENT) {
990                         if (mp->type == COUNTER_USEC)
991                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
992                         else
993                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
994                 }
995         }
996
997         /* C1 */
998         if (DO_BIC(BIC_CPU_c1))
999                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1000
1001
1002         /* print per-core data only for 1st thread in core */
1003         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1004                 goto done;
1005
1006         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
1007                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1008         if (DO_BIC(BIC_CPU_c6))
1009                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1010         if (DO_BIC(BIC_CPU_c7))
1011                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1012
1013         /* Mod%c6 */
1014         if (DO_BIC(BIC_Mod_c6))
1015                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1016
1017         if (DO_BIC(BIC_CoreTmp))
1018                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1019
1020         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1021                 if (mp->format == FORMAT_RAW) {
1022                         if (mp->width == 32)
1023                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1024                         else
1025                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1026                 } else if (mp->format == FORMAT_DELTA) {
1027                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1028                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1029                         else
1030                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1031                 } else if (mp->format == FORMAT_PERCENT) {
1032                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1033                 }
1034         }
1035
1036         /* print per-package data only for 1st core in package */
1037         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1038                 goto done;
1039
1040         /* PkgTmp */
1041         if (DO_BIC(BIC_PkgTmp))
1042                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1043
1044         /* GFXrc6 */
1045         if (DO_BIC(BIC_GFX_rc6)) {
1046                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1047                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1048                 } else {
1049                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1050                                 p->gfx_rc6_ms / 10.0 / interval_float);
1051                 }
1052         }
1053
1054         /* GFXMHz */
1055         if (DO_BIC(BIC_GFXMHz))
1056                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1057
1058         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1059         if (DO_BIC(BIC_Totl_c0))
1060                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1061         if (DO_BIC(BIC_Any_c0))
1062                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1063         if (DO_BIC(BIC_GFX_c0))
1064                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1065         if (DO_BIC(BIC_CPUGFX))
1066                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1067
1068         if (DO_BIC(BIC_Pkgpc2))
1069                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1070         if (DO_BIC(BIC_Pkgpc3))
1071                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1072         if (DO_BIC(BIC_Pkgpc6))
1073                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1074         if (DO_BIC(BIC_Pkgpc7))
1075                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1076         if (DO_BIC(BIC_Pkgpc8))
1077                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1078         if (DO_BIC(BIC_Pkgpc9))
1079                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1080         if (DO_BIC(BIC_Pkgpc10))
1081                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1082
1083         if (DO_BIC(BIC_CPU_LPI))
1084                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1085         if (DO_BIC(BIC_SYS_LPI))
1086                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1087
1088         /*
1089          * If measurement interval exceeds minimum RAPL Joule Counter range,
1090          * indicate that results are suspect by printing "**" in fraction place.
1091          */
1092         if (interval_float < rapl_joule_counter_range)
1093                 fmt8 = "%s%.2f";
1094         else
1095                 fmt8 = "%6.0f**";
1096
1097         if (DO_BIC(BIC_PkgWatt))
1098                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1099         if (DO_BIC(BIC_CorWatt))
1100                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1101         if (DO_BIC(BIC_GFXWatt))
1102                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1103         if (DO_BIC(BIC_RAMWatt))
1104                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1105         if (DO_BIC(BIC_Pkg_J))
1106                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1107         if (DO_BIC(BIC_Cor_J))
1108                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1109         if (DO_BIC(BIC_GFX_J))
1110                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1111         if (DO_BIC(BIC_RAM_J))
1112                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1113         if (DO_BIC(BIC_PKG__))
1114                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1115         if (DO_BIC(BIC_RAM__))
1116                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1117
1118         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1119                 if (mp->format == FORMAT_RAW) {
1120                         if (mp->width == 32)
1121                                 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1122                         else
1123                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1124                 } else if (mp->format == FORMAT_DELTA) {
1125                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1126                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1127                         else
1128                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1129                 } else if (mp->format == FORMAT_PERCENT) {
1130                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1131                 }
1132         }
1133
1134 done:
1135         if (*(outp - 1) != '\n')
1136                 outp += sprintf(outp, "\n");
1137
1138         return 0;
1139 }
1140
1141 void flush_output_stdout(void)
1142 {
1143         FILE *filep;
1144
1145         if (outf == stderr)
1146                 filep = stdout;
1147         else
1148                 filep = outf;
1149
1150         fputs(output_buffer, filep);
1151         fflush(filep);
1152
1153         outp = output_buffer;
1154 }
1155 void flush_output_stderr(void)
1156 {
1157         fputs(output_buffer, outf);
1158         fflush(outf);
1159         outp = output_buffer;
1160 }
1161 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1162 {
1163         static int printed;
1164
1165         if (!printed || !summary_only)
1166                 print_header("\t");
1167
1168         format_counters(&average.threads, &average.cores, &average.packages);
1169
1170         printed = 1;
1171
1172         if (summary_only)
1173                 return;
1174
1175         for_all_cpus(format_counters, t, c, p);
1176 }
1177
1178 #define DELTA_WRAP32(new, old)                  \
1179         if (new > old) {                        \
1180                 old = new - old;                \
1181         } else {                                \
1182                 old = 0x100000000 + new - old;  \
1183         }
1184
1185 int
1186 delta_package(struct pkg_data *new, struct pkg_data *old)
1187 {
1188         int i;
1189         struct msr_counter *mp;
1190
1191
1192         if (DO_BIC(BIC_Totl_c0))
1193                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1194         if (DO_BIC(BIC_Any_c0))
1195                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1196         if (DO_BIC(BIC_GFX_c0))
1197                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1198         if (DO_BIC(BIC_CPUGFX))
1199                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1200
1201         old->pc2 = new->pc2 - old->pc2;
1202         if (DO_BIC(BIC_Pkgpc3))
1203                 old->pc3 = new->pc3 - old->pc3;
1204         if (DO_BIC(BIC_Pkgpc6))
1205                 old->pc6 = new->pc6 - old->pc6;
1206         if (DO_BIC(BIC_Pkgpc7))
1207                 old->pc7 = new->pc7 - old->pc7;
1208         old->pc8 = new->pc8 - old->pc8;
1209         old->pc9 = new->pc9 - old->pc9;
1210         old->pc10 = new->pc10 - old->pc10;
1211         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1212         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1213         old->pkg_temp_c = new->pkg_temp_c;
1214
1215         /* flag an error when rc6 counter resets/wraps */
1216         if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1217                 old->gfx_rc6_ms = -1;
1218         else
1219                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1220
1221         old->gfx_mhz = new->gfx_mhz;
1222
1223         DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1224         DELTA_WRAP32(new->energy_cores, old->energy_cores);
1225         DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1226         DELTA_WRAP32(new->energy_dram, old->energy_dram);
1227         DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1228         DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1229
1230         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1231                 if (mp->format == FORMAT_RAW)
1232                         old->counter[i] = new->counter[i];
1233                 else
1234                         old->counter[i] = new->counter[i] - old->counter[i];
1235         }
1236
1237         return 0;
1238 }
1239
1240 void
1241 delta_core(struct core_data *new, struct core_data *old)
1242 {
1243         int i;
1244         struct msr_counter *mp;
1245
1246         old->c3 = new->c3 - old->c3;
1247         old->c6 = new->c6 - old->c6;
1248         old->c7 = new->c7 - old->c7;
1249         old->core_temp_c = new->core_temp_c;
1250         old->mc6_us = new->mc6_us - old->mc6_us;
1251
1252         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1253                 if (mp->format == FORMAT_RAW)
1254                         old->counter[i] = new->counter[i];
1255                 else
1256                         old->counter[i] = new->counter[i] - old->counter[i];
1257         }
1258 }
1259
1260 /*
1261  * old = new - old
1262  */
1263 int
1264 delta_thread(struct thread_data *new, struct thread_data *old,
1265         struct core_data *core_delta)
1266 {
1267         int i;
1268         struct msr_counter *mp;
1269
1270         /* we run cpuid just the 1st time, copy the results */
1271         if (DO_BIC(BIC_APIC))
1272                 new->apic_id = old->apic_id;
1273         if (DO_BIC(BIC_X2APIC))
1274                 new->x2apic_id = old->x2apic_id;
1275
1276         /*
1277          * the timestamps from start of measurement interval are in "old"
1278          * the timestamp from end of measurement interval are in "new"
1279          * over-write old w/ new so we can print end of interval values
1280          */
1281
1282         old->tv_begin = new->tv_begin;
1283         old->tv_end = new->tv_end;
1284
1285         old->tsc = new->tsc - old->tsc;
1286
1287         /* check for TSC < 1 Mcycles over interval */
1288         if (old->tsc < (1000 * 1000))
1289                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1290                      "You can disable all c-states by booting with \"idle=poll\"\n"
1291                      "or just the deep ones with \"processor.max_cstate=1\"");
1292
1293         old->c1 = new->c1 - old->c1;
1294
1295         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1296                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1297                         old->aperf = new->aperf - old->aperf;
1298                         old->mperf = new->mperf - old->mperf;
1299                 } else {
1300                         return -1;
1301                 }
1302         }
1303
1304
1305         if (use_c1_residency_msr) {
1306                 /*
1307                  * Some models have a dedicated C1 residency MSR,
1308                  * which should be more accurate than the derivation below.
1309                  */
1310         } else {
1311                 /*
1312                  * As counter collection is not atomic,
1313                  * it is possible for mperf's non-halted cycles + idle states
1314                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1315                  */
1316                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1317                         old->c1 = 0;
1318                 else {
1319                         /* normal case, derive c1 */
1320                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1321                                 - core_delta->c6 - core_delta->c7;
1322                 }
1323         }
1324
1325         if (old->mperf == 0) {
1326                 if (debug > 1)
1327                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1328                 old->mperf = 1; /* divide by 0 protection */
1329         }
1330
1331         if (DO_BIC(BIC_IRQ))
1332                 old->irq_count = new->irq_count - old->irq_count;
1333
1334         if (DO_BIC(BIC_SMI))
1335                 old->smi_count = new->smi_count - old->smi_count;
1336
1337         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1338                 if (mp->format == FORMAT_RAW)
1339                         old->counter[i] = new->counter[i];
1340                 else
1341                         old->counter[i] = new->counter[i] - old->counter[i];
1342         }
1343         return 0;
1344 }
1345
1346 int delta_cpu(struct thread_data *t, struct core_data *c,
1347         struct pkg_data *p, struct thread_data *t2,
1348         struct core_data *c2, struct pkg_data *p2)
1349 {
1350         int retval = 0;
1351
1352         /* calculate core delta only for 1st thread in core */
1353         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1354                 delta_core(c, c2);
1355
1356         /* always calculate thread delta */
1357         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1358         if (retval)
1359                 return retval;
1360
1361         /* calculate package delta only for 1st core in package */
1362         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1363                 retval = delta_package(p, p2);
1364
1365         return retval;
1366 }
1367
1368 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1369 {
1370         int i;
1371         struct msr_counter  *mp;
1372
1373         t->tv_begin.tv_sec = 0;
1374         t->tv_begin.tv_usec = 0;
1375         t->tv_end.tv_sec = 0;
1376         t->tv_end.tv_usec = 0;
1377
1378         t->tsc = 0;
1379         t->aperf = 0;
1380         t->mperf = 0;
1381         t->c1 = 0;
1382
1383         t->irq_count = 0;
1384         t->smi_count = 0;
1385
1386         /* tells format_counters to dump all fields from this set */
1387         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1388
1389         c->c3 = 0;
1390         c->c6 = 0;
1391         c->c7 = 0;
1392         c->mc6_us = 0;
1393         c->core_temp_c = 0;
1394
1395         p->pkg_wtd_core_c0 = 0;
1396         p->pkg_any_core_c0 = 0;
1397         p->pkg_any_gfxe_c0 = 0;
1398         p->pkg_both_core_gfxe_c0 = 0;
1399
1400         p->pc2 = 0;
1401         if (DO_BIC(BIC_Pkgpc3))
1402                 p->pc3 = 0;
1403         if (DO_BIC(BIC_Pkgpc6))
1404                 p->pc6 = 0;
1405         if (DO_BIC(BIC_Pkgpc7))
1406                 p->pc7 = 0;
1407         p->pc8 = 0;
1408         p->pc9 = 0;
1409         p->pc10 = 0;
1410         p->cpu_lpi = 0;
1411         p->sys_lpi = 0;
1412
1413         p->energy_pkg = 0;
1414         p->energy_dram = 0;
1415         p->energy_cores = 0;
1416         p->energy_gfx = 0;
1417         p->rapl_pkg_perf_status = 0;
1418         p->rapl_dram_perf_status = 0;
1419         p->pkg_temp_c = 0;
1420
1421         p->gfx_rc6_ms = 0;
1422         p->gfx_mhz = 0;
1423         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1424                 t->counter[i] = 0;
1425
1426         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1427                 c->counter[i] = 0;
1428
1429         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1430                 p->counter[i] = 0;
1431 }
1432 int sum_counters(struct thread_data *t, struct core_data *c,
1433         struct pkg_data *p)
1434 {
1435         int i;
1436         struct msr_counter *mp;
1437
1438         /* copy un-changing apic_id's */
1439         if (DO_BIC(BIC_APIC))
1440                 average.threads.apic_id = t->apic_id;
1441         if (DO_BIC(BIC_X2APIC))
1442                 average.threads.x2apic_id = t->x2apic_id;
1443
1444         /* remember first tv_begin */
1445         if (average.threads.tv_begin.tv_sec == 0)
1446                 average.threads.tv_begin = t->tv_begin;
1447
1448         /* remember last tv_end */
1449         average.threads.tv_end = t->tv_end;
1450
1451         average.threads.tsc += t->tsc;
1452         average.threads.aperf += t->aperf;
1453         average.threads.mperf += t->mperf;
1454         average.threads.c1 += t->c1;
1455
1456         average.threads.irq_count += t->irq_count;
1457         average.threads.smi_count += t->smi_count;
1458
1459         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1460                 if (mp->format == FORMAT_RAW)
1461                         continue;
1462                 average.threads.counter[i] += t->counter[i];
1463         }
1464
1465         /* sum per-core values only for 1st thread in core */
1466         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1467                 return 0;
1468
1469         average.cores.c3 += c->c3;
1470         average.cores.c6 += c->c6;
1471         average.cores.c7 += c->c7;
1472         average.cores.mc6_us += c->mc6_us;
1473
1474         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1475
1476         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1477                 if (mp->format == FORMAT_RAW)
1478                         continue;
1479                 average.cores.counter[i] += c->counter[i];
1480         }
1481
1482         /* sum per-pkg values only for 1st core in pkg */
1483         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1484                 return 0;
1485
1486         if (DO_BIC(BIC_Totl_c0))
1487                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1488         if (DO_BIC(BIC_Any_c0))
1489                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1490         if (DO_BIC(BIC_GFX_c0))
1491                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1492         if (DO_BIC(BIC_CPUGFX))
1493                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1494
1495         average.packages.pc2 += p->pc2;
1496         if (DO_BIC(BIC_Pkgpc3))
1497                 average.packages.pc3 += p->pc3;
1498         if (DO_BIC(BIC_Pkgpc6))
1499                 average.packages.pc6 += p->pc6;
1500         if (DO_BIC(BIC_Pkgpc7))
1501                 average.packages.pc7 += p->pc7;
1502         average.packages.pc8 += p->pc8;
1503         average.packages.pc9 += p->pc9;
1504         average.packages.pc10 += p->pc10;
1505
1506         average.packages.cpu_lpi = p->cpu_lpi;
1507         average.packages.sys_lpi = p->sys_lpi;
1508
1509         average.packages.energy_pkg += p->energy_pkg;
1510         average.packages.energy_dram += p->energy_dram;
1511         average.packages.energy_cores += p->energy_cores;
1512         average.packages.energy_gfx += p->energy_gfx;
1513
1514         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1515         average.packages.gfx_mhz = p->gfx_mhz;
1516
1517         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1518
1519         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1520         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1521
1522         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1523                 if (mp->format == FORMAT_RAW)
1524                         continue;
1525                 average.packages.counter[i] += p->counter[i];
1526         }
1527         return 0;
1528 }
1529 /*
1530  * sum the counters for all cpus in the system
1531  * compute the weighted average
1532  */
1533 void compute_average(struct thread_data *t, struct core_data *c,
1534         struct pkg_data *p)
1535 {
1536         int i;
1537         struct msr_counter *mp;
1538
1539         clear_counters(&average.threads, &average.cores, &average.packages);
1540
1541         for_all_cpus(sum_counters, t, c, p);
1542
1543         average.threads.tsc /= topo.num_cpus;
1544         average.threads.aperf /= topo.num_cpus;
1545         average.threads.mperf /= topo.num_cpus;
1546         average.threads.c1 /= topo.num_cpus;
1547
1548         if (average.threads.irq_count > 9999999)
1549                 sums_need_wide_columns = 1;
1550
1551         average.cores.c3 /= topo.num_cores;
1552         average.cores.c6 /= topo.num_cores;
1553         average.cores.c7 /= topo.num_cores;
1554         average.cores.mc6_us /= topo.num_cores;
1555
1556         if (DO_BIC(BIC_Totl_c0))
1557                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1558         if (DO_BIC(BIC_Any_c0))
1559                 average.packages.pkg_any_core_c0 /= topo.num_packages;
1560         if (DO_BIC(BIC_GFX_c0))
1561                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1562         if (DO_BIC(BIC_CPUGFX))
1563                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1564
1565         average.packages.pc2 /= topo.num_packages;
1566         if (DO_BIC(BIC_Pkgpc3))
1567                 average.packages.pc3 /= topo.num_packages;
1568         if (DO_BIC(BIC_Pkgpc6))
1569                 average.packages.pc6 /= topo.num_packages;
1570         if (DO_BIC(BIC_Pkgpc7))
1571                 average.packages.pc7 /= topo.num_packages;
1572
1573         average.packages.pc8 /= topo.num_packages;
1574         average.packages.pc9 /= topo.num_packages;
1575         average.packages.pc10 /= topo.num_packages;
1576
1577         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1578                 if (mp->format == FORMAT_RAW)
1579                         continue;
1580                 if (mp->type == COUNTER_ITEMS) {
1581                         if (average.threads.counter[i] > 9999999)
1582                                 sums_need_wide_columns = 1;
1583                         continue;
1584                 }
1585                 average.threads.counter[i] /= topo.num_cpus;
1586         }
1587         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1588                 if (mp->format == FORMAT_RAW)
1589                         continue;
1590                 if (mp->type == COUNTER_ITEMS) {
1591                         if (average.cores.counter[i] > 9999999)
1592                                 sums_need_wide_columns = 1;
1593                 }
1594                 average.cores.counter[i] /= topo.num_cores;
1595         }
1596         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1597                 if (mp->format == FORMAT_RAW)
1598                         continue;
1599                 if (mp->type == COUNTER_ITEMS) {
1600                         if (average.packages.counter[i] > 9999999)
1601                                 sums_need_wide_columns = 1;
1602                 }
1603                 average.packages.counter[i] /= topo.num_packages;
1604         }
1605 }
1606
1607 static unsigned long long rdtsc(void)
1608 {
1609         unsigned int low, high;
1610
1611         asm volatile("rdtsc" : "=a" (low), "=d" (high));
1612
1613         return low | ((unsigned long long)high) << 32;
1614 }
1615
1616 /*
1617  * Open a file, and exit on failure
1618  */
1619 FILE *fopen_or_die(const char *path, const char *mode)
1620 {
1621         FILE *filep = fopen(path, mode);
1622
1623         if (!filep)
1624                 err(1, "%s: open failed", path);
1625         return filep;
1626 }
1627 /*
1628  * snapshot_sysfs_counter()
1629  *
1630  * return snapshot of given counter
1631  */
1632 unsigned long long snapshot_sysfs_counter(char *path)
1633 {
1634         FILE *fp;
1635         int retval;
1636         unsigned long long counter;
1637
1638         fp = fopen_or_die(path, "r");
1639
1640         retval = fscanf(fp, "%lld", &counter);
1641         if (retval != 1)
1642                 err(1, "snapshot_sysfs_counter(%s)", path);
1643
1644         fclose(fp);
1645
1646         return counter;
1647 }
1648
1649 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1650 {
1651         if (mp->msr_num != 0) {
1652                 if (get_msr(cpu, mp->msr_num, counterp))
1653                         return -1;
1654         } else {
1655                 char path[128 + PATH_BYTES];
1656
1657                 if (mp->flags & SYSFS_PERCPU) {
1658                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1659                                  cpu, mp->path);
1660
1661                         *counterp = snapshot_sysfs_counter(path);
1662                 } else {
1663                         *counterp = snapshot_sysfs_counter(mp->path);
1664                 }
1665         }
1666
1667         return 0;
1668 }
1669
1670 void get_apic_id(struct thread_data *t)
1671 {
1672         unsigned int eax, ebx, ecx, edx;
1673
1674         if (DO_BIC(BIC_APIC)) {
1675                 eax = ebx = ecx = edx = 0;
1676                 __cpuid(1, eax, ebx, ecx, edx);
1677
1678                 t->apic_id = (ebx >> 24) & 0xff;
1679         }
1680
1681         if (!DO_BIC(BIC_X2APIC))
1682                 return;
1683
1684         if (authentic_amd) {
1685                 unsigned int topology_extensions;
1686
1687                 if (max_extended_level < 0x8000001e)
1688                         return;
1689
1690                 eax = ebx = ecx = edx = 0;
1691                 __cpuid(0x80000001, eax, ebx, ecx, edx);
1692                         topology_extensions = ecx & (1 << 22);
1693
1694                 if (topology_extensions == 0)
1695                         return;
1696
1697                 eax = ebx = ecx = edx = 0;
1698                 __cpuid(0x8000001e, eax, ebx, ecx, edx);
1699
1700                 t->x2apic_id = eax;
1701                 return;
1702         }
1703
1704         if (!genuine_intel)
1705                 return;
1706
1707         if (max_level < 0xb)
1708                 return;
1709
1710         ecx = 0;
1711         __cpuid(0xb, eax, ebx, ecx, edx);
1712         t->x2apic_id = edx;
1713
1714         if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1715                 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
1716                                 t->cpu_id, t->apic_id, t->x2apic_id);
1717 }
1718
1719 /*
1720  * get_counters(...)
1721  * migrate to cpu
1722  * acquire and record local counters for that cpu
1723  */
1724 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1725 {
1726         int cpu = t->cpu_id;
1727         unsigned long long msr;
1728         int aperf_mperf_retry_count = 0;
1729         struct msr_counter *mp;
1730         int i;
1731
1732         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1733
1734         if (cpu_migrate(cpu)) {
1735                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1736                 return -1;
1737         }
1738
1739         if (first_counter_read)
1740                 get_apic_id(t);
1741 retry:
1742         t->tsc = rdtsc();       /* we are running on local CPU of interest */
1743
1744         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1745                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1746
1747                 /*
1748                  * The TSC, APERF and MPERF must be read together for
1749                  * APERF/MPERF and MPERF/TSC to give accurate results.
1750                  *
1751                  * Unfortunately, APERF and MPERF are read by
1752                  * individual system call, so delays may occur
1753                  * between them.  If the time to read them
1754                  * varies by a large amount, we re-read them.
1755                  */
1756
1757                 /*
1758                  * This initial dummy APERF read has been seen to
1759                  * reduce jitter in the subsequent reads.
1760                  */
1761
1762                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1763                         return -3;
1764
1765                 t->tsc = rdtsc();       /* re-read close to APERF */
1766
1767                 tsc_before = t->tsc;
1768
1769                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1770                         return -3;
1771
1772                 tsc_between = rdtsc();
1773
1774                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1775                         return -4;
1776
1777                 tsc_after = rdtsc();
1778
1779                 aperf_time = tsc_between - tsc_before;
1780                 mperf_time = tsc_after - tsc_between;
1781
1782                 /*
1783                  * If the system call latency to read APERF and MPERF
1784                  * differ by more than 2x, then try again.
1785                  */
1786                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1787                         aperf_mperf_retry_count++;
1788                         if (aperf_mperf_retry_count < 5)
1789                                 goto retry;
1790                         else
1791                                 warnx("cpu%d jitter %lld %lld",
1792                                         cpu, aperf_time, mperf_time);
1793                 }
1794                 aperf_mperf_retry_count = 0;
1795
1796                 t->aperf = t->aperf * aperf_mperf_multiplier;
1797                 t->mperf = t->mperf * aperf_mperf_multiplier;
1798         }
1799
1800         if (DO_BIC(BIC_IRQ))
1801                 t->irq_count = irqs_per_cpu[cpu];
1802         if (DO_BIC(BIC_SMI)) {
1803                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1804                         return -5;
1805                 t->smi_count = msr & 0xFFFFFFFF;
1806         }
1807         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1808                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1809                         return -6;
1810         }
1811
1812         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1813                 if (get_mp(cpu, mp, &t->counter[i]))
1814                         return -10;
1815         }
1816
1817         /* collect core counters only for 1st thread in core */
1818         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1819                 goto done;
1820
1821         if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1822                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1823                         return -6;
1824         }
1825
1826         if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1827                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1828                         return -7;
1829         } else if (do_knl_cstates) {
1830                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1831                         return -7;
1832         }
1833
1834         if (DO_BIC(BIC_CPU_c7))
1835                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1836                         return -8;
1837
1838         if (DO_BIC(BIC_Mod_c6))
1839                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1840                         return -8;
1841
1842         if (DO_BIC(BIC_CoreTmp)) {
1843                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1844                         return -9;
1845                 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1846         }
1847
1848         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1849                 if (get_mp(cpu, mp, &c->counter[i]))
1850                         return -10;
1851         }
1852
1853         /* collect package counters only for 1st core in package */
1854         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1855                 goto done;
1856
1857         if (DO_BIC(BIC_Totl_c0)) {
1858                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1859                         return -10;
1860         }
1861         if (DO_BIC(BIC_Any_c0)) {
1862                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1863                         return -11;
1864         }
1865         if (DO_BIC(BIC_GFX_c0)) {
1866                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1867                         return -12;
1868         }
1869         if (DO_BIC(BIC_CPUGFX)) {
1870                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1871                         return -13;
1872         }
1873         if (DO_BIC(BIC_Pkgpc3))
1874                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1875                         return -9;
1876         if (DO_BIC(BIC_Pkgpc6)) {
1877                 if (do_slm_cstates) {
1878                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1879                                 return -10;
1880                 } else {
1881                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1882                                 return -10;
1883                 }
1884         }
1885
1886         if (DO_BIC(BIC_Pkgpc2))
1887                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1888                         return -11;
1889         if (DO_BIC(BIC_Pkgpc7))
1890                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1891                         return -12;
1892         if (DO_BIC(BIC_Pkgpc8))
1893                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1894                         return -13;
1895         if (DO_BIC(BIC_Pkgpc9))
1896                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1897                         return -13;
1898         if (DO_BIC(BIC_Pkgpc10))
1899                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1900                         return -13;
1901
1902         if (DO_BIC(BIC_CPU_LPI))
1903                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1904         if (DO_BIC(BIC_SYS_LPI))
1905                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
1906
1907         if (do_rapl & RAPL_PKG) {
1908                 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1909                         return -13;
1910                 p->energy_pkg = msr & 0xFFFFFFFF;
1911         }
1912         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1913                 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1914                         return -14;
1915                 p->energy_cores = msr & 0xFFFFFFFF;
1916         }
1917         if (do_rapl & RAPL_DRAM) {
1918                 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1919                         return -15;
1920                 p->energy_dram = msr & 0xFFFFFFFF;
1921         }
1922         if (do_rapl & RAPL_GFX) {
1923                 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1924                         return -16;
1925                 p->energy_gfx = msr & 0xFFFFFFFF;
1926         }
1927         if (do_rapl & RAPL_PKG_PERF_STATUS) {
1928                 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1929                         return -16;
1930                 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1931         }
1932         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1933                 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1934                         return -16;
1935                 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1936         }
1937         if (DO_BIC(BIC_PkgTmp)) {
1938                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1939                         return -17;
1940                 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1941         }
1942
1943         if (DO_BIC(BIC_GFX_rc6))
1944                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1945
1946         if (DO_BIC(BIC_GFXMHz))
1947                 p->gfx_mhz = gfx_cur_mhz;
1948
1949         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1950                 if (get_mp(cpu, mp, &p->counter[i]))
1951                         return -10;
1952         }
1953 done:
1954         gettimeofday(&t->tv_end, (struct timezone *)NULL);
1955
1956         return 0;
1957 }
1958
1959 /*
1960  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1961  * If you change the values, note they are used both in comparisons
1962  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1963  */
1964
1965 #define PCLUKN 0 /* Unknown */
1966 #define PCLRSV 1 /* Reserved */
1967 #define PCL__0 2 /* PC0 */
1968 #define PCL__1 3 /* PC1 */
1969 #define PCL__2 4 /* PC2 */
1970 #define PCL__3 5 /* PC3 */
1971 #define PCL__4 6 /* PC4 */
1972 #define PCL__6 7 /* PC6 */
1973 #define PCL_6N 8 /* PC6 No Retention */
1974 #define PCL_6R 9 /* PC6 Retention */
1975 #define PCL__7 10 /* PC7 */
1976 #define PCL_7S 11 /* PC7 Shrink */
1977 #define PCL__8 12 /* PC8 */
1978 #define PCL__9 13 /* PC9 */
1979 #define PCL_10 14 /* PC10 */
1980 #define PCLUNL 15 /* Unlimited */
1981
1982 int pkg_cstate_limit = PCLUKN;
1983 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1984         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
1985
1986 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1987 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1988 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1989 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1990 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1991 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1992 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1993 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1994
1995
1996 static void
1997 calculate_tsc_tweak()
1998 {
1999         tsc_tweak = base_hz / tsc_hz;
2000 }
2001
2002 static void
2003 dump_nhm_platform_info(void)
2004 {
2005         unsigned long long msr;
2006         unsigned int ratio;
2007
2008         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2009
2010         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2011
2012         ratio = (msr >> 40) & 0xFF;
2013         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
2014                 ratio, bclk, ratio * bclk);
2015
2016         ratio = (msr >> 8) & 0xFF;
2017         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2018                 ratio, bclk, ratio * bclk);
2019
2020         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2021         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2022                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2023
2024         return;
2025 }
2026
2027 static void
2028 dump_hsw_turbo_ratio_limits(void)
2029 {
2030         unsigned long long msr;
2031         unsigned int ratio;
2032
2033         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2034
2035         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2036
2037         ratio = (msr >> 8) & 0xFF;
2038         if (ratio)
2039                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2040                         ratio, bclk, ratio * bclk);
2041
2042         ratio = (msr >> 0) & 0xFF;
2043         if (ratio)
2044                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2045                         ratio, bclk, ratio * bclk);
2046         return;
2047 }
2048
2049 static void
2050 dump_ivt_turbo_ratio_limits(void)
2051 {
2052         unsigned long long msr;
2053         unsigned int ratio;
2054
2055         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2056
2057         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2058
2059         ratio = (msr >> 56) & 0xFF;
2060         if (ratio)
2061                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2062                         ratio, bclk, ratio * bclk);
2063
2064         ratio = (msr >> 48) & 0xFF;
2065         if (ratio)
2066                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2067                         ratio, bclk, ratio * bclk);
2068
2069         ratio = (msr >> 40) & 0xFF;
2070         if (ratio)
2071                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2072                         ratio, bclk, ratio * bclk);
2073
2074         ratio = (msr >> 32) & 0xFF;
2075         if (ratio)
2076                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2077                         ratio, bclk, ratio * bclk);
2078
2079         ratio = (msr >> 24) & 0xFF;
2080         if (ratio)
2081                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2082                         ratio, bclk, ratio * bclk);
2083
2084         ratio = (msr >> 16) & 0xFF;
2085         if (ratio)
2086                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2087                         ratio, bclk, ratio * bclk);
2088
2089         ratio = (msr >> 8) & 0xFF;
2090         if (ratio)
2091                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2092                         ratio, bclk, ratio * bclk);
2093
2094         ratio = (msr >> 0) & 0xFF;
2095         if (ratio)
2096                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2097                         ratio, bclk, ratio * bclk);
2098         return;
2099 }
2100 int has_turbo_ratio_group_limits(int family, int model)
2101 {
2102
2103         if (!genuine_intel)
2104                 return 0;
2105
2106         switch (model) {
2107         case INTEL_FAM6_ATOM_GOLDMONT:
2108         case INTEL_FAM6_SKYLAKE_X:
2109         case INTEL_FAM6_ATOM_GOLDMONT_X:
2110                 return 1;
2111         }
2112         return 0;
2113 }
2114
2115 static void
2116 dump_turbo_ratio_limits(int family, int model)
2117 {
2118         unsigned long long msr, core_counts;
2119         unsigned int ratio, group_size;
2120
2121         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2122         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2123
2124         if (has_turbo_ratio_group_limits(family, model)) {
2125                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2126                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2127         } else {
2128                 core_counts = 0x0807060504030201;
2129         }
2130
2131         ratio = (msr >> 56) & 0xFF;
2132         group_size = (core_counts >> 56) & 0xFF;
2133         if (ratio)
2134                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2135                         ratio, bclk, ratio * bclk, group_size);
2136
2137         ratio = (msr >> 48) & 0xFF;
2138         group_size = (core_counts >> 48) & 0xFF;
2139         if (ratio)
2140                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2141                         ratio, bclk, ratio * bclk, group_size);
2142
2143         ratio = (msr >> 40) & 0xFF;
2144         group_size = (core_counts >> 40) & 0xFF;
2145         if (ratio)
2146                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2147                         ratio, bclk, ratio * bclk, group_size);
2148
2149         ratio = (msr >> 32) & 0xFF;
2150         group_size = (core_counts >> 32) & 0xFF;
2151         if (ratio)
2152                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2153                         ratio, bclk, ratio * bclk, group_size);
2154
2155         ratio = (msr >> 24) & 0xFF;
2156         group_size = (core_counts >> 24) & 0xFF;
2157         if (ratio)
2158                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2159                         ratio, bclk, ratio * bclk, group_size);
2160
2161         ratio = (msr >> 16) & 0xFF;
2162         group_size = (core_counts >> 16) & 0xFF;
2163         if (ratio)
2164                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2165                         ratio, bclk, ratio * bclk, group_size);
2166
2167         ratio = (msr >> 8) & 0xFF;
2168         group_size = (core_counts >> 8) & 0xFF;
2169         if (ratio)
2170                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2171                         ratio, bclk, ratio * bclk, group_size);
2172
2173         ratio = (msr >> 0) & 0xFF;
2174         group_size = (core_counts >> 0) & 0xFF;
2175         if (ratio)
2176                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2177                         ratio, bclk, ratio * bclk, group_size);
2178         return;
2179 }
2180
2181 static void
2182 dump_atom_turbo_ratio_limits(void)
2183 {
2184         unsigned long long msr;
2185         unsigned int ratio;
2186
2187         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2188         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2189
2190         ratio = (msr >> 0) & 0x3F;
2191         if (ratio)
2192                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2193                         ratio, bclk, ratio * bclk);
2194
2195         ratio = (msr >> 8) & 0x3F;
2196         if (ratio)
2197                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2198                         ratio, bclk, ratio * bclk);
2199
2200         ratio = (msr >> 16) & 0x3F;
2201         if (ratio)
2202                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2203                         ratio, bclk, ratio * bclk);
2204
2205         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2206         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2207
2208         ratio = (msr >> 24) & 0x3F;
2209         if (ratio)
2210                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2211                         ratio, bclk, ratio * bclk);
2212
2213         ratio = (msr >> 16) & 0x3F;
2214         if (ratio)
2215                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2216                         ratio, bclk, ratio * bclk);
2217
2218         ratio = (msr >> 8) & 0x3F;
2219         if (ratio)
2220                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2221                         ratio, bclk, ratio * bclk);
2222
2223         ratio = (msr >> 0) & 0x3F;
2224         if (ratio)
2225                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2226                         ratio, bclk, ratio * bclk);
2227 }
2228
2229 static void
2230 dump_knl_turbo_ratio_limits(void)
2231 {
2232         const unsigned int buckets_no = 7;
2233
2234         unsigned long long msr;
2235         int delta_cores, delta_ratio;
2236         int i, b_nr;
2237         unsigned int cores[buckets_no];
2238         unsigned int ratio[buckets_no];
2239
2240         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2241
2242         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2243                 base_cpu, msr);
2244
2245         /**
2246          * Turbo encoding in KNL is as follows:
2247          * [0] -- Reserved
2248          * [7:1] -- Base value of number of active cores of bucket 1.
2249          * [15:8] -- Base value of freq ratio of bucket 1.
2250          * [20:16] -- +ve delta of number of active cores of bucket 2.
2251          * i.e. active cores of bucket 2 =
2252          * active cores of bucket 1 + delta
2253          * [23:21] -- Negative delta of freq ratio of bucket 2.
2254          * i.e. freq ratio of bucket 2 =
2255          * freq ratio of bucket 1 - delta
2256          * [28:24]-- +ve delta of number of active cores of bucket 3.
2257          * [31:29]-- -ve delta of freq ratio of bucket 3.
2258          * [36:32]-- +ve delta of number of active cores of bucket 4.
2259          * [39:37]-- -ve delta of freq ratio of bucket 4.
2260          * [44:40]-- +ve delta of number of active cores of bucket 5.
2261          * [47:45]-- -ve delta of freq ratio of bucket 5.
2262          * [52:48]-- +ve delta of number of active cores of bucket 6.
2263          * [55:53]-- -ve delta of freq ratio of bucket 6.
2264          * [60:56]-- +ve delta of number of active cores of bucket 7.
2265          * [63:61]-- -ve delta of freq ratio of bucket 7.
2266          */
2267
2268         b_nr = 0;
2269         cores[b_nr] = (msr & 0xFF) >> 1;
2270         ratio[b_nr] = (msr >> 8) & 0xFF;
2271
2272         for (i = 16; i < 64; i += 8) {
2273                 delta_cores = (msr >> i) & 0x1F;
2274                 delta_ratio = (msr >> (i + 5)) & 0x7;
2275
2276                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2277                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2278                 b_nr++;
2279         }
2280
2281         for (i = buckets_no - 1; i >= 0; i--)
2282                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2283                         fprintf(outf,
2284                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2285                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2286 }
2287
2288 static void
2289 dump_nhm_cst_cfg(void)
2290 {
2291         unsigned long long msr;
2292
2293         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2294
2295         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2296
2297         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2298                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2299                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2300                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2301                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2302                 (msr & (1 << 15)) ? "" : "UN",
2303                 (unsigned int)msr & 0xF,
2304                 pkg_cstate_limit_strings[pkg_cstate_limit]);
2305
2306 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2307         if (has_automatic_cstate_conversion) {
2308                 fprintf(outf, ", automatic c-state conversion=%s",
2309                         (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2310         }
2311
2312         fprintf(outf, ")\n");
2313
2314         return;
2315 }
2316
2317 static void
2318 dump_config_tdp(void)
2319 {
2320         unsigned long long msr;
2321
2322         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2323         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2324         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2325
2326         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2327         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2328         if (msr) {
2329                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2330                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2331                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2332                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2333         }
2334         fprintf(outf, ")\n");
2335
2336         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2337         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2338         if (msr) {
2339                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2340                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2341                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2342                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2343         }
2344         fprintf(outf, ")\n");
2345
2346         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2347         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2348         if ((msr) & 0x3)
2349                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2350         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2351         fprintf(outf, ")\n");
2352
2353         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2354         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2355         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2356         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2357         fprintf(outf, ")\n");
2358 }
2359
2360 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2361
2362 void print_irtl(void)
2363 {
2364         unsigned long long msr;
2365
2366         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2367         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2368         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2369                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2370
2371         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2372         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2373         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2374                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2375
2376         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2377         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2378         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2379                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2380
2381         if (!do_irtl_hsw)
2382                 return;
2383
2384         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2385         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2386         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2387                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2388
2389         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2390         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2391         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2392                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2393
2394         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2395         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2396         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2397                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2398
2399 }
2400 void free_fd_percpu(void)
2401 {
2402         int i;
2403
2404         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2405                 if (fd_percpu[i] != 0)
2406                         close(fd_percpu[i]);
2407         }
2408
2409         free(fd_percpu);
2410 }
2411
2412 void free_all_buffers(void)
2413 {
2414         int i;
2415
2416         CPU_FREE(cpu_present_set);
2417         cpu_present_set = NULL;
2418         cpu_present_setsize = 0;
2419
2420         CPU_FREE(cpu_affinity_set);
2421         cpu_affinity_set = NULL;
2422         cpu_affinity_setsize = 0;
2423
2424         free(thread_even);
2425         free(core_even);
2426         free(package_even);
2427
2428         thread_even = NULL;
2429         core_even = NULL;
2430         package_even = NULL;
2431
2432         free(thread_odd);
2433         free(core_odd);
2434         free(package_odd);
2435
2436         thread_odd = NULL;
2437         core_odd = NULL;
2438         package_odd = NULL;
2439
2440         free(output_buffer);
2441         output_buffer = NULL;
2442         outp = NULL;
2443
2444         free_fd_percpu();
2445
2446         free(irq_column_2_cpu);
2447         free(irqs_per_cpu);
2448
2449         for (i = 0; i <= topo.max_cpu_num; ++i) {
2450                 if (cpus[i].put_ids)
2451                         CPU_FREE(cpus[i].put_ids);
2452         }
2453         free(cpus);
2454 }
2455
2456
2457 /*
2458  * Parse a file containing a single int.
2459  */
2460 int parse_int_file(const char *fmt, ...)
2461 {
2462         va_list args;
2463         char path[PATH_MAX];
2464         FILE *filep;
2465         int value;
2466
2467         va_start(args, fmt);
2468         vsnprintf(path, sizeof(path), fmt, args);
2469         va_end(args);
2470         filep = fopen_or_die(path, "r");
2471         if (fscanf(filep, "%d", &value) != 1)
2472                 err(1, "%s: failed to parse number from file", path);
2473         fclose(filep);
2474         return value;
2475 }
2476
2477 /*
2478  * cpu_is_first_core_in_package(cpu)
2479  * return 1 if given CPU is 1st core in package
2480  */
2481 int cpu_is_first_core_in_package(int cpu)
2482 {
2483         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2484 }
2485
2486 int get_physical_package_id(int cpu)
2487 {
2488         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2489 }
2490
2491 int get_core_id(int cpu)
2492 {
2493         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2494 }
2495
2496 void set_node_data(void)
2497 {
2498         int pkg, node, lnode, cpu, cpux;
2499         int cpu_count;
2500
2501         /* initialize logical_node_id */
2502         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2503                 cpus[cpu].logical_node_id = -1;
2504
2505         cpu_count = 0;
2506         for (pkg = 0; pkg < topo.num_packages; pkg++) {
2507                 lnode = 0;
2508                 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2509                         if (cpus[cpu].physical_package_id != pkg)
2510                                 continue;
2511                         /* find a cpu with an unset logical_node_id */
2512                         if (cpus[cpu].logical_node_id != -1)
2513                                 continue;
2514                         cpus[cpu].logical_node_id = lnode;
2515                         node = cpus[cpu].physical_node_id;
2516                         cpu_count++;
2517                         /*
2518                          * find all matching cpus on this pkg and set
2519                          * the logical_node_id
2520                          */
2521                         for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2522                                 if ((cpus[cpux].physical_package_id == pkg) &&
2523                                    (cpus[cpux].physical_node_id == node)) {
2524                                         cpus[cpux].logical_node_id = lnode;
2525                                         cpu_count++;
2526                                 }
2527                         }
2528                         lnode++;
2529                         if (lnode > topo.nodes_per_pkg)
2530                                 topo.nodes_per_pkg = lnode;
2531                 }
2532                 if (cpu_count >= topo.max_cpu_num)
2533                         break;
2534         }
2535 }
2536
2537 int get_physical_node_id(struct cpu_topology *thiscpu)
2538 {
2539         char path[80];
2540         FILE *filep;
2541         int i;
2542         int cpu = thiscpu->logical_cpu_id;
2543
2544         for (i = 0; i <= topo.max_cpu_num; i++) {
2545                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2546                         cpu, i);
2547                 filep = fopen(path, "r");
2548                 if (!filep)
2549                         continue;
2550                 fclose(filep);
2551                 return i;
2552         }
2553         return -1;
2554 }
2555
2556 int get_thread_siblings(struct cpu_topology *thiscpu)
2557 {
2558         char path[80], character;
2559         FILE *filep;
2560         unsigned long map;
2561         int so, shift, sib_core;
2562         int cpu = thiscpu->logical_cpu_id;
2563         int offset = topo.max_cpu_num + 1;
2564         size_t size;
2565         int thread_id = 0;
2566
2567         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2568         if (thiscpu->thread_id < 0)
2569                 thiscpu->thread_id = thread_id++;
2570         if (!thiscpu->put_ids)
2571                 return -1;
2572
2573         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2574         CPU_ZERO_S(size, thiscpu->put_ids);
2575
2576         sprintf(path,
2577                 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2578         filep = fopen_or_die(path, "r");
2579         do {
2580                 offset -= BITMASK_SIZE;
2581                 fscanf(filep, "%lx%c", &map, &character);
2582                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2583                         if ((map >> shift) & 0x1) {
2584                                 so = shift + offset;
2585                                 sib_core = get_core_id(so);
2586                                 if (sib_core == thiscpu->physical_core_id) {
2587                                         CPU_SET_S(so, size, thiscpu->put_ids);
2588                                         if ((so != cpu) &&
2589                                             (cpus[so].thread_id < 0))
2590                                                 cpus[so].thread_id =
2591                                                                     thread_id++;
2592                                 }
2593                         }
2594                 }
2595         } while (!strncmp(&character, ",", 1));
2596         fclose(filep);
2597
2598         return CPU_COUNT_S(size, thiscpu->put_ids);
2599 }
2600
2601 /*
2602  * run func(thread, core, package) in topology order
2603  * skip non-present cpus
2604  */
2605
2606 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2607         struct pkg_data *, struct thread_data *, struct core_data *,
2608         struct pkg_data *), struct thread_data *thread_base,
2609         struct core_data *core_base, struct pkg_data *pkg_base,
2610         struct thread_data *thread_base2, struct core_data *core_base2,
2611         struct pkg_data *pkg_base2)
2612 {
2613         int retval, pkg_no, node_no, core_no, thread_no;
2614
2615         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2616                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2617                         for (core_no = 0; core_no < topo.cores_per_node;
2618                              ++core_no) {
2619                                 for (thread_no = 0; thread_no <
2620                                         topo.threads_per_core; ++thread_no) {
2621                                         struct thread_data *t, *t2;
2622                                         struct core_data *c, *c2;
2623                                         struct pkg_data *p, *p2;
2624
2625                                         t = GET_THREAD(thread_base, thread_no,
2626                                                        core_no, node_no,
2627                                                        pkg_no);
2628
2629                                         if (cpu_is_not_present(t->cpu_id))
2630                                                 continue;
2631
2632                                         t2 = GET_THREAD(thread_base2, thread_no,
2633                                                         core_no, node_no,
2634                                                         pkg_no);
2635
2636                                         c = GET_CORE(core_base, core_no,
2637                                                      node_no, pkg_no);
2638                                         c2 = GET_CORE(core_base2, core_no,
2639                                                       node_no,
2640                                                       pkg_no);
2641
2642                                         p = GET_PKG(pkg_base, pkg_no);
2643                                         p2 = GET_PKG(pkg_base2, pkg_no);
2644
2645                                         retval = func(t, c, p, t2, c2, p2);
2646                                         if (retval)
2647                                                 return retval;
2648                                 }
2649                         }
2650                 }
2651         }
2652         return 0;
2653 }
2654
2655 /*
2656  * run func(cpu) on every cpu in /proc/stat
2657  * return max_cpu number
2658  */
2659 int for_all_proc_cpus(int (func)(int))
2660 {
2661         FILE *fp;
2662         int cpu_num;
2663         int retval;
2664
2665         fp = fopen_or_die(proc_stat, "r");
2666
2667         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2668         if (retval != 0)
2669                 err(1, "%s: failed to parse format", proc_stat);
2670
2671         while (1) {
2672                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2673                 if (retval != 1)
2674                         break;
2675
2676                 retval = func(cpu_num);
2677                 if (retval) {
2678                         fclose(fp);
2679                         return(retval);
2680                 }
2681         }
2682         fclose(fp);
2683         return 0;
2684 }
2685
2686 void re_initialize(void)
2687 {
2688         free_all_buffers();
2689         setup_all_buffers();
2690         printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2691 }
2692
2693 void set_max_cpu_num(void)
2694 {
2695         FILE *filep;
2696         unsigned long dummy;
2697
2698         topo.max_cpu_num = 0;
2699         filep = fopen_or_die(
2700                         "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2701                         "r");
2702         while (fscanf(filep, "%lx,", &dummy) == 1)
2703                 topo.max_cpu_num += BITMASK_SIZE;
2704         fclose(filep);
2705         topo.max_cpu_num--; /* 0 based */
2706 }
2707
2708 /*
2709  * count_cpus()
2710  * remember the last one seen, it will be the max
2711  */
2712 int count_cpus(int cpu)
2713 {
2714         topo.num_cpus++;
2715         return 0;
2716 }
2717 int mark_cpu_present(int cpu)
2718 {
2719         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2720         return 0;
2721 }
2722
2723 int init_thread_id(int cpu)
2724 {
2725         cpus[cpu].thread_id = -1;
2726         return 0;
2727 }
2728
2729 /*
2730  * snapshot_proc_interrupts()
2731  *
2732  * read and record summary of /proc/interrupts
2733  *
2734  * return 1 if config change requires a restart, else return 0
2735  */
2736 int snapshot_proc_interrupts(void)
2737 {
2738         static FILE *fp;
2739         int column, retval;
2740
2741         if (fp == NULL)
2742                 fp = fopen_or_die("/proc/interrupts", "r");
2743         else
2744                 rewind(fp);
2745
2746         /* read 1st line of /proc/interrupts to get cpu* name for each column */
2747         for (column = 0; column < topo.num_cpus; ++column) {
2748                 int cpu_number;
2749
2750                 retval = fscanf(fp, " CPU%d", &cpu_number);
2751                 if (retval != 1)
2752                         break;
2753
2754                 if (cpu_number > topo.max_cpu_num) {
2755                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2756                         return 1;
2757                 }
2758
2759                 irq_column_2_cpu[column] = cpu_number;
2760                 irqs_per_cpu[cpu_number] = 0;
2761         }
2762
2763         /* read /proc/interrupt count lines and sum up irqs per cpu */
2764         while (1) {
2765                 int column;
2766                 char buf[64];
2767
2768                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
2769                 if (retval != 1)
2770                         break;
2771
2772                 /* read the count per cpu */
2773                 for (column = 0; column < topo.num_cpus; ++column) {
2774
2775                         int cpu_number, irq_count;
2776
2777                         retval = fscanf(fp, " %d", &irq_count);
2778                         if (retval != 1)
2779                                 break;
2780
2781                         cpu_number = irq_column_2_cpu[column];
2782                         irqs_per_cpu[cpu_number] += irq_count;
2783
2784                 }
2785
2786                 while (getc(fp) != '\n')
2787                         ;       /* flush interrupt description */
2788
2789         }
2790         return 0;
2791 }
2792 /*
2793  * snapshot_gfx_rc6_ms()
2794  *
2795  * record snapshot of
2796  * /sys/class/drm/card0/power/rc6_residency_ms
2797  *
2798  * return 1 if config change requires a restart, else return 0
2799  */
2800 int snapshot_gfx_rc6_ms(void)
2801 {
2802         FILE *fp;
2803         int retval;
2804
2805         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2806
2807         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2808         if (retval != 1)
2809                 err(1, "GFX rc6");
2810
2811         fclose(fp);
2812
2813         return 0;
2814 }
2815 /*
2816  * snapshot_gfx_mhz()
2817  *
2818  * record snapshot of
2819  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2820  *
2821  * return 1 if config change requires a restart, else return 0
2822  */
2823 int snapshot_gfx_mhz(void)
2824 {
2825         static FILE *fp;
2826         int retval;
2827
2828         if (fp == NULL)
2829                 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2830         else {
2831                 rewind(fp);
2832                 fflush(fp);
2833         }
2834
2835         retval = fscanf(fp, "%d", &gfx_cur_mhz);
2836         if (retval != 1)
2837                 err(1, "GFX MHz");
2838
2839         return 0;
2840 }
2841
2842 /*
2843  * snapshot_cpu_lpi()
2844  *
2845  * record snapshot of
2846  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2847  *
2848  * return 1 if config change requires a restart, else return 0
2849  */
2850 int snapshot_cpu_lpi_us(void)
2851 {
2852         FILE *fp;
2853         int retval;
2854
2855         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2856
2857         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2858         if (retval != 1)
2859                 err(1, "CPU LPI");
2860
2861         fclose(fp);
2862
2863         return 0;
2864 }
2865 /*
2866  * snapshot_sys_lpi()
2867  *
2868  * record snapshot of
2869  * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
2870  *
2871  * return 1 if config change requires a restart, else return 0
2872  */
2873 int snapshot_sys_lpi_us(void)
2874 {
2875         FILE *fp;
2876         int retval;
2877
2878         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
2879
2880         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2881         if (retval != 1)
2882                 err(1, "SYS LPI");
2883
2884         fclose(fp);
2885
2886         return 0;
2887 }
2888 /*
2889  * snapshot /proc and /sys files
2890  *
2891  * return 1 if configuration restart needed, else return 0
2892  */
2893 int snapshot_proc_sysfs_files(void)
2894 {
2895         if (DO_BIC(BIC_IRQ))
2896                 if (snapshot_proc_interrupts())
2897                         return 1;
2898
2899         if (DO_BIC(BIC_GFX_rc6))
2900                 snapshot_gfx_rc6_ms();
2901
2902         if (DO_BIC(BIC_GFXMHz))
2903                 snapshot_gfx_mhz();
2904
2905         if (DO_BIC(BIC_CPU_LPI))
2906                 snapshot_cpu_lpi_us();
2907
2908         if (DO_BIC(BIC_SYS_LPI))
2909                 snapshot_sys_lpi_us();
2910
2911         return 0;
2912 }
2913
2914 int exit_requested;
2915
2916 static void signal_handler (int signal)
2917 {
2918         switch (signal) {
2919         case SIGINT:
2920                 exit_requested = 1;
2921                 if (debug)
2922                         fprintf(stderr, " SIGINT\n");
2923                 break;
2924         case SIGUSR1:
2925                 if (debug > 1)
2926                         fprintf(stderr, "SIGUSR1\n");
2927                 break;
2928         }
2929         /* make sure this manually-invoked interval is at least 1ms long */
2930         nanosleep(&one_msec, NULL);
2931 }
2932
2933 void setup_signal_handler(void)
2934 {
2935         struct sigaction sa;
2936
2937         memset(&sa, 0, sizeof(sa));
2938
2939         sa.sa_handler = &signal_handler;
2940
2941         if (sigaction(SIGINT, &sa, NULL) < 0)
2942                 err(1, "sigaction SIGINT");
2943         if (sigaction(SIGUSR1, &sa, NULL) < 0)
2944                 err(1, "sigaction SIGUSR1");
2945 }
2946
2947 void do_sleep(void)
2948 {
2949         struct timeval select_timeout;
2950         fd_set readfds;
2951         int retval;
2952
2953         FD_ZERO(&readfds);
2954         FD_SET(0, &readfds);
2955
2956         if (!isatty(fileno(stdin))) {
2957                 nanosleep(&interval_ts, NULL);
2958                 return;
2959         }
2960
2961         select_timeout = interval_tv;
2962         retval = select(1, &readfds, NULL, NULL, &select_timeout);
2963
2964         if (retval == 1) {
2965                 switch (getc(stdin)) {
2966                 case 'q':
2967                         exit_requested = 1;
2968                         break;
2969                 }
2970                 /* make sure this manually-invoked interval is at least 1ms long */
2971                 nanosleep(&one_msec, NULL);
2972         }
2973 }
2974
2975
2976 void turbostat_loop()
2977 {
2978         int retval;
2979         int restarted = 0;
2980         int done_iters = 0;
2981
2982         setup_signal_handler();
2983
2984 restart:
2985         restarted++;
2986
2987         snapshot_proc_sysfs_files();
2988         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2989         first_counter_read = 0;
2990         if (retval < -1) {
2991                 exit(retval);
2992         } else if (retval == -1) {
2993                 if (restarted > 1) {
2994                         exit(retval);
2995                 }
2996                 re_initialize();
2997                 goto restart;
2998         }
2999         restarted = 0;
3000         done_iters = 0;
3001         gettimeofday(&tv_even, (struct timezone *)NULL);
3002
3003         while (1) {
3004                 if (for_all_proc_cpus(cpu_is_not_present)) {
3005                         re_initialize();
3006                         goto restart;
3007                 }
3008                 do_sleep();
3009                 if (snapshot_proc_sysfs_files())
3010                         goto restart;
3011                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
3012                 if (retval < -1) {
3013                         exit(retval);
3014                 } else if (retval == -1) {
3015                         re_initialize();
3016                         goto restart;
3017                 }
3018                 gettimeofday(&tv_odd, (struct timezone *)NULL);
3019                 timersub(&tv_odd, &tv_even, &tv_delta);
3020                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3021                         re_initialize();
3022                         goto restart;
3023                 }
3024                 compute_average(EVEN_COUNTERS);
3025                 format_all_counters(EVEN_COUNTERS);
3026                 flush_output_stdout();
3027                 if (exit_requested)
3028                         break;
3029                 if (num_iterations && ++done_iters >= num_iterations)
3030                         break;
3031                 do_sleep();
3032                 if (snapshot_proc_sysfs_files())
3033                         goto restart;
3034                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3035                 if (retval < -1) {
3036                         exit(retval);
3037                 } else if (retval == -1) {
3038                         re_initialize();
3039                         goto restart;
3040                 }
3041                 gettimeofday(&tv_even, (struct timezone *)NULL);
3042                 timersub(&tv_even, &tv_odd, &tv_delta);
3043                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3044                         re_initialize();
3045                         goto restart;
3046                 }
3047                 compute_average(ODD_COUNTERS);
3048                 format_all_counters(ODD_COUNTERS);
3049                 flush_output_stdout();
3050                 if (exit_requested)
3051                         break;
3052                 if (num_iterations && ++done_iters >= num_iterations)
3053                         break;
3054         }
3055 }
3056
3057 void check_dev_msr()
3058 {
3059         struct stat sb;
3060         char pathname[32];
3061
3062         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3063         if (stat(pathname, &sb))
3064                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3065                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3066 }
3067
3068 void check_permissions()
3069 {
3070         struct __user_cap_header_struct cap_header_data;
3071         cap_user_header_t cap_header = &cap_header_data;
3072         struct __user_cap_data_struct cap_data_data;
3073         cap_user_data_t cap_data = &cap_data_data;
3074         extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
3075         int do_exit = 0;
3076         char pathname[32];
3077
3078         /* check for CAP_SYS_RAWIO */
3079         cap_header->pid = getpid();
3080         cap_header->version = _LINUX_CAPABILITY_VERSION;
3081         if (capget(cap_header, cap_data) < 0)
3082                 err(-6, "capget(2) failed");
3083
3084         if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
3085                 do_exit++;
3086                 warnx("capget(CAP_SYS_RAWIO) failed,"
3087                         " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3088         }
3089
3090         /* test file permissions */
3091         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3092         if (euidaccess(pathname, R_OK)) {
3093                 do_exit++;
3094                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3095         }
3096
3097         /* if all else fails, thell them to be root */
3098         if (do_exit)
3099                 if (getuid() != 0)
3100                         warnx("... or simply run as root");
3101
3102         if (do_exit)
3103                 exit(-6);
3104 }
3105
3106 /*
3107  * NHM adds support for additional MSRs:
3108  *
3109  * MSR_SMI_COUNT                   0x00000034
3110  *
3111  * MSR_PLATFORM_INFO               0x000000ce
3112  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3113  *
3114  * MSR_MISC_PWR_MGMT               0x000001aa
3115  *
3116  * MSR_PKG_C3_RESIDENCY            0x000003f8
3117  * MSR_PKG_C6_RESIDENCY            0x000003f9
3118  * MSR_CORE_C3_RESIDENCY           0x000003fc
3119  * MSR_CORE_C6_RESIDENCY           0x000003fd
3120  *
3121  * Side effect:
3122  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3123  * sets has_misc_feature_control
3124  */
3125 int probe_nhm_msrs(unsigned int family, unsigned int model)
3126 {
3127         unsigned long long msr;
3128         unsigned int base_ratio;
3129         int *pkg_cstate_limits;
3130
3131         if (!genuine_intel)
3132                 return 0;
3133
3134         if (family != 6)
3135                 return 0;
3136
3137         bclk = discover_bclk(family, model);
3138
3139         switch (model) {
3140         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3141         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3142                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3143                 break;
3144         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3145         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3146         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3147         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3148                 pkg_cstate_limits = snb_pkg_cstate_limits;
3149                 has_misc_feature_control = 1;
3150                 break;
3151         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3152         case INTEL_FAM6_HASWELL_X:      /* HSX */
3153         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3154         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3155         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3156         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3157         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3158         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3159                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3160                 has_misc_feature_control = 1;
3161                 break;
3162         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3163                 pkg_cstate_limits = skx_pkg_cstate_limits;
3164                 has_misc_feature_control = 1;
3165                 break;
3166         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3167                 no_MSR_MISC_PWR_MGMT = 1;
3168         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
3169                 pkg_cstate_limits = slv_pkg_cstate_limits;
3170                 break;
3171         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3172                 pkg_cstate_limits = amt_pkg_cstate_limits;
3173                 no_MSR_MISC_PWR_MGMT = 1;
3174                 break;
3175         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3176                 pkg_cstate_limits = phi_pkg_cstate_limits;
3177                 break;
3178         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3179         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3180         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
3181                 pkg_cstate_limits = glm_pkg_cstate_limits;
3182                 break;
3183         default:
3184                 return 0;
3185         }
3186         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3187         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3188
3189         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3190         base_ratio = (msr >> 8) & 0xFF;
3191
3192         base_hz = base_ratio * bclk * 1000000;
3193         has_base_hz = 1;
3194         return 1;
3195 }
3196 /*
3197  * SLV client has support for unique MSRs:
3198  *
3199  * MSR_CC6_DEMOTION_POLICY_CONFIG
3200  * MSR_MC6_DEMOTION_POLICY_CONFIG
3201  */
3202
3203 int has_slv_msrs(unsigned int family, unsigned int model)
3204 {
3205         if (!genuine_intel)
3206                 return 0;
3207
3208         switch (model) {
3209         case INTEL_FAM6_ATOM_SILVERMONT:
3210         case INTEL_FAM6_ATOM_SILVERMONT_MID:
3211         case INTEL_FAM6_ATOM_AIRMONT_MID:
3212                 return 1;
3213         }
3214         return 0;
3215 }
3216 int is_dnv(unsigned int family, unsigned int model)
3217 {
3218
3219         if (!genuine_intel)
3220                 return 0;
3221
3222         switch (model) {
3223         case INTEL_FAM6_ATOM_GOLDMONT_X:
3224                 return 1;
3225         }
3226         return 0;
3227 }
3228 int is_bdx(unsigned int family, unsigned int model)
3229 {
3230
3231         if (!genuine_intel)
3232                 return 0;
3233
3234         switch (model) {
3235         case INTEL_FAM6_BROADWELL_X:
3236                 return 1;
3237         }
3238         return 0;
3239 }
3240 int is_skx(unsigned int family, unsigned int model)
3241 {
3242
3243         if (!genuine_intel)
3244                 return 0;
3245
3246         switch (model) {
3247         case INTEL_FAM6_SKYLAKE_X:
3248                 return 1;
3249         }
3250         return 0;
3251 }
3252
3253 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3254 {
3255         if (has_slv_msrs(family, model))
3256                 return 0;
3257
3258         switch (model) {
3259         /* Nehalem compatible, but do not include turbo-ratio limit support */
3260         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3261         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
3262                 return 0;
3263         default:
3264                 return 1;
3265         }
3266 }
3267 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3268 {
3269         if (has_slv_msrs(family, model))
3270                 return 1;
3271
3272         return 0;
3273 }
3274 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3275 {
3276         if (!genuine_intel)
3277                 return 0;
3278
3279         if (family != 6)
3280                 return 0;
3281
3282         switch (model) {
3283         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3284         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3285                 return 1;
3286         default:
3287                 return 0;
3288         }
3289 }
3290 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3291 {
3292         if (!genuine_intel)
3293                 return 0;
3294
3295         if (family != 6)
3296                 return 0;
3297
3298         switch (model) {
3299         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
3300                 return 1;
3301         default:
3302                 return 0;
3303         }
3304 }
3305
3306 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3307 {
3308         if (!genuine_intel)
3309                 return 0;
3310
3311         if (family != 6)
3312                 return 0;
3313
3314         switch (model) {
3315         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3316                 return 1;
3317         default:
3318                 return 0;
3319         }
3320 }
3321 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3322 {
3323         if (!genuine_intel)
3324                 return 0;
3325
3326         if (family != 6)
3327                 return 0;
3328
3329         switch (model) {
3330         case INTEL_FAM6_ATOM_GOLDMONT:
3331         case INTEL_FAM6_SKYLAKE_X:
3332                 return 1;
3333         default:
3334                 return 0;
3335         }
3336 }
3337 int has_config_tdp(unsigned int family, unsigned int model)
3338 {
3339         if (!genuine_intel)
3340                 return 0;
3341
3342         if (family != 6)
3343                 return 0;
3344
3345         switch (model) {
3346         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3347         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3348         case INTEL_FAM6_HASWELL_X:      /* HSX */
3349         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3350         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3351         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3352         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3353         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3354         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3355         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3356
3357         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
3358                 return 1;
3359         default:
3360                 return 0;
3361         }
3362 }
3363
3364 static void
3365 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3366 {
3367         if (!do_nhm_platform_info)
3368                 return;
3369
3370         dump_nhm_platform_info();
3371
3372         if (has_hsw_turbo_ratio_limit(family, model))
3373                 dump_hsw_turbo_ratio_limits();
3374
3375         if (has_ivt_turbo_ratio_limit(family, model))
3376                 dump_ivt_turbo_ratio_limits();
3377
3378         if (has_turbo_ratio_limit(family, model))
3379                 dump_turbo_ratio_limits(family, model);
3380
3381         if (has_atom_turbo_ratio_limit(family, model))
3382                 dump_atom_turbo_ratio_limits();
3383
3384         if (has_knl_turbo_ratio_limit(family, model))
3385                 dump_knl_turbo_ratio_limits();
3386
3387         if (has_config_tdp(family, model))
3388                 dump_config_tdp();
3389
3390         dump_nhm_cst_cfg();
3391 }
3392
3393 static void
3394 dump_sysfs_cstate_config(void)
3395 {
3396         char path[64];
3397         char name_buf[16];
3398         char desc[64];
3399         FILE *input;
3400         int state;
3401         char *sp;
3402
3403         if (!DO_BIC(BIC_sysfs))
3404                 return;
3405
3406         for (state = 0; state < 10; ++state) {
3407
3408                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3409                         base_cpu, state);
3410                 input = fopen(path, "r");
3411                 if (input == NULL)
3412                         continue;
3413                 fgets(name_buf, sizeof(name_buf), input);
3414
3415                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3416                 sp = strchr(name_buf, '-');
3417                 if (!sp)
3418                         sp = strchrnul(name_buf, '\n');
3419                 *sp = '\0';
3420
3421                 fclose(input);
3422
3423                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3424                         base_cpu, state);
3425                 input = fopen(path, "r");
3426                 if (input == NULL)
3427                         continue;
3428                 fgets(desc, sizeof(desc), input);
3429
3430                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3431                 fclose(input);
3432         }
3433 }
3434 static void
3435 dump_sysfs_pstate_config(void)
3436 {
3437         char path[64];
3438         char driver_buf[64];
3439         char governor_buf[64];
3440         FILE *input;
3441         int turbo;
3442
3443         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3444                         base_cpu);
3445         input = fopen(path, "r");
3446         if (input == NULL) {
3447                 fprintf(stderr, "NSFOD %s\n", path);
3448                 return;
3449         }
3450         fgets(driver_buf, sizeof(driver_buf), input);
3451         fclose(input);
3452
3453         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3454                         base_cpu);
3455         input = fopen(path, "r");
3456         if (input == NULL) {
3457                 fprintf(stderr, "NSFOD %s\n", path);
3458                 return;
3459         }
3460         fgets(governor_buf, sizeof(governor_buf), input);
3461         fclose(input);
3462
3463         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3464         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3465
3466         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3467         input = fopen(path, "r");
3468         if (input != NULL) {
3469                 fscanf(input, "%d", &turbo);
3470                 fprintf(outf, "cpufreq boost: %d\n", turbo);
3471                 fclose(input);
3472         }
3473
3474         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3475         input = fopen(path, "r");
3476         if (input != NULL) {
3477                 fscanf(input, "%d", &turbo);
3478                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3479                 fclose(input);
3480         }
3481 }
3482
3483
3484 /*
3485  * print_epb()
3486  * Decode the ENERGY_PERF_BIAS MSR
3487  */
3488 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3489 {
3490         unsigned long long msr;
3491         char *epb_string;
3492         int cpu;
3493
3494         if (!has_epb)
3495                 return 0;
3496
3497         cpu = t->cpu_id;
3498
3499         /* EPB is per-package */
3500         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3501                 return 0;
3502
3503         if (cpu_migrate(cpu)) {
3504                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3505                 return -1;
3506         }
3507
3508         if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3509                 return 0;
3510
3511         switch (msr & 0xF) {
3512         case ENERGY_PERF_BIAS_PERFORMANCE:
3513                 epb_string = "performance";
3514                 break;
3515         case ENERGY_PERF_BIAS_NORMAL:
3516                 epb_string = "balanced";
3517                 break;
3518         case ENERGY_PERF_BIAS_POWERSAVE:
3519                 epb_string = "powersave";
3520                 break;
3521         default:
3522                 epb_string = "custom";
3523                 break;
3524         }
3525         fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3526
3527         return 0;
3528 }
3529 /*
3530  * print_hwp()
3531  * Decode the MSR_HWP_CAPABILITIES
3532  */
3533 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3534 {
3535         unsigned long long msr;
3536         int cpu;
3537
3538         if (!has_hwp)
3539                 return 0;
3540
3541         cpu = t->cpu_id;
3542
3543         /* MSR_HWP_CAPABILITIES is per-package */
3544         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3545                 return 0;
3546
3547         if (cpu_migrate(cpu)) {
3548                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3549                 return -1;
3550         }
3551
3552         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3553                 return 0;
3554
3555         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3556                 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3557
3558         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3559         if ((msr & (1 << 0)) == 0)
3560                 return 0;
3561
3562         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3563                 return 0;
3564
3565         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3566                         "(high %d guar %d eff %d low %d)\n",
3567                         cpu, msr,
3568                         (unsigned int)HWP_HIGHEST_PERF(msr),
3569                         (unsigned int)HWP_GUARANTEED_PERF(msr),
3570                         (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3571                         (unsigned int)HWP_LOWEST_PERF(msr));
3572
3573         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3574                 return 0;
3575
3576         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3577                         "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3578                         cpu, msr,
3579                         (unsigned int)(((msr) >> 0) & 0xff),
3580                         (unsigned int)(((msr) >> 8) & 0xff),
3581                         (unsigned int)(((msr) >> 16) & 0xff),
3582                         (unsigned int)(((msr) >> 24) & 0xff),
3583                         (unsigned int)(((msr) >> 32) & 0xff3),
3584                         (unsigned int)(((msr) >> 42) & 0x1));
3585
3586         if (has_hwp_pkg) {
3587                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3588                         return 0;
3589
3590                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3591                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
3592                         cpu, msr,
3593                         (unsigned int)(((msr) >> 0) & 0xff),
3594                         (unsigned int)(((msr) >> 8) & 0xff),
3595                         (unsigned int)(((msr) >> 16) & 0xff),
3596                         (unsigned int)(((msr) >> 24) & 0xff),
3597                         (unsigned int)(((msr) >> 32) & 0xff3));
3598         }
3599         if (has_hwp_notify) {
3600                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3601                         return 0;
3602
3603                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3604                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3605                         cpu, msr,
3606                         ((msr) & 0x1) ? "EN" : "Dis",
3607                         ((msr) & 0x2) ? "EN" : "Dis");
3608         }
3609         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3610                 return 0;
3611
3612         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3613                         "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3614                         cpu, msr,
3615                         ((msr) & 0x1) ? "" : "No-",
3616                         ((msr) & 0x2) ? "" : "No-");
3617
3618         return 0;
3619 }
3620
3621 /*
3622  * print_perf_limit()
3623  */
3624 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3625 {
3626         unsigned long long msr;
3627         int cpu;
3628
3629         cpu = t->cpu_id;
3630
3631         /* per-package */
3632         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3633                 return 0;
3634
3635         if (cpu_migrate(cpu)) {
3636                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3637                 return -1;
3638         }
3639
3640         if (do_core_perf_limit_reasons) {
3641                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3642                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3643                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3644                         (msr & 1 << 15) ? "bit15, " : "",
3645                         (msr & 1 << 14) ? "bit14, " : "",
3646                         (msr & 1 << 13) ? "Transitions, " : "",
3647                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3648                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
3649                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3650                         (msr & 1 << 9) ? "CorePwr, " : "",
3651                         (msr & 1 << 8) ? "Amps, " : "",
3652                         (msr & 1 << 6) ? "VR-Therm, " : "",
3653                         (msr & 1 << 5) ? "Auto-HWP, " : "",
3654                         (msr & 1 << 4) ? "Graphics, " : "",
3655                         (msr & 1 << 2) ? "bit2, " : "",
3656                         (msr & 1 << 1) ? "ThermStatus, " : "",
3657                         (msr & 1 << 0) ? "PROCHOT, " : "");
3658                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3659                         (msr & 1 << 31) ? "bit31, " : "",
3660                         (msr & 1 << 30) ? "bit30, " : "",
3661                         (msr & 1 << 29) ? "Transitions, " : "",
3662                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3663                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
3664                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3665                         (msr & 1 << 25) ? "CorePwr, " : "",
3666                         (msr & 1 << 24) ? "Amps, " : "",
3667                         (msr & 1 << 22) ? "VR-Therm, " : "",
3668                         (msr & 1 << 21) ? "Auto-HWP, " : "",
3669                         (msr & 1 << 20) ? "Graphics, " : "",
3670                         (msr & 1 << 18) ? "bit18, " : "",
3671                         (msr & 1 << 17) ? "ThermStatus, " : "",
3672                         (msr & 1 << 16) ? "PROCHOT, " : "");
3673
3674         }
3675         if (do_gfx_perf_limit_reasons) {
3676                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3677                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3678                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3679                         (msr & 1 << 0) ? "PROCHOT, " : "",
3680                         (msr & 1 << 1) ? "ThermStatus, " : "",
3681                         (msr & 1 << 4) ? "Graphics, " : "",
3682                         (msr & 1 << 6) ? "VR-Therm, " : "",
3683                         (msr & 1 << 8) ? "Amps, " : "",
3684                         (msr & 1 << 9) ? "GFXPwr, " : "",
3685                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3686                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3687                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3688                         (msr & 1 << 16) ? "PROCHOT, " : "",
3689                         (msr & 1 << 17) ? "ThermStatus, " : "",
3690                         (msr & 1 << 20) ? "Graphics, " : "",
3691                         (msr & 1 << 22) ? "VR-Therm, " : "",
3692                         (msr & 1 << 24) ? "Amps, " : "",
3693                         (msr & 1 << 25) ? "GFXPwr, " : "",
3694                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3695                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3696         }
3697         if (do_ring_perf_limit_reasons) {
3698                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3699                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3700                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
3701                         (msr & 1 << 0) ? "PROCHOT, " : "",
3702                         (msr & 1 << 1) ? "ThermStatus, " : "",
3703                         (msr & 1 << 6) ? "VR-Therm, " : "",
3704                         (msr & 1 << 8) ? "Amps, " : "",
3705                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
3706                         (msr & 1 << 11) ? "PkgPwrL2, " : "");
3707                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3708                         (msr & 1 << 16) ? "PROCHOT, " : "",
3709                         (msr & 1 << 17) ? "ThermStatus, " : "",
3710                         (msr & 1 << 22) ? "VR-Therm, " : "",
3711                         (msr & 1 << 24) ? "Amps, " : "",
3712                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
3713                         (msr & 1 << 27) ? "PkgPwrL2, " : "");
3714         }
3715         return 0;
3716 }
3717
3718 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
3719 #define RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
3720
3721 double get_tdp(unsigned int model)
3722 {
3723         unsigned long long msr;
3724
3725         if (do_rapl & RAPL_PKG_POWER_INFO)
3726                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3727                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3728
3729         switch (model) {
3730         case INTEL_FAM6_ATOM_SILVERMONT:
3731         case INTEL_FAM6_ATOM_SILVERMONT_X:
3732                 return 30.0;
3733         default:
3734                 return 135.0;
3735         }
3736 }
3737
3738 /*
3739  * rapl_dram_energy_units_probe()
3740  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3741  */
3742 static double
3743 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3744 {
3745         /* only called for genuine_intel, family 6 */
3746
3747         switch (model) {
3748         case INTEL_FAM6_HASWELL_X:      /* HSX */
3749         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3750         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3751                 return (rapl_dram_energy_units = 15.3 / 1000000);
3752         default:
3753                 return (rapl_energy_units);
3754         }
3755 }
3756
3757
3758 /*
3759  * rapl_probe()
3760  *
3761  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3762  */
3763 void rapl_probe(unsigned int family, unsigned int model)
3764 {
3765         unsigned long long msr;
3766         unsigned int time_unit;
3767         double tdp;
3768
3769         if (!genuine_intel)
3770                 return;
3771
3772         if (family != 6)
3773                 return;
3774
3775         switch (model) {
3776         case INTEL_FAM6_SANDYBRIDGE:
3777         case INTEL_FAM6_IVYBRIDGE:
3778         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3779         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3780         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
3781         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
3782                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3783                 if (rapl_joules) {
3784                         BIC_PRESENT(BIC_Pkg_J);
3785                         BIC_PRESENT(BIC_Cor_J);
3786                         BIC_PRESENT(BIC_GFX_J);
3787                 } else {
3788                         BIC_PRESENT(BIC_PkgWatt);
3789                         BIC_PRESENT(BIC_CorWatt);
3790                         BIC_PRESENT(BIC_GFXWatt);
3791                 }
3792                 break;
3793         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3794         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3795                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3796                 if (rapl_joules)
3797                         BIC_PRESENT(BIC_Pkg_J);
3798                 else
3799                         BIC_PRESENT(BIC_PkgWatt);
3800                 break;
3801         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3802         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
3803                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3804                 BIC_PRESENT(BIC_PKG__);
3805                 BIC_PRESENT(BIC_RAM__);
3806                 if (rapl_joules) {
3807                         BIC_PRESENT(BIC_Pkg_J);
3808                         BIC_PRESENT(BIC_Cor_J);
3809                         BIC_PRESENT(BIC_RAM_J);
3810                         BIC_PRESENT(BIC_GFX_J);
3811                 } else {
3812                         BIC_PRESENT(BIC_PkgWatt);
3813                         BIC_PRESENT(BIC_CorWatt);
3814                         BIC_PRESENT(BIC_RAMWatt);
3815                         BIC_PRESENT(BIC_GFXWatt);
3816                 }
3817                 break;
3818         case INTEL_FAM6_HASWELL_X:      /* HSX */
3819         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3820         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3821         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
3822                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3823                 BIC_PRESENT(BIC_PKG__);
3824                 BIC_PRESENT(BIC_RAM__);
3825                 if (rapl_joules) {
3826                         BIC_PRESENT(BIC_Pkg_J);
3827                         BIC_PRESENT(BIC_RAM_J);
3828                 } else {
3829                         BIC_PRESENT(BIC_PkgWatt);
3830                         BIC_PRESENT(BIC_RAMWatt);
3831                 }
3832                 break;
3833         case INTEL_FAM6_SANDYBRIDGE_X:
3834         case INTEL_FAM6_IVYBRIDGE_X:
3835                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3836                 BIC_PRESENT(BIC_PKG__);
3837                 BIC_PRESENT(BIC_RAM__);
3838                 if (rapl_joules) {
3839                         BIC_PRESENT(BIC_Pkg_J);
3840                         BIC_PRESENT(BIC_Cor_J);
3841                         BIC_PRESENT(BIC_RAM_J);
3842                 } else {
3843                         BIC_PRESENT(BIC_PkgWatt);
3844                         BIC_PRESENT(BIC_CorWatt);
3845                         BIC_PRESENT(BIC_RAMWatt);
3846                 }
3847                 break;
3848         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3849         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
3850                 do_rapl = RAPL_PKG | RAPL_CORES;
3851                 if (rapl_joules) {
3852                         BIC_PRESENT(BIC_Pkg_J);
3853                         BIC_PRESENT(BIC_Cor_J);
3854                 } else {
3855                         BIC_PRESENT(BIC_PkgWatt);
3856                         BIC_PRESENT(BIC_CorWatt);
3857                 }
3858                 break;
3859         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
3860                 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3861                 BIC_PRESENT(BIC_PKG__);
3862                 BIC_PRESENT(BIC_RAM__);
3863                 if (rapl_joules) {
3864                         BIC_PRESENT(BIC_Pkg_J);
3865                         BIC_PRESENT(BIC_Cor_J);
3866                         BIC_PRESENT(BIC_RAM_J);
3867                 } else {
3868                         BIC_PRESENT(BIC_PkgWatt);
3869                         BIC_PRESENT(BIC_CorWatt);
3870                         BIC_PRESENT(BIC_RAMWatt);
3871                 }
3872                 break;
3873         default:
3874                 return;
3875         }
3876
3877         /* units on package 0, verify later other packages match */
3878         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3879                 return;
3880
3881         rapl_power_units = 1.0 / (1 << (msr & 0xF));
3882         if (model == INTEL_FAM6_ATOM_SILVERMONT)
3883                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3884         else
3885                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3886
3887         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3888
3889         time_unit = msr >> 16 & 0xF;
3890         if (time_unit == 0)
3891                 time_unit = 0xA;
3892
3893         rapl_time_units = 1.0 / (1 << (time_unit));
3894
3895         tdp = get_tdp(model);
3896
3897         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3898         if (!quiet)
3899                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3900
3901         return;
3902 }
3903
3904 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3905 {
3906         if (!genuine_intel)
3907                 return;
3908
3909         if (family != 6)
3910                 return;
3911
3912         switch (model) {
3913         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
3914         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
3915                 do_gfx_perf_limit_reasons = 1;
3916         case INTEL_FAM6_HASWELL_X:      /* HSX */
3917                 do_core_perf_limit_reasons = 1;
3918                 do_ring_perf_limit_reasons = 1;
3919         default:
3920                 return;
3921         }
3922 }
3923
3924 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
3925 {
3926         if (is_skx(family, model) || is_bdx(family, model))
3927                 has_automatic_cstate_conversion = 1;
3928 }
3929
3930 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3931 {
3932         unsigned long long msr;
3933         unsigned int dts, dts2;
3934         int cpu;
3935
3936         if (!(do_dts || do_ptm))
3937                 return 0;
3938
3939         cpu = t->cpu_id;
3940
3941         /* DTS is per-core, no need to print for each thread */
3942         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3943                 return 0;
3944
3945         if (cpu_migrate(cpu)) {
3946                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3947                 return -1;
3948         }
3949
3950         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3951                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3952                         return 0;
3953
3954                 dts = (msr >> 16) & 0x7F;
3955                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3956                         cpu, msr, tcc_activation_temp - dts);
3957
3958                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3959                         return 0;
3960
3961                 dts = (msr >> 16) & 0x7F;
3962                 dts2 = (msr >> 8) & 0x7F;
3963                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3964                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3965         }
3966
3967
3968         if (do_dts && debug) {
3969                 unsigned int resolution;
3970
3971                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3972                         return 0;
3973
3974                 dts = (msr >> 16) & 0x7F;
3975                 resolution = (msr >> 27) & 0xF;
3976                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3977                         cpu, msr, tcc_activation_temp - dts, resolution);
3978
3979                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3980                         return 0;
3981
3982                 dts = (msr >> 16) & 0x7F;
3983                 dts2 = (msr >> 8) & 0x7F;
3984                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3985                         cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3986         }
3987
3988         return 0;
3989 }
3990
3991 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3992 {
3993         fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3994                 cpu, label,
3995                 ((msr >> 15) & 1) ? "EN" : "DIS",
3996                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
3997                 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3998                 (((msr >> 16) & 1) ? "EN" : "DIS"));
3999
4000         return;
4001 }
4002
4003 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4004 {
4005         unsigned long long msr;
4006         int cpu;
4007
4008         if (!do_rapl)
4009                 return 0;
4010
4011         /* RAPL counters are per package, so print only for 1st thread/package */
4012         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4013                 return 0;
4014
4015         cpu = t->cpu_id;
4016         if (cpu_migrate(cpu)) {
4017                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4018                 return -1;
4019         }
4020
4021         if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4022                 return -1;
4023
4024         fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
4025                 rapl_power_units, rapl_energy_units, rapl_time_units);
4026
4027         if (do_rapl & RAPL_PKG_POWER_INFO) {
4028
4029                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4030                         return -5;
4031
4032
4033                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4034                         cpu, msr,
4035                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4036                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4037                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4038                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4039
4040         }
4041         if (do_rapl & RAPL_PKG) {
4042
4043                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4044                         return -9;
4045
4046                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4047                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4048
4049                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
4050                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4051                         cpu,
4052                         ((msr >> 47) & 1) ? "EN" : "DIS",
4053                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
4054                         (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4055                         ((msr >> 48) & 1) ? "EN" : "DIS");
4056         }
4057
4058         if (do_rapl & RAPL_DRAM_POWER_INFO) {
4059                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4060                         return -6;
4061
4062                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4063                         cpu, msr,
4064                         ((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4065                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4066                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4067                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4068         }
4069         if (do_rapl & RAPL_DRAM) {
4070                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4071                         return -9;
4072                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4073                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4074
4075                 print_power_limit_msr(cpu, msr, "DRAM Limit");
4076         }
4077         if (do_rapl & RAPL_CORE_POLICY) {
4078                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4079                         return -7;
4080
4081                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4082         }
4083         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4084                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4085                         return -9;
4086                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4087                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4088                 print_power_limit_msr(cpu, msr, "Cores Limit");
4089         }
4090         if (do_rapl & RAPL_GFX) {
4091                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4092                         return -8;
4093
4094                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4095
4096                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4097                         return -9;
4098                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4099                                 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4100                 print_power_limit_msr(cpu, msr, "GFX Limit");
4101         }
4102         return 0;
4103 }
4104
4105 /*
4106  * SNB adds support for additional MSRs:
4107  *
4108  * MSR_PKG_C7_RESIDENCY            0x000003fa
4109  * MSR_CORE_C7_RESIDENCY           0x000003fe
4110  * MSR_PKG_C2_RESIDENCY            0x0000060d
4111  */
4112
4113 int has_snb_msrs(unsigned int family, unsigned int model)
4114 {
4115         if (!genuine_intel)
4116                 return 0;
4117
4118         switch (model) {
4119         case INTEL_FAM6_SANDYBRIDGE:
4120         case INTEL_FAM6_SANDYBRIDGE_X:
4121         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
4122         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
4123         case INTEL_FAM6_HASWELL_CORE:   /* HSW */
4124         case INTEL_FAM6_HASWELL_X:      /* HSW */
4125         case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
4126         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4127         case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
4128         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4129         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4130         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4131         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4132         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4133         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4134         case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
4135                 return 1;
4136         }
4137         return 0;
4138 }
4139
4140 /*
4141  * HSW adds support for additional MSRs:
4142  *
4143  * MSR_PKG_C8_RESIDENCY         0x00000630
4144  * MSR_PKG_C9_RESIDENCY         0x00000631
4145  * MSR_PKG_C10_RESIDENCY        0x00000632
4146  *
4147  * MSR_PKGC8_IRTL               0x00000633
4148  * MSR_PKGC9_IRTL               0x00000634
4149  * MSR_PKGC10_IRTL              0x00000635
4150  *
4151  */
4152 int has_hsw_msrs(unsigned int family, unsigned int model)
4153 {
4154         if (!genuine_intel)
4155                 return 0;
4156
4157         switch (model) {
4158         case INTEL_FAM6_HASWELL_CORE:
4159         case INTEL_FAM6_BROADWELL_CORE: /* BDW */
4160         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4161         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4162         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4163         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4164                 return 1;
4165         }
4166         return 0;
4167 }
4168
4169 /*
4170  * SKL adds support for additional MSRS:
4171  *
4172  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4173  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4174  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4175  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4176  */
4177 int has_skl_msrs(unsigned int family, unsigned int model)
4178 {
4179         if (!genuine_intel)
4180                 return 0;
4181
4182         switch (model) {
4183         case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4184         case INTEL_FAM6_CANNONLAKE_MOBILE:      /* CNL */
4185                 return 1;
4186         }
4187         return 0;
4188 }
4189
4190 int is_slm(unsigned int family, unsigned int model)
4191 {
4192         if (!genuine_intel)
4193                 return 0;
4194         switch (model) {
4195         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4196         case INTEL_FAM6_ATOM_SILVERMONT_X:      /* AVN */
4197                 return 1;
4198         }
4199         return 0;
4200 }
4201
4202 int is_knl(unsigned int family, unsigned int model)
4203 {
4204         if (!genuine_intel)
4205                 return 0;
4206         switch (model) {
4207         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4208                 return 1;
4209         }
4210         return 0;
4211 }
4212
4213 int is_cnl(unsigned int family, unsigned int model)
4214 {
4215         if (!genuine_intel)
4216                 return 0;
4217
4218         switch (model) {
4219         case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4220                 return 1;
4221         }
4222
4223         return 0;
4224 }
4225
4226 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4227 {
4228         if (is_knl(family, model))
4229                 return 1024;
4230         return 1;
4231 }
4232
4233 #define SLM_BCLK_FREQS 5
4234 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4235
4236 double slm_bclk(void)
4237 {
4238         unsigned long long msr = 3;
4239         unsigned int i;
4240         double freq;
4241
4242         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4243                 fprintf(outf, "SLM BCLK: unknown\n");
4244
4245         i = msr & 0xf;
4246         if (i >= SLM_BCLK_FREQS) {
4247                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4248                 i = 3;
4249         }
4250         freq = slm_freq_table[i];
4251
4252         if (!quiet)
4253                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4254
4255         return freq;
4256 }
4257
4258 double discover_bclk(unsigned int family, unsigned int model)
4259 {
4260         if (has_snb_msrs(family, model) || is_knl(family, model))
4261                 return 100.00;
4262         else if (is_slm(family, model))
4263                 return slm_bclk();
4264         else
4265                 return 133.33;
4266 }
4267
4268 /*
4269  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4270  * the Thermal Control Circuit (TCC) activates.
4271  * This is usually equal to tjMax.
4272  *
4273  * Older processors do not have this MSR, so there we guess,
4274  * but also allow cmdline over-ride with -T.
4275  *
4276  * Several MSR temperature values are in units of degrees-C
4277  * below this value, including the Digital Thermal Sensor (DTS),
4278  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4279  */
4280 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4281 {
4282         unsigned long long msr;
4283         unsigned int target_c_local;
4284         int cpu;
4285
4286         /* tcc_activation_temp is used only for dts or ptm */
4287         if (!(do_dts || do_ptm))
4288                 return 0;
4289
4290         /* this is a per-package concept */
4291         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4292                 return 0;
4293
4294         cpu = t->cpu_id;
4295         if (cpu_migrate(cpu)) {
4296                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4297                 return -1;
4298         }
4299
4300         if (tcc_activation_temp_override != 0) {
4301                 tcc_activation_temp = tcc_activation_temp_override;
4302                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4303                         cpu, tcc_activation_temp);
4304                 return 0;
4305         }
4306
4307         /* Temperature Target MSR is Nehalem and newer only */
4308         if (!do_nhm_platform_info)
4309                 goto guess;
4310
4311         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4312                 goto guess;
4313
4314         target_c_local = (msr >> 16) & 0xFF;
4315
4316         if (!quiet)
4317                 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4318                         cpu, msr, target_c_local);
4319
4320         if (!target_c_local)
4321                 goto guess;
4322
4323         tcc_activation_temp = target_c_local;
4324
4325         return 0;
4326
4327 guess:
4328         tcc_activation_temp = TJMAX_DEFAULT;
4329         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4330                 cpu, tcc_activation_temp);
4331
4332         return 0;
4333 }
4334
4335 void decode_feature_control_msr(void)
4336 {
4337         unsigned long long msr;
4338
4339         if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4340                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4341                         base_cpu, msr,
4342                         msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4343                         msr & (1 << 18) ? "SGX" : "");
4344 }
4345
4346 void decode_misc_enable_msr(void)
4347 {
4348         unsigned long long msr;
4349
4350         if (!genuine_intel)
4351                 return;
4352
4353         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4354                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4355                         base_cpu, msr,
4356                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4357                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4358                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4359                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4360                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4361 }
4362
4363 void decode_misc_feature_control(void)
4364 {
4365         unsigned long long msr;
4366
4367         if (!has_misc_feature_control)
4368                 return;
4369
4370         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4371                 fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4372                         base_cpu, msr,
4373                         msr & (0 << 0) ? "No-" : "",
4374                         msr & (1 << 0) ? "No-" : "",
4375                         msr & (2 << 0) ? "No-" : "",
4376                         msr & (3 << 0) ? "No-" : "");
4377 }
4378 /*
4379  * Decode MSR_MISC_PWR_MGMT
4380  *
4381  * Decode the bits according to the Nehalem documentation
4382  * bit[0] seems to continue to have same meaning going forward
4383  * bit[1] less so...
4384  */
4385 void decode_misc_pwr_mgmt_msr(void)
4386 {
4387         unsigned long long msr;
4388
4389         if (!do_nhm_platform_info)
4390                 return;
4391
4392         if (no_MSR_MISC_PWR_MGMT)
4393                 return;
4394
4395         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4396                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4397                         base_cpu, msr,
4398                         msr & (1 << 0) ? "DIS" : "EN",
4399                         msr & (1 << 1) ? "EN" : "DIS",
4400                         msr & (1 << 8) ? "EN" : "DIS");
4401 }
4402 /*
4403  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4404  *
4405  * This MSRs are present on Silvermont processors,
4406  * Intel Atom processor E3000 series (Baytrail), and friends.
4407  */
4408 void decode_c6_demotion_policy_msr(void)
4409 {
4410         unsigned long long msr;
4411
4412         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4413                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4414                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4415
4416         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4417                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4418                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4419 }
4420
4421 /*
4422  * When models are the same, for the purpose of turbostat, reuse
4423  */
4424 unsigned int intel_model_duplicates(unsigned int model)
4425 {
4426
4427         switch(model) {
4428         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
4429         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
4430         case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
4431         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
4432         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
4433                 return INTEL_FAM6_NEHALEM;
4434
4435         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
4436         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
4437                 return INTEL_FAM6_NEHALEM_EX;
4438
4439         case INTEL_FAM6_XEON_PHI_KNM:
4440                 return INTEL_FAM6_XEON_PHI_KNL;
4441
4442         case INTEL_FAM6_HASWELL_ULT:
4443                 return INTEL_FAM6_HASWELL_CORE;
4444
4445         case INTEL_FAM6_BROADWELL_X:
4446         case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
4447                 return INTEL_FAM6_BROADWELL_X;
4448
4449         case INTEL_FAM6_SKYLAKE_MOBILE:
4450         case INTEL_FAM6_SKYLAKE_DESKTOP:
4451         case INTEL_FAM6_KABYLAKE_MOBILE:
4452         case INTEL_FAM6_KABYLAKE_DESKTOP:
4453                 return INTEL_FAM6_SKYLAKE_MOBILE;
4454         }
4455         return model;
4456 }
4457 void process_cpuid()
4458 {
4459         unsigned int eax, ebx, ecx, edx;
4460         unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
4461         unsigned int has_turbo;
4462
4463         eax = ebx = ecx = edx = 0;
4464
4465         __cpuid(0, max_level, ebx, ecx, edx);
4466
4467         if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
4468                 genuine_intel = 1;
4469         else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
4470                 authentic_amd = 1;
4471
4472         if (!quiet)
4473                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4474                         (char *)&ebx, (char *)&edx, (char *)&ecx);
4475
4476         __cpuid(1, fms, ebx, ecx, edx);
4477         family = (fms >> 8) & 0xf;
4478         model = (fms >> 4) & 0xf;
4479         stepping = fms & 0xf;
4480         if (family == 0xf)
4481                 family += (fms >> 20) & 0xff;
4482         if (family >= 6)
4483                 model += ((fms >> 16) & 0xf) << 4;
4484         ecx_flags = ecx;
4485         edx_flags = edx;
4486
4487         /*
4488          * check max extended function levels of CPUID.
4489          * This is needed to check for invariant TSC.
4490          * This check is valid for both Intel and AMD.
4491          */
4492         ebx = ecx = edx = 0;
4493         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4494
4495         if (!quiet) {
4496                 fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4497                         max_level, max_extended_level, family, model, stepping, family, model, stepping);
4498                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4499                         ecx_flags & (1 << 0) ? "SSE3" : "-",
4500                         ecx_flags & (1 << 3) ? "MONITOR" : "-",
4501                         ecx_flags & (1 << 6) ? "SMX" : "-",
4502                         ecx_flags & (1 << 7) ? "EIST" : "-",
4503                         ecx_flags & (1 << 8) ? "TM2" : "-",
4504                         edx_flags & (1 << 4) ? "TSC" : "-",
4505                         edx_flags & (1 << 5) ? "MSR" : "-",
4506                         edx_flags & (1 << 22) ? "ACPI-TM" : "-",
4507                         edx_flags & (1 << 28) ? "HT" : "-",
4508                         edx_flags & (1 << 29) ? "TM" : "-");
4509         }
4510         if (genuine_intel)
4511                 model = intel_model_duplicates(model);
4512
4513         if (!(edx_flags & (1 << 5)))
4514                 errx(1, "CPUID: no MSR");
4515
4516         if (max_extended_level >= 0x80000007) {
4517
4518                 /*
4519                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4520                  * this check is valid for both Intel and AMD
4521                  */
4522                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4523                 has_invariant_tsc = edx & (1 << 8);
4524         }
4525
4526         /*
4527          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4528          * this check is valid for both Intel and AMD
4529          */
4530
4531         __cpuid(0x6, eax, ebx, ecx, edx);
4532         has_aperf = ecx & (1 << 0);
4533         if (has_aperf) {
4534                 BIC_PRESENT(BIC_Avg_MHz);
4535                 BIC_PRESENT(BIC_Busy);
4536                 BIC_PRESENT(BIC_Bzy_MHz);
4537         }
4538         do_dts = eax & (1 << 0);
4539         if (do_dts)
4540                 BIC_PRESENT(BIC_CoreTmp);
4541         has_turbo = eax & (1 << 1);
4542         do_ptm = eax & (1 << 6);
4543         if (do_ptm)
4544                 BIC_PRESENT(BIC_PkgTmp);
4545         has_hwp = eax & (1 << 7);
4546         has_hwp_notify = eax & (1 << 8);
4547         has_hwp_activity_window = eax & (1 << 9);
4548         has_hwp_epp = eax & (1 << 10);
4549         has_hwp_pkg = eax & (1 << 11);
4550         has_epb = ecx & (1 << 3);
4551
4552         if (!quiet)
4553                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4554                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4555                         has_aperf ? "" : "No-",
4556                         has_turbo ? "" : "No-",
4557                         do_dts ? "" : "No-",
4558                         do_ptm ? "" : "No-",
4559                         has_hwp ? "" : "No-",
4560                         has_hwp_notify ? "" : "No-",
4561                         has_hwp_activity_window ? "" : "No-",
4562                         has_hwp_epp ? "" : "No-",
4563                         has_hwp_pkg ? "" : "No-",
4564                         has_epb ? "" : "No-");
4565
4566         if (!quiet)
4567                 decode_misc_enable_msr();
4568
4569
4570         if (max_level >= 0x7 && !quiet) {
4571                 int has_sgx;
4572
4573                 ecx = 0;
4574
4575                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4576
4577                 has_sgx = ebx & (1 << 2);
4578                 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4579
4580                 if (has_sgx)
4581                         decode_feature_control_msr();
4582         }
4583
4584         if (max_level >= 0x15) {
4585                 unsigned int eax_crystal;
4586                 unsigned int ebx_tsc;
4587
4588                 /*
4589                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4590                  */
4591                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4592                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4593
4594                 if (ebx_tsc != 0) {
4595
4596                         if (!quiet && (ebx != 0))
4597                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4598                                         eax_crystal, ebx_tsc, crystal_hz);
4599
4600                         if (crystal_hz == 0)
4601                                 switch(model) {
4602                                 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
4603                                         crystal_hz = 24000000;  /* 24.0 MHz */
4604                                         break;
4605                                 case INTEL_FAM6_ATOM_GOLDMONT_X:        /* DNV */
4606                                         crystal_hz = 25000000;  /* 25.0 MHz */
4607                                         break;
4608                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4609                                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4610                                         crystal_hz = 19200000;  /* 19.2 MHz */
4611                                         break;
4612                                 default:
4613                                         crystal_hz = 0;
4614                         }
4615
4616                         if (crystal_hz) {
4617                                 tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4618                                 if (!quiet)
4619                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4620                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4621                         }
4622                 }
4623         }
4624         if (max_level >= 0x16) {
4625                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
4626
4627                 /*
4628                  * CPUID 16H Base MHz, Max MHz, Bus MHz
4629                  */
4630                 base_mhz = max_mhz = bus_mhz = edx = 0;
4631
4632                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4633                 if (!quiet)
4634                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4635                                 base_mhz, max_mhz, bus_mhz);
4636         }
4637
4638         if (has_aperf)
4639                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4640
4641         BIC_PRESENT(BIC_IRQ);
4642         BIC_PRESENT(BIC_TSC_MHz);
4643
4644         if (probe_nhm_msrs(family, model)) {
4645                 do_nhm_platform_info = 1;
4646                 BIC_PRESENT(BIC_CPU_c1);
4647                 BIC_PRESENT(BIC_CPU_c3);
4648                 BIC_PRESENT(BIC_CPU_c6);
4649                 BIC_PRESENT(BIC_SMI);
4650         }
4651         do_snb_cstates = has_snb_msrs(family, model);
4652
4653         if (do_snb_cstates)
4654                 BIC_PRESENT(BIC_CPU_c7);
4655
4656         do_irtl_snb = has_snb_msrs(family, model);
4657         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4658                 BIC_PRESENT(BIC_Pkgpc2);
4659         if (pkg_cstate_limit >= PCL__3)
4660                 BIC_PRESENT(BIC_Pkgpc3);
4661         if (pkg_cstate_limit >= PCL__6)
4662                 BIC_PRESENT(BIC_Pkgpc6);
4663         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4664                 BIC_PRESENT(BIC_Pkgpc7);
4665         if (has_slv_msrs(family, model)) {
4666                 BIC_NOT_PRESENT(BIC_Pkgpc2);
4667                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4668                 BIC_PRESENT(BIC_Pkgpc6);
4669                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4670                 BIC_PRESENT(BIC_Mod_c6);
4671                 use_c1_residency_msr = 1;
4672         }
4673         if (is_dnv(family, model)) {
4674                 BIC_PRESENT(BIC_CPU_c1);
4675                 BIC_NOT_PRESENT(BIC_CPU_c3);
4676                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4677                 BIC_NOT_PRESENT(BIC_CPU_c7);
4678                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4679                 use_c1_residency_msr = 1;
4680         }
4681         if (is_skx(family, model)) {
4682                 BIC_NOT_PRESENT(BIC_CPU_c3);
4683                 BIC_NOT_PRESENT(BIC_Pkgpc3);
4684                 BIC_NOT_PRESENT(BIC_CPU_c7);
4685                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4686         }
4687         if (is_bdx(family, model)) {
4688                 BIC_NOT_PRESENT(BIC_CPU_c7);
4689                 BIC_NOT_PRESENT(BIC_Pkgpc7);
4690         }
4691         if (has_hsw_msrs(family, model)) {
4692                 BIC_PRESENT(BIC_Pkgpc8);
4693                 BIC_PRESENT(BIC_Pkgpc9);
4694                 BIC_PRESENT(BIC_Pkgpc10);
4695         }
4696         do_irtl_hsw = has_hsw_msrs(family, model);
4697         if (has_skl_msrs(family, model)) {
4698                 BIC_PRESENT(BIC_Totl_c0);
4699                 BIC_PRESENT(BIC_Any_c0);
4700                 BIC_PRESENT(BIC_GFX_c0);
4701                 BIC_PRESENT(BIC_CPUGFX);
4702         }
4703         do_slm_cstates = is_slm(family, model);
4704         do_knl_cstates  = is_knl(family, model);
4705         do_cnl_cstates = is_cnl(family, model);
4706
4707         if (!quiet)
4708                 decode_misc_pwr_mgmt_msr();
4709
4710         if (!quiet && has_slv_msrs(family, model))
4711                 decode_c6_demotion_policy_msr();
4712
4713         rapl_probe(family, model);
4714         perf_limit_reasons_probe(family, model);
4715         automatic_cstate_conversion_probe(family, model);
4716
4717         if (!quiet)
4718                 dump_cstate_pstate_config_info(family, model);
4719
4720         if (!quiet)
4721                 dump_sysfs_cstate_config();
4722         if (!quiet)
4723                 dump_sysfs_pstate_config();
4724
4725         if (has_skl_msrs(family, model))
4726                 calculate_tsc_tweak();
4727
4728         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4729                 BIC_PRESENT(BIC_GFX_rc6);
4730
4731         if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4732                 BIC_PRESENT(BIC_GFXMHz);
4733
4734         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4735                 BIC_PRESENT(BIC_CPU_LPI);
4736         else
4737                 BIC_NOT_PRESENT(BIC_CPU_LPI);
4738
4739         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
4740                 BIC_PRESENT(BIC_SYS_LPI);
4741         else
4742                 BIC_NOT_PRESENT(BIC_SYS_LPI);
4743
4744         if (!quiet)
4745                 decode_misc_feature_control();
4746
4747         return;
4748 }
4749
4750 /*
4751  * in /dev/cpu/ return success for names that are numbers
4752  * ie. filter out ".", "..", "microcode".
4753  */
4754 int dir_filter(const struct dirent *dirp)
4755 {
4756         if (isdigit(dirp->d_name[0]))
4757                 return 1;
4758         else
4759                 return 0;
4760 }
4761
4762 int open_dev_cpu_msr(int dummy1)
4763 {
4764         return 0;
4765 }
4766
4767 void topology_probe()
4768 {
4769         int i;
4770         int max_core_id = 0;
4771         int max_package_id = 0;
4772         int max_siblings = 0;
4773
4774         /* Initialize num_cpus, max_cpu_num */
4775         set_max_cpu_num();
4776         topo.num_cpus = 0;
4777         for_all_proc_cpus(count_cpus);
4778         if (!summary_only && topo.num_cpus > 1)
4779                 BIC_PRESENT(BIC_CPU);
4780
4781         if (debug > 1)
4782                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4783
4784         cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4785         if (cpus == NULL)
4786                 err(1, "calloc cpus");
4787
4788         /*
4789          * Allocate and initialize cpu_present_set
4790          */
4791         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4792         if (cpu_present_set == NULL)
4793                 err(3, "CPU_ALLOC");
4794         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4795         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4796         for_all_proc_cpus(mark_cpu_present);
4797
4798         /*
4799          * Validate that all cpus in cpu_subset are also in cpu_present_set
4800          */
4801         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4802                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4803                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4804                                 err(1, "cpu%d not present", i);
4805         }
4806
4807         /*
4808          * Allocate and initialize cpu_affinity_set
4809          */
4810         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4811         if (cpu_affinity_set == NULL)
4812                 err(3, "CPU_ALLOC");
4813         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4814         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4815
4816         for_all_proc_cpus(init_thread_id);
4817
4818         /*
4819          * For online cpus
4820          * find max_core_id, max_package_id
4821          */
4822         for (i = 0; i <= topo.max_cpu_num; ++i) {
4823                 int siblings;
4824
4825                 if (cpu_is_not_present(i)) {
4826                         if (debug > 1)
4827                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4828                         continue;
4829                 }
4830
4831                 cpus[i].logical_cpu_id = i;
4832
4833                 /* get package information */
4834                 cpus[i].physical_package_id = get_physical_package_id(i);
4835                 if (cpus[i].physical_package_id > max_package_id)
4836                         max_package_id = cpus[i].physical_package_id;
4837
4838                 /* get numa node information */
4839                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
4840                 if (cpus[i].physical_node_id > topo.max_node_num)
4841                         topo.max_node_num = cpus[i].physical_node_id;
4842
4843                 /* get core information */
4844                 cpus[i].physical_core_id = get_core_id(i);
4845                 if (cpus[i].physical_core_id > max_core_id)
4846                         max_core_id = cpus[i].physical_core_id;
4847
4848                 /* get thread information */
4849                 siblings = get_thread_siblings(&cpus[i]);
4850                 if (siblings > max_siblings)
4851                         max_siblings = siblings;
4852                 if (cpus[i].thread_id == 0)
4853                         topo.num_cores++;
4854         }
4855
4856         topo.cores_per_node = max_core_id + 1;
4857         if (debug > 1)
4858                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4859                         max_core_id, topo.cores_per_node);
4860         if (!summary_only && topo.cores_per_node > 1)
4861                 BIC_PRESENT(BIC_Core);
4862
4863         topo.num_packages = max_package_id + 1;
4864         if (debug > 1)
4865                 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4866                         max_package_id, topo.num_packages);
4867         if (!summary_only && topo.num_packages > 1)
4868                 BIC_PRESENT(BIC_Package);
4869
4870         set_node_data();
4871         if (debug > 1)
4872                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
4873         if (!summary_only && topo.nodes_per_pkg > 1)
4874                 BIC_PRESENT(BIC_Node);
4875
4876         topo.threads_per_core = max_siblings;
4877         if (debug > 1)
4878                 fprintf(outf, "max_siblings %d\n", max_siblings);
4879
4880         if (debug < 1)
4881                 return;
4882
4883         for (i = 0; i <= topo.max_cpu_num; ++i) {
4884                 if (cpu_is_not_present(i))
4885                         continue;
4886                 fprintf(outf,
4887                         "cpu %d pkg %d node %d lnode %d core %d thread %d\n",
4888                         i, cpus[i].physical_package_id,
4889                         cpus[i].physical_node_id,
4890                         cpus[i].logical_node_id,
4891                         cpus[i].physical_core_id,
4892                         cpus[i].thread_id);
4893         }
4894
4895 }
4896
4897 void
4898 allocate_counters(struct thread_data **t, struct core_data **c,
4899                   struct pkg_data **p)
4900 {
4901         int i;
4902         int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
4903                         topo.num_packages;
4904         int num_threads = topo.threads_per_core * num_cores;
4905
4906         *t = calloc(num_threads, sizeof(struct thread_data));
4907         if (*t == NULL)
4908                 goto error;
4909
4910         for (i = 0; i < num_threads; i++)
4911                 (*t)[i].cpu_id = -1;
4912
4913         *c = calloc(num_cores, sizeof(struct core_data));
4914         if (*c == NULL)
4915                 goto error;
4916
4917         for (i = 0; i < num_cores; i++)
4918                 (*c)[i].core_id = -1;
4919
4920         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
4921         if (*p == NULL)
4922                 goto error;
4923
4924         for (i = 0; i < topo.num_packages; i++)
4925                 (*p)[i].package_id = i;
4926
4927         return;
4928 error:
4929         err(1, "calloc counters");
4930 }
4931 /*
4932  * init_counter()
4933  *
4934  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4935  */
4936 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4937         struct pkg_data *pkg_base, int cpu_id)
4938 {
4939         int pkg_id = cpus[cpu_id].physical_package_id;
4940         int node_id = cpus[cpu_id].logical_node_id;
4941         int core_id = cpus[cpu_id].physical_core_id;
4942         int thread_id = cpus[cpu_id].thread_id;
4943         struct thread_data *t;
4944         struct core_data *c;
4945         struct pkg_data *p;
4946
4947
4948         /* Workaround for systems where physical_node_id==-1
4949          * and logical_node_id==(-1 - topo.num_cpus)
4950          */
4951         if (node_id < 0)
4952                 node_id = 0;
4953
4954         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
4955         c = GET_CORE(core_base, core_id, node_id, pkg_id);
4956         p = GET_PKG(pkg_base, pkg_id);
4957
4958         t->cpu_id = cpu_id;
4959         if (thread_id == 0) {
4960                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4961                 if (cpu_is_first_core_in_package(cpu_id))
4962                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4963         }
4964
4965         c->core_id = core_id;
4966         p->package_id = pkg_id;
4967 }
4968
4969
4970 int initialize_counters(int cpu_id)
4971 {
4972         init_counter(EVEN_COUNTERS, cpu_id);
4973         init_counter(ODD_COUNTERS, cpu_id);
4974         return 0;
4975 }
4976
4977 void allocate_output_buffer()
4978 {
4979         output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4980         outp = output_buffer;
4981         if (outp == NULL)
4982                 err(-1, "calloc output buffer");
4983 }
4984 void allocate_fd_percpu(void)
4985 {
4986         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4987         if (fd_percpu == NULL)
4988                 err(-1, "calloc fd_percpu");
4989 }
4990 void allocate_irq_buffers(void)
4991 {
4992         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4993         if (irq_column_2_cpu == NULL)
4994                 err(-1, "calloc %d", topo.num_cpus);
4995
4996         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4997         if (irqs_per_cpu == NULL)
4998                 err(-1, "calloc %d", topo.max_cpu_num + 1);
4999 }
5000 void setup_all_buffers(void)
5001 {
5002         topology_probe();
5003         allocate_irq_buffers();
5004         allocate_fd_percpu();
5005         allocate_counters(&thread_even, &core_even, &package_even);
5006         allocate_counters(&thread_odd, &core_odd, &package_odd);
5007         allocate_output_buffer();
5008         for_all_proc_cpus(initialize_counters);
5009 }
5010
5011 void set_base_cpu(void)
5012 {
5013         base_cpu = sched_getcpu();
5014         if (base_cpu < 0)
5015                 err(-ENODEV, "No valid cpus found");
5016
5017         if (debug > 1)
5018                 fprintf(outf, "base_cpu = %d\n", base_cpu);
5019 }
5020
5021 void turbostat_init()
5022 {
5023         setup_all_buffers();
5024         set_base_cpu();
5025         check_dev_msr();
5026         check_permissions();
5027         process_cpuid();
5028
5029
5030         if (!quiet)
5031                 for_all_cpus(print_hwp, ODD_COUNTERS);
5032
5033         if (!quiet)
5034                 for_all_cpus(print_epb, ODD_COUNTERS);
5035
5036         if (!quiet)
5037                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
5038
5039         if (!quiet)
5040                 for_all_cpus(print_rapl, ODD_COUNTERS);
5041
5042         for_all_cpus(set_temperature_target, ODD_COUNTERS);
5043
5044         if (!quiet)
5045                 for_all_cpus(print_thermal, ODD_COUNTERS);
5046
5047         if (!quiet && do_irtl_snb)
5048                 print_irtl();
5049 }
5050
5051 int fork_it(char **argv)
5052 {
5053         pid_t child_pid;
5054         int status;
5055
5056         snapshot_proc_sysfs_files();
5057         status = for_all_cpus(get_counters, EVEN_COUNTERS);
5058         first_counter_read = 0;
5059         if (status)
5060                 exit(status);
5061         /* clear affinity side-effect of get_counters() */
5062         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5063         gettimeofday(&tv_even, (struct timezone *)NULL);
5064
5065         child_pid = fork();
5066         if (!child_pid) {
5067                 /* child */
5068                 execvp(argv[0], argv);
5069                 err(errno, "exec %s", argv[0]);
5070         } else {
5071
5072                 /* parent */
5073                 if (child_pid == -1)
5074                         err(1, "fork");
5075
5076                 signal(SIGINT, SIG_IGN);
5077                 signal(SIGQUIT, SIG_IGN);
5078                 if (waitpid(child_pid, &status, 0) == -1)
5079                         err(status, "waitpid");
5080         }
5081         /*
5082          * n.b. fork_it() does not check for errors from for_all_cpus()
5083          * because re-starting is problematic when forking
5084          */
5085         snapshot_proc_sysfs_files();
5086         for_all_cpus(get_counters, ODD_COUNTERS);
5087         gettimeofday(&tv_odd, (struct timezone *)NULL);
5088         timersub(&tv_odd, &tv_even, &tv_delta);
5089         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5090                 fprintf(outf, "%s: Counter reset detected\n", progname);
5091         else {
5092                 compute_average(EVEN_COUNTERS);
5093                 format_all_counters(EVEN_COUNTERS);
5094         }
5095
5096         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5097
5098         flush_output_stderr();
5099
5100         return status;
5101 }
5102
5103 int get_and_dump_counters(void)
5104 {
5105         int status;
5106
5107         snapshot_proc_sysfs_files();
5108         status = for_all_cpus(get_counters, ODD_COUNTERS);
5109         if (status)
5110                 return status;
5111
5112         status = for_all_cpus(dump_counters, ODD_COUNTERS);
5113         if (status)
5114                 return status;
5115
5116         flush_output_stdout();
5117
5118         return status;
5119 }
5120
5121 void print_version() {
5122         fprintf(outf, "turbostat version 18.07.27"
5123                 " - Len Brown <lenb@kernel.org>\n");
5124 }
5125
5126 int add_counter(unsigned int msr_num, char *path, char *name,
5127         unsigned int width, enum counter_scope scope,
5128         enum counter_type type, enum counter_format format, int flags)
5129 {
5130         struct msr_counter *msrp;
5131
5132         msrp = calloc(1, sizeof(struct msr_counter));
5133         if (msrp == NULL) {
5134                 perror("calloc");
5135                 exit(1);
5136         }
5137
5138         msrp->msr_num = msr_num;
5139         strncpy(msrp->name, name, NAME_BYTES);
5140         if (path)
5141                 strncpy(msrp->path, path, PATH_BYTES);
5142         msrp->width = width;
5143         msrp->type = type;
5144         msrp->format = format;
5145         msrp->flags = flags;
5146
5147         switch (scope) {
5148
5149         case SCOPE_CPU:
5150                 msrp->next = sys.tp;
5151                 sys.tp = msrp;
5152                 sys.added_thread_counters++;
5153                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5154                         fprintf(stderr, "exceeded max %d added thread counters\n",
5155                                 MAX_ADDED_COUNTERS);
5156                         exit(-1);
5157                 }
5158                 break;
5159
5160         case SCOPE_CORE:
5161                 msrp->next = sys.cp;
5162                 sys.cp = msrp;
5163                 sys.added_core_counters++;
5164                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5165                         fprintf(stderr, "exceeded max %d added core counters\n",
5166                                 MAX_ADDED_COUNTERS);
5167                         exit(-1);
5168                 }
5169                 break;
5170
5171         case SCOPE_PACKAGE:
5172                 msrp->next = sys.pp;
5173                 sys.pp = msrp;
5174                 sys.added_package_counters++;
5175                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5176                         fprintf(stderr, "exceeded max %d added package counters\n",
5177                                 MAX_ADDED_COUNTERS);
5178                         exit(-1);
5179                 }
5180                 break;
5181         }
5182
5183         return 0;
5184 }
5185
5186 void parse_add_command(char *add_command)
5187 {
5188         int msr_num = 0;
5189         char *path = NULL;
5190         char name_buffer[NAME_BYTES] = "";
5191         int width = 64;
5192         int fail = 0;
5193         enum counter_scope scope = SCOPE_CPU;
5194         enum counter_type type = COUNTER_CYCLES;
5195         enum counter_format format = FORMAT_DELTA;
5196
5197         while (add_command) {
5198
5199                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5200                         goto next;
5201
5202                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
5203                         goto next;
5204
5205                 if (*add_command == '/') {
5206                         path = add_command;
5207                         goto next;
5208                 }
5209
5210                 if (sscanf(add_command, "u%d", &width) == 1) {
5211                         if ((width == 32) || (width == 64))
5212                                 goto next;
5213                         width = 64;
5214                 }
5215                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5216                         scope = SCOPE_CPU;
5217                         goto next;
5218                 }
5219                 if (!strncmp(add_command, "core", strlen("core"))) {
5220                         scope = SCOPE_CORE;
5221                         goto next;
5222                 }
5223                 if (!strncmp(add_command, "package", strlen("package"))) {
5224                         scope = SCOPE_PACKAGE;
5225                         goto next;
5226                 }
5227                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5228                         type = COUNTER_CYCLES;
5229                         goto next;
5230                 }
5231                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5232                         type = COUNTER_SECONDS;
5233                         goto next;
5234                 }
5235                 if (!strncmp(add_command, "usec", strlen("usec"))) {
5236                         type = COUNTER_USEC;
5237                         goto next;
5238                 }
5239                 if (!strncmp(add_command, "raw", strlen("raw"))) {
5240                         format = FORMAT_RAW;
5241                         goto next;
5242                 }
5243                 if (!strncmp(add_command, "delta", strlen("delta"))) {
5244                         format = FORMAT_DELTA;
5245                         goto next;
5246                 }
5247                 if (!strncmp(add_command, "percent", strlen("percent"))) {
5248                         format = FORMAT_PERCENT;
5249                         goto next;
5250                 }
5251
5252                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
5253                         char *eos;
5254
5255                         eos = strchr(name_buffer, ',');
5256                         if (eos)
5257                                 *eos = '\0';
5258                         goto next;
5259                 }
5260
5261 next:
5262                 add_command = strchr(add_command, ',');
5263                 if (add_command) {
5264                         *add_command = '\0';
5265                         add_command++;
5266                 }
5267
5268         }
5269         if ((msr_num == 0) && (path == NULL)) {
5270                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5271                 fail++;
5272         }
5273
5274         /* generate default column header */
5275         if (*name_buffer == '\0') {
5276                 if (width == 32)
5277                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5278                 else
5279                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5280         }
5281
5282         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5283                 fail++;
5284
5285         if (fail) {
5286                 help();
5287                 exit(1);
5288         }
5289 }
5290
5291 int is_deferred_skip(char *name)
5292 {
5293         int i;
5294
5295         for (i = 0; i < deferred_skip_index; ++i)
5296                 if (!strcmp(name, deferred_skip_names[i]))
5297                         return 1;
5298         return 0;
5299 }
5300
5301 void probe_sysfs(void)
5302 {
5303         char path[64];
5304         char name_buf[16];
5305         FILE *input;
5306         int state;
5307         char *sp;
5308
5309         if (!DO_BIC(BIC_sysfs))
5310                 return;
5311
5312         for (state = 10; state >= 0; --state) {
5313
5314                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5315                         base_cpu, state);
5316                 input = fopen(path, "r");
5317                 if (input == NULL)
5318                         continue;
5319                 fgets(name_buf, sizeof(name_buf), input);
5320
5321                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5322                 sp = strchr(name_buf, '-');
5323                 if (!sp)
5324                         sp = strchrnul(name_buf, '\n');
5325                 *sp = '%';
5326                 *(sp + 1) = '\0';
5327
5328                 fclose(input);
5329
5330                 sprintf(path, "cpuidle/state%d/time", state);
5331
5332                 if (is_deferred_skip(name_buf))
5333                         continue;
5334
5335                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5336                                 FORMAT_PERCENT, SYSFS_PERCPU);
5337         }
5338
5339         for (state = 10; state >= 0; --state) {
5340
5341                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5342                         base_cpu, state);
5343                 input = fopen(path, "r");
5344                 if (input == NULL)
5345                         continue;
5346                 fgets(name_buf, sizeof(name_buf), input);
5347                  /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5348                 sp = strchr(name_buf, '-');
5349                 if (!sp)
5350                         sp = strchrnul(name_buf, '\n');
5351                 *sp = '\0';
5352                 fclose(input);
5353
5354                 sprintf(path, "cpuidle/state%d/usage", state);
5355
5356                 if (is_deferred_skip(name_buf))
5357                         continue;
5358
5359                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5360                                 FORMAT_DELTA, SYSFS_PERCPU);
5361         }
5362
5363 }
5364
5365
5366 /*
5367  * parse cpuset with following syntax
5368  * 1,2,4..6,8-10 and set bits in cpu_subset
5369  */
5370 void parse_cpu_command(char *optarg)
5371 {
5372         unsigned int start, end;
5373         char *next;
5374
5375         if (!strcmp(optarg, "core")) {
5376                 if (cpu_subset)
5377                         goto error;
5378                 show_core_only++;
5379                 return;
5380         }
5381         if (!strcmp(optarg, "package")) {
5382                 if (cpu_subset)
5383                         goto error;
5384                 show_pkg_only++;
5385                 return;
5386         }
5387         if (show_core_only || show_pkg_only)
5388                 goto error;
5389
5390         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5391         if (cpu_subset == NULL)
5392                 err(3, "CPU_ALLOC");
5393         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5394
5395         CPU_ZERO_S(cpu_subset_size, cpu_subset);
5396
5397         next = optarg;
5398
5399         while (next && *next) {
5400
5401                 if (*next == '-')       /* no negative cpu numbers */
5402                         goto error;
5403
5404                 start = strtoul(next, &next, 10);
5405
5406                 if (start >= CPU_SUBSET_MAXCPUS)
5407                         goto error;
5408                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
5409
5410                 if (*next == '\0')
5411                         break;
5412
5413                 if (*next == ',') {
5414                         next += 1;
5415                         continue;
5416                 }
5417
5418                 if (*next == '-') {
5419                         next += 1;      /* start range */
5420                 } else if (*next == '.') {
5421                         next += 1;
5422                         if (*next == '.')
5423                                 next += 1;      /* start range */
5424                         else
5425                                 goto error;
5426                 }
5427
5428                 end = strtoul(next, &next, 10);
5429                 if (end <= start)
5430                         goto error;
5431
5432                 while (++start <= end) {
5433                         if (start >= CPU_SUBSET_MAXCPUS)
5434                                 goto error;
5435                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
5436                 }
5437
5438                 if (*next == ',')
5439                         next += 1;
5440                 else if (*next != '\0')
5441                         goto error;
5442         }
5443
5444         return;
5445
5446 error:
5447         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5448         help();
5449         exit(-1);
5450 }
5451
5452
5453 void cmdline(int argc, char **argv)
5454 {
5455         int opt;
5456         int option_index = 0;
5457         static struct option long_options[] = {
5458                 {"add",         required_argument,      0, 'a'},
5459                 {"cpu",         required_argument,      0, 'c'},
5460                 {"Dump",        no_argument,            0, 'D'},
5461                 {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
5462                 {"enable",      required_argument,      0, 'e'},
5463                 {"interval",    required_argument,      0, 'i'},
5464                 {"num_iterations",      required_argument,      0, 'n'},
5465                 {"help",        no_argument,            0, 'h'},
5466                 {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
5467                 {"Joules",      no_argument,            0, 'J'},
5468                 {"list",        no_argument,            0, 'l'},
5469                 {"out",         required_argument,      0, 'o'},
5470                 {"quiet",       no_argument,            0, 'q'},
5471                 {"show",        required_argument,      0, 's'},
5472                 {"Summary",     no_argument,            0, 'S'},
5473                 {"TCC",         required_argument,      0, 'T'},
5474                 {"version",     no_argument,            0, 'v' },
5475                 {0,             0,                      0,  0 }
5476         };
5477
5478         progname = argv[0];
5479
5480         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5481                                 long_options, &option_index)) != -1) {
5482                 switch (opt) {
5483                 case 'a':
5484                         parse_add_command(optarg);
5485                         break;
5486                 case 'c':
5487                         parse_cpu_command(optarg);
5488                         break;
5489                 case 'D':
5490                         dump_only++;
5491                         break;
5492                 case 'e':
5493                         /* --enable specified counter */
5494                         bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5495                         break;
5496                 case 'd':
5497                         debug++;
5498                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5499                         break;
5500                 case 'H':
5501                         /*
5502                          * --hide: do not show those specified
5503                          *  multiple invocations simply clear more bits in enabled mask
5504                          */
5505                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5506                         break;
5507                 case 'h':
5508                 default:
5509                         help();
5510                         exit(1);
5511                 case 'i':
5512                         {
5513                                 double interval = strtod(optarg, NULL);
5514
5515                                 if (interval < 0.001) {
5516                                         fprintf(outf, "interval %f seconds is too small\n",
5517                                                 interval);
5518                                         exit(2);
5519                                 }
5520
5521                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5522                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5523                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5524                         }
5525                         break;
5526                 case 'J':
5527                         rapl_joules++;
5528                         break;
5529                 case 'l':
5530                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5531                         list_header_only++;
5532                         quiet++;
5533                         break;
5534                 case 'o':
5535                         outf = fopen_or_die(optarg, "w");
5536                         break;
5537                 case 'q':
5538                         quiet = 1;
5539                         break;
5540                 case 'n':
5541                         num_iterations = strtod(optarg, NULL);
5542
5543                         if (num_iterations <= 0) {
5544                                 fprintf(outf, "iterations %d should be positive number\n",
5545                                         num_iterations);
5546                                 exit(2);
5547                         }
5548                         break;
5549                 case 's':
5550                         /*
5551                          * --show: show only those specified
5552                          *  The 1st invocation will clear and replace the enabled mask
5553                          *  subsequent invocations can add to it.
5554                          */
5555                         if (shown == 0)
5556                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5557                         else
5558                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5559                         shown = 1;
5560                         break;
5561                 case 'S':
5562                         summary_only++;
5563                         break;
5564                 case 'T':
5565                         tcc_activation_temp_override = atoi(optarg);
5566                         break;
5567                 case 'v':
5568                         print_version();
5569                         exit(0);
5570                         break;
5571                 }
5572         }
5573 }
5574
5575 int main(int argc, char **argv)
5576 {
5577         outf = stderr;
5578         cmdline(argc, argv);
5579
5580         if (!quiet)
5581                 print_version();
5582
5583         probe_sysfs();
5584
5585         turbostat_init();
5586
5587         /* dump counters and exit */
5588         if (dump_only)
5589                 return get_and_dump_counters();
5590
5591         /* list header and exit */
5592         if (list_header_only) {
5593                 print_header(",");
5594                 flush_output_stdout();
5595                 return 0;
5596         }
5597
5598         /*
5599          * if any params left, it must be a command to fork
5600          */
5601         if (argc - optind)
5602                 return fork_it(argv + optind);
5603         else
5604                 turbostat_loop();
5605
5606         return 0;
5607 }