tools/power turbostat: initial Gemini Lake SOC support
[jlayton/linux.git] / tools / power / x86 / turbostat / turbostat.c
index 12a6eea2d6ae60090b094365e730e20cf646536c..819d67fbb6ca7571eef62b4d03d8541814734723 100644 (file)
@@ -21,6 +21,7 @@
 
 #define _GNU_SOURCE
 #include MSRHEADER
+#include INTEL_FAMILY_HEADER
 #include <stdarg.h>
 #include <stdio.h>
 #include <err.h>
@@ -48,17 +49,12 @@ FILE *outf;
 int *fd_percpu;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
+unsigned int quiet;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int dump_only;
-unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
-unsigned int do_pc2;
-unsigned int do_pc3;
-unsigned int do_pc6;
-unsigned int do_pc7;
-unsigned int do_c8_c9_c10;
 unsigned int do_skl_residency;
 unsigned int do_slm_cstates;
 unsigned int use_c1_residency_msr;
@@ -70,29 +66,19 @@ unsigned int units = 1000000;       /* MHz etc */
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
-unsigned int extra_msr_offset32;
-unsigned int extra_msr_offset64;
-unsigned int extra_delta_offset32;
-unsigned int extra_delta_offset64;
+unsigned int no_MSR_MISC_PWR_MGMT;
 unsigned int aperf_mperf_multiplier = 1;
-int do_irq = 1;
-int do_smi;
 double bclk;
 double base_hz;
 unsigned int has_base_hz;
 double tsc_tweak = 1.0;
-unsigned int show_pkg;
-unsigned int show_core;
-unsigned int show_cpu;
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
 unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
-unsigned int do_gfx_rc6_ms;
 unsigned long long  gfx_cur_rc6_ms;
-unsigned int do_gfx_mhz;
 unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
@@ -112,6 +98,7 @@ unsigned int has_hwp_notify;         /* IA32_HWP_INTERRUPT */
 unsigned int has_hwp_activity_window;  /* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;              /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;              /* IA32_HWP_REQUEST_PKG */
+unsigned int has_misc_feature_control;
 
 #define RAPL_PKG               (1 << 0)
                                        /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -129,9 +116,8 @@ unsigned int has_hwp_pkg;           /* IA32_HWP_REQUEST_PKG */
 #define RAPL_DRAM_POWER_INFO   (1 << 5)
                                        /* 0x61c MSR_DRAM_POWER_INFO */
 
-#define RAPL_CORES             (1 << 6)
+#define RAPL_CORES_POWER_LIMIT (1 << 6)
                                        /* 0x638 MSR_PP0_POWER_LIMIT */
-                                       /* 0x639 MSR_PP0_ENERGY_STATUS */
 #define RAPL_CORE_POLICY       (1 << 7)
                                        /* 0x63a MSR_PP0_POLICY */
 
@@ -139,39 +125,49 @@ unsigned int has_hwp_pkg;         /* IA32_HWP_REQUEST_PKG */
                                        /* 0x640 MSR_PP1_POWER_LIMIT */
                                        /* 0x641 MSR_PP1_ENERGY_STATUS */
                                        /* 0x642 MSR_PP1_POLICY */
+
+#define RAPL_CORES_ENERGY_STATUS       (1 << 9)
+                                       /* 0x639 MSR_PP0_ENERGY_STATUS */
+#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
 #define        TJMAX_DEFAULT   100
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
+/*
+ * buffer size used by sscanf() for added column names
+ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
+ */
+#define        NAME_BYTES 20
+
 int backwards_count;
 char *progname;
 
 cpu_set_t *cpu_present_set, *cpu_affinity_set;
 size_t cpu_present_setsize, cpu_affinity_setsize;
+#define MAX_ADDED_COUNTERS 16
 
 struct thread_data {
        unsigned long long tsc;
        unsigned long long aperf;
        unsigned long long mperf;
        unsigned long long c1;
-       unsigned long long extra_msr64;
-       unsigned long long extra_delta64;
-       unsigned long long extra_msr32;
-       unsigned long long extra_delta32;
        unsigned int irq_count;
        unsigned int smi_count;
        unsigned int cpu_id;
        unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE    0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
+       unsigned long long counter[MAX_ADDED_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
        unsigned long long c3;
        unsigned long long c6;
        unsigned long long c7;
+       unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
        unsigned int core_temp_c;
        unsigned int core_id;
+       unsigned long long counter[MAX_ADDED_COUNTERS];
 } *core_even, *core_odd;
 
 struct pkg_data {
@@ -196,7 +192,7 @@ struct pkg_data {
        unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
        unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
        unsigned int pkg_temp_c;
-
+       unsigned long long counter[MAX_ADDED_COUNTERS];
 } *package_even, *package_odd;
 
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -210,11 +206,36 @@ struct pkg_data {
        (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
+enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
+enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
+enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
+
+struct msr_counter {
+       unsigned int msr_num;
+       char name[NAME_BYTES];
+       unsigned int width;
+       enum counter_type type;
+       enum counter_format format;
+       struct msr_counter *next;
+       unsigned int flags;
+#define        FLAGS_HIDE      (1 << 0)
+#define        FLAGS_SHOW      (1 << 1)
+};
+
+struct sys_counters {
+       unsigned int added_thread_counters;
+       unsigned int added_core_counters;
+       unsigned int added_package_counters;
+       struct msr_counter *tp;
+       struct msr_counter *cp;
+       struct msr_counter *pp;
+} sys;
+
 struct system_summary {
        struct thread_data threads;
        struct core_data cores;
        struct pkg_data packages;
-} sum, average;
+} average;
 
 
 struct topo_params {
@@ -308,69 +329,209 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
        retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
        if (retval != sizeof *msr)
-               err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
+               err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
 
        return 0;
 }
 
 /*
- * Example Format w/ field column widths:
- *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
- * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
+ * Each string in this array is compared in --show and --hide cmdline.
+ * Thus, strings that are proper sub-sets must follow their more specific peers.
+ */
+struct msr_counter bic[] = {
+       { 0x0, "Package" },
+       { 0x0, "Avg_MHz" },
+       { 0x0, "Bzy_MHz" },
+       { 0x0, "TSC_MHz" },
+       { 0x0, "IRQ" },
+       { 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
+       { 0x0, "Busy%" },
+       { 0x0, "CPU%c1" },
+       { 0x0, "CPU%c3" },
+       { 0x0, "CPU%c6" },
+       { 0x0, "CPU%c7" },
+       { 0x0, "ThreadC" },
+       { 0x0, "CoreTmp" },
+       { 0x0, "CoreCnt" },
+       { 0x0, "PkgTmp" },
+       { 0x0, "GFX%rc6" },
+       { 0x0, "GFXMHz" },
+       { 0x0, "Pkg%pc2" },
+       { 0x0, "Pkg%pc3" },
+       { 0x0, "Pkg%pc6" },
+       { 0x0, "Pkg%pc7" },
+       { 0x0, "Pkg%pc8" },
+       { 0x0, "Pkg%pc9" },
+       { 0x0, "Pkg%pc10" },
+       { 0x0, "PkgWatt" },
+       { 0x0, "CorWatt" },
+       { 0x0, "GFXWatt" },
+       { 0x0, "PkgCnt" },
+       { 0x0, "RAMWatt" },
+       { 0x0, "PKG_%" },
+       { 0x0, "RAM_%" },
+       { 0x0, "Pkg_J" },
+       { 0x0, "Cor_J" },
+       { 0x0, "GFX_J" },
+       { 0x0, "RAM_J" },
+       { 0x0, "Core" },
+       { 0x0, "CPU" },
+       { 0x0, "Mod%c6" },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define        BIC_Package     (1ULL << 0)
+#define        BIC_Avg_MHz     (1ULL << 1)
+#define        BIC_Bzy_MHz     (1ULL << 2)
+#define        BIC_TSC_MHz     (1ULL << 3)
+#define        BIC_IRQ         (1ULL << 4)
+#define        BIC_SMI         (1ULL << 5)
+#define        BIC_Busy        (1ULL << 6)
+#define        BIC_CPU_c1      (1ULL << 7)
+#define        BIC_CPU_c3      (1ULL << 8)
+#define        BIC_CPU_c6      (1ULL << 9)
+#define        BIC_CPU_c7      (1ULL << 10)
+#define        BIC_ThreadC     (1ULL << 11)
+#define        BIC_CoreTmp     (1ULL << 12)
+#define        BIC_CoreCnt     (1ULL << 13)
+#define        BIC_PkgTmp      (1ULL << 14)
+#define        BIC_GFX_rc6     (1ULL << 15)
+#define        BIC_GFXMHz      (1ULL << 16)
+#define        BIC_Pkgpc2      (1ULL << 17)
+#define        BIC_Pkgpc3      (1ULL << 18)
+#define        BIC_Pkgpc6      (1ULL << 19)
+#define        BIC_Pkgpc7      (1ULL << 20)
+#define        BIC_Pkgpc8      (1ULL << 21)
+#define        BIC_Pkgpc9      (1ULL << 22)
+#define        BIC_Pkgpc10     (1ULL << 23)
+#define        BIC_PkgWatt     (1ULL << 24)
+#define        BIC_CorWatt     (1ULL << 25)
+#define        BIC_GFXWatt     (1ULL << 26)
+#define        BIC_PkgCnt      (1ULL << 27)
+#define        BIC_RAMWatt     (1ULL << 28)
+#define        BIC_PKG__       (1ULL << 29)
+#define        BIC_RAM__       (1ULL << 30)
+#define        BIC_Pkg_J       (1ULL << 31)
+#define        BIC_Cor_J       (1ULL << 32)
+#define        BIC_GFX_J       (1ULL << 33)
+#define        BIC_RAM_J       (1ULL << 34)
+#define        BIC_Core        (1ULL << 35)
+#define        BIC_CPU         (1ULL << 36)
+#define        BIC_Mod_c6      (1ULL << 37)
+
+unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
+unsigned long long bic_present;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+
+/*
+ * bic_lookup
+ * for all the strings in comma separate name_list,
+ * set the approprate bit in return value.
  */
+unsigned long long bic_lookup(char *name_list)
+{
+       int i;
+       unsigned long long retval = 0;
+
+       while (name_list) {
+               char *comma;
+
+               comma = strchr(name_list, ',');
+
+               if (comma)
+                       *comma = '\0';
+
+               for (i = 0; i < MAX_BIC; ++i) {
+                       if (!strcmp(name_list, bic[i].name)) {
+                               retval |= (1ULL << i);
+                               break;
+                       }
+               }
+               if (i == MAX_BIC) {
+                       fprintf(stderr, "Invalid counter name: %s\n", name_list);
+                       exit(-1);
+               }
+
+               name_list = comma;
+               if (name_list)
+                       name_list++;
+
+       }
+       return retval;
+}
 
 void print_header(void)
 {
-       if (show_pkg)
+       struct msr_counter *mp;
+
+       if (DO_BIC(BIC_Package))
                outp += sprintf(outp, "\tPackage");
-       if (show_core)
+       if (DO_BIC(BIC_Core))
                outp += sprintf(outp, "\tCore");
-       if (show_cpu)
+       if (DO_BIC(BIC_CPU))
                outp += sprintf(outp, "\tCPU");
-       if (has_aperf)
+       if (DO_BIC(BIC_Avg_MHz))
                outp += sprintf(outp, "\tAvg_MHz");
-       if (has_aperf)
+       if (DO_BIC(BIC_Busy))
                outp += sprintf(outp, "\tBusy%%");
-       if (has_aperf)
+       if (DO_BIC(BIC_Bzy_MHz))
                outp += sprintf(outp, "\tBzy_MHz");
-       outp += sprintf(outp, "\tTSC_MHz");
-
-       if (extra_delta_offset32)
-               outp += sprintf(outp, "\tcount 0x%03X", extra_delta_offset32);
-       if (extra_delta_offset64)
-               outp += sprintf(outp, "\tCOUNT 0x%03X", extra_delta_offset64);
-       if (extra_msr_offset32)
-               outp += sprintf(outp, "\tMSR 0x%03X", extra_msr_offset32);
-       if (extra_msr_offset64)
-               outp += sprintf(outp, "\tMSR 0x%03X", extra_msr_offset64);
-
-       if (!debug)
-               goto done;
+       if (DO_BIC(BIC_TSC_MHz))
+               outp += sprintf(outp, "\tTSC_MHz");
 
-       if (do_irq)
+       if (DO_BIC(BIC_IRQ))
                outp += sprintf(outp, "\tIRQ");
-       if (do_smi)
+       if (DO_BIC(BIC_SMI))
                outp += sprintf(outp, "\tSMI");
 
-       if (do_nhm_cstates)
+       if (DO_BIC(BIC_CPU_c1))
                outp += sprintf(outp, "\tCPU%%c1");
-       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+
+       for (mp = sys.tp; mp; mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 64)
+                               outp += sprintf(outp, "\t%18.18s", mp->name);
+                       else
+                               outp += sprintf(outp, "\t%10.10s", mp->name);
+               } else {
+                       outp += sprintf(outp, "\t%-7.7s", mp->name);
+               }
+       }
+
+       if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
                outp += sprintf(outp, "\tCPU%%c3");
-       if (do_nhm_cstates)
+       if (DO_BIC(BIC_CPU_c6))
                outp += sprintf(outp, "\tCPU%%c6");
-       if (do_snb_cstates)
+       if (DO_BIC(BIC_CPU_c7))
                outp += sprintf(outp, "\tCPU%%c7");
 
-       if (do_dts)
+       if (DO_BIC(BIC_Mod_c6))
+               outp += sprintf(outp, "\tMod%%c6");
+
+       if (DO_BIC(BIC_CoreTmp))
                outp += sprintf(outp, "\tCoreTmp");
-       if (do_ptm)
+
+       for (mp = sys.cp; mp; mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 64)
+                               outp += sprintf(outp, "\t%18.18s", mp->name);
+                       else
+                               outp += sprintf(outp, "\t%10.10s", mp->name);
+               } else {
+                       outp += sprintf(outp, "\t%-7.7s", mp->name);
+               }
+       }
+
+       if (DO_BIC(BIC_PkgTmp))
                outp += sprintf(outp, "\tPkgTmp");
 
-       if (do_gfx_rc6_ms)
+       if (DO_BIC(BIC_GFX_rc6))
                outp += sprintf(outp, "\tGFX%%rc6");
 
-       if (do_gfx_mhz)
+       if (DO_BIC(BIC_GFXMHz))
                outp += sprintf(outp, "\tGFXMHz");
 
        if (do_skl_residency) {
@@ -380,54 +541,68 @@ void print_header(void)
                outp += sprintf(outp, "\tCPUGFX%%");
        }
 
-       if (do_pc2)
+       if (DO_BIC(BIC_Pkgpc2))
                outp += sprintf(outp, "\tPkg%%pc2");
-       if (do_pc3)
+       if (DO_BIC(BIC_Pkgpc3))
                outp += sprintf(outp, "\tPkg%%pc3");
-       if (do_pc6)
+       if (DO_BIC(BIC_Pkgpc6))
                outp += sprintf(outp, "\tPkg%%pc6");
-       if (do_pc7)
+       if (DO_BIC(BIC_Pkgpc7))
                outp += sprintf(outp, "\tPkg%%pc7");
-       if (do_c8_c9_c10) {
+       if (DO_BIC(BIC_Pkgpc8))
                outp += sprintf(outp, "\tPkg%%pc8");
+       if (DO_BIC(BIC_Pkgpc9))
                outp += sprintf(outp, "\tPkg%%pc9");
+       if (DO_BIC(BIC_Pkgpc10))
                outp += sprintf(outp, "\tPk%%pc10");
-       }
 
        if (do_rapl && !rapl_joules) {
-               if (do_rapl & RAPL_PKG)
+               if (DO_BIC(BIC_PkgWatt))
                        outp += sprintf(outp, "\tPkgWatt");
-               if (do_rapl & RAPL_CORES)
+               if (DO_BIC(BIC_CorWatt))
                        outp += sprintf(outp, "\tCorWatt");
-               if (do_rapl & RAPL_GFX)
+               if (DO_BIC(BIC_GFXWatt))
                        outp += sprintf(outp, "\tGFXWatt");
-               if (do_rapl & RAPL_DRAM)
+               if (DO_BIC(BIC_RAMWatt))
                        outp += sprintf(outp, "\tRAMWatt");
-               if (do_rapl & RAPL_PKG_PERF_STATUS)
+               if (DO_BIC(BIC_PKG__))
                        outp += sprintf(outp, "\tPKG_%%");
-               if (do_rapl & RAPL_DRAM_PERF_STATUS)
+               if (DO_BIC(BIC_RAM__))
                        outp += sprintf(outp, "\tRAM_%%");
        } else if (do_rapl && rapl_joules) {
-               if (do_rapl & RAPL_PKG)
+               if (DO_BIC(BIC_Pkg_J))
                        outp += sprintf(outp, "\tPkg_J");
-               if (do_rapl & RAPL_CORES)
+               if (DO_BIC(BIC_Cor_J))
                        outp += sprintf(outp, "\tCor_J");
-               if (do_rapl & RAPL_GFX)
+               if (DO_BIC(BIC_GFX_J))
                        outp += sprintf(outp, "\tGFX_J");
-               if (do_rapl & RAPL_DRAM)
+               if (DO_BIC(BIC_RAM_J))
                        outp += sprintf(outp, "\tRAM_J");
-               if (do_rapl & RAPL_PKG_PERF_STATUS)
+               if (DO_BIC(BIC_PKG__))
                        outp += sprintf(outp, "\tPKG_%%");
-               if (do_rapl & RAPL_DRAM_PERF_STATUS)
+               if (DO_BIC(BIC_RAM__))
                        outp += sprintf(outp, "\tRAM_%%");
        }
-    done:
+       for (mp = sys.pp; mp; mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 64)
+                               outp += sprintf(outp, "\t%18.18s", mp->name);
+                       else
+                               outp += sprintf(outp, "\t%10.10s", mp->name);
+               } else {
+                       outp += sprintf(outp, "\t%-7.7s", mp->name);
+               }
+       }
+
        outp += sprintf(outp, "\n");
 }
 
 int dump_counters(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
+       int i;
+       struct msr_counter *mp;
+
        outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
 
        if (t) {
@@ -437,18 +612,16 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
                outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
                outp += sprintf(outp, "c1: %016llX\n", t->c1);
-               outp += sprintf(outp, "msr0x%x: %08llX\n",
-                       extra_delta_offset32, t->extra_delta32);
-               outp += sprintf(outp, "msr0x%x: %016llX\n",
-                       extra_delta_offset64, t->extra_delta64);
-               outp += sprintf(outp, "msr0x%x: %08llX\n",
-                       extra_msr_offset32, t->extra_msr32);
-               outp += sprintf(outp, "msr0x%x: %016llX\n",
-                       extra_msr_offset64, t->extra_msr64);
-               if (do_irq)
+
+               if (DO_BIC(BIC_IRQ))
                        outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
-               if (do_smi)
+               if (DO_BIC(BIC_SMI))
                        outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
+
+               for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+                       outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
+                               i, mp->msr_num, t->counter[i]);
+               }
        }
 
        if (c) {
@@ -457,6 +630,12 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "c6: %016llX\n", c->c6);
                outp += sprintf(outp, "c7: %016llX\n", c->c7);
                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+
+               for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+                       outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
+                               i, mp->msr_num, c->counter[i]);
+               }
+               outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
        }
 
        if (p) {
@@ -468,11 +647,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
 
                outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
-               if (do_pc3)
+               if (DO_BIC(BIC_Pkgpc3))
                        outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
-               if (do_pc6)
+               if (DO_BIC(BIC_Pkgpc6))
                        outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
-               if (do_pc7)
+               if (DO_BIC(BIC_Pkgpc7))
                        outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
                outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
                outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
@@ -486,6 +665,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "Throttle RAM: %0X\n",
                        p->rapl_dram_perf_status);
                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
+
+               for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+                       outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
+                               i, mp->msr_num, p->counter[i]);
+               }
        }
 
        outp += sprintf(outp, "\n");
@@ -499,8 +683,10 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 int format_counters(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
-       double interval_float;
+       double interval_float, tsc;
        char *fmt8;
+       int i;
+       struct msr_counter *mp;
 
         /* if showing only 1st thread in core and this isn't one, bail out */
        if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -512,105 +698,118 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
        interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
+       tsc = t->tsc * tsc_tweak;
+
        /* topo columns, print blanks on 1st (average) line */
        if (t == &average.threads) {
-               if (show_pkg)
+               if (DO_BIC(BIC_Package))
                        outp += sprintf(outp, "\t-");
-               if (show_core)
+               if (DO_BIC(BIC_Core))
                        outp += sprintf(outp, "\t-");
-               if (show_cpu)
+               if (DO_BIC(BIC_CPU))
                        outp += sprintf(outp, "\t-");
        } else {
-               if (show_pkg) {
+               if (DO_BIC(BIC_Package)) {
                        if (p)
                                outp += sprintf(outp, "\t%d", p->package_id);
                        else
                                outp += sprintf(outp, "\t-");
                }
-               if (show_core) {
+               if (DO_BIC(BIC_Core)) {
                        if (c)
                                outp += sprintf(outp, "\t%d", c->core_id);
                        else
                                outp += sprintf(outp, "\t-");
                }
-               if (show_cpu)
+               if (DO_BIC(BIC_CPU))
                        outp += sprintf(outp, "\t%d", t->cpu_id);
        }
 
-       /* Avg_MHz */
-       if (has_aperf)
+       if (DO_BIC(BIC_Avg_MHz))
                outp += sprintf(outp, "\t%.0f",
                        1.0 / units * t->aperf / interval_float);
 
-       /* Busy% */
-       if (has_aperf)
-               outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
+       if (DO_BIC(BIC_Busy))
+               outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/tsc);
 
-       /* Bzy_MHz */
-       if (has_aperf) {
+       if (DO_BIC(BIC_Bzy_MHz)) {
                if (has_base_hz)
                        outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
                else
                        outp += sprintf(outp, "\t%.0f",
-                               1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
+                               tsc / units * t->aperf / t->mperf / interval_float);
        }
 
-       /* TSC_MHz */
-       outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
-
-       /* delta */
-       if (extra_delta_offset32)
-               outp += sprintf(outp, "\t%11llu", t->extra_delta32);
-
-       /* DELTA */
-       if (extra_delta_offset64)
-               outp += sprintf(outp, "\t%11llu", t->extra_delta64);
-       /* msr */
-       if (extra_msr_offset32)
-               outp += sprintf(outp, "\t0x%08llx", t->extra_msr32);
-
-       /* MSR */
-       if (extra_msr_offset64)
-               outp += sprintf(outp, "\t0x%016llx", t->extra_msr64);
-
-       if (!debug)
-               goto done;
+       if (DO_BIC(BIC_TSC_MHz))
+               outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
        /* IRQ */
-       if (do_irq)
+       if (DO_BIC(BIC_IRQ))
                outp += sprintf(outp, "\t%d", t->irq_count);
 
        /* SMI */
-       if (do_smi)
+       if (DO_BIC(BIC_SMI))
                outp += sprintf(outp, "\t%d", t->smi_count);
 
-       if (do_nhm_cstates)
-               outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
+       /* C1 */
+       if (DO_BIC(BIC_CPU_c1))
+               outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
+
+       /* Added counters */
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 32)
+                               outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
+                       else
+                               outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
+               } else if (mp->format == FORMAT_DELTA) {
+                       outp += sprintf(outp, "\t%lld", t->counter[i]);
+               } else if (mp->format == FORMAT_PERCENT) {
+                       outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
+               }
+       }
 
        /* print per-core data only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                goto done;
 
-       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-               outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
-       if (do_nhm_cstates)
-               outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
-       if (do_snb_cstates)
-               outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+       if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/tsc);
+       if (DO_BIC(BIC_CPU_c6))
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/tsc);
+       if (DO_BIC(BIC_CPU_c7))
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/tsc);
 
-       if (do_dts)
+       /* Mod%c6 */
+       if (DO_BIC(BIC_Mod_c6))
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / tsc);
+
+       if (DO_BIC(BIC_CoreTmp))
                outp += sprintf(outp, "\t%d", c->core_temp_c);
 
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 32)
+                               outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
+                       else
+                               outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
+               } else if (mp->format == FORMAT_DELTA) {
+                       outp += sprintf(outp, "\t%lld", c->counter[i]);
+               } else if (mp->format == FORMAT_PERCENT) {
+                       outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
+               }
+       }
+
        /* print per-package data only for 1st core in package */
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
                goto done;
 
        /* PkgTmp */
-       if (do_ptm)
+       if (DO_BIC(BIC_PkgTmp))
                outp += sprintf(outp, "\t%d", p->pkg_temp_c);
 
        /* GFXrc6 */
-       if (do_gfx_rc6_ms) {
+       if (DO_BIC(BIC_GFX_rc6)) {
                if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
                        outp += sprintf(outp, "\t**.**");
                } else {
@@ -620,31 +819,32 @@ int format_counters(struct thread_data *t, struct core_data *c,
        }
 
        /* GFXMHz */
-       if (do_gfx_mhz)
+       if (DO_BIC(BIC_GFXMHz))
                outp += sprintf(outp, "\t%d", p->gfx_mhz);
 
        /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
        if (do_skl_residency) {
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
-       }
-
-       if (do_pc2)
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
-       if (do_pc3)
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
-       if (do_pc6)
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
-       if (do_pc7)
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
-       if (do_c8_c9_c10) {
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
-               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
        }
 
+       if (DO_BIC(BIC_Pkgpc2))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
+       if (DO_BIC(BIC_Pkgpc3))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
+       if (DO_BIC(BIC_Pkgpc6))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
+       if (DO_BIC(BIC_Pkgpc7))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
+       if (DO_BIC(BIC_Pkgpc8))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+       if (DO_BIC(BIC_Pkgpc9))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+       if (DO_BIC(BIC_Pkgpc10))
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
+
        /*
         * If measurement interval exceeds minimum RAPL Joule Counter range,
         * indicate that results are suspect by printing "**" in fraction place.
@@ -654,37 +854,40 @@ int format_counters(struct thread_data *t, struct core_data *c,
        else
                fmt8 = "%6.0f**";
 
-       if (do_rapl && !rapl_joules) {
-               if (do_rapl & RAPL_PKG)
-                       outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_CORES)
-                       outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_GFX)
-                       outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_DRAM)
-                       outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
-               if (do_rapl & RAPL_PKG_PERF_STATUS)
-                       outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
-               if (do_rapl & RAPL_DRAM_PERF_STATUS)
-                       outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-       } else if (do_rapl && rapl_joules) {
-               if (do_rapl & RAPL_PKG)
-                       outp += sprintf(outp, fmt8,
-                                       p->energy_pkg * rapl_energy_units);
-               if (do_rapl & RAPL_CORES)
-                       outp += sprintf(outp, fmt8,
-                                       p->energy_cores * rapl_energy_units);
-               if (do_rapl & RAPL_GFX)
-                       outp += sprintf(outp, fmt8,
-                                       p->energy_gfx * rapl_energy_units);
-               if (do_rapl & RAPL_DRAM)
-                       outp += sprintf(outp, fmt8,
-                                       p->energy_dram * rapl_dram_energy_units);
-               if (do_rapl & RAPL_PKG_PERF_STATUS)
-                       outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
-               if (do_rapl & RAPL_DRAM_PERF_STATUS)
-                       outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+       if (DO_BIC(BIC_PkgWatt))
+               outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
+       if (DO_BIC(BIC_CorWatt))
+               outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
+       if (DO_BIC(BIC_GFXWatt))
+               outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
+       if (DO_BIC(BIC_RAMWatt))
+               outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
+       if (DO_BIC(BIC_Pkg_J))
+               outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
+       if (DO_BIC(BIC_Cor_J))
+               outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
+       if (DO_BIC(BIC_GFX_J))
+               outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
+       if (DO_BIC(BIC_RAM_J))
+               outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
+       if (DO_BIC(BIC_PKG__))
+               outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+       if (DO_BIC(BIC_RAM__))
+               outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 32)
+                               outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
+                       else
+                               outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
+               } else if (mp->format == FORMAT_DELTA) {
+                       outp += sprintf(outp, "\t%lld", p->counter[i]);
+               } else if (mp->format == FORMAT_PERCENT) {
+                       outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
+               }
        }
+
 done:
        outp += sprintf(outp, "\n");
 
@@ -740,6 +943,8 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
 int
 delta_package(struct pkg_data *new, struct pkg_data *old)
 {
+       int i;
+       struct msr_counter *mp;
 
        if (do_skl_residency) {
                old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
@@ -748,11 +953,11 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
                old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
        }
        old->pc2 = new->pc2 - old->pc2;
-       if (do_pc3)
+       if (DO_BIC(BIC_Pkgpc3))
                old->pc3 = new->pc3 - old->pc3;
-       if (do_pc6)
+       if (DO_BIC(BIC_Pkgpc6))
                old->pc6 = new->pc6 - old->pc6;
-       if (do_pc7)
+       if (DO_BIC(BIC_Pkgpc7))
                old->pc7 = new->pc7 - old->pc7;
        old->pc8 = new->pc8 - old->pc8;
        old->pc9 = new->pc9 - old->pc9;
@@ -774,16 +979,34 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
        DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
        DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
 
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       old->counter[i] = new->counter[i];
+               else
+                       old->counter[i] = new->counter[i] - old->counter[i];
+       }
+
        return 0;
 }
 
 void
 delta_core(struct core_data *new, struct core_data *old)
 {
+       int i;
+       struct msr_counter *mp;
+
        old->c3 = new->c3 - old->c3;
        old->c6 = new->c6 - old->c6;
        old->c7 = new->c7 - old->c7;
        old->core_temp_c = new->core_temp_c;
+       old->mc6_us = new->mc6_us - old->mc6_us;
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       old->counter[i] = new->counter[i];
+               else
+                       old->counter[i] = new->counter[i] - old->counter[i];
+       }
 }
 
 /*
@@ -793,6 +1016,9 @@ int
 delta_thread(struct thread_data *new, struct thread_data *old,
        struct core_data *core_delta)
 {
+       int i;
+       struct msr_counter *mp;
+
        old->tsc = new->tsc - old->tsc;
 
        /* check for TSC < 1 Mcycles over interval */
@@ -803,7 +1029,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 
        old->c1 = new->c1 - old->c1;
 
-       if (has_aperf) {
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
                if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
                        old->aperf = new->aperf - old->aperf;
                        old->mperf = new->mperf - old->mperf;
@@ -828,7 +1054,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
                        old->c1 = 0;
                else {
                        /* normal case, derive c1 */
-                       old->c1 = old->tsc - old->mperf - core_delta->c3
+                       old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
                                - core_delta->c6 - core_delta->c7;
                }
        }
@@ -839,23 +1065,18 @@ delta_thread(struct thread_data *new, struct thread_data *old,
                old->mperf = 1; /* divide by 0 protection */
        }
 
-       old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
-       old->extra_delta32 &= 0xFFFFFFFF;
-
-       old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
-
-       /*
-        * Extra MSR is just a snapshot, simply copy latest w/o subtracting
-        */
-       old->extra_msr32 = new->extra_msr32;
-       old->extra_msr64 = new->extra_msr64;
-
-       if (do_irq)
+       if (DO_BIC(BIC_IRQ))
                old->irq_count = new->irq_count - old->irq_count;
 
-       if (do_smi)
+       if (DO_BIC(BIC_SMI))
                old->smi_count = new->smi_count - old->smi_count;
 
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       old->counter[i] = new->counter[i];
+               else
+                       old->counter[i] = new->counter[i] - old->counter[i];
+       }
        return 0;
 }
 
@@ -883,14 +1104,14 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
 
 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
+       int i;
+       struct msr_counter  *mp;
+
        t->tsc = 0;
        t->aperf = 0;
        t->mperf = 0;
        t->c1 = 0;
 
-       t->extra_delta32 = 0;
-       t->extra_delta64 = 0;
-
        t->irq_count = 0;
        t->smi_count = 0;
 
@@ -900,6 +1121,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        c->c3 = 0;
        c->c6 = 0;
        c->c7 = 0;
+       c->mc6_us = 0;
        c->core_temp_c = 0;
 
        p->pkg_wtd_core_c0 = 0;
@@ -908,11 +1130,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        p->pkg_both_core_gfxe_c0 = 0;
 
        p->pc2 = 0;
-       if (do_pc3)
+       if (DO_BIC(BIC_Pkgpc3))
                p->pc3 = 0;
-       if (do_pc6)
+       if (DO_BIC(BIC_Pkgpc6))
                p->pc6 = 0;
-       if (do_pc7)
+       if (DO_BIC(BIC_Pkgpc7))
                p->pc7 = 0;
        p->pc8 = 0;
        p->pc9 = 0;
@@ -928,21 +1150,35 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
        p->gfx_rc6_ms = 0;
        p->gfx_mhz = 0;
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
+               t->counter[i] = 0;
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
+               c->counter[i] = 0;
+
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
+               p->counter[i] = 0;
 }
 int sum_counters(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
+       int i;
+       struct msr_counter *mp;
+
        average.threads.tsc += t->tsc;
        average.threads.aperf += t->aperf;
        average.threads.mperf += t->mperf;
        average.threads.c1 += t->c1;
 
-       average.threads.extra_delta32 += t->extra_delta32;
-       average.threads.extra_delta64 += t->extra_delta64;
-
        average.threads.irq_count += t->irq_count;
        average.threads.smi_count += t->smi_count;
 
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.threads.counter[i] += t->counter[i];
+       }
+
        /* sum per-core values only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
@@ -950,9 +1186,16 @@ int sum_counters(struct thread_data *t, struct core_data *c,
        average.cores.c3 += c->c3;
        average.cores.c6 += c->c6;
        average.cores.c7 += c->c7;
+       average.cores.mc6_us += c->mc6_us;
 
        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
 
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.cores.counter[i] += c->counter[i];
+       }
+
        /* sum per-pkg values only for 1st core in pkg */
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
                return 0;
@@ -965,11 +1208,11 @@ int sum_counters(struct thread_data *t, struct core_data *c,
        }
 
        average.packages.pc2 += p->pc2;
-       if (do_pc3)
+       if (DO_BIC(BIC_Pkgpc3))
                average.packages.pc3 += p->pc3;
-       if (do_pc6)
+       if (DO_BIC(BIC_Pkgpc6))
                average.packages.pc6 += p->pc6;
-       if (do_pc7)
+       if (DO_BIC(BIC_Pkgpc7))
                average.packages.pc7 += p->pc7;
        average.packages.pc8 += p->pc8;
        average.packages.pc9 += p->pc9;
@@ -987,6 +1230,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 
        average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
        average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.packages.counter[i] += p->counter[i];
+       }
        return 0;
 }
 /*
@@ -996,6 +1245,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 void compute_average(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
+       int i;
+       struct msr_counter *mp;
+
        clear_counters(&average.threads, &average.cores, &average.packages);
 
        for_all_cpus(sum_counters, t, c, p);
@@ -1005,14 +1257,10 @@ void compute_average(struct thread_data *t, struct core_data *c,
        average.threads.mperf /= topo.num_cpus;
        average.threads.c1 /= topo.num_cpus;
 
-       average.threads.extra_delta32 /= topo.num_cpus;
-       average.threads.extra_delta32 &= 0xFFFFFFFF;
-
-       average.threads.extra_delta64 /= topo.num_cpus;
-
        average.cores.c3 /= topo.num_cores;
        average.cores.c6 /= topo.num_cores;
        average.cores.c7 /= topo.num_cores;
+       average.cores.mc6_us /= topo.num_cores;
 
        if (do_skl_residency) {
                average.packages.pkg_wtd_core_c0 /= topo.num_packages;
@@ -1022,16 +1270,32 @@ void compute_average(struct thread_data *t, struct core_data *c,
        }
 
        average.packages.pc2 /= topo.num_packages;
-       if (do_pc3)
+       if (DO_BIC(BIC_Pkgpc3))
                average.packages.pc3 /= topo.num_packages;
-       if (do_pc6)
+       if (DO_BIC(BIC_Pkgpc6))
                average.packages.pc6 /= topo.num_packages;
-       if (do_pc7)
+       if (DO_BIC(BIC_Pkgpc7))
                average.packages.pc7 /= topo.num_packages;
 
        average.packages.pc8 /= topo.num_packages;
        average.packages.pc9 /= topo.num_packages;
        average.packages.pc10 /= topo.num_packages;
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.threads.counter[i] /= topo.num_cpus;
+       }
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.cores.counter[i] /= topo.num_cores;
+       }
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.packages.counter[i] /= topo.num_packages;
+       }
 }
 
 static unsigned long long rdtsc(void)
@@ -1053,6 +1317,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        int cpu = t->cpu_id;
        unsigned long long msr;
        int aperf_mperf_retry_count = 0;
+       struct msr_counter *mp;
+       int i;
 
        if (cpu_migrate(cpu)) {
                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
@@ -1062,7 +1328,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 retry:
        t->tsc = rdtsc();       /* we are running on local CPU of interest */
 
-       if (has_aperf) {
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
                unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
 
                /*
@@ -1118,48 +1384,34 @@ retry:
                t->mperf = t->mperf * aperf_mperf_multiplier;
        }
 
-       if (do_irq)
+       if (DO_BIC(BIC_IRQ))
                t->irq_count = irqs_per_cpu[cpu];
-       if (do_smi) {
+       if (DO_BIC(BIC_SMI)) {
                if (get_msr(cpu, MSR_SMI_COUNT, &msr))
                        return -5;
                t->smi_count = msr & 0xFFFFFFFF;
        }
-       if (extra_delta_offset32) {
-               if (get_msr(cpu, extra_delta_offset32, &msr))
-                       return -5;
-               t->extra_delta32 = msr & 0xFFFFFFFF;
+       if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+                       return -6;
        }
 
-       if (extra_delta_offset64)
-               if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
-                       return -5;
-
-       if (extra_msr_offset32) {
-               if (get_msr(cpu, extra_msr_offset32, &msr))
-                       return -5;
-               t->extra_msr32 = msr & 0xFFFFFFFF;
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (get_msr(cpu, mp->msr_num, &t->counter[i]))
+                       return -10;
        }
 
-       if (extra_msr_offset64)
-               if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
-                       return -5;
-
-       if (use_c1_residency_msr) {
-               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
-                       return -6;
-       }
 
        /* collect core counters only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
 
-       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
+       if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
                        return -6;
        }
 
-       if (do_nhm_cstates && !do_knl_cstates) {
+       if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
        } else if (do_knl_cstates) {
@@ -1167,16 +1419,24 @@ retry:
                        return -7;
        }
 
-       if (do_snb_cstates)
+       if (DO_BIC(BIC_CPU_c7))
                if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
                        return -8;
 
-       if (do_dts) {
+       if (DO_BIC(BIC_Mod_c6))
+               if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+                       return -8;
+
+       if (DO_BIC(BIC_CoreTmp)) {
                if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
                        return -9;
                c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
        }
 
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (get_msr(cpu, mp->msr_num, &c->counter[i]))
+                       return -10;
+       }
 
        /* collect package counters only for 1st core in package */
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
@@ -1192,32 +1452,41 @@ retry:
                if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
                        return -13;
        }
-       if (do_pc3)
+       if (DO_BIC(BIC_Pkgpc3))
                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
                        return -9;
-       if (do_pc6)
-               if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
-                       return -10;
-       if (do_pc2)
+       if (DO_BIC(BIC_Pkgpc6)) {
+               if (do_slm_cstates) {
+                       if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
+                               return -10;
+               } else {
+                       if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
+                               return -10;
+               }
+       }
+
+       if (DO_BIC(BIC_Pkgpc2))
                if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
                        return -11;
-       if (do_pc7)
+       if (DO_BIC(BIC_Pkgpc7))
                if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
                        return -12;
-       if (do_c8_c9_c10) {
+       if (DO_BIC(BIC_Pkgpc8))
                if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
                        return -13;
+       if (DO_BIC(BIC_Pkgpc9))
                if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
                        return -13;
+       if (DO_BIC(BIC_Pkgpc10))
                if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
                        return -13;
-       }
+
        if (do_rapl & RAPL_PKG) {
                if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
                        return -13;
                p->energy_pkg = msr & 0xFFFFFFFF;
        }
-       if (do_rapl & RAPL_CORES) {
+       if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
                if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
                        return -14;
                p->energy_cores = msr & 0xFFFFFFFF;
@@ -1242,18 +1511,23 @@ retry:
                        return -16;
                p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
        }
-       if (do_ptm) {
+       if (DO_BIC(BIC_PkgTmp)) {
                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
                        return -17;
                p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
        }
 
-       if (do_gfx_rc6_ms)
+       if (DO_BIC(BIC_GFX_rc6))
                p->gfx_rc6_ms = gfx_cur_rc6_ms;
 
-       if (do_gfx_mhz)
+       if (DO_BIC(BIC_GFXMHz))
                p->gfx_mhz = gfx_cur_mhz;
 
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (get_msr(cpu, mp->msr_num, &p->counter[i]))
+                       return -10;
+       }
+
        return 0;
 }
 
@@ -1286,10 +1560,11 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
+int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
 
 static void
@@ -1309,11 +1584,11 @@ dump_nhm_platform_info(void)
        fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
 
        ratio = (msr >> 40) & 0xFF;
-       fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
+       fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
                ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 8) & 0xFF;
-       fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
+       fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
                ratio, bclk, ratio * bclk);
 
        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -1335,12 +1610,12 @@ dump_hsw_turbo_ratio_limits(void)
 
        ratio = (msr >> 8) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 0) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
                        ratio, bclk, ratio * bclk);
        return;
 }
@@ -1357,42 +1632,42 @@ dump_ivt_turbo_ratio_limits(void)
 
        ratio = (msr >> 56) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 48) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 40) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 32) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 24) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 16) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 8) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 0) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
                        ratio, bclk, ratio * bclk);
        return;
 }
@@ -1409,46 +1684,94 @@ dump_nhm_turbo_ratio_limits(void)
 
        ratio = (msr >> 56) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 8 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 48) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 7 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 40) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 6 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 32) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 5 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 24) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 16) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 8) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
                        ratio, bclk, ratio * bclk);
 
        ratio = (msr >> 0) & 0xFF;
        if (ratio)
-               fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active cores\n",
                        ratio, bclk, ratio * bclk);
        return;
 }
 
+static void
+dump_atom_turbo_ratio_limits(void)
+{
+       unsigned long long msr;
+       unsigned int ratio;
+
+       get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
+       fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+       ratio = (msr >> 0) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
+                       ratio, bclk, ratio * bclk);
+
+       ratio = (msr >> 8) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
+                       ratio, bclk, ratio * bclk);
+
+       ratio = (msr >> 16) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
+                       ratio, bclk, ratio * bclk);
+
+       get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
+       fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+       ratio = (msr >> 24) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
+                       ratio, bclk, ratio * bclk);
+
+       ratio = (msr >> 16) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
+                       ratio, bclk, ratio * bclk);
+
+       ratio = (msr >> 8) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
+                       ratio, bclk, ratio * bclk);
+
+       ratio = (msr >> 0) & 0x3F;
+       if (ratio)
+               fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
+                       ratio, bclk, ratio * bclk);
+}
+
 static void
 dump_knl_turbo_ratio_limits(void)
 {
@@ -1504,7 +1827,7 @@ dump_knl_turbo_ratio_limits(void)
        for (i = buckets_no - 1; i >= 0; i--)
                if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
                        fprintf(outf,
-                               "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+                               "%d * %.1f = %.1f MHz max turbo %d active cores\n",
                                ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 
@@ -1513,12 +1836,12 @@ dump_nhm_cst_cfg(void)
 {
        unsigned long long msr;
 
-       get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+       get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
 
-       fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
+       fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
 
        fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
                (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
@@ -2003,10 +2326,10 @@ int snapshot_proc_sysfs_files(void)
        if (snapshot_proc_interrupts())
                return 1;
 
-       if (do_gfx_rc6_ms)
+       if (DO_BIC(BIC_GFX_rc6))
                snapshot_gfx_rc6_ms();
 
-       if (do_gfx_mhz)
+       if (DO_BIC(BIC_GFXMHz))
                snapshot_gfx_mhz();
 
        return 0;
@@ -2135,7 +2458,9 @@ void check_permissions()
  * MSR_SMI_COUNT                   0x00000034
  *
  * MSR_PLATFORM_INFO               0x000000ce
- * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
+ * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
+ *
+ * MSR_MISC_PWR_MGMT               0x000001aa
  *
  * MSR_PKG_C3_RESIDENCY            0x000003f8
  * MSR_PKG_C6_RESIDENCY            0x000003f9
@@ -2143,7 +2468,8 @@ void check_permissions()
  * MSR_CORE_C6_RESIDENCY           0x000003fd
  *
  * Side effect:
- * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
+ * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
+ * sets has_misc_feature_control
  */
 int probe_nhm_msrs(unsigned int family, unsigned int model)
 {
@@ -2160,53 +2486,63 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
        bclk = discover_bclk(family, model);
 
        switch (model) {
-       case 0x1A:      /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
-       case 0x1E:      /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+       case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+       case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
        case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
-       case 0x25:      /* Westmere Client - Clarkdale, Arrandale */
-       case 0x2C:      /* Westmere EP - Gulftown */
-       case 0x2E:      /* Nehalem-EX Xeon - Beckton */
-       case 0x2F:      /* Westmere-EX Xeon - Eagleton */
+       case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
+       case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
+       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
+       case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
                pkg_cstate_limits = nhm_pkg_cstate_limits;
                break;
-       case 0x2A:      /* SNB */
-       case 0x2D:      /* SNB Xeon */
-       case 0x3A:      /* IVB */
-       case 0x3E:      /* IVB Xeon */
+       case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
+       case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
+       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
+       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
                pkg_cstate_limits = snb_pkg_cstate_limits;
+               has_misc_feature_control = 1;
                break;
-       case 0x3C:      /* HSW */
-       case 0x3F:      /* HSX */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x55:      /* SKX */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                pkg_cstate_limits = hsw_pkg_cstate_limits;
+               has_misc_feature_control = 1;
                break;
-       case 0x37:      /* BYT */
-       case 0x4D:      /* AVN */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+               pkg_cstate_limits = skx_pkg_cstate_limits;
+               has_misc_feature_control = 1;
+               break;
+       case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
+               no_MSR_MISC_PWR_MGMT = 1;
+       case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
                pkg_cstate_limits = slv_pkg_cstate_limits;
                break;
-       case 0x4C:      /* AMT */
+       case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
                pkg_cstate_limits = amt_pkg_cstate_limits;
+               no_MSR_MISC_PWR_MGMT = 1;
                break;
-       case 0x57:      /* PHI */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
+       case INTEL_FAM6_XEON_PHI_KNM:
                pkg_cstate_limits = phi_pkg_cstate_limits;
                break;
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+       case INTEL_FAM6_ATOM_GEMINI_LAKE:
+       case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
                pkg_cstate_limits = bxt_pkg_cstate_limits;
                break;
        default:
                return 0;
        }
-       get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+       get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
        pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -2216,18 +2552,50 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
        has_base_hz = 1;
        return 1;
 }
+/*
+ * SLV client has supporet for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+
+       switch (model) {
+       case INTEL_FAM6_ATOM_SILVERMONT1:
+       case INTEL_FAM6_ATOM_MERRIFIELD:
+       case INTEL_FAM6_ATOM_MOOREFIELD:
+               return 1;
+       }
+       return 0;
+}
+
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
+       if (has_slv_msrs(family, model))
+               return 0;
+
        switch (model) {
        /* Nehalem compatible, but do not include turbo-ratio limit support */
-       case 0x2E:      /* Nehalem-EX Xeon - Beckton */
-       case 0x2F:      /* Westmere-EX Xeon - Eagleton */
-       case 0x57:      /* PHI - Knights Landing (different MSR definition) */
+       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
+       case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 0;
        default:
                return 1;
        }
 }
+int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+       if (has_slv_msrs(family, model))
+               return 1;
+
+       return 0;
+}
 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
@@ -2237,8 +2605,8 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x3E:      /* IVB Xeon */
-       case 0x3F:      /* HSW Xeon */
+       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
+       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
                return 1;
        default:
                return 0;
@@ -2253,7 +2621,7 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x3F:      /* HSW Xeon */
+       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
                return 1;
        default:
                return 0;
@@ -2269,7 +2637,8 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x57:      /* Knights Landing */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 1;
        default:
                return 0;
@@ -2284,22 +2653,23 @@ int has_config_tdp(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x3A:      /* IVB */
-       case 0x3C:      /* HSW */
-       case 0x3F:      /* HSX */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x55:      /* SKX */
-
-       case 0x57:      /* Knights Landing */
+       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+
+       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 1;
        default:
                return 0;
@@ -2323,6 +2693,9 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
        if (has_nhm_turbo_ratio_limit(family, model))
                dump_nhm_turbo_ratio_limits();
 
+       if (has_atom_turbo_ratio_limit(family, model))
+               dump_atom_turbo_ratio_limits();
+
        if (has_knl_turbo_ratio_limit(family, model))
                dump_knl_turbo_ratio_limits();
 
@@ -2579,8 +2952,8 @@ double get_tdp(unsigned int model)
                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
 
        switch (model) {
-       case 0x37:
-       case 0x4D:
+       case INTEL_FAM6_ATOM_SILVERMONT1:
+       case INTEL_FAM6_ATOM_SILVERMONT2:
                return 30.0;
        default:
                return 135.0;
@@ -2597,10 +2970,11 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
        /* only called for genuine_intel, family 6 */
 
        switch (model) {
-       case 0x3F:      /* HSX */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x57:      /* KNL */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return (rapl_dram_energy_units = 15.3 / 1000000);
        default:
                return (rapl_energy_units);
@@ -2626,38 +3000,105 @@ void rapl_probe(unsigned int family, unsigned int model)
                return;
 
        switch (model) {
-       case 0x2A:
-       case 0x3A:
-       case 0x3C:      /* HSW */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_IVYBRIDGE:
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_Cor_J);
+                       BIC_PRESENT(BIC_GFX_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_CorWatt);
+                       BIC_PRESENT(BIC_GFXWatt);
+               }
                break;
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+       case INTEL_FAM6_ATOM_GEMINI_LAKE:
                do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
+               if (rapl_joules)
+                       BIC_PRESENT(BIC_Pkg_J);
+               else
+                       BIC_PRESENT(BIC_PkgWatt);
                break;
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+               BIC_PRESENT(BIC_PKG__);
+               BIC_PRESENT(BIC_RAM__);
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_Cor_J);
+                       BIC_PRESENT(BIC_RAM_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_CorWatt);
+                       BIC_PRESENT(BIC_RAMWatt);
+               }
                break;
-       case 0x3F:      /* HSX */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x55:      /* SKX */
-       case 0x57:      /* KNL */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNM:
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+               BIC_PRESENT(BIC_PKG__);
+               BIC_PRESENT(BIC_RAM__);
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_RAM_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_RAMWatt);
+               }
                break;
-       case 0x2D:
-       case 0x3E:
+       case INTEL_FAM6_SANDYBRIDGE_X:
+       case INTEL_FAM6_IVYBRIDGE_X:
                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+               BIC_PRESENT(BIC_PKG__);
+               BIC_PRESENT(BIC_RAM__);
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_Cor_J);
+                       BIC_PRESENT(BIC_RAM_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_CorWatt);
+                       BIC_PRESENT(BIC_RAMWatt);
+               }
                break;
-       case 0x37:      /* BYT */
-       case 0x4D:      /* AVN */
-               do_rapl = RAPL_PKG | RAPL_CORES ;
+       case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
+       case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
+               do_rapl = RAPL_PKG | RAPL_CORES;
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_Cor_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_CorWatt);
+               }
+               break;
+       case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
+               do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+               BIC_PRESENT(BIC_PKG__);
+               BIC_PRESENT(BIC_RAM__);
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_Cor_J);
+                       BIC_PRESENT(BIC_RAM_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_CorWatt);
+                       BIC_PRESENT(BIC_RAMWatt);
+               }
                break;
        default:
                return;
@@ -2668,7 +3109,7 @@ void rapl_probe(unsigned int family, unsigned int model)
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       if (model == 0x37)
+       if (model == INTEL_FAM6_ATOM_SILVERMONT1)
                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
        else
                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
@@ -2684,7 +3125,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        tdp = get_tdp(model);
 
        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
-       if (debug)
+       if (!quiet)
                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 
        return;
@@ -2699,11 +3140,11 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
                return;
 
        switch (model) {
-       case 0x3C:      /* HSW */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
                do_gfx_perf_limit_reasons = 1;
-       case 0x3F:      /* HSX */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
                do_core_perf_limit_reasons = 1;
                do_ring_perf_limit_reasons = 1;
        default:
@@ -2723,7 +3164,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
        cpu = t->cpu_id;
 
        /* DTS is per-core, no need to print for each thread */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 
+       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -2809,11 +3250,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
                return -1;
 
-       if (debug) {
-               fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
-                       "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
-                       rapl_power_units, rapl_energy_units, rapl_time_units);
-       }
+       fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+               rapl_power_units, rapl_energy_units, rapl_time_units);
+
        if (do_rapl & RAPL_PKG_POWER_INFO) {
 
                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
@@ -2834,7 +3273,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        return -9;
 
                fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
-                       cpu, msr, (msr >> 63) & 1 ? "": "UN");
+                       cpu, msr, (msr >> 63) & 1 ? "" : "UN");
 
                print_power_limit_msr(cpu, msr, "PKG Limit #1");
                fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
@@ -2860,41 +3299,34 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
                        return -9;
                fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
-                               cpu, msr, (msr >> 31) & 1 ? "": "UN");
+                               cpu, msr, (msr >> 31) & 1 ? "" : "UN");
 
                print_power_limit_msr(cpu, msr, "DRAM Limit");
        }
        if (do_rapl & RAPL_CORE_POLICY) {
-               if (debug) {
-                       if (get_msr(cpu, MSR_PP0_POLICY, &msr))
-                               return -7;
+               if (get_msr(cpu, MSR_PP0_POLICY, &msr))
+                       return -7;
 
-                       fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
-               }
+               fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
        }
-       if (do_rapl & RAPL_CORES) {
-               if (debug) {
-
-                       if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
-                               return -9;
-                       fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
-                                       cpu, msr, (msr >> 31) & 1 ? "": "UN");
-                       print_power_limit_msr(cpu, msr, "Cores Limit");
-               }
+       if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+               if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
+                       return -9;
+               fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+                               cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+               print_power_limit_msr(cpu, msr, "Cores Limit");
        }
        if (do_rapl & RAPL_GFX) {
-               if (debug) {
-                       if (get_msr(cpu, MSR_PP1_POLICY, &msr))
-                               return -8;
+               if (get_msr(cpu, MSR_PP1_POLICY, &msr))
+                       return -8;
 
-                       fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+               fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
 
-                       if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
-                               return -9;
-                       fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
-                                       cpu, msr, (msr >> 31) & 1 ? "": "UN");
-                       print_power_limit_msr(cpu, msr, "GFX Limit");
-               }
+               if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
+                       return -9;
+               fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+                               cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+               print_power_limit_msr(cpu, msr, "GFX Limit");
        }
        return 0;
 }
@@ -2913,24 +3345,26 @@ int has_snb_msrs(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x2A:
-       case 0x2D:
-       case 0x3A:      /* IVB */
-       case 0x3E:      /* IVB Xeon */
-       case 0x3C:      /* HSW */
-       case 0x3F:      /* HSW */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x55:      /* SKX */
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_SANDYBRIDGE_X:
+       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
+       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_X:      /* HSW */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+       case INTEL_FAM6_ATOM_GEMINI_LAKE:
+       case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
                return 1;
        }
        return 0;
@@ -2954,13 +3388,14 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x45:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+       case INTEL_FAM6_ATOM_GEMINI_LAKE:
                return 1;
        }
        return 0;
@@ -2980,24 +3415,22 @@ int has_skl_msrs(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                return 1;
        }
        return 0;
 }
 
-
-
 int is_slm(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
                return 0;
        switch (model) {
-       case 0x37:      /* BYT */
-       case 0x4D:      /* AVN */
+       case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
+       case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
                return 1;
        }
        return 0;
@@ -3008,7 +3441,8 @@ int is_knl(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
        switch (model) {
-       case 0x57:      /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 1;
        }
        return 0;
@@ -3040,7 +3474,8 @@ double slm_bclk(void)
        }
        freq = slm_freq_table[i];
 
-       fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+       if (!quiet)
+               fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
 
        return freq;
 }
@@ -3103,7 +3538,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 
        target_c_local = (msr >> 16) & 0xFF;
 
-       if (debug)
+       if (!quiet)
                fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
                        cpu, msr, target_c_local);
 
@@ -3138,13 +3573,30 @@ void decode_misc_enable_msr(void)
        unsigned long long msr;
 
        if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
-               fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+               fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
                        base_cpu, msr,
-                       msr & (1 << 3) ? "TCC" : "",
-                       msr & (1 << 16) ? "EIST" : "",
-                       msr & (1 << 18) ? "MONITOR" : "");
+                       msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
+                       msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
+                       msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
+                       msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
+                       msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
 
+void decode_misc_feature_control(void)
+{
+       unsigned long long msr;
+
+       if (!has_misc_feature_control)
+               return;
+
+       if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
+               fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
+                       base_cpu, msr,
+                       msr & (0 << 0) ? "No-" : "",
+                       msr & (1 << 0) ? "No-" : "",
+                       msr & (2 << 0) ? "No-" : "",
+                       msr & (3 << 0) ? "No-" : "");
+}
 /*
  * Decode MSR_MISC_PWR_MGMT
  *
@@ -3159,17 +3611,40 @@ void decode_misc_pwr_mgmt_msr(void)
        if (!do_nhm_platform_info)
                return;
 
+       if (no_MSR_MISC_PWR_MGMT)
+               return;
+
        if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
-               fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
+               fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
                        base_cpu, msr,
                        msr & (1 << 0) ? "DIS" : "EN",
-                       msr & (1 << 1) ? "EN" : "DIS");
+                       msr & (1 << 1) ? "EN" : "DIS",
+                       msr & (1 << 8) ? "EN" : "DIS");
+}
+/*
+ * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
+ *
+ * This MSRs are present on Silvermont processors,
+ * Intel Atom processor E3000 series (Baytrail), and friends.
+ */
+void decode_c6_demotion_policy_msr(void)
+{
+       unsigned long long msr;
+
+       if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
+               fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
+                       base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+
+       if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
+               fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
+                       base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
 }
 
 void process_cpuid()
 {
        unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
        unsigned int fms, family, model, stepping;
+       unsigned int has_turbo;
 
        eax = ebx = ecx = edx = 0;
 
@@ -3178,7 +3653,7 @@ void process_cpuid()
        if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
                genuine_intel = 1;
 
-       if (debug)
+       if (!quiet)
                fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
                        (char *)&ebx, (char *)&edx, (char *)&ecx);
 
@@ -3189,7 +3664,7 @@ void process_cpuid()
        if (family == 6 || family == 0xf)
                model += ((fms >> 16) & 0xf) << 4;
 
-       if (debug) {
+       if (!quiet) {
                fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
                        max_level, family, model, stepping, family, model, stepping);
                fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
@@ -3232,8 +3707,18 @@ void process_cpuid()
 
        __cpuid(0x6, eax, ebx, ecx, edx);
        has_aperf = ecx & (1 << 0);
+       if (has_aperf) {
+               BIC_PRESENT(BIC_Avg_MHz);
+               BIC_PRESENT(BIC_Busy);
+               BIC_PRESENT(BIC_Bzy_MHz);
+       }
        do_dts = eax & (1 << 0);
+       if (do_dts)
+               BIC_PRESENT(BIC_CoreTmp);
+       has_turbo = eax & (1 << 1);
        do_ptm = eax & (1 << 6);
+       if (do_ptm)
+               BIC_PRESENT(BIC_PkgTmp);
        has_hwp = eax & (1 << 7);
        has_hwp_notify = eax & (1 << 8);
        has_hwp_activity_window = eax & (1 << 9);
@@ -3241,10 +3726,11 @@ void process_cpuid()
        has_hwp_pkg = eax & (1 << 11);
        has_epb = ecx & (1 << 3);
 
-       if (debug)
-               fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+       if (!quiet)
+               fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
                        "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
                        has_aperf ? "" : "No-",
+                       has_turbo ? "" : "No-",
                        do_dts ? "" : "No-",
                        do_ptm ? "" : "No-",
                        has_hwp ? "" : "No-",
@@ -3254,10 +3740,11 @@ void process_cpuid()
                        has_hwp_pkg ? "" : "No-",
                        has_epb ? "" : "No-");
 
-       if (debug)
+       if (!quiet)
                decode_misc_enable_msr();
 
-       if (max_level >= 0x7 && debug) {
+
+       if (max_level >= 0x7 && !quiet) {
                int has_sgx;
 
                ecx = 0;
@@ -3283,22 +3770,24 @@ void process_cpuid()
 
                if (ebx_tsc != 0) {
 
-                       if (debug && (ebx != 0))
+                       if (!quiet && (ebx != 0))
                                fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
                                        eax_crystal, ebx_tsc, crystal_hz);
 
                        if (crystal_hz == 0)
                                switch(model) {
-                               case 0x4E:      /* SKL */
-                               case 0x5E:      /* SKL */
-                               case 0x8E:      /* KBL */
-                               case 0x9E:      /* KBL */
+                               case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+                               case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+                               case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+                               case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                                        crystal_hz = 24000000;  /* 24.0 MHz */
                                        break;
-                               case 0x55:      /* SKX */
+                               case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+                               case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
                                        crystal_hz = 25000000;  /* 25.0 MHz */
                                        break;
-                               case 0x5C:      /* BXT */
+                               case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+                               case INTEL_FAM6_ATOM_GEMINI_LAKE:
                                        crystal_hz = 19200000;  /* 19.2 MHz */
                                        break;
                                default:
@@ -3307,7 +3796,7 @@ void process_cpuid()
 
                        if (crystal_hz) {
                                tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
-                               if (debug)
+                               if (!quiet)
                                        fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
                                                tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
                        }
@@ -3322,7 +3811,7 @@ void process_cpuid()
                base_mhz = max_mhz = bus_mhz = edx = 0;
 
                __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
-               if (debug)
+               if (!quiet)
                        fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
                                base_mhz, max_mhz, bus_mhz);
        }
@@ -3330,34 +3819,71 @@ void process_cpuid()
        if (has_aperf)
                aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
 
-       do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
+       BIC_PRESENT(BIC_IRQ);
+       BIC_PRESENT(BIC_TSC_MHz);
+
+       if (probe_nhm_msrs(family, model)) {
+               do_nhm_platform_info = 1;
+               BIC_PRESENT(BIC_CPU_c1);
+               BIC_PRESENT(BIC_CPU_c3);
+               BIC_PRESENT(BIC_CPU_c6);
+               BIC_PRESENT(BIC_SMI);
+       }
        do_snb_cstates = has_snb_msrs(family, model);
+
+       if (do_snb_cstates)
+               BIC_PRESENT(BIC_CPU_c7);
+
        do_irtl_snb = has_snb_msrs(family, model);
-       do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
-       do_pc3 = (pkg_cstate_limit >= PCL__3);
-       do_pc6 = (pkg_cstate_limit >= PCL__6);
-       do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
-       do_c8_c9_c10 = has_hsw_msrs(family, model);
+       if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
+               BIC_PRESENT(BIC_Pkgpc2);
+       if (pkg_cstate_limit >= PCL__3)
+               BIC_PRESENT(BIC_Pkgpc3);
+       if (pkg_cstate_limit >= PCL__6)
+               BIC_PRESENT(BIC_Pkgpc6);
+       if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
+               BIC_PRESENT(BIC_Pkgpc7);
+       if (has_slv_msrs(family, model)) {
+               BIC_NOT_PRESENT(BIC_Pkgpc2);
+               BIC_NOT_PRESENT(BIC_Pkgpc3);
+               BIC_PRESENT(BIC_Pkgpc6);
+               BIC_NOT_PRESENT(BIC_Pkgpc7);
+               BIC_PRESENT(BIC_Mod_c6);
+               use_c1_residency_msr = 1;
+       }
+       if (has_hsw_msrs(family, model)) {
+               BIC_PRESENT(BIC_Pkgpc8);
+               BIC_PRESENT(BIC_Pkgpc9);
+               BIC_PRESENT(BIC_Pkgpc10);
+       }
        do_irtl_hsw = has_hsw_msrs(family, model);
        do_skl_residency = has_skl_msrs(family, model);
        do_slm_cstates = is_slm(family, model);
        do_knl_cstates  = is_knl(family, model);
 
-       if (debug)
+       if (!quiet)
                decode_misc_pwr_mgmt_msr();
 
+       if (!quiet && has_slv_msrs(family, model))
+               decode_c6_demotion_policy_msr();
+
        rapl_probe(family, model);
        perf_limit_reasons_probe(family, model);
 
-       if (debug)
+       if (!quiet)
                dump_cstate_pstate_config_info(family, model);
 
        if (has_skl_msrs(family, model))
                calculate_tsc_tweak();
 
-       do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
+       if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+               BIC_PRESENT(BIC_GFX_rc6);
 
-       do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
+       if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+               BIC_PRESENT(BIC_GFXMHz);
+
+       if (!quiet)
+               decode_misc_feature_control();
 
        return;
 }
@@ -3371,14 +3897,12 @@ void help()
        "when COMMAND completes.\n"
        "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
        "to print statistics, until interrupted.\n"
-       "--debug        run in \"debug\" mode\n"
+       "--add          add a counter\n"
+       "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+       "--quiet        skip decoding system configuration header\n"
        "--interval sec Override default 5-second measurement interval\n"
        "--help         print this help message\n"
-       "--counter msr  print 32-bit counter at address \"msr\"\n"
-       "--Counter msr  print 64-bit Counter at address \"msr\"\n"
        "--out file     create or truncate \"file\" for all output\n"
-       "--msr msr      print 32-bit value at address \"msr\"\n"
-       "--MSR msr      print 64-bit Value at address \"msr\"\n"
        "--version      print version information\n"
        "\n"
        "For more help, run \"man turbostat\"\n");
@@ -3418,7 +3942,7 @@ void topology_probe()
        topo.max_cpu_num = 0;
        for_all_proc_cpus(count_cpus);
        if (!summary_only && topo.num_cpus > 1)
-               show_cpu = 1;
+               BIC_PRESENT(BIC_CPU);
 
        if (debug > 1)
                fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
@@ -3478,15 +4002,15 @@ void topology_probe()
        if (debug > 1)
                fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
                        max_core_id, topo.num_cores_per_pkg);
-       if (debug && !summary_only && topo.num_cores_per_pkg > 1)
-               show_core = 1;
+       if (!summary_only && topo.num_cores_per_pkg > 1)
+               BIC_PRESENT(BIC_Core);
 
        topo.num_packages = max_package_id + 1;
        if (debug > 1)
                fprintf(outf, "max_package_id %d, sizing for %d packages\n",
                        max_package_id, topo.num_packages);
        if (debug && !summary_only && topo.num_packages > 1)
-               show_pkg = 1;
+               BIC_PRESENT(BIC_Package);
 
        topo.num_threads_per_core = max_siblings;
        if (debug > 1)
@@ -3628,24 +4152,24 @@ void turbostat_init()
        process_cpuid();
 
 
-       if (debug)
+       if (!quiet)
                for_all_cpus(print_hwp, ODD_COUNTERS);
 
-       if (debug)
+       if (!quiet)
                for_all_cpus(print_epb, ODD_COUNTERS);
 
-       if (debug)
+       if (!quiet)
                for_all_cpus(print_perf_limit, ODD_COUNTERS);
 
-       if (debug)
+       if (!quiet)
                for_all_cpus(print_rapl, ODD_COUNTERS);
 
        for_all_cpus(set_temperature_target, ODD_COUNTERS);
 
-       if (debug)
+       if (!quiet)
                for_all_cpus(print_thermal, ODD_COUNTERS);
 
-       if (debug && do_irtl_snb)
+       if (!quiet && do_irtl_snb)
                print_irtl();
 }
 
@@ -3715,27 +4239,222 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(outf, "turbostat version 4.14 22 Apr 2016"
+       fprintf(outf, "turbostat version 4.17 10 Jan 2017"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
+int add_counter(unsigned int msr_num, char *name, unsigned int width,
+       enum counter_scope scope, enum counter_type type,
+       enum counter_format format)
+{
+       struct msr_counter *msrp;
+
+       msrp = calloc(1, sizeof(struct msr_counter));
+       if (msrp == NULL) {
+               perror("calloc");
+               exit(1);
+       }
+
+       msrp->msr_num = msr_num;
+       strncpy(msrp->name, name, NAME_BYTES);
+       msrp->width = width;
+       msrp->type = type;
+       msrp->format = format;
+
+       switch (scope) {
+
+       case SCOPE_CPU:
+               msrp->next = sys.tp;
+               sys.tp = msrp;
+               sys.added_thread_counters++;
+               if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
+                       fprintf(stderr, "exceeded max %d added thread counters\n",
+                               MAX_ADDED_COUNTERS);
+                       exit(-1);
+               }
+               break;
+
+       case SCOPE_CORE:
+               msrp->next = sys.cp;
+               sys.cp = msrp;
+               sys.added_core_counters++;
+               if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
+                       fprintf(stderr, "exceeded max %d added core counters\n",
+                               MAX_ADDED_COUNTERS);
+                       exit(-1);
+               }
+               break;
+
+       case SCOPE_PACKAGE:
+               msrp->next = sys.pp;
+               sys.pp = msrp;
+               sys.added_package_counters++;
+               if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
+                       fprintf(stderr, "exceeded max %d added package counters\n",
+                               MAX_ADDED_COUNTERS);
+                       exit(-1);
+               }
+               break;
+       }
+
+       return 0;
+}
+
+void parse_add_command(char *add_command)
+{
+       int msr_num = 0;
+       char name_buffer[NAME_BYTES] = "";
+       int width = 64;
+       int fail = 0;
+       enum counter_scope scope = SCOPE_CPU;
+       enum counter_type type = COUNTER_CYCLES;
+       enum counter_format format = FORMAT_DELTA;
+
+       while (add_command) {
+
+               if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
+                       goto next;
+
+               if (sscanf(add_command, "msr%d", &msr_num) == 1)
+                       goto next;
+
+               if (sscanf(add_command, "u%d", &width) == 1) {
+                       if ((width == 32) || (width == 64))
+                               goto next;
+                       width = 64;
+               }
+               if (!strncmp(add_command, "cpu", strlen("cpu"))) {
+                       scope = SCOPE_CPU;
+                       goto next;
+               }
+               if (!strncmp(add_command, "core", strlen("core"))) {
+                       scope = SCOPE_CORE;
+                       goto next;
+               }
+               if (!strncmp(add_command, "package", strlen("package"))) {
+                       scope = SCOPE_PACKAGE;
+                       goto next;
+               }
+               if (!strncmp(add_command, "cycles", strlen("cycles"))) {
+                       type = COUNTER_CYCLES;
+                       goto next;
+               }
+               if (!strncmp(add_command, "seconds", strlen("seconds"))) {
+                       type = COUNTER_SECONDS;
+                       goto next;
+               }
+               if (!strncmp(add_command, "raw", strlen("raw"))) {
+                       format = FORMAT_RAW;
+                       goto next;
+               }
+               if (!strncmp(add_command, "delta", strlen("delta"))) {
+                       format = FORMAT_DELTA;
+                       goto next;
+               }
+               if (!strncmp(add_command, "percent", strlen("percent"))) {
+                       format = FORMAT_PERCENT;
+                       goto next;
+               }
+
+               if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
+                       char *eos;
+
+                       eos = strchr(name_buffer, ',');
+                       if (eos)
+                               *eos = '\0';
+                       goto next;
+               }
+
+next:
+               add_command = strchr(add_command, ',');
+               if (add_command)
+                       add_command++;
+
+       }
+       if (msr_num == 0) {
+               fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
+               fail++;
+       }
+
+       /* generate default column header */
+       if (*name_buffer == '\0') {
+               if (format == FORMAT_RAW) {
+                       if (width == 32)
+                               sprintf(name_buffer, "msr%d", msr_num);
+                       else
+                               sprintf(name_buffer, "MSR%d", msr_num);
+               } else if (format == FORMAT_DELTA) {
+                       if (width == 32)
+                               sprintf(name_buffer, "cnt%d", msr_num);
+                       else
+                               sprintf(name_buffer, "CNT%d", msr_num);
+               } else if (format == FORMAT_PERCENT) {
+                       if (width == 32)
+                               sprintf(name_buffer, "msr%d%%", msr_num);
+                       else
+                               sprintf(name_buffer, "MSR%d%%", msr_num);
+               }
+       }
+
+       if (add_counter(msr_num, name_buffer, width, scope, type, format))
+               fail++;
+
+       if (fail) {
+               help();
+               exit(1);
+       }
+}
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
+ */
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
+
+int shown;
+/*
+ * parse_show_hide() - process cmdline to set default counter action
+ */
+void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
+{
+       /*
+        * --show: show only those specified
+        *  The 1st invocation will clear and replace the enabled mask
+        *  subsequent invocations can add to it.
+        */
+       if (new_mode == SHOW_LIST) {
+               if (shown == 0)
+                       bic_enabled = bic_lookup(optarg);
+               else
+                       bic_enabled |= bic_lookup(optarg);
+               shown = 1;
+
+               return;
+       }
+
+       /*
+        * --hide: do not show those specified
+        *  multiple invocations simply clear more bits in enabled mask
+        */
+       bic_enabled &= ~bic_lookup(optarg);
+}
+
 void cmdline(int argc, char **argv)
 {
        int opt;
        int option_index = 0;
        static struct option long_options[] = {
-               {"Counter",     required_argument,      0, 'C'},
-               {"counter",     required_argument,      0, 'c'},
+               {"add",         required_argument,      0, 'a'},
                {"Dump",        no_argument,            0, 'D'},
-               {"debug",       no_argument,            0, 'd'},
+               {"debug",       no_argument,            0, 'd'},        /* internal, not documented */
                {"interval",    required_argument,      0, 'i'},
                {"help",        no_argument,            0, 'h'},
+               {"hide",        required_argument,      0, 'H'},        // meh, -h taken by --help
                {"Joules",      no_argument,            0, 'J'},
-               {"MSR",         required_argument,      0, 'M'},
-               {"msr",         required_argument,      0, 'm'},
                {"out",         required_argument,      0, 'o'},
                {"Package",     no_argument,            0, 'p'},
                {"processor",   no_argument,            0, 'p'},
+               {"quiet",       no_argument,            0, 'q'},
+               {"show",        required_argument,      0, 's'},
                {"Summary",     no_argument,            0, 'S'},
                {"TCC",         required_argument,      0, 'T'},
                {"version",     no_argument,            0, 'v' },
@@ -3744,14 +4463,11 @@ void cmdline(int argc, char **argv)
 
        progname = argv[0];
 
-       while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
+       while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpqST:v",
                                long_options, &option_index)) != -1) {
                switch (opt) {
-               case 'C':
-                       sscanf(optarg, "%x", &extra_delta_offset64);
-                       break;
-               case 'c':
-                       sscanf(optarg, "%x", &extra_delta_offset32);
+               case 'a':
+                       parse_add_command(optarg);
                        break;
                case 'D':
                        dump_only++;
@@ -3759,6 +4475,9 @@ void cmdline(int argc, char **argv)
                case 'd':
                        debug++;
                        break;
+               case 'H':
+                       parse_show_hide(optarg, HIDE_LIST);
+                       break;
                case 'h':
                default:
                        help();
@@ -3780,12 +4499,6 @@ void cmdline(int argc, char **argv)
                case 'J':
                        rapl_joules++;
                        break;
-               case 'M':
-                       sscanf(optarg, "%x", &extra_msr_offset64);
-                       break;
-               case 'm':
-                       sscanf(optarg, "%x", &extra_msr_offset32);
-                       break;
                case 'o':
                        outf = fopen_or_die(optarg, "w");
                        break;
@@ -3795,6 +4508,12 @@ void cmdline(int argc, char **argv)
                case 'p':
                        show_core_only++;
                        break;
+               case 'q':
+                       quiet = 1;
+                       break;
+               case 's':
+                       parse_show_hide(optarg, SHOW_LIST);
+                       break;
                case 'S':
                        summary_only++;
                        break;
@@ -3815,7 +4534,7 @@ int main(int argc, char **argv)
 
        cmdline(argc, argv);
 
-       if (debug)
+       if (!quiet)
                print_version();
 
        turbostat_init();