tools/power turbostat: version 18.07.27
[sfrench/cifs-2.6.git] / tools / power / x86 / turbostat / turbostat.c
index 6bf4268137975503ed844ce4bb6ad0adf5047aec..980bd9d20646bd1ef7d64f5a624c6f62fe8bacf4 100644 (file)
@@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window;       /* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;              /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;              /* IA32_HWP_REQUEST_PKG */
 unsigned int has_misc_feature_control;
+unsigned int first_counter_read = 1;
 
 #define RAPL_PKG               (1 << 0)
                                        /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -170,6 +171,8 @@ struct thread_data {
        unsigned long long  irq_count;
        unsigned int smi_count;
        unsigned int cpu_id;
+       unsigned int apic_id;
+       unsigned int x2apic_id;
        unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE    0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
@@ -216,12 +219,21 @@ struct pkg_data {
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
 #define EVEN_COUNTERS thread_even, core_even, package_even
 
-#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
-       (thread_base + (pkg_no) * topo.num_cores_per_pkg * \
-               topo.num_threads_per_core + \
-               (core_no) * topo.num_threads_per_core + (thread_no))
-#define GET_CORE(core_base, core_no, pkg_no) \
-       (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
+#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)         \
+       ((thread_base) +                                                      \
+        ((pkg_no) *                                                          \
+         topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
+        ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
+        ((core_no) * topo.threads_per_core) +                                \
+        (thread_no))
+
+#define GET_CORE(core_base, core_no, node_no, pkg_no)                  \
+       ((core_base) +                                                  \
+        ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
+        ((node_no) * topo.cores_per_node) +                            \
+        (core_no))
+
+
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
@@ -260,8 +272,10 @@ struct system_summary {
 struct cpu_topology {
        int physical_package_id;
        int logical_cpu_id;
-       int node_id;
+       int physical_node_id;
+       int logical_node_id;    /* 0-based count within the package */
        int physical_core_id;
+       int thread_id;
        cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
 } *cpus;
 
@@ -270,8 +284,10 @@ struct topo_params {
        int num_cpus;
        int num_cores;
        int max_cpu_num;
-       int num_cores_per_pkg;
-       int num_threads_per_core;
+       int max_node_num;
+       int nodes_per_pkg;
+       int cores_per_node;
+       int threads_per_core;
 } topo;
 
 struct timeval tv_even, tv_odd, tv_delta;
@@ -293,27 +309,33 @@ int cpu_is_not_present(int cpu)
 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
        struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
 {
-       int retval, pkg_no, core_no, thread_no;
+       int retval, pkg_no, core_no, thread_no, node_no;
 
        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-               for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-                       for (thread_no = 0; thread_no <
-                               topo.num_threads_per_core; ++thread_no) {
-                               struct thread_data *t;
-                               struct core_data *c;
-                               struct pkg_data *p;
-
-                               t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
-
-                               if (cpu_is_not_present(t->cpu_id))
-                                       continue;
-
-                               c = GET_CORE(core_base, core_no, pkg_no);
-                               p = GET_PKG(pkg_base, pkg_no);
-
-                               retval = func(t, c, p);
-                               if (retval)
-                                       return retval;
+               for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
+                       for (node_no = 0; node_no < topo.nodes_per_pkg;
+                            node_no++) {
+                               for (thread_no = 0; thread_no <
+                                       topo.threads_per_core; ++thread_no) {
+                                       struct thread_data *t;
+                                       struct core_data *c;
+                                       struct pkg_data *p;
+
+                                       t = GET_THREAD(thread_base, thread_no,
+                                                      core_no, node_no,
+                                                      pkg_no);
+
+                                       if (cpu_is_not_present(t->cpu_id))
+                                               continue;
+
+                                       c = GET_CORE(core_base, core_no,
+                                                    node_no, pkg_no);
+                                       p = GET_PKG(pkg_base, pkg_no);
+
+                                       retval = func(t, c, p);
+                                       if (retval)
+                                               return retval;
+                               }
                        }
                }
        }
@@ -362,19 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 }
 
 /*
- * Each string in this array is compared in --show and --hide cmdline.
- * Thus, strings that are proper sub-sets must follow their more specific peers.
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ *    matching on them for --show and --hide.
  */
 struct msr_counter bic[] = {
        { 0x0, "usec" },
        { 0x0, "Time_Of_Day_Seconds" },
        { 0x0, "Package" },
+       { 0x0, "Node" },
        { 0x0, "Avg_MHz" },
+       { 0x0, "Busy%" },
        { 0x0, "Bzy_MHz" },
        { 0x0, "TSC_MHz" },
        { 0x0, "IRQ" },
        { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
-       { 0x0, "Busy%" },
+       { 0x0, "sysfs" },
        { 0x0, "CPU%c1" },
        { 0x0, "CPU%c3" },
        { 0x0, "CPU%c6" },
@@ -405,72 +431,73 @@ struct msr_counter bic[] = {
        { 0x0, "Cor_J" },
        { 0x0, "GFX_J" },
        { 0x0, "RAM_J" },
-       { 0x0, "Core" },
-       { 0x0, "CPU" },
        { 0x0, "Mod%c6" },
-       { 0x0, "sysfs" },
        { 0x0, "Totl%C0" },
        { 0x0, "Any%C0" },
        { 0x0, "GFX%C0" },
        { 0x0, "CPUGFX%" },
-       { 0x0, "Node%" },
+       { 0x0, "Core" },
+       { 0x0, "CPU" },
+       { 0x0, "APIC" },
+       { 0x0, "X2APIC" },
 };
 
-
-
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
 #define        BIC_USEC        (1ULL << 0)
 #define        BIC_TOD         (1ULL << 1)
 #define        BIC_Package     (1ULL << 2)
-#define        BIC_Avg_MHz     (1ULL << 3)
-#define        BIC_Bzy_MHz     (1ULL << 4)
-#define        BIC_TSC_MHz     (1ULL << 5)
-#define        BIC_IRQ         (1ULL << 6)
-#define        BIC_SMI         (1ULL << 7)
-#define        BIC_Busy        (1ULL << 8)
-#define        BIC_CPU_c1      (1ULL << 9)
-#define        BIC_CPU_c3      (1ULL << 10)
-#define        BIC_CPU_c6      (1ULL << 11)
-#define        BIC_CPU_c7      (1ULL << 12)
-#define        BIC_ThreadC     (1ULL << 13)
-#define        BIC_CoreTmp     (1ULL << 14)
-#define        BIC_CoreCnt     (1ULL << 15)
-#define        BIC_PkgTmp      (1ULL << 16)
-#define        BIC_GFX_rc6     (1ULL << 17)
-#define        BIC_GFXMHz      (1ULL << 18)
-#define        BIC_Pkgpc2      (1ULL << 19)
-#define        BIC_Pkgpc3      (1ULL << 20)
-#define        BIC_Pkgpc6      (1ULL << 21)
-#define        BIC_Pkgpc7      (1ULL << 22)
-#define        BIC_Pkgpc8      (1ULL << 23)
-#define        BIC_Pkgpc9      (1ULL << 24)
-#define        BIC_Pkgpc10     (1ULL << 25)
-#define BIC_CPU_LPI    (1ULL << 26)
-#define BIC_SYS_LPI    (1ULL << 27)
-#define        BIC_PkgWatt     (1ULL << 26)
-#define        BIC_CorWatt     (1ULL << 27)
-#define        BIC_GFXWatt     (1ULL << 28)
-#define        BIC_PkgCnt      (1ULL << 29)
-#define        BIC_RAMWatt     (1ULL << 30)
-#define        BIC_PKG__       (1ULL << 31)
-#define        BIC_RAM__       (1ULL << 32)
-#define        BIC_Pkg_J       (1ULL << 33)
-#define        BIC_Cor_J       (1ULL << 34)
-#define        BIC_GFX_J       (1ULL << 35)
-#define        BIC_RAM_J       (1ULL << 36)
-#define        BIC_Core        (1ULL << 37)
-#define        BIC_CPU         (1ULL << 38)
-#define        BIC_Mod_c6      (1ULL << 39)
-#define        BIC_sysfs       (1ULL << 40)
-#define        BIC_Totl_c0     (1ULL << 41)
-#define        BIC_Any_c0      (1ULL << 42)
-#define        BIC_GFX_c0      (1ULL << 43)
-#define        BIC_CPUGFX      (1ULL << 44)
-
-#define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD)
+#define        BIC_Node        (1ULL << 3)
+#define        BIC_Avg_MHz     (1ULL << 4)
+#define        BIC_Busy        (1ULL << 5)
+#define        BIC_Bzy_MHz     (1ULL << 6)
+#define        BIC_TSC_MHz     (1ULL << 7)
+#define        BIC_IRQ         (1ULL << 8)
+#define        BIC_SMI         (1ULL << 9)
+#define        BIC_sysfs       (1ULL << 10)
+#define        BIC_CPU_c1      (1ULL << 11)
+#define        BIC_CPU_c3      (1ULL << 12)
+#define        BIC_CPU_c6      (1ULL << 13)
+#define        BIC_CPU_c7      (1ULL << 14)
+#define        BIC_ThreadC     (1ULL << 15)
+#define        BIC_CoreTmp     (1ULL << 16)
+#define        BIC_CoreCnt     (1ULL << 17)
+#define        BIC_PkgTmp      (1ULL << 18)
+#define        BIC_GFX_rc6     (1ULL << 19)
+#define        BIC_GFXMHz      (1ULL << 20)
+#define        BIC_Pkgpc2      (1ULL << 21)
+#define        BIC_Pkgpc3      (1ULL << 22)
+#define        BIC_Pkgpc6      (1ULL << 23)
+#define        BIC_Pkgpc7      (1ULL << 24)
+#define        BIC_Pkgpc8      (1ULL << 25)
+#define        BIC_Pkgpc9      (1ULL << 26)
+#define        BIC_Pkgpc10     (1ULL << 27)
+#define BIC_CPU_LPI    (1ULL << 28)
+#define BIC_SYS_LPI    (1ULL << 29)
+#define        BIC_PkgWatt     (1ULL << 30)
+#define        BIC_CorWatt     (1ULL << 31)
+#define        BIC_GFXWatt     (1ULL << 32)
+#define        BIC_PkgCnt      (1ULL << 33)
+#define        BIC_RAMWatt     (1ULL << 34)
+#define        BIC_PKG__       (1ULL << 35)
+#define        BIC_RAM__       (1ULL << 36)
+#define        BIC_Pkg_J       (1ULL << 37)
+#define        BIC_Cor_J       (1ULL << 38)
+#define        BIC_GFX_J       (1ULL << 39)
+#define        BIC_RAM_J       (1ULL << 40)
+#define        BIC_Mod_c6      (1ULL << 41)
+#define        BIC_Totl_c0     (1ULL << 42)
+#define        BIC_Any_c0      (1ULL << 43)
+#define        BIC_GFX_c0      (1ULL << 44)
+#define        BIC_CPUGFX      (1ULL << 45)
+#define        BIC_Core        (1ULL << 46)
+#define        BIC_CPU         (1ULL << 47)
+#define        BIC_APIC        (1ULL << 48)
+#define        BIC_X2APIC      (1ULL << 49)
+
+#define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
@@ -497,17 +524,34 @@ void help(void)
        "when COMMAND completes.\n"
        "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
        "to print statistics, until interrupted.\n"
-       "--add          add a counter\n"
-       "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-       "--cpu  cpu-set limit output to summary plus cpu-set:\n"
-       "               {core | package | j,k,l..m,n-p }\n"
-       "--quiet        skip decoding system configuration header\n"
-       "--interval sec.subsec  Override default 5-second measurement interval\n"
-       "--help         print this help message\n"
-       "--list         list column headers only\n"
-       "--num_iterations num   number of the measurement iterations\n"
-       "--out file     create or truncate \"file\" for all output\n"
-       "--version      print version information\n"
+       "  -a, --add    add a counter\n"
+       "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+       "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
+       "                 {core | package | j,k,l..m,n-p }\n"
+       "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
+       "  -D, --Dump   displays the raw counter values\n"
+       "  -e, --enable [all | column]\n"
+       "               shows all or the specified disabled column\n"
+       "  -H, --hide [column|column,column,...]\n"
+       "               hide the specified column(s)\n"
+       "  -i, --interval sec.subsec\n"
+       "               Override default 5-second measurement interval\n"
+       "  -J, --Joules displays energy in Joules instead of Watts\n"
+       "  -l, --list   list column headers only\n"
+       "  -n, --num_iterations num\n"
+       "               number of the measurement iterations\n"
+       "  -o, --out file\n"
+       "               create or truncate \"file\" for all output\n"
+       "  -q, --quiet  skip decoding system configuration header\n"
+       "  -s, --show [column|column,column,...]\n"
+       "               show only the specified column(s)\n"
+       "  -S, --Summary\n"
+       "               limits output to 1-line system summary per interval\n"
+       "  -T, --TCC temperature\n"
+       "               sets the Thermal Control Circuit temperature in\n"
+       "                 degrees Celsius\n"
+       "  -h, --help   print this help message\n"
+       "  -v, --version        print version information\n"
        "\n"
        "For more help, run \"man turbostat\"\n");
 }
@@ -575,10 +619,16 @@ void print_header(char *delim)
                outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Package))
                outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+       if (DO_BIC(BIC_Node))
+               outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Core))
                outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
        if (DO_BIC(BIC_CPU))
                outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
+       if (DO_BIC(BIC_APIC))
+               outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
+       if (DO_BIC(BIC_X2APIC))
+               outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Avg_MHz))
                outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Busy))
@@ -852,10 +902,16 @@ int format_counters(struct thread_data *t, struct core_data *c,
        if (t == &average.threads) {
                if (DO_BIC(BIC_Package))
                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+               if (DO_BIC(BIC_Node))
+                       outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
                if (DO_BIC(BIC_Core))
                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
                if (DO_BIC(BIC_CPU))
                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+               if (DO_BIC(BIC_APIC))
+                       outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+               if (DO_BIC(BIC_X2APIC))
+                       outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
        } else {
                if (DO_BIC(BIC_Package)) {
                        if (p)
@@ -863,6 +919,15 @@ int format_counters(struct thread_data *t, struct core_data *c,
                        else
                                outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
                }
+               if (DO_BIC(BIC_Node)) {
+                       if (t)
+                               outp += sprintf(outp, "%s%d",
+                                               (printed++ ? delim : ""),
+                                             cpus[t->cpu_id].physical_node_id);
+                       else
+                               outp += sprintf(outp, "%s-",
+                                               (printed++ ? delim : ""));
+               }
                if (DO_BIC(BIC_Core)) {
                        if (c)
                                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
@@ -871,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
                }
                if (DO_BIC(BIC_CPU))
                        outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
+               if (DO_BIC(BIC_APIC))
+                       outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
+               if (DO_BIC(BIC_X2APIC))
+                       outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
        }
 
        if (DO_BIC(BIC_Avg_MHz))
@@ -1094,9 +1163,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
        if (!printed || !summary_only)
                print_header("\t");
 
-       if (topo.num_cpus > 1)
-               format_counters(&average.threads, &average.cores,
-                       &average.packages);
+       format_counters(&average.threads, &average.cores, &average.packages);
 
        printed = 1;
 
@@ -1198,6 +1265,12 @@ delta_thread(struct thread_data *new, struct thread_data *old,
        int i;
        struct msr_counter *mp;
 
+       /* we run cpuid just the 1st time, copy the results */
+       if (DO_BIC(BIC_APIC))
+               new->apic_id = old->apic_id;
+       if (DO_BIC(BIC_X2APIC))
+               new->x2apic_id = old->x2apic_id;
+
        /*
         * the timestamps from start of measurement interval are in "old"
         * the timestamp from end of measurement interval are in "new"
@@ -1360,6 +1433,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
        int i;
        struct msr_counter *mp;
 
+       /* copy un-changing apic_id's */
+       if (DO_BIC(BIC_APIC))
+               average.threads.apic_id = t->apic_id;
+       if (DO_BIC(BIC_X2APIC))
+               average.threads.x2apic_id = t->x2apic_id;
+
        /* remember first tv_begin */
        if (average.threads.tv_begin.tv_sec == 0)
                average.threads.tv_begin = t->tv_begin;
@@ -1586,6 +1665,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
        return 0;
 }
 
+void get_apic_id(struct thread_data *t)
+{
+       unsigned int eax, ebx, ecx, edx, max_level;
+
+       eax = ebx = ecx = edx = 0;
+
+       if (!genuine_intel)
+               return;
+
+       __cpuid(0, max_level, ebx, ecx, edx);
+
+       __cpuid(1, eax, ebx, ecx, edx);
+       t->apic_id = (ebx >> 24) & 0xf;
+
+       if (max_level < 0xb)
+               return;
+
+       if (!DO_BIC(BIC_X2APIC))
+               return;
+
+       ecx = 0;
+       __cpuid(0xb, eax, ebx, ecx, edx);
+       t->x2apic_id = edx;
+
+       if (debug && (t->apic_id != t->x2apic_id))
+               fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1599,7 +1706,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        struct msr_counter *mp;
        int i;
 
-
        gettimeofday(&t->tv_begin, (struct timezone *)NULL);
 
        if (cpu_migrate(cpu)) {
@@ -1607,6 +1713,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                return -1;
        }
 
+       if (first_counter_read)
+               get_apic_id(t);
 retry:
        t->tsc = rdtsc();       /* we are running on local CPU of interest */
 
@@ -2342,44 +2450,6 @@ int parse_int_file(const char *fmt, ...)
        return value;
 }
 
-/*
- * get_cpu_position_in_core(cpu)
- * return the position of the CPU among its HT siblings in the core
- * return -1 if the sibling is not in list
- */
-int get_cpu_position_in_core(int cpu)
-{
-       char path[64];
-       FILE *filep;
-       int this_cpu;
-       char character;
-       int i;
-
-       sprintf(path,
-               "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
-               cpu);
-       filep = fopen(path, "r");
-       if (filep == NULL) {
-               perror(path);
-               exit(1);
-       }
-
-       for (i = 0; i < topo.num_threads_per_core; i++) {
-               fscanf(filep, "%d", &this_cpu);
-               if (this_cpu == cpu) {
-                       fclose(filep);
-                       return i;
-               }
-
-               /* Account for no separator after last thread*/
-               if (i != (topo.num_threads_per_core - 1))
-                       fscanf(filep, "%c", &character);
-       }
-
-       fclose(filep);
-       return -1;
-}
-
 /*
  * cpu_is_first_core_in_package(cpu)
  * return 1 if given CPU is 1st core in package
@@ -2399,7 +2469,48 @@ int get_core_id(int cpu)
        return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
 }
 
-int get_node_id(struct cpu_topology *thiscpu)
+void set_node_data(void)
+{
+       int pkg, node, lnode, cpu, cpux;
+       int cpu_count;
+
+       /* initialize logical_node_id */
+       for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
+               cpus[cpu].logical_node_id = -1;
+
+       cpu_count = 0;
+       for (pkg = 0; pkg < topo.num_packages; pkg++) {
+               lnode = 0;
+               for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
+                       if (cpus[cpu].physical_package_id != pkg)
+                               continue;
+                       /* find a cpu with an unset logical_node_id */
+                       if (cpus[cpu].logical_node_id != -1)
+                               continue;
+                       cpus[cpu].logical_node_id = lnode;
+                       node = cpus[cpu].physical_node_id;
+                       cpu_count++;
+                       /*
+                        * find all matching cpus on this pkg and set
+                        * the logical_node_id
+                        */
+                       for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
+                               if ((cpus[cpux].physical_package_id == pkg) &&
+                                  (cpus[cpux].physical_node_id == node)) {
+                                       cpus[cpux].logical_node_id = lnode;
+                                       cpu_count++;
+                               }
+                       }
+                       lnode++;
+                       if (lnode > topo.nodes_per_pkg)
+                               topo.nodes_per_pkg = lnode;
+               }
+               if (cpu_count >= topo.max_cpu_num)
+                       break;
+       }
+}
+
+int get_physical_node_id(struct cpu_topology *thiscpu)
 {
        char path[80];
        FILE *filep;
@@ -2423,12 +2534,15 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
        char path[80], character;
        FILE *filep;
        unsigned long map;
-       int shift, sib_core;
+       int so, shift, sib_core;
        int cpu = thiscpu->logical_cpu_id;
        int offset = topo.max_cpu_num + 1;
        size_t size;
+       int thread_id = 0;
 
        thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (thiscpu->thread_id < 0)
+               thiscpu->thread_id = thread_id++;
        if (!thiscpu->put_ids)
                return -1;
 
@@ -2443,10 +2557,15 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
                fscanf(filep, "%lx%c", &map, &character);
                for (shift = 0; shift < BITMASK_SIZE; shift++) {
                        if ((map >> shift) & 0x1) {
-                               sib_core = get_core_id(shift + offset);
-                               if (sib_core == thiscpu->physical_core_id)
-                                       CPU_SET_S(shift + offset, size,
-                                               thiscpu->put_ids);
+                               so = shift + offset;
+                               sib_core = get_core_id(so);
+                               if (sib_core == thiscpu->physical_core_id) {
+                                       CPU_SET_S(so, size, thiscpu->put_ids);
+                                       if ((so != cpu) &&
+                                           (cpus[so].thread_id < 0))
+                                               cpus[so].thread_id =
+                                                                   thread_id++;
+                               }
                        }
                }
        } while (!strncmp(&character, ",", 1));
@@ -2467,32 +2586,42 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
        struct thread_data *thread_base2, struct core_data *core_base2,
        struct pkg_data *pkg_base2)
 {
-       int retval, pkg_no, core_no, thread_no;
+       int retval, pkg_no, node_no, core_no, thread_no;
 
        for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-               for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-                       for (thread_no = 0; thread_no <
-                               topo.num_threads_per_core; ++thread_no) {
-                               struct thread_data *t, *t2;
-                               struct core_data *c, *c2;
-                               struct pkg_data *p, *p2;
-
-                               t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
-
-                               if (cpu_is_not_present(t->cpu_id))
-                                       continue;
-
-                               t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
-
-                               c = GET_CORE(core_base, core_no, pkg_no);
-                               c2 = GET_CORE(core_base2, core_no, pkg_no);
-
-                               p = GET_PKG(pkg_base, pkg_no);
-                               p2 = GET_PKG(pkg_base2, pkg_no);
-
-                               retval = func(t, c, p, t2, c2, p2);
-                               if (retval)
-                                       return retval;
+               for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
+                       for (core_no = 0; core_no < topo.cores_per_node;
+                            ++core_no) {
+                               for (thread_no = 0; thread_no <
+                                       topo.threads_per_core; ++thread_no) {
+                                       struct thread_data *t, *t2;
+                                       struct core_data *c, *c2;
+                                       struct pkg_data *p, *p2;
+
+                                       t = GET_THREAD(thread_base, thread_no,
+                                                      core_no, node_no,
+                                                      pkg_no);
+
+                                       if (cpu_is_not_present(t->cpu_id))
+                                               continue;
+
+                                       t2 = GET_THREAD(thread_base2, thread_no,
+                                                       core_no, node_no,
+                                                       pkg_no);
+
+                                       c = GET_CORE(core_base, core_no,
+                                                    node_no, pkg_no);
+                                       c2 = GET_CORE(core_base2, core_no,
+                                                     node_no,
+                                                     pkg_no);
+
+                                       p = GET_PKG(pkg_base, pkg_no);
+                                       p2 = GET_PKG(pkg_base2, pkg_no);
+
+                                       retval = func(t, c, p, t2, c2, p2);
+                                       if (retval)
+                                               return retval;
+                               }
                        }
                }
        }
@@ -2567,6 +2696,12 @@ int mark_cpu_present(int cpu)
        return 0;
 }
 
+int init_thread_id(int cpu)
+{
+       cpus[cpu].thread_id = -1;
+       return 0;
+}
+
 /*
  * snapshot_proc_interrupts()
  *
@@ -2813,6 +2948,7 @@ void do_sleep(void)
        }
 }
 
+
 void turbostat_loop()
 {
        int retval;
@@ -2826,6 +2962,7 @@ restart:
 
        snapshot_proc_sysfs_files();
        retval = for_all_cpus(get_counters, EVEN_COUNTERS);
+       first_counter_read = 0;
        if (retval < -1) {
                exit(retval);
        } else if (retval == -1) {
@@ -4320,13 +4457,15 @@ void process_cpuid()
        family = (fms >> 8) & 0xf;
        model = (fms >> 4) & 0xf;
        stepping = fms & 0xf;
-       if (family == 6 || family == 0xf)
+       if (family == 0xf)
+               family += (fms >> 20) & 0xff;
+       if (family >= 6)
                model += ((fms >> 16) & 0xf) << 4;
 
        if (!quiet) {
                fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
                        max_level, family, model, stepping, family, model, stepping);
-               fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
+               fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
                        ecx & (1 << 0) ? "SSE3" : "-",
                        ecx & (1 << 3) ? "MONITOR" : "-",
                        ecx & (1 << 6) ? "SMX" : "-",
@@ -4335,6 +4474,7 @@ void process_cpuid()
                        edx & (1 << 4) ? "TSC" : "-",
                        edx & (1 << 5) ? "MSR" : "-",
                        edx & (1 << 22) ? "ACPI-TM" : "-",
+                       edx & (1 << 28) ? "HT" : "-",
                        edx & (1 << 29) ? "TM" : "-");
        }
 
@@ -4586,7 +4726,6 @@ void process_cpuid()
        return;
 }
 
-
 /*
  * in /dev/cpu/ return success for names that are numbers
  * ie. filter out ".", "..", "microcode".
@@ -4653,6 +4792,7 @@ void topology_probe()
        cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 
+       for_all_proc_cpus(init_thread_id);
 
        /*
         * For online cpus
@@ -4675,7 +4815,9 @@ void topology_probe()
                        max_package_id = cpus[i].physical_package_id;
 
                /* get numa node information */
-               cpus[i].node_id = get_node_id(&cpus[i]);
+               cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
+               if (cpus[i].physical_node_id > topo.max_node_num)
+                       topo.max_node_num = cpus[i].physical_node_id;
 
                /* get core information */
                cpus[i].physical_core_id = get_core_id(i);
@@ -4686,18 +4828,15 @@ void topology_probe()
                siblings = get_thread_siblings(&cpus[i]);
                if (siblings > max_siblings)
                        max_siblings = siblings;
-
-               if (debug > 1)
-                       fprintf(outf, "cpu %d pkg %d node %d core %d\n",
-                               i, cpus[i].physical_package_id,
-                               cpus[i].node_id,
-                               cpus[i].physical_core_id);
+               if (cpus[i].thread_id == 0)
+                       topo.num_cores++;
        }
-       topo.num_cores_per_pkg = max_core_id + 1;
+
+       topo.cores_per_node = max_core_id + 1;
        if (debug > 1)
                fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
-                       max_core_id, topo.num_cores_per_pkg);
-       if (!summary_only && topo.num_cores_per_pkg > 1)
+                       max_core_id, topo.cores_per_node);
+       if (!summary_only && topo.cores_per_node > 1)
                BIC_PRESENT(BIC_Core);
 
        topo.num_packages = max_package_id + 1;
@@ -4707,31 +4846,52 @@ void topology_probe()
        if (!summary_only && topo.num_packages > 1)
                BIC_PRESENT(BIC_Package);
 
-       topo.num_threads_per_core = max_siblings;
+       set_node_data();
+       if (debug > 1)
+               fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
+       if (!summary_only && topo.nodes_per_pkg > 1)
+               BIC_PRESENT(BIC_Node);
+
+       topo.threads_per_core = max_siblings;
        if (debug > 1)
                fprintf(outf, "max_siblings %d\n", max_siblings);
+
+       if (debug < 1)
+               return;
+
+       for (i = 0; i <= topo.max_cpu_num; ++i) {
+               fprintf(outf,
+                       "cpu %d pkg %d node %d lnode %d core %d thread %d\n",
+                       i, cpus[i].physical_package_id,
+                       cpus[i].physical_node_id,
+                       cpus[i].logical_node_id,
+                       cpus[i].physical_core_id,
+                       cpus[i].thread_id);
+       }
+
 }
 
 void
-allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
+allocate_counters(struct thread_data **t, struct core_data **c,
+                 struct pkg_data **p)
 {
        int i;
+       int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
+                       topo.num_packages;
+       int num_threads = topo.threads_per_core * num_cores;
 
-       *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-               topo.num_packages, sizeof(struct thread_data));
+       *t = calloc(num_threads, sizeof(struct thread_data));
        if (*t == NULL)
                goto error;
 
-       for (i = 0; i < topo.num_threads_per_core *
-               topo.num_cores_per_pkg * topo.num_packages; i++)
+       for (i = 0; i < num_threads; i++)
                (*t)[i].cpu_id = -1;
 
-       *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-               sizeof(struct core_data));
+       *c = calloc(num_cores, sizeof(struct core_data));
        if (*c == NULL)
                goto error;
 
-       for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
+       for (i = 0; i < num_cores; i++)
                (*c)[i].core_id = -1;
 
        *p = calloc(topo.num_packages, sizeof(struct pkg_data));
@@ -4748,47 +4908,46 @@ error:
 /*
  * init_counter()
  *
- * set cpu_id, core_num, pkg_num
  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
- *
- * increment topo.num_cores when 1st core in pkg seen
  */
 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
-       struct pkg_data *pkg_base, int thread_num, int core_num,
-       int pkg_num, int cpu_id)
+       struct pkg_data *pkg_base, int cpu_id)
 {
+       int pkg_id = cpus[cpu_id].physical_package_id;
+       int node_id = cpus[cpu_id].logical_node_id;
+       int core_id = cpus[cpu_id].physical_core_id;
+       int thread_id = cpus[cpu_id].thread_id;
        struct thread_data *t;
        struct core_data *c;
        struct pkg_data *p;
 
-       t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
-       c = GET_CORE(core_base, core_num, pkg_num);
-       p = GET_PKG(pkg_base, pkg_num);
+
+       /* Workaround for systems where physical_node_id==-1
+        * and logical_node_id==(-1 - topo.num_cpus)
+        */
+       if (node_id < 0)
+               node_id = 0;
+
+       t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
+       c = GET_CORE(core_base, core_id, node_id, pkg_id);
+       p = GET_PKG(pkg_base, pkg_id);
 
        t->cpu_id = cpu_id;
-       if (thread_num == 0) {
+       if (thread_id == 0) {
                t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
                if (cpu_is_first_core_in_package(cpu_id))
                        t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
        }
 
-       c->core_id = core_num;
-       p->package_id = pkg_num;
+       c->core_id = core_id;
+       p->package_id = pkg_id;
 }
 
 
 int initialize_counters(int cpu_id)
 {
-       int my_thread_id, my_core_id, my_package_id;
-
-       my_package_id = get_physical_package_id(cpu_id);
-       my_core_id = get_core_id(cpu_id);
-       my_thread_id = get_cpu_position_in_core(cpu_id);
-       if (!my_thread_id)
-               topo.num_cores++;
-
-       init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
-       init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
+       init_counter(EVEN_COUNTERS, cpu_id);
+       init_counter(ODD_COUNTERS, cpu_id);
        return 0;
 }
 
@@ -4873,6 +5032,7 @@ int fork_it(char **argv)
 
        snapshot_proc_sysfs_files();
        status = for_all_cpus(get_counters, EVEN_COUNTERS);
+       first_counter_read = 0;
        if (status)
                exit(status);
        /* clear affinity side-effect of get_counters() */
@@ -4936,7 +5096,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(outf, "turbostat version 17.06.23"
+       fprintf(outf, "turbostat version 18.07.27"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -5308,7 +5468,7 @@ void cmdline(int argc, char **argv)
                        break;
                case 'e':
                        /* --enable specified counter */
-                       bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+                       bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
                        break;
                case 'd':
                        debug++;
@@ -5392,7 +5552,6 @@ void cmdline(int argc, char **argv)
 int main(int argc, char **argv)
 {
        outf = stderr;
-
        cmdline(argc, argv);
 
        if (!quiet)