Merge branch 'perf/urgent' into perf/core, to pick up latest fixes and refresh the...
[sfrench/cifs-2.6.git] / arch / x86 / events / intel / lbr.c
index 955457a30197e8be224f0304ec83f14365b3fba9..8a6bbacd17dcfb077e2a910b5b532c1d24a4a623 100644 (file)
@@ -109,6 +109,9 @@ enum {
        X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
        X86_BR_CALL_STACK       = 1 << 16,/* call stack */
        X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
+
+       X86_BR_TYPE_SAVE        = 1 << 18,/* indicate to save branch type */
+
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -514,6 +517,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
                cpuc->lbr_entries[i].in_tx      = 0;
                cpuc->lbr_entries[i].abort      = 0;
                cpuc->lbr_entries[i].cycles     = 0;
+               cpuc->lbr_entries[i].type       = 0;
                cpuc->lbr_entries[i].reserved   = 0;
        }
        cpuc->lbr_stack.nr = i;
@@ -600,6 +604,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
                cpuc->lbr_entries[out].in_tx     = in_tx;
                cpuc->lbr_entries[out].abort     = abort;
                cpuc->lbr_entries[out].cycles    = cycles;
+               cpuc->lbr_entries[out].type      = 0;
                cpuc->lbr_entries[out].reserved  = 0;
                out++;
        }
@@ -677,6 +682,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 
        if (br_type & PERF_SAMPLE_BRANCH_CALL)
                mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+               mask |= X86_BR_TYPE_SAVE;
+
        /*
         * stash actual user request into reg, it may
         * be used by fixup code for some CPU
@@ -930,6 +939,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
        return ret;
 }
 
+#define X86_BR_TYPE_MAP_MAX    16
+
+static int branch_map[X86_BR_TYPE_MAP_MAX] = {
+       PERF_BR_CALL,           /* X86_BR_CALL */
+       PERF_BR_RET,            /* X86_BR_RET */
+       PERF_BR_SYSCALL,        /* X86_BR_SYSCALL */
+       PERF_BR_SYSRET,         /* X86_BR_SYSRET */
+       PERF_BR_UNKNOWN,        /* X86_BR_INT */
+       PERF_BR_UNKNOWN,        /* X86_BR_IRET */
+       PERF_BR_COND,           /* X86_BR_JCC */
+       PERF_BR_UNCOND,         /* X86_BR_JMP */
+       PERF_BR_UNKNOWN,        /* X86_BR_IRQ */
+       PERF_BR_IND_CALL,       /* X86_BR_IND_CALL */
+       PERF_BR_UNKNOWN,        /* X86_BR_ABORT */
+       PERF_BR_UNKNOWN,        /* X86_BR_IN_TX */
+       PERF_BR_UNKNOWN,        /* X86_BR_NO_TX */
+       PERF_BR_CALL,           /* X86_BR_ZERO_CALL */
+       PERF_BR_UNKNOWN,        /* X86_BR_CALL_STACK */
+       PERF_BR_IND,            /* X86_BR_IND_JMP */
+};
+
+static int
+common_branch_type(int type)
+{
+       int i;
+
+       type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+
+       if (type) {
+               i = __ffs(type);
+               if (i < X86_BR_TYPE_MAP_MAX)
+                       return branch_map[i];
+       }
+
+       return PERF_BR_UNKNOWN;
+}
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -946,7 +992,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
        bool compress = false;
 
        /* if sampling all branches, then nothing to filter */
-       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+       if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+           ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
                return;
 
        for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -967,6 +1014,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
                        cpuc->lbr_entries[i].from = 0;
                        compress = true;
                }
+
+               if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+                       cpuc->lbr_entries[i].type = common_branch_type(type);
        }
 
        if (!compress)