arch/x86/kernel/cpu/perf_event_amd.c

   1 #ifdef CONFIG_CPU_SUP_AMD
   2
   3 static DEFINE_RAW_SPINLOCK(amd_nb_lock);
   4
   5 static __initconst const u64 amd_hw_cache_event_ids
   6                                 [PERF_COUNT_HW_CACHE_MAX]
   7                                 [PERF_COUNT_HW_CACHE_OP_MAX]
   8                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
   9 {
  10  [ C(L1D) ] = {
  11         [ C(OP_READ) ] = {
  12                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
  13                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
  14         },
  15         [ C(OP_WRITE) ] = {
  16                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
  17                 [ C(RESULT_MISS)   ] = 0,
  18         },
  19         [ C(OP_PREFETCH) ] = {
  20                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
  21                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
  22         },
  23  },
  24  [ C(L1I ) ] = {
  25         [ C(OP_READ) ] = {
  26                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
  27                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
  28         },
  29         [ C(OP_WRITE) ] = {
  30                 [ C(RESULT_ACCESS) ] = -1,
  31                 [ C(RESULT_MISS)   ] = -1,
  32         },
  33         [ C(OP_PREFETCH) ] = {
  34                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
  35                 [ C(RESULT_MISS)   ] = 0,
  36         },
  37  },
  38  [ C(LL  ) ] = {
  39         [ C(OP_READ) ] = {
  40                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
  41                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
  42         },
  43         [ C(OP_WRITE) ] = {
  44                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
  45                 [ C(RESULT_MISS)   ] = 0,
  46         },
  47         [ C(OP_PREFETCH) ] = {
  48                 [ C(RESULT_ACCESS) ] = 0,
  49                 [ C(RESULT_MISS)   ] = 0,
  50         },
  51  },
  52  [ C(DTLB) ] = {
  53         [ C(OP_READ) ] = {
  54                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
  55                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
  56         },
  57         [ C(OP_WRITE) ] = {
  58                 [ C(RESULT_ACCESS) ] = 0,
  59                 [ C(RESULT_MISS)   ] = 0,
  60         },
  61         [ C(OP_PREFETCH) ] = {
  62                 [ C(RESULT_ACCESS) ] = 0,
  63                 [ C(RESULT_MISS)   ] = 0,
  64         },
  65  },
  66  [ C(ITLB) ] = {
  67         [ C(OP_READ) ] = {
  68                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
  69                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
  70         },
  71         [ C(OP_WRITE) ] = {
  72                 [ C(RESULT_ACCESS) ] = -1,
  73                 [ C(RESULT_MISS)   ] = -1,
  74         },
  75         [ C(OP_PREFETCH) ] = {
  76                 [ C(RESULT_ACCESS) ] = -1,
  77                 [ C(RESULT_MISS)   ] = -1,
  78         },
  79  },
  80  [ C(BPU ) ] = {
  81         [ C(OP_READ) ] = {
  82                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
  83                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
  84         },
  85         [ C(OP_WRITE) ] = {
  86                 [ C(RESULT_ACCESS) ] = -1,
  87                 [ C(RESULT_MISS)   ] = -1,
  88         },
  89         [ C(OP_PREFETCH) ] = {
  90                 [ C(RESULT_ACCESS) ] = -1,
  91                 [ C(RESULT_MISS)   ] = -1,
  92         },
  93  },
  94 };
  95
  96 /*
  97  * AMD Performance Monitor K7 and later.
  98  */
  99 static const u64 amd_perfmon_event_map[] =
 100 {
 101   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
 102   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
 103   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
 104   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
 105   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
 106   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
 107 };
 108
 109 static u64 amd_pmu_event_map(int hw_event)
 110 {
 111         return amd_perfmon_event_map[hw_event];
 112 }
 113
 114 static int amd_pmu_hw_config(struct perf_event *event)
 115 {
 116         int ret = x86_pmu_hw_config(event);
 117
 118         if (ret)
 119                 return ret;
 120
 121         if (event->attr.type != PERF_TYPE_RAW)
 122                 return 0;
 123
 124         event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
 125
 126         return 0;
 127 }
 128
 129 /*
 130  * AMD64 events are detected based on their event codes.
 131  */
 132 static inline int amd_is_nb_event(struct hw_perf_event *hwc)
 133 {
 134         return (hwc->config & 0xe0) == 0xe0;
 135 }
 136
 137 static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 138 {
 139         struct amd_nb *nb = cpuc->amd_nb;
 140
 141         return nb && nb->nb_id != -1;
 142 }
 143
 144 static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
 145                                       struct perf_event *event)
 146 {
 147         struct hw_perf_event *hwc = &event->hw;
 148         struct amd_nb *nb = cpuc->amd_nb;
 149         int i;
 150
 151         /*
 152          * only care about NB events
 153          */
 154         if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
 155                 return;
 156
 157         /*
 158          * need to scan whole list because event may not have
 159          * been assigned during scheduling
 160          *
 161          * no race condition possible because event can only
 162          * be removed on one CPU at a time AND PMU is disabled
 163          * when we come here
 164          */
 165         for (i = 0; i < x86_pmu.num_counters; i++) {
 166                 if (nb->owners[i] == event) {
 167                         cmpxchg(nb->owners+i, event, NULL);
 168                         break;
 169                 }
 170         }
 171 }
 172
 173  /*
 174   * AMD64 NorthBridge events need special treatment because
 175   * counter access needs to be synchronized across all cores
 176   * of a package. Refer to BKDG section 3.12
 177   *
 178   * NB events are events measuring L3 cache, Hypertransport
 179   * traffic. They are identified by an event code >= 0xe00.
 180   * They measure events on the NorthBride which is shared
 181   * by all cores on a package. NB events are counted on a
 182   * shared set of counters. When a NB event is programmed
 183   * in a counter, the data actually comes from a shared
 184   * counter. Thus, access to those counters needs to be
 185   * synchronized.
 186   *
 187   * We implement the synchronization such that no two cores
 188   * can be measuring NB events using the same counters. Thus,
 189   * we maintain a per-NB allocation table. The available slot
 190   * is propagated using the event_constraint structure.
 191   *
 192   * We provide only one choice for each NB event based on
 193   * the fact that only NB events have restrictions. Consequently,
 194   * if a counter is available, there is a guarantee the NB event
 195   * will be assigned to it. If no slot is available, an empty
 196   * constraint is returned and scheduling will eventually fail
 197   * for this event.
 198   *
 199   * Note that all cores attached the same NB compete for the same
 200   * counters to host NB events, this is why we use atomic ops. Some
 201   * multi-chip CPUs may have more than one NB.
 202   *
 203   * Given that resources are allocated (cmpxchg), they must be
 204   * eventually freed for others to use. This is accomplished by
 205   * calling amd_put_event_constraints().
 206   *
 207   * Non NB events are not impacted by this restriction.
 208   */
 209 static struct event_constraint *
 210 amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 211 {
 212         struct hw_perf_event *hwc = &event->hw;
 213         struct amd_nb *nb = cpuc->amd_nb;
 214         struct perf_event *old = NULL;
 215         int max = x86_pmu.num_counters;
 216         int i, j, k = -1;
 217
 218         /*
 219          * if not NB event or no NB, then no constraints
 220          */
 221         if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
 222                 return &unconstrained;
 223
 224         /*
 225          * detect if already present, if so reuse
 226          *
 227          * cannot merge with actual allocation
 228          * because of possible holes
 229          *
 230          * event can already be present yet not assigned (in hwc->idx)
 231          * because of successive calls to x86_schedule_events() from
 232          * hw_perf_group_sched_in() without hw_perf_enable()
 233          */
 234         for (i = 0; i < max; i++) {
 235                 /*
 236                  * keep track of first free slot
 237                  */
 238                 if (k == -1 && !nb->owners[i])
 239                         k = i;
 240
 241                 /* already present, reuse */
 242                 if (nb->owners[i] == event)
 243                         goto done;
 244         }
 245         /*
 246          * not present, so grab a new slot
 247          * starting either at:
 248          */
 249         if (hwc->idx != -1) {
 250                 /* previous assignment */
 251                 i = hwc->idx;
 252         } else if (k != -1) {
 253                 /* start from free slot found */
 254                 i = k;
 255         } else {
 256                 /*
 257                  * event not found, no slot found in
 258                  * first pass, try again from the
 259                  * beginning
 260                  */
 261                 i = 0;
 262         }
 263         j = i;
 264         do {
 265                 old = cmpxchg(nb->owners+i, NULL, event);
 266                 if (!old)
 267                         break;
 268                 if (++i == max)
 269                         i = 0;
 270         } while (i != j);
 271 done:
 272         if (!old)
 273                 return &nb->event_constraints[i];
 274
 275         return &emptyconstraint;
 276 }
 277
 278 static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
 279 {
 280         struct amd_nb *nb;
 281         int i;
 282
 283         nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
 284         if (!nb)
 285                 return NULL;
 286
 287         memset(nb, 0, sizeof(*nb));
 288         nb->nb_id = nb_id;
 289
 290         /*
 291          * initialize all possible NB constraints
 292          */
 293         for (i = 0; i < x86_pmu.num_counters; i++) {
 294                 __set_bit(i, nb->event_constraints[i].idxmsk);
 295                 nb->event_constraints[i].weight = 1;
 296         }
 297         return nb;
 298 }
 299
 300 static int amd_pmu_cpu_prepare(int cpu)
 301 {
 302         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 303
 304         WARN_ON_ONCE(cpuc->amd_nb);
 305
 306         if (boot_cpu_data.x86_max_cores < 2)
 307                 return NOTIFY_OK;
 308
 309         cpuc->amd_nb = amd_alloc_nb(cpu, -1);
 310         if (!cpuc->amd_nb)
 311                 return NOTIFY_BAD;
 312
 313         return NOTIFY_OK;
 314 }
 315
 316 static void amd_pmu_cpu_starting(int cpu)
 317 {
 318         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 319         struct amd_nb *nb;
 320         int i, nb_id;
 321
 322         if (boot_cpu_data.x86_max_cores < 2)
 323                 return;
 324
 325         nb_id = amd_get_nb_id(cpu);
 326         WARN_ON_ONCE(nb_id == BAD_APICID);
 327
 328         raw_spin_lock(&amd_nb_lock);
 329
 330         for_each_online_cpu(i) {
 331                 nb = per_cpu(cpu_hw_events, i).amd_nb;
 332                 if (WARN_ON_ONCE(!nb))
 333                         continue;
 334
 335                 if (nb->nb_id == nb_id) {
 336                         kfree(cpuc->amd_nb);
 337                         cpuc->amd_nb = nb;
 338                         break;
 339                 }
 340         }
 341
 342         cpuc->amd_nb->nb_id = nb_id;
 343         cpuc->amd_nb->refcnt++;
 344
 345         raw_spin_unlock(&amd_nb_lock);
 346 }
 347
 348 static void amd_pmu_cpu_dead(int cpu)
 349 {
 350         struct cpu_hw_events *cpuhw;
 351
 352         if (boot_cpu_data.x86_max_cores < 2)
 353                 return;
 354
 355         cpuhw = &per_cpu(cpu_hw_events, cpu);
 356
 357         raw_spin_lock(&amd_nb_lock);
 358
 359         if (cpuhw->amd_nb) {
 360                 struct amd_nb *nb = cpuhw->amd_nb;
 361
 362                 if (nb->nb_id == -1 || --nb->refcnt == 0)
 363                         kfree(nb);
 364
 365                 cpuhw->amd_nb = NULL;
 366         }
 367
 368         raw_spin_unlock(&amd_nb_lock);
 369 }
 370
 371 static __initconst const struct x86_pmu amd_pmu = {
 372         .name                   = "AMD",
 373         .handle_irq             = x86_pmu_handle_irq,
 374         .disable_all            = x86_pmu_disable_all,
 375         .enable_all             = x86_pmu_enable_all,
 376         .enable                 = x86_pmu_enable_event,
 377         .disable                = x86_pmu_disable_event,
 378         .hw_config              = amd_pmu_hw_config,
 379         .schedule_events        = x86_schedule_events,
 380         .eventsel               = MSR_K7_EVNTSEL0,
 381         .perfctr                = MSR_K7_PERFCTR0,
 382         .event_map              = amd_pmu_event_map,
 383         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
 384         .num_counters           = 4,
 385         .cntval_bits            = 48,
 386         .cntval_mask            = (1ULL << 48) - 1,
 387         .apic                   = 1,
 388         /* use highest bit to detect overflow */
 389         .max_period             = (1ULL << 47) - 1,
 390         .get_event_constraints  = amd_get_event_constraints,
 391         .put_event_constraints  = amd_put_event_constraints,
 392
 393         .cpu_prepare            = amd_pmu_cpu_prepare,
 394         .cpu_starting           = amd_pmu_cpu_starting,
 395         .cpu_dead               = amd_pmu_cpu_dead,
 396 };
 397
 398 static __init int amd_pmu_init(void)
 399 {
 400         /* Performance-monitoring supported from K7 and later: */
 401         if (boot_cpu_data.x86 < 6)
 402                 return -ENODEV;
 403
 404         x86_pmu = amd_pmu;
 405
 406         /* Events are common for all AMDs */
 407         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 408                sizeof(hw_cache_event_ids));
 409
 410         return 0;
 411 }
 412
 413 #else /* CONFIG_CPU_SUP_AMD */
 414
 415 static int amd_pmu_init(void)
 416 {
 417         return 0;
 418 }
 419
 420 #endif