2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017-2018 Intel Corporation
8 #include <linux/pm_runtime.h>
10 #include "intel_ringbuffer.h"
13 /* Frequency for the sampling timer for events which need it. */
15 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
17 #define ENGINE_SAMPLE_MASK \
18 (BIT(I915_SAMPLE_BUSY) | \
19 BIT(I915_SAMPLE_WAIT) | \
20 BIT(I915_SAMPLE_SEMA))
22 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
24 static cpumask_t i915_pmu_cpumask;
26 static u8 engine_config_sample(u64 config)
28 return config & I915_PMU_SAMPLE_MASK;
31 static u8 engine_event_sample(struct perf_event *event)
33 return engine_config_sample(event->attr.config);
36 static u8 engine_event_class(struct perf_event *event)
38 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
41 static u8 engine_event_instance(struct perf_event *event)
43 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
46 static bool is_engine_config(u64 config)
48 return config < __I915_PMU_OTHER(0);
51 static unsigned int config_enabled_bit(u64 config)
53 if (is_engine_config(config))
54 return engine_config_sample(config);
56 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
59 static u64 config_enabled_mask(u64 config)
61 return BIT_ULL(config_enabled_bit(config));
64 static bool is_engine_event(struct perf_event *event)
66 return is_engine_config(event->attr.config);
69 static unsigned int event_enabled_bit(struct perf_event *event)
71 return config_enabled_bit(event->attr.config);
74 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
79 * Only some counters need the sampling timer.
81 * We start with a bitmask of all currently enabled events.
83 enable = i915->pmu.enable;
86 * Mask out all the ones which do not need the timer, or in
87 * other words keep all the ones that could need the timer.
89 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
90 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
94 * When the GPU is idle per-engine counters do not need to be
95 * running so clear those bits out.
98 enable &= ~ENGINE_SAMPLE_MASK;
100 * Also there is software busyness tracking available we do not
101 * need the timer for I915_SAMPLE_BUSY counter.
103 * Use RCS as proxy for all engines.
105 else if (intel_engine_supports_stats(i915->engine[RCS]))
106 enable &= ~BIT(I915_SAMPLE_BUSY);
109 * If some bits remain it means we need the sampling timer running.
114 void i915_pmu_gt_parked(struct drm_i915_private *i915)
116 if (!i915->pmu.base.event_init)
119 spin_lock_irq(&i915->pmu.lock);
121 * Signal sampling timer to stop if only engine events are enabled and
124 i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
125 spin_unlock_irq(&i915->pmu.lock);
128 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
130 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
131 i915->pmu.timer_enabled = true;
132 i915->pmu.timer_last = ktime_get();
133 hrtimer_start_range_ns(&i915->pmu.timer,
134 ns_to_ktime(PERIOD), 0,
135 HRTIMER_MODE_REL_PINNED);
139 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
141 if (!i915->pmu.base.event_init)
144 spin_lock_irq(&i915->pmu.lock);
146 * Re-enable sampling timer when GPU goes active.
148 __i915_pmu_maybe_start_timer(i915);
149 spin_unlock_irq(&i915->pmu.lock);
152 static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
155 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
161 add_sample(struct i915_pmu_sample *sample, u32 val)
167 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
169 struct intel_engine_cs *engine;
170 enum intel_engine_id id;
171 intel_wakeref_t wakeref;
174 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
177 if (!dev_priv->gt.awake)
180 wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
184 for_each_engine(engine, dev_priv, id) {
185 u32 current_seqno = intel_engine_get_seqno(engine);
186 u32 last_seqno = intel_engine_last_submit(engine);
189 val = !i915_seqno_passed(current_seqno, last_seqno);
192 add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
195 if (val && (engine->pmu.enable &
196 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
197 fw = grab_forcewake(dev_priv, fw);
199 val = I915_READ_FW(RING_CTL(engine->mmio_base));
205 add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
208 if (val & RING_WAIT_SEMAPHORE)
209 add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
214 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
216 intel_runtime_pm_put(dev_priv, wakeref);
220 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
222 sample->cur += mul_u32_u32(val, mul);
226 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
228 if (dev_priv->pmu.enable &
229 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
232 val = dev_priv->gt_pm.rps.cur_freq;
233 if (dev_priv->gt.awake) {
234 intel_wakeref_t wakeref;
236 with_intel_runtime_pm_if_in_use(dev_priv, wakeref)
237 val = intel_get_cagf(dev_priv,
238 I915_READ_NOTRACE(GEN6_RPSTAT1));
241 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
242 intel_gpu_freq(dev_priv, val),
246 if (dev_priv->pmu.enable &
247 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
248 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
249 intel_gpu_freq(dev_priv,
250 dev_priv->gt_pm.rps.cur_freq),
255 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
257 struct drm_i915_private *i915 =
258 container_of(hrtimer, struct drm_i915_private, pmu.timer);
259 unsigned int period_ns;
262 if (!READ_ONCE(i915->pmu.timer_enabled))
263 return HRTIMER_NORESTART;
266 period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
267 i915->pmu.timer_last = now;
270 * Strictly speaking the passed in period may not be 100% accurate for
271 * all internal calculation, since some amount of time can be spent on
272 * grabbing the forcewake. However the potential error from timer call-
273 * back delay greatly dominates this so we keep it simple.
275 engines_sample(i915, period_ns);
276 frequency_sample(i915, period_ns);
278 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
280 return HRTIMER_RESTART;
283 static u64 count_interrupts(struct drm_i915_private *i915)
285 /* open-coded kstat_irqs() */
286 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
290 if (!desc || !desc->kstat_irqs)
293 for_each_possible_cpu(cpu)
294 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
299 static void engine_event_destroy(struct perf_event *event)
301 struct drm_i915_private *i915 =
302 container_of(event->pmu, typeof(*i915), pmu.base);
303 struct intel_engine_cs *engine;
305 engine = intel_engine_lookup_user(i915,
306 engine_event_class(event),
307 engine_event_instance(event));
308 if (WARN_ON_ONCE(!engine))
311 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
312 intel_engine_supports_stats(engine))
313 intel_disable_engine_stats(engine);
316 static void i915_pmu_event_destroy(struct perf_event *event)
318 WARN_ON(event->parent);
320 if (is_engine_event(event))
321 engine_event_destroy(event);
325 engine_event_status(struct intel_engine_cs *engine,
326 enum drm_i915_pmu_engine_sample sample)
329 case I915_SAMPLE_BUSY:
330 case I915_SAMPLE_WAIT:
332 case I915_SAMPLE_SEMA:
333 if (INTEL_GEN(engine->i915) < 6)
344 config_status(struct drm_i915_private *i915, u64 config)
347 case I915_PMU_ACTUAL_FREQUENCY:
348 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
349 /* Requires a mutex for sampling! */
352 case I915_PMU_REQUESTED_FREQUENCY:
353 if (INTEL_GEN(i915) < 6)
356 case I915_PMU_INTERRUPTS:
358 case I915_PMU_RC6_RESIDENCY:
369 static int engine_event_init(struct perf_event *event)
371 struct drm_i915_private *i915 =
372 container_of(event->pmu, typeof(*i915), pmu.base);
373 struct intel_engine_cs *engine;
377 engine = intel_engine_lookup_user(i915, engine_event_class(event),
378 engine_event_instance(event));
382 sample = engine_event_sample(event);
383 ret = engine_event_status(engine, sample);
387 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
388 ret = intel_enable_engine_stats(engine);
393 static int i915_pmu_event_init(struct perf_event *event)
395 struct drm_i915_private *i915 =
396 container_of(event->pmu, typeof(*i915), pmu.base);
399 if (event->attr.type != event->pmu->type)
402 /* unsupported modes and filters */
403 if (event->attr.sample_period) /* no sampling */
406 if (has_branch_stack(event))
412 /* only allow running on one cpu at a time */
413 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
416 if (is_engine_event(event))
417 ret = engine_event_init(event);
419 ret = config_status(i915, event->attr.config);
424 event->destroy = i915_pmu_event_destroy;
429 static u64 __get_rc6(struct drm_i915_private *i915)
433 val = intel_rc6_residency_ns(i915,
434 IS_VALLEYVIEW(i915) ?
439 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
442 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
447 static u64 get_rc6(struct drm_i915_private *i915)
449 #if IS_ENABLED(CONFIG_PM)
450 intel_wakeref_t wakeref;
454 wakeref = intel_runtime_pm_get_if_in_use(i915);
456 val = __get_rc6(i915);
457 intel_runtime_pm_put(i915, wakeref);
460 * If we are coming back from being runtime suspended we must
461 * be careful not to report a larger value than returned
465 spin_lock_irqsave(&i915->pmu.lock, flags);
467 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
468 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
469 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
471 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
474 spin_unlock_irqrestore(&i915->pmu.lock, flags);
476 struct pci_dev *pdev = i915->drm.pdev;
477 struct device *kdev = &pdev->dev;
480 * We are runtime suspended.
482 * Report the delta from when the device was suspended to now,
483 * on top of the last known real value, as the approximated RC6
486 spin_lock_irqsave(&i915->pmu.lock, flags);
489 * After the above branch intel_runtime_pm_get_if_in_use failed
490 * to get the runtime PM reference we cannot assume we are in
491 * runtime suspend since we can either: a) race with coming out
492 * of it before we took the power.lock, or b) there are other
493 * states than suspended which can bring us here.
495 * We need to double-check that we are indeed currently runtime
496 * suspended and if not we cannot do better than report the last
499 if (pm_runtime_status_suspended(kdev)) {
500 val = pm_runtime_suspended_time(kdev);
502 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
503 i915->pmu.suspended_time_last = val;
505 val -= i915->pmu.suspended_time_last;
506 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
508 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
509 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
510 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
512 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
515 spin_unlock_irqrestore(&i915->pmu.lock, flags);
520 return __get_rc6(i915);
524 static u64 __i915_pmu_event_read(struct perf_event *event)
526 struct drm_i915_private *i915 =
527 container_of(event->pmu, typeof(*i915), pmu.base);
530 if (is_engine_event(event)) {
531 u8 sample = engine_event_sample(event);
532 struct intel_engine_cs *engine;
534 engine = intel_engine_lookup_user(i915,
535 engine_event_class(event),
536 engine_event_instance(event));
538 if (WARN_ON_ONCE(!engine)) {
540 } else if (sample == I915_SAMPLE_BUSY &&
541 intel_engine_supports_stats(engine)) {
542 val = ktime_to_ns(intel_engine_get_busy_time(engine));
544 val = engine->pmu.sample[sample].cur;
547 switch (event->attr.config) {
548 case I915_PMU_ACTUAL_FREQUENCY:
550 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
551 USEC_PER_SEC /* to MHz */);
553 case I915_PMU_REQUESTED_FREQUENCY:
555 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
556 USEC_PER_SEC /* to MHz */);
558 case I915_PMU_INTERRUPTS:
559 val = count_interrupts(i915);
561 case I915_PMU_RC6_RESIDENCY:
570 static void i915_pmu_event_read(struct perf_event *event)
572 struct hw_perf_event *hwc = &event->hw;
576 prev = local64_read(&hwc->prev_count);
577 new = __i915_pmu_event_read(event);
579 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
582 local64_add(new - prev, &event->count);
585 static void i915_pmu_enable(struct perf_event *event)
587 struct drm_i915_private *i915 =
588 container_of(event->pmu, typeof(*i915), pmu.base);
589 unsigned int bit = event_enabled_bit(event);
592 spin_lock_irqsave(&i915->pmu.lock, flags);
595 * Update the bitmask of enabled events and increment
596 * the event reference counter.
598 BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
599 GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
600 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
601 i915->pmu.enable |= BIT_ULL(bit);
602 i915->pmu.enable_count[bit]++;
605 * Start the sampling timer if needed and not already enabled.
607 __i915_pmu_maybe_start_timer(i915);
610 * For per-engine events the bitmask and reference counting
611 * is stored per engine.
613 if (is_engine_event(event)) {
614 u8 sample = engine_event_sample(event);
615 struct intel_engine_cs *engine;
617 engine = intel_engine_lookup_user(i915,
618 engine_event_class(event),
619 engine_event_instance(event));
621 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
622 I915_ENGINE_SAMPLE_COUNT);
623 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
624 I915_ENGINE_SAMPLE_COUNT);
625 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
626 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
627 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
629 engine->pmu.enable |= BIT(sample);
630 engine->pmu.enable_count[sample]++;
633 spin_unlock_irqrestore(&i915->pmu.lock, flags);
636 * Store the current counter value so we can report the correct delta
637 * for all listeners. Even when the event was already enabled and has
638 * an existing non-zero value.
640 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
643 static void i915_pmu_disable(struct perf_event *event)
645 struct drm_i915_private *i915 =
646 container_of(event->pmu, typeof(*i915), pmu.base);
647 unsigned int bit = event_enabled_bit(event);
650 spin_lock_irqsave(&i915->pmu.lock, flags);
652 if (is_engine_event(event)) {
653 u8 sample = engine_event_sample(event);
654 struct intel_engine_cs *engine;
656 engine = intel_engine_lookup_user(i915,
657 engine_event_class(event),
658 engine_event_instance(event));
660 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
661 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
662 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
665 * Decrement the reference count and clear the enabled
666 * bitmask when the last listener on an event goes away.
668 if (--engine->pmu.enable_count[sample] == 0)
669 engine->pmu.enable &= ~BIT(sample);
672 GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
673 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
675 * Decrement the reference count and clear the enabled
676 * bitmask when the last listener on an event goes away.
678 if (--i915->pmu.enable_count[bit] == 0) {
679 i915->pmu.enable &= ~BIT_ULL(bit);
680 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
683 spin_unlock_irqrestore(&i915->pmu.lock, flags);
686 static void i915_pmu_event_start(struct perf_event *event, int flags)
688 i915_pmu_enable(event);
692 static void i915_pmu_event_stop(struct perf_event *event, int flags)
694 if (flags & PERF_EF_UPDATE)
695 i915_pmu_event_read(event);
696 i915_pmu_disable(event);
697 event->hw.state = PERF_HES_STOPPED;
700 static int i915_pmu_event_add(struct perf_event *event, int flags)
702 if (flags & PERF_EF_START)
703 i915_pmu_event_start(event, flags);
708 static void i915_pmu_event_del(struct perf_event *event, int flags)
710 i915_pmu_event_stop(event, PERF_EF_UPDATE);
713 static int i915_pmu_event_event_idx(struct perf_event *event)
718 struct i915_str_attribute {
719 struct device_attribute attr;
723 static ssize_t i915_pmu_format_show(struct device *dev,
724 struct device_attribute *attr, char *buf)
726 struct i915_str_attribute *eattr;
728 eattr = container_of(attr, struct i915_str_attribute, attr);
729 return sprintf(buf, "%s\n", eattr->str);
732 #define I915_PMU_FORMAT_ATTR(_name, _config) \
733 (&((struct i915_str_attribute[]) { \
734 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
738 static struct attribute *i915_pmu_format_attrs[] = {
739 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
743 static const struct attribute_group i915_pmu_format_attr_group = {
745 .attrs = i915_pmu_format_attrs,
748 struct i915_ext_attribute {
749 struct device_attribute attr;
753 static ssize_t i915_pmu_event_show(struct device *dev,
754 struct device_attribute *attr, char *buf)
756 struct i915_ext_attribute *eattr;
758 eattr = container_of(attr, struct i915_ext_attribute, attr);
759 return sprintf(buf, "config=0x%lx\n", eattr->val);
762 static struct attribute_group i915_pmu_events_attr_group = {
764 /* Patch in attrs at runtime. */
768 i915_pmu_get_attr_cpumask(struct device *dev,
769 struct device_attribute *attr,
772 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
775 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
777 static struct attribute *i915_cpumask_attrs[] = {
778 &dev_attr_cpumask.attr,
782 static const struct attribute_group i915_pmu_cpumask_attr_group = {
783 .attrs = i915_cpumask_attrs,
786 static const struct attribute_group *i915_pmu_attr_groups[] = {
787 &i915_pmu_format_attr_group,
788 &i915_pmu_events_attr_group,
789 &i915_pmu_cpumask_attr_group,
793 #define __event(__config, __name, __unit) \
795 .config = (__config), \
800 #define __engine_event(__sample, __name) \
802 .sample = (__sample), \
806 static struct i915_ext_attribute *
807 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
809 sysfs_attr_init(&attr->attr.attr);
810 attr->attr.attr.name = name;
811 attr->attr.attr.mode = 0444;
812 attr->attr.show = i915_pmu_event_show;
818 static struct perf_pmu_events_attr *
819 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
822 sysfs_attr_init(&attr->attr.attr);
823 attr->attr.attr.name = name;
824 attr->attr.attr.mode = 0444;
825 attr->attr.show = perf_event_sysfs_show;
826 attr->event_str = str;
831 static struct attribute **
832 create_event_attributes(struct drm_i915_private *i915)
834 static const struct {
839 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
840 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
841 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
842 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
844 static const struct {
845 enum drm_i915_pmu_engine_sample sample;
847 } engine_events[] = {
848 __engine_event(I915_SAMPLE_BUSY, "busy"),
849 __engine_event(I915_SAMPLE_SEMA, "sema"),
850 __engine_event(I915_SAMPLE_WAIT, "wait"),
852 unsigned int count = 0;
853 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
854 struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
855 struct attribute **attr = NULL, **attr_iter;
856 struct intel_engine_cs *engine;
857 enum intel_engine_id id;
860 /* Count how many counters we will be exposing. */
861 for (i = 0; i < ARRAY_SIZE(events); i++) {
862 if (!config_status(i915, events[i].config))
866 for_each_engine(engine, i915, id) {
867 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
868 if (!engine_event_status(engine,
869 engine_events[i].sample))
874 /* Allocate attribute objects and table. */
875 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
879 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
883 /* Max one pointer of each attribute type plus a termination entry. */
884 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
888 i915_iter = i915_attr;
892 /* Initialize supported non-engine counters. */
893 for (i = 0; i < ARRAY_SIZE(events); i++) {
896 if (config_status(i915, events[i].config))
899 str = kstrdup(events[i].name, GFP_KERNEL);
903 *attr_iter++ = &i915_iter->attr.attr;
904 i915_iter = add_i915_attr(i915_iter, str, events[i].config);
906 if (events[i].unit) {
907 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
911 *attr_iter++ = &pmu_iter->attr.attr;
912 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
916 /* Initialize supported engine counters. */
917 for_each_engine(engine, i915, id) {
918 for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
921 if (engine_event_status(engine,
922 engine_events[i].sample))
925 str = kasprintf(GFP_KERNEL, "%s-%s",
926 engine->name, engine_events[i].name);
930 *attr_iter++ = &i915_iter->attr.attr;
932 add_i915_attr(i915_iter, str,
933 __I915_PMU_ENGINE(engine->uabi_class,
935 engine_events[i].sample));
937 str = kasprintf(GFP_KERNEL, "%s-%s.unit",
938 engine->name, engine_events[i].name);
942 *attr_iter++ = &pmu_iter->attr.attr;
943 pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
947 i915->pmu.i915_attr = i915_attr;
948 i915->pmu.pmu_attr = pmu_attr;
953 for (attr_iter = attr; *attr_iter; attr_iter++)
954 kfree((*attr_iter)->name);
964 static void free_event_attributes(struct drm_i915_private *i915)
966 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
968 for (; *attr_iter; attr_iter++)
969 kfree((*attr_iter)->name);
971 kfree(i915_pmu_events_attr_group.attrs);
972 kfree(i915->pmu.i915_attr);
973 kfree(i915->pmu.pmu_attr);
975 i915_pmu_events_attr_group.attrs = NULL;
976 i915->pmu.i915_attr = NULL;
977 i915->pmu.pmu_attr = NULL;
980 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
982 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
984 GEM_BUG_ON(!pmu->base.event_init);
986 /* Select the first online CPU as a designated reader. */
987 if (!cpumask_weight(&i915_pmu_cpumask))
988 cpumask_set_cpu(cpu, &i915_pmu_cpumask);
993 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
995 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
998 GEM_BUG_ON(!pmu->base.event_init);
1000 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1001 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1002 /* Migrate events if there is a valid target */
1003 if (target < nr_cpu_ids) {
1004 cpumask_set_cpu(target, &i915_pmu_cpumask);
1005 perf_pmu_migrate_context(&pmu->base, cpu, target);
1012 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1014 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1016 enum cpuhp_state slot;
1019 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1020 "perf/x86/intel/i915:online",
1021 i915_pmu_cpu_online,
1022 i915_pmu_cpu_offline);
1027 ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1029 cpuhp_remove_multi_state(slot);
1037 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1039 WARN_ON(cpuhp_slot == CPUHP_INVALID);
1040 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1041 cpuhp_remove_multi_state(cpuhp_slot);
1044 void i915_pmu_register(struct drm_i915_private *i915)
1048 if (INTEL_GEN(i915) <= 2) {
1049 DRM_INFO("PMU not supported for this GPU.");
1053 i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1054 if (!i915_pmu_events_attr_group.attrs) {
1059 i915->pmu.base.attr_groups = i915_pmu_attr_groups;
1060 i915->pmu.base.task_ctx_nr = perf_invalid_context;
1061 i915->pmu.base.event_init = i915_pmu_event_init;
1062 i915->pmu.base.add = i915_pmu_event_add;
1063 i915->pmu.base.del = i915_pmu_event_del;
1064 i915->pmu.base.start = i915_pmu_event_start;
1065 i915->pmu.base.stop = i915_pmu_event_stop;
1066 i915->pmu.base.read = i915_pmu_event_read;
1067 i915->pmu.base.event_idx = i915_pmu_event_event_idx;
1069 spin_lock_init(&i915->pmu.lock);
1070 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1071 i915->pmu.timer.function = i915_sample;
1073 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1077 ret = i915_pmu_register_cpuhp_state(i915);
1084 perf_pmu_unregister(&i915->pmu.base);
1086 i915->pmu.base.event_init = NULL;
1087 free_event_attributes(i915);
1088 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1091 void i915_pmu_unregister(struct drm_i915_private *i915)
1093 if (!i915->pmu.base.event_init)
1096 WARN_ON(i915->pmu.enable);
1098 hrtimer_cancel(&i915->pmu.timer);
1100 i915_pmu_unregister_cpuhp_state(i915);
1102 perf_pmu_unregister(&i915->pmu.base);
1103 i915->pmu.base.event_init = NULL;
1104 free_event_attributes(i915);