perf/core: Fix cgroup events tracking

author Chengming Zhou <zhouchengming@bytedance.com>

Wed, 7 Dec 2022 12:40:23 +0000 (20:40 +0800)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 27 Dec 2022 11:44:00 +0000 (12:44 +0100)
author Chengming Zhou <zhouchengming@bytedance.com>
Wed, 7 Dec 2022 12:40:23 +0000 (20:40 +0800)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 27 Dec 2022 11:44:00 +0000 (12:44 +0100)
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 4bd2434251f01c48f381e17bce7584993b555232..37c0f04d7a00553e272aeaf9681dc282b118655a 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -380,7 +380,6 @@ enum event_type_t {
  
  /*
   * perf_sched_events : >0 events exist
- * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
   */
  
  static void perf_sched_delayed(struct work_struct *work);
@@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
  static DEFINE_MUTEX(perf_sched_mutex);
  static atomic_t perf_sched_count;
  
-static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
  static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
  
  static atomic_t nr_mmap_events __read_mostly;
@@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task)
         struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
         struct perf_cgroup *cgrp;
  
-       cgrp = perf_cgroup_from_task(task, NULL);
+       /*
+        * cpuctx->cgrp is set when the first cgroup event enabled,
+        * and is cleared when the last cgroup event disabled.
+        */
+       if (READ_ONCE(cpuctx->cgrp) == NULL)
+               return;
  
         WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+
+       cgrp = perf_cgroup_from_task(task, NULL);
         if (READ_ONCE(cpuctx->cgrp) == cgrp)
                 return;
  
@@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
          * to check if we have to switch out PMU state.
          * cgroup event are system-wide mode only
          */
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_switch(next);
+       perf_cgroup_switch(next);
  }
  
  static bool perf_less_group_idx(const void *l, const void *r)
@@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event)
                 detach_sb_event(event);
  }
  
-static void unaccount_event_cpu(struct perf_event *event, int cpu)
-{
-       if (event->parent)
-               return;
-
-       if (is_cgroup_event(event))
-               atomic_dec(&per_cpu(perf_cgroup_events, cpu));
-}
-
  #ifdef CONFIG_NO_HZ_FULL
  static DEFINE_SPINLOCK(nr_freq_lock);
  #endif
@@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event)
                         schedule_delayed_work(&perf_sched_work, HZ);
         }
  
-       unaccount_event_cpu(event, event->cpu);
-
         unaccount_pmu_sb_event(event);
  }
  
@@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event)
                 attach_sb_event(event);
  }
  
-static void account_event_cpu(struct perf_event *event, int cpu)
-{
-       if (event->parent)
-               return;
-
-       if (is_cgroup_event(event))
-               atomic_inc(&per_cpu(perf_cgroup_events, cpu));
-}
-
  /* Freq events need the tick to stay alive (see perf_event_task_tick). */
  static void account_freq_event_nohz(void)
  {
@@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event)
         }
  enabled:
  
-       account_event_cpu(event, event->cpu);
-
         account_pmu_sb_event(event);
  }
  
@@ -12822,13 +12804,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx,
  
         perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) {
                 perf_remove_from_context(event, 0);
-               unaccount_event_cpu(event, cpu);
                 put_pmu_ctx(event->pmu_ctx);
                 list_add(&event->migrate_entry, events);
  
                 for_each_sibling_event(sibling, event) {
                         perf_remove_from_context(sibling, 0);
-                       unaccount_event_cpu(sibling, cpu);
                         put_pmu_ctx(sibling->pmu_ctx);
                         list_add(&sibling->migrate_entry, events);
                 }
@@ -12847,7 +12827,6 @@ static void __perf_pmu_install_event(struct pmu *pmu,
  
         if (event->state >= PERF_EVENT_STATE_OFF)
                 event->state = PERF_EVENT_STATE_INACTIVE;
-       account_event_cpu(event, cpu);
         perf_install_in_context(ctx, event, cpu);
  }
  
@@ -13742,8 +13721,7 @@ static int __perf_cgroup_move(void *info)
         struct task_struct *task = info;
  
         preempt_disable();
-       if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-               perf_cgroup_switch(task);
+       perf_cgroup_switch(task);
         preempt_enable();
  
         return 0;
author	Chengming Zhou <zhouchengming@bytedance.com>
	Wed, 7 Dec 2022 12:40:23 +0000 (20:40 +0800)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 27 Dec 2022 11:44:00 +0000 (12:44 +0100)