Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

[jlayton/linux.git] / kernel / events / core.c
diff --git a/kernel/events/core.c b/kernel/events/core.c

index ed50b0943213fcaee079d378c7e6c82f3739e1ec..5fa58e4cffac3a7f2c7144fda8aeb72e6fc05a98 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -608,7 +608,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
         if (!f.file)
                 return -EBADF;
  
-       css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
+       css = css_tryget_online_from_dir(f.file->f_dentry,
+                                        &perf_event_cgrp_subsys);
         if (IS_ERR(css)) {
                 ret = PTR_ERR(css);
                 goto out;
@@ -2973,6 +2974,22 @@ out:
         local_irq_restore(flags);
  }
  
+void perf_event_exec(void)
+{
+       struct perf_event_context *ctx;
+       int ctxn;
+
+       rcu_read_lock();
+       for_each_task_context_nr(ctxn) {
+               ctx = current->perf_event_ctxp[ctxn];
+               if (!ctx)
+                       continue;
+
+               perf_event_enable_on_exec(ctx);
+       }
+       rcu_read_unlock();
+}
+
  /*
   * Cross CPU call to read the hardware event
   */
@@ -3195,7 +3212,8 @@ static void free_event_rcu(struct rcu_head *head)
  }
  
  static void ring_buffer_put(struct ring_buffer *rb);
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
+static void ring_buffer_attach(struct perf_event *event,
+                              struct ring_buffer *rb);
  
  static void unaccount_event_cpu(struct perf_event *event, int cpu)
  {
@@ -3259,8 +3277,6 @@ static void _free_event(struct perf_event *event)
         unaccount_event(event);
  
         if (event->rb) {
-               struct ring_buffer *rb;
-
                 /*
                  * Can happen when we close an event with re-directed output.
                  *
@@ -3268,12 +3284,7 @@ static void _free_event(struct perf_event *event)
                  * over us; possibly making our ring_buffer_put() the last.
                  */
                 mutex_lock(&event->mmap_mutex);
-               rb = event->rb;
-               if (rb) {
-                       rcu_assign_pointer(event->rb, NULL);
-                       ring_buffer_detach(event, rb);
-                       ring_buffer_put(rb); /* could be last */
-               }
+               ring_buffer_attach(event, NULL);
                 mutex_unlock(&event->mmap_mutex);
         }
  
@@ -3870,28 +3881,47 @@ unlock:
  static void ring_buffer_attach(struct perf_event *event,
                                struct ring_buffer *rb)
  {
+       struct ring_buffer *old_rb = NULL;
         unsigned long flags;
  
-       if (!list_empty(&event->rb_entry))
-               return;
+       if (event->rb) {
+               /*
+                * Should be impossible, we set this when removing
+                * event->rb_entry and wait/clear when adding event->rb_entry.
+                */
+               WARN_ON_ONCE(event->rcu_pending);
  
-       spin_lock_irqsave(&rb->event_lock, flags);
-       if (list_empty(&event->rb_entry))
-               list_add(&event->rb_entry, &rb->event_list);
-       spin_unlock_irqrestore(&rb->event_lock, flags);
-}
+               old_rb = event->rb;
+               event->rcu_batches = get_state_synchronize_rcu();
+               event->rcu_pending = 1;
  
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
-{
-       unsigned long flags;
+               spin_lock_irqsave(&old_rb->event_lock, flags);
+               list_del_rcu(&event->rb_entry);
+               spin_unlock_irqrestore(&old_rb->event_lock, flags);
+       }
  
-       if (list_empty(&event->rb_entry))
-               return;
+       if (event->rcu_pending && rb) {
+               cond_synchronize_rcu(event->rcu_batches);
+               event->rcu_pending = 0;
+       }
  
-       spin_lock_irqsave(&rb->event_lock, flags);
-       list_del_init(&event->rb_entry);
-       wake_up_all(&event->waitq);
-       spin_unlock_irqrestore(&rb->event_lock, flags);
+       if (rb) {
+               spin_lock_irqsave(&rb->event_lock, flags);
+               list_add_rcu(&event->rb_entry, &rb->event_list);
+               spin_unlock_irqrestore(&rb->event_lock, flags);
+       }
+
+       rcu_assign_pointer(event->rb, rb);
+
+       if (old_rb) {
+               ring_buffer_put(old_rb);
+               /*
+                * Since we detached before setting the new rb, so that we
+                * could attach the new rb, we could have missed a wakeup.
+                * Provide it now.
+                */
+               wake_up_all(&event->waitq);
+       }
  }
  
  static void ring_buffer_wakeup(struct perf_event *event)
@@ -3960,7 +3990,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
  {
         struct perf_event *event = vma->vm_file->private_data;
  
-       struct ring_buffer *rb = event->rb;
+       struct ring_buffer *rb = ring_buffer_get(event);
         struct user_struct *mmap_user = rb->mmap_user;
         int mmap_locked = rb->mmap_locked;
         unsigned long size = perf_data_size(rb);
@@ -3968,18 +3998,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
         atomic_dec(&rb->mmap_count);
  
         if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
-               return;
+               goto out_put;
  
-       /* Detach current event from the buffer. */
-       rcu_assign_pointer(event->rb, NULL);
-       ring_buffer_detach(event, rb);
+       ring_buffer_attach(event, NULL);
         mutex_unlock(&event->mmap_mutex);
  
         /* If there's still other mmap()s of this buffer, we're done. */
-       if (atomic_read(&rb->mmap_count)) {
-               ring_buffer_put(rb); /* can't be last */
-               return;
-       }
+       if (atomic_read(&rb->mmap_count))
+               goto out_put;
  
         /*
          * No other mmap()s, detach from all other events that might redirect
@@ -4009,11 +4035,9 @@ again:
                  * still restart the iteration to make sure we're not now
                  * iterating the wrong list.
                  */
-               if (event->rb == rb) {
-                       rcu_assign_pointer(event->rb, NULL);
-                       ring_buffer_detach(event, rb);
-                       ring_buffer_put(rb); /* can't be last, we still have one */
-               }
+               if (event->rb == rb)
+                       ring_buffer_attach(event, NULL);
+
                 mutex_unlock(&event->mmap_mutex);
                 put_event(event);
  
@@ -4038,6 +4062,7 @@ again:
         vma->vm_mm->pinned_vm -= mmap_locked;
         free_uid(mmap_user);
  
+out_put:
         ring_buffer_put(rb); /* could be last */
  }
  
@@ -4155,7 +4180,6 @@ again:
         vma->vm_mm->pinned_vm += extra;
  
         ring_buffer_attach(event, rb);
-       rcu_assign_pointer(event->rb, rb);
  
         perf_event_init_userpage(event);
         perf_event_update_userpage(event);
@@ -5067,21 +5091,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
                        NULL);
  }
  
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
  {
         struct perf_comm_event comm_event;
-       struct perf_event_context *ctx;
-       int ctxn;
-
-       rcu_read_lock();
-       for_each_task_context_nr(ctxn) {
-               ctx = task->perf_event_ctxp[ctxn];
-               if (!ctx)
-                       continue;
-
-               perf_event_enable_on_exec(ctx);
-       }
-       rcu_read_unlock();
  
         if (!atomic_read(&nr_comm_events))
                 return;
@@ -5093,7 +5105,7 @@ void perf_event_comm(struct task_struct *task)
                 .event_id  = {
                         .header = {
                                 .type = PERF_RECORD_COMM,
-                               .misc = 0,
+                               .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
                                 /* .size */
                         },
                         /* .pid */
@@ -5439,6 +5451,9 @@ struct swevent_htable {
  
         /* Recursion avoidance in each contexts */
         int                             recursion[PERF_NR_CONTEXTS];
+
+       /* Keeps track of cpu being initialized/exited */
+       bool                            online;
  };
  
  static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5685,8 +5700,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
         hwc->state = !(flags & PERF_EF_START);
  
         head = find_swevent_head(swhash, event);
-       if (WARN_ON_ONCE(!head))
+       if (!head) {
+               /*
+                * We can race with cpu hotplug code. Do not
+                * WARN if the cpu just got unplugged.
+                */
+               WARN_ON_ONCE(swhash->online);
                 return -EINVAL;
+       }
  
         hlist_add_head_rcu(&event->hlist_entry, head);
  
@@ -6956,7 +6977,7 @@ err_size:
  static int
  perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
  {
-       struct ring_buffer *rb = NULL, *old_rb = NULL;
+       struct ring_buffer *rb = NULL;
         int ret = -EINVAL;
  
         if (!output_event)
@@ -6984,8 +7005,6 @@ set:
         if (atomic_read(&event->mmap_count))
                 goto unlock;
  
-       old_rb = event->rb;
-
         if (output_event) {
                 /* get the rb we want to redirect to */
                 rb = ring_buffer_get(output_event);
@@ -6993,23 +7012,7 @@ set:
                         goto unlock;
         }
  
-       if (old_rb)
-               ring_buffer_detach(event, old_rb);
-
-       if (rb)
-               ring_buffer_attach(event, rb);
-
-       rcu_assign_pointer(event->rb, rb);
-
-       if (old_rb) {
-               ring_buffer_put(old_rb);
-               /*
-                * Since we detached before setting the new rb, so that we
-                * could attach the new rb, we could have missed a wakeup.
-                * Provide it now.
-                */
-               wake_up_all(&event->waitq);
-       }
+       ring_buffer_attach(event, rb);
  
         ret = 0;
  unlock:
@@ -7060,6 +7063,9 @@ SYSCALL_DEFINE5(perf_event_open,
         if (attr.freq) {
                 if (attr.sample_freq > sysctl_perf_event_sample_rate)
                         return -EINVAL;
+       } else {
+               if (attr.sample_period & (1ULL << 63))
+                       return -EINVAL;
         }
  
         /*
@@ -7120,6 +7126,13 @@ SYSCALL_DEFINE5(perf_event_open,
                 }
         }
  
+       if (is_sampling_event(event)) {
+               if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+                       err = -ENOTSUPP;
+                       goto err_alloc;
+               }
+       }
+
         account_event(event);
  
         /*
@@ -7431,7 +7444,7 @@ __perf_event_exit_task(struct perf_event *child_event,
  
  static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
  {
-       struct perf_event *child_event;
+       struct perf_event *child_event, *next;
         struct perf_event_context *child_ctx;
         unsigned long flags;
  
@@ -7485,7 +7498,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
          */
         mutex_lock(&child_ctx->mutex);
  
-       list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
+       list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
                 __perf_event_exit_task(child_event, child_ctx, child);
  
         mutex_unlock(&child_ctx->mutex);
@@ -7865,6 +7878,7 @@ static void perf_event_init_cpu(int cpu)
         struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
  
         mutex_lock(&swhash->hlist_mutex);
+       swhash->online = true;
         if (swhash->hlist_refcount > 0) {
                 struct swevent_hlist *hlist;
  
@@ -7922,6 +7936,7 @@ static void perf_event_exit_cpu(int cpu)
         perf_event_exit_cpu_context(cpu);
  
         mutex_lock(&swhash->hlist_mutex);
+       swhash->online = false;
         swevent_hlist_release(swhash);
         mutex_unlock(&swhash->hlist_mutex);
  }