Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[jlayton/linux.git] / kernel / events / core.c
index ed50b0943213fcaee079d378c7e6c82f3739e1ec..5fa58e4cffac3a7f2c7144fda8aeb72e6fc05a98 100644 (file)
@@ -608,7 +608,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
        if (!f.file)
                return -EBADF;
 
-       css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
+       css = css_tryget_online_from_dir(f.file->f_dentry,
+                                        &perf_event_cgrp_subsys);
        if (IS_ERR(css)) {
                ret = PTR_ERR(css);
                goto out;
@@ -2973,6 +2974,22 @@ out:
        local_irq_restore(flags);
 }
 
+void perf_event_exec(void)
+{
+       struct perf_event_context *ctx;
+       int ctxn;
+
+       rcu_read_lock();
+       for_each_task_context_nr(ctxn) {
+               ctx = current->perf_event_ctxp[ctxn];
+               if (!ctx)
+                       continue;
+
+               perf_event_enable_on_exec(ctx);
+       }
+       rcu_read_unlock();
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -3195,7 +3212,8 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void ring_buffer_put(struct ring_buffer *rb);
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
+static void ring_buffer_attach(struct perf_event *event,
+                              struct ring_buffer *rb);
 
 static void unaccount_event_cpu(struct perf_event *event, int cpu)
 {
@@ -3259,8 +3277,6 @@ static void _free_event(struct perf_event *event)
        unaccount_event(event);
 
        if (event->rb) {
-               struct ring_buffer *rb;
-
                /*
                 * Can happen when we close an event with re-directed output.
                 *
@@ -3268,12 +3284,7 @@ static void _free_event(struct perf_event *event)
                 * over us; possibly making our ring_buffer_put() the last.
                 */
                mutex_lock(&event->mmap_mutex);
-               rb = event->rb;
-               if (rb) {
-                       rcu_assign_pointer(event->rb, NULL);
-                       ring_buffer_detach(event, rb);
-                       ring_buffer_put(rb); /* could be last */
-               }
+               ring_buffer_attach(event, NULL);
                mutex_unlock(&event->mmap_mutex);
        }
 
@@ -3870,28 +3881,47 @@ unlock:
 static void ring_buffer_attach(struct perf_event *event,
                               struct ring_buffer *rb)
 {
+       struct ring_buffer *old_rb = NULL;
        unsigned long flags;
 
-       if (!list_empty(&event->rb_entry))
-               return;
+       if (event->rb) {
+               /*
+                * Should be impossible, we set this when removing
+                * event->rb_entry and wait/clear when adding event->rb_entry.
+                */
+               WARN_ON_ONCE(event->rcu_pending);
 
-       spin_lock_irqsave(&rb->event_lock, flags);
-       if (list_empty(&event->rb_entry))
-               list_add(&event->rb_entry, &rb->event_list);
-       spin_unlock_irqrestore(&rb->event_lock, flags);
-}
+               old_rb = event->rb;
+               event->rcu_batches = get_state_synchronize_rcu();
+               event->rcu_pending = 1;
 
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
-{
-       unsigned long flags;
+               spin_lock_irqsave(&old_rb->event_lock, flags);
+               list_del_rcu(&event->rb_entry);
+               spin_unlock_irqrestore(&old_rb->event_lock, flags);
+       }
 
-       if (list_empty(&event->rb_entry))
-               return;
+       if (event->rcu_pending && rb) {
+               cond_synchronize_rcu(event->rcu_batches);
+               event->rcu_pending = 0;
+       }
 
-       spin_lock_irqsave(&rb->event_lock, flags);
-       list_del_init(&event->rb_entry);
-       wake_up_all(&event->waitq);
-       spin_unlock_irqrestore(&rb->event_lock, flags);
+       if (rb) {
+               spin_lock_irqsave(&rb->event_lock, flags);
+               list_add_rcu(&event->rb_entry, &rb->event_list);
+               spin_unlock_irqrestore(&rb->event_lock, flags);
+       }
+
+       rcu_assign_pointer(event->rb, rb);
+
+       if (old_rb) {
+               ring_buffer_put(old_rb);
+               /*
+                * Since we detached before setting the new rb, so that we
+                * could attach the new rb, we could have missed a wakeup.
+                * Provide it now.
+                */
+               wake_up_all(&event->waitq);
+       }
 }
 
 static void ring_buffer_wakeup(struct perf_event *event)
@@ -3960,7 +3990,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 {
        struct perf_event *event = vma->vm_file->private_data;
 
-       struct ring_buffer *rb = event->rb;
+       struct ring_buffer *rb = ring_buffer_get(event);
        struct user_struct *mmap_user = rb->mmap_user;
        int mmap_locked = rb->mmap_locked;
        unsigned long size = perf_data_size(rb);
@@ -3968,18 +3998,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        atomic_dec(&rb->mmap_count);
 
        if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
-               return;
+               goto out_put;
 
-       /* Detach current event from the buffer. */
-       rcu_assign_pointer(event->rb, NULL);
-       ring_buffer_detach(event, rb);
+       ring_buffer_attach(event, NULL);
        mutex_unlock(&event->mmap_mutex);
 
        /* If there's still other mmap()s of this buffer, we're done. */
-       if (atomic_read(&rb->mmap_count)) {
-               ring_buffer_put(rb); /* can't be last */
-               return;
-       }
+       if (atomic_read(&rb->mmap_count))
+               goto out_put;
 
        /*
         * No other mmap()s, detach from all other events that might redirect
@@ -4009,11 +4035,9 @@ again:
                 * still restart the iteration to make sure we're not now
                 * iterating the wrong list.
                 */
-               if (event->rb == rb) {
-                       rcu_assign_pointer(event->rb, NULL);
-                       ring_buffer_detach(event, rb);
-                       ring_buffer_put(rb); /* can't be last, we still have one */
-               }
+               if (event->rb == rb)
+                       ring_buffer_attach(event, NULL);
+
                mutex_unlock(&event->mmap_mutex);
                put_event(event);
 
@@ -4038,6 +4062,7 @@ again:
        vma->vm_mm->pinned_vm -= mmap_locked;
        free_uid(mmap_user);
 
+out_put:
        ring_buffer_put(rb); /* could be last */
 }
 
@@ -4155,7 +4180,6 @@ again:
        vma->vm_mm->pinned_vm += extra;
 
        ring_buffer_attach(event, rb);
-       rcu_assign_pointer(event->rb, rb);
 
        perf_event_init_userpage(event);
        perf_event_update_userpage(event);
@@ -5067,21 +5091,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
                       NULL);
 }
 
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
 {
        struct perf_comm_event comm_event;
-       struct perf_event_context *ctx;
-       int ctxn;
-
-       rcu_read_lock();
-       for_each_task_context_nr(ctxn) {
-               ctx = task->perf_event_ctxp[ctxn];
-               if (!ctx)
-                       continue;
-
-               perf_event_enable_on_exec(ctx);
-       }
-       rcu_read_unlock();
 
        if (!atomic_read(&nr_comm_events))
                return;
@@ -5093,7 +5105,7 @@ void perf_event_comm(struct task_struct *task)
                .event_id  = {
                        .header = {
                                .type = PERF_RECORD_COMM,
-                               .misc = 0,
+                               .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
                                /* .size */
                        },
                        /* .pid */
@@ -5439,6 +5451,9 @@ struct swevent_htable {
 
        /* Recursion avoidance in each contexts */
        int                             recursion[PERF_NR_CONTEXTS];
+
+       /* Keeps track of cpu being initialized/exited */
+       bool                            online;
 };
 
 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5685,8 +5700,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
        hwc->state = !(flags & PERF_EF_START);
 
        head = find_swevent_head(swhash, event);
-       if (WARN_ON_ONCE(!head))
+       if (!head) {
+               /*
+                * We can race with cpu hotplug code. Do not
+                * WARN if the cpu just got unplugged.
+                */
+               WARN_ON_ONCE(swhash->online);
                return -EINVAL;
+       }
 
        hlist_add_head_rcu(&event->hlist_entry, head);
 
@@ -6956,7 +6977,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-       struct ring_buffer *rb = NULL, *old_rb = NULL;
+       struct ring_buffer *rb = NULL;
        int ret = -EINVAL;
 
        if (!output_event)
@@ -6984,8 +7005,6 @@ set:
        if (atomic_read(&event->mmap_count))
                goto unlock;
 
-       old_rb = event->rb;
-
        if (output_event) {
                /* get the rb we want to redirect to */
                rb = ring_buffer_get(output_event);
@@ -6993,23 +7012,7 @@ set:
                        goto unlock;
        }
 
-       if (old_rb)
-               ring_buffer_detach(event, old_rb);
-
-       if (rb)
-               ring_buffer_attach(event, rb);
-
-       rcu_assign_pointer(event->rb, rb);
-
-       if (old_rb) {
-               ring_buffer_put(old_rb);
-               /*
-                * Since we detached before setting the new rb, so that we
-                * could attach the new rb, we could have missed a wakeup.
-                * Provide it now.
-                */
-               wake_up_all(&event->waitq);
-       }
+       ring_buffer_attach(event, rb);
 
        ret = 0;
 unlock:
@@ -7060,6 +7063,9 @@ SYSCALL_DEFINE5(perf_event_open,
        if (attr.freq) {
                if (attr.sample_freq > sysctl_perf_event_sample_rate)
                        return -EINVAL;
+       } else {
+               if (attr.sample_period & (1ULL << 63))
+                       return -EINVAL;
        }
 
        /*
@@ -7120,6 +7126,13 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
+       if (is_sampling_event(event)) {
+               if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+                       err = -ENOTSUPP;
+                       goto err_alloc;
+               }
+       }
+
        account_event(event);
 
        /*
@@ -7431,7 +7444,7 @@ __perf_event_exit_task(struct perf_event *child_event,
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 {
-       struct perf_event *child_event;
+       struct perf_event *child_event, *next;
        struct perf_event_context *child_ctx;
        unsigned long flags;
 
@@ -7485,7 +7498,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         */
        mutex_lock(&child_ctx->mutex);
 
-       list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
+       list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
                __perf_event_exit_task(child_event, child_ctx, child);
 
        mutex_unlock(&child_ctx->mutex);
@@ -7865,6 +7878,7 @@ static void perf_event_init_cpu(int cpu)
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
        mutex_lock(&swhash->hlist_mutex);
+       swhash->online = true;
        if (swhash->hlist_refcount > 0) {
                struct swevent_hlist *hlist;
 
@@ -7922,6 +7936,7 @@ static void perf_event_exit_cpu(int cpu)
        perf_event_exit_cpu_context(cpu);
 
        mutex_lock(&swhash->hlist_mutex);
+       swhash->online = false;
        swevent_hlist_release(swhash);
        mutex_unlock(&swhash->hlist_mutex);
 }