Merge tag 'trace-v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux...
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
index a0defe156b57109c18b5e8a6fbcc639d9c409536..2a7c6fd934e9cb391b5ddf589748c572810ad6d1 100644 (file)
@@ -1263,10 +1263,17 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
 
 int tracing_alloc_snapshot_instance(struct trace_array *tr)
 {
+       int order;
        int ret;
 
        if (!tr->allocated_snapshot) {
 
+               /* Make the snapshot buffer have the same order as main buffer */
+               order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
+               ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
+               if (ret < 0)
+                       return ret;
+
                /* allocate spare buffer */
                ret = resize_buffer_duplicate_size(&tr->max_buffer,
                                   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
@@ -1286,6 +1293,7 @@ static void free_snapshot(struct trace_array *tr)
         * The max_tr ring buffer has some state (e.g. ring->clock) and
         * we want preserve it.
         */
+       ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
        ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
        set_buffer_entries(&tr->max_buffer, 1);
        tracing_reset_online_cpus(&tr->max_buffer);
@@ -3767,7 +3775,7 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
 
        /* OK if part of the temp seq buffer */
        if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
-           (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
+           (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
                return true;
 
        /* Core rodata can not be freed */
@@ -5032,7 +5040,7 @@ static int tracing_release(struct inode *inode, struct file *file)
        return 0;
 }
 
-static int tracing_release_generic_tr(struct inode *inode, struct file *file)
+int tracing_release_generic_tr(struct inode *inode, struct file *file)
 {
        struct trace_array *tr = inode->i_private;
 
@@ -6946,8 +6954,8 @@ waitagain:
                goto out;
        }
 
-       if (cnt >= PAGE_SIZE)
-               cnt = PAGE_SIZE - 1;
+       if (cnt >= TRACE_SEQ_BUFFER_SIZE)
+               cnt = TRACE_SEQ_BUFFER_SIZE - 1;
 
        /* reset all but tr, trace, and overruns */
        trace_iterator_reset(iter);
@@ -7292,8 +7300,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        enum event_trigger_type tt = ETT_NONE;
        struct trace_buffer *buffer;
        struct print_entry *entry;
+       int meta_size;
        ssize_t written;
-       int size;
+       size_t size;
        int len;
 
 /* Used in tracing_mark_raw_write() as well */
@@ -7306,23 +7315,44 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        if (!(tr->trace_flags & TRACE_ITER_MARKERS))
                return -EINVAL;
 
-       if (cnt > TRACE_BUF_SIZE)
-               cnt = TRACE_BUF_SIZE;
-
-       BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
+       if ((ssize_t)cnt < 0)
+               return -EINVAL;
 
-       size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
+       meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
+ again:
+       size = cnt + meta_size;
 
        /* If less than "<faulted>", then make sure we can still add that */
        if (cnt < FAULTED_SIZE)
                size += FAULTED_SIZE - cnt;
 
+       if (size > TRACE_SEQ_BUFFER_SIZE) {
+               cnt -= size - TRACE_SEQ_BUFFER_SIZE;
+               goto again;
+       }
+
        buffer = tr->array_buffer.buffer;
        event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
                                            tracing_gen_ctx());
-       if (unlikely(!event))
+       if (unlikely(!event)) {
+               /*
+                * If the size was greater than what was allowed, then
+                * make it smaller and try again.
+                */
+               if (size > ring_buffer_max_event_size(buffer)) {
+                       /* cnt < FAULTED size should never be bigger than max */
+                       if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
+                               return -EBADF;
+                       cnt = ring_buffer_max_event_size(buffer) - meta_size;
+                       /* The above should only happen once */
+                       if (WARN_ON_ONCE(cnt + meta_size == size))
+                               return -EBADF;
+                       goto again;
+               }
+
                /* Ring buffer disabled, return as if not open for write */
                return -EBADF;
+       }
 
        entry = ring_buffer_event_data(event);
        entry->ip = _THIS_IP_;
@@ -7357,9 +7387,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        return written;
 }
 
-/* Limit it for now to 3K (including tag) */
-#define RAW_DATA_MAX_SIZE (1024*3)
-
 static ssize_t
 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
                                        size_t cnt, loff_t *fpos)
@@ -7381,19 +7408,18 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
                return -EINVAL;
 
        /* The marker must at least have a tag id */
-       if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
+       if (cnt < sizeof(unsigned int))
                return -EINVAL;
 
-       if (cnt > TRACE_BUF_SIZE)
-               cnt = TRACE_BUF_SIZE;
-
-       BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
-
        size = sizeof(*entry) + cnt;
        if (cnt < FAULT_SIZE_ID)
                size += FAULT_SIZE_ID - cnt;
 
        buffer = tr->array_buffer.buffer;
+
+       if (size > ring_buffer_max_event_size(buffer))
+               return -EINVAL;
+
        event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
                                            tracing_gen_ctx());
        if (!event)
@@ -7578,6 +7604,7 @@ struct ftrace_buffer_info {
        struct trace_iterator   iter;
        void                    *spare;
        unsigned int            spare_cpu;
+       unsigned int            spare_size;
        unsigned int            read;
 };
 
@@ -8282,6 +8309,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 {
        struct ftrace_buffer_info *info = filp->private_data;
        struct trace_iterator *iter = &info->iter;
+       void *trace_data;
+       int page_size;
        ssize_t ret = 0;
        ssize_t size;
 
@@ -8293,6 +8322,17 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
                return -EBUSY;
 #endif
 
+       page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
+
+       /* Make sure the spare matches the current sub buffer size */
+       if (info->spare) {
+               if (page_size != info->spare_size) {
+                       ring_buffer_free_read_page(iter->array_buffer->buffer,
+                                                  info->spare_cpu, info->spare);
+                       info->spare = NULL;
+               }
+       }
+
        if (!info->spare) {
                info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
                                                          iter->cpu_file);
@@ -8301,19 +8341,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
                        info->spare = NULL;
                } else {
                        info->spare_cpu = iter->cpu_file;
+                       info->spare_size = page_size;
                }
        }
        if (!info->spare)
                return ret;
 
        /* Do we have previous read data to read? */
-       if (info->read < PAGE_SIZE)
+       if (info->read < page_size)
                goto read;
 
  again:
        trace_access_lock(iter->cpu_file);
        ret = ring_buffer_read_page(iter->array_buffer->buffer,
-                                   &info->spare,
+                                   info->spare,
                                    count,
                                    iter->cpu_file, 0);
        trace_access_unlock(iter->cpu_file);
@@ -8334,11 +8375,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 
        info->read = 0;
  read:
-       size = PAGE_SIZE - info->read;
+       size = page_size - info->read;
        if (size > count)
                size = count;
-
-       ret = copy_to_user(ubuf, info->spare + info->read, size);
+       trace_data = ring_buffer_read_page_data(info->spare);
+       ret = copy_to_user(ubuf, trace_data + info->read, size);
        if (ret == size)
                return -EFAULT;
 
@@ -8449,6 +8490,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                .spd_release    = buffer_spd_release,
        };
        struct buffer_ref *ref;
+       int page_size;
        int entries, i;
        ssize_t ret = 0;
 
@@ -8457,13 +8499,14 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                return -EBUSY;
 #endif
 
-       if (*ppos & (PAGE_SIZE - 1))
+       page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
+       if (*ppos & (page_size - 1))
                return -EINVAL;
 
-       if (len & (PAGE_SIZE - 1)) {
-               if (len < PAGE_SIZE)
+       if (len & (page_size - 1)) {
+               if (len < page_size)
                        return -EINVAL;
-               len &= PAGE_MASK;
+               len &= (~(page_size - 1));
        }
 
        if (splice_grow_spd(pipe, &spd))
@@ -8473,7 +8516,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
        trace_access_lock(iter->cpu_file);
        entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
 
-       for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
+       for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
                struct page *page;
                int r;
 
@@ -8494,7 +8537,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                }
                ref->cpu = iter->cpu_file;
 
-               r = ring_buffer_read_page(ref->buffer, &ref->page,
+               r = ring_buffer_read_page(ref->buffer, ref->page,
                                          len, iter->cpu_file, 1);
                if (r < 0) {
                        ring_buffer_free_read_page(ref->buffer, ref->cpu,
@@ -8503,14 +8546,14 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                        break;
                }
 
-               page = virt_to_page(ref->page);
+               page = virt_to_page(ring_buffer_read_page_data(ref->page));
 
                spd.pages[i] = page;
-               spd.partial[i].len = PAGE_SIZE;
+               spd.partial[i].len = page_size;
                spd.partial[i].offset = 0;
                spd.partial[i].private = (unsigned long)ref;
                spd.nr_pages++;
-               *ppos += PAGE_SIZE;
+               *ppos += page_size;
 
                entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
        }
@@ -9354,6 +9397,103 @@ static const struct file_operations buffer_percent_fops = {
        .llseek         = default_llseek,
 };
 
+static ssize_t
+buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+       struct trace_array *tr = filp->private_data;
+       size_t size;
+       char buf[64];
+       int order;
+       int r;
+
+       order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
+       size = (PAGE_SIZE << order) / 1024;
+
+       r = sprintf(buf, "%zd\n", size);
+
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
+                        size_t cnt, loff_t *ppos)
+{
+       struct trace_array *tr = filp->private_data;
+       unsigned long val;
+       int old_order;
+       int order;
+       int pages;
+       int ret;
+
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
+               return ret;
+
+       val *= 1024; /* value passed in is in KB */
+
+       pages = DIV_ROUND_UP(val, PAGE_SIZE);
+       order = fls(pages - 1);
+
+       /* limit between 1 and 128 system pages */
+       if (order < 0 || order > 7)
+               return -EINVAL;
+
+       /* Do not allow tracing while changing the order of the ring buffer */
+       tracing_stop_tr(tr);
+
+       old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
+       if (old_order == order)
+               goto out;
+
+       ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
+       if (ret)
+               goto out;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+
+       if (!tr->allocated_snapshot)
+               goto out_max;
+
+       ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
+       if (ret) {
+               /* Put back the old order */
+               cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
+               if (WARN_ON_ONCE(cnt)) {
+                       /*
+                        * AARGH! We are left with different orders!
+                        * The max buffer is our "snapshot" buffer.
+                        * When a tracer needs a snapshot (one of the
+                        * latency tracers), it swaps the max buffer
+                        * with the saved snap shot. We succeeded to
+                        * update the order of the main buffer, but failed to
+                        * update the order of the max buffer. But when we tried
+                        * to reset the main buffer to the original size, we
+                        * failed there too. This is very unlikely to
+                        * happen, but if it does, warn and kill all
+                        * tracing.
+                        */
+                       tracing_disabled = 1;
+               }
+               goto out;
+       }
+ out_max:
+#endif
+       (*ppos)++;
+ out:
+       if (ret)
+               cnt = ret;
+       tracing_start_tr(tr);
+       return cnt;
+}
+
+static const struct file_operations buffer_subbuf_size_fops = {
+       .open           = tracing_open_generic_tr,
+       .read           = buffer_subbuf_size_read,
+       .write          = buffer_subbuf_size_write,
+       .release        = tracing_release_generic_tr,
+       .llseek         = default_llseek,
+};
+
 static struct dentry *trace_instance_dir;
 
 static void
@@ -9504,7 +9644,8 @@ static int trace_array_create_dir(struct trace_array *tr)
        return ret;
 }
 
-static struct trace_array *trace_array_create(const char *name)
+static struct trace_array *
+trace_array_create_systems(const char *name, const char *systems)
 {
        struct trace_array *tr;
        int ret;
@@ -9524,6 +9665,12 @@ static struct trace_array *trace_array_create(const char *name)
        if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
                goto out_free_tr;
 
+       if (systems) {
+               tr->system_names = kstrdup_const(systems, GFP_KERNEL);
+               if (!tr->system_names)
+                       goto out_free_tr;
+       }
+
        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
 
        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9570,12 +9717,18 @@ static struct trace_array *trace_array_create(const char *name)
        free_trace_buffers(tr);
        free_cpumask_var(tr->pipe_cpumask);
        free_cpumask_var(tr->tracing_cpumask);
+       kfree_const(tr->system_names);
        kfree(tr->name);
        kfree(tr);
 
        return ERR_PTR(ret);
 }
 
+static struct trace_array *trace_array_create(const char *name)
+{
+       return trace_array_create_systems(name, NULL);
+}
+
 static int instance_mkdir(const char *name)
 {
        struct trace_array *tr;
@@ -9601,6 +9754,7 @@ out_unlock:
 /**
  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
  * @name: The name of the trace array to be looked up/created.
+ * @systems: A list of systems to create event directories for (NULL for all)
  *
  * Returns pointer to trace array with given name.
  * NULL, if it cannot be created.
@@ -9614,7 +9768,7 @@ out_unlock:
  * trace_array_put() is called, user space can not delete it.
  *
  */
-struct trace_array *trace_array_get_by_name(const char *name)
+struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
 {
        struct trace_array *tr;
 
@@ -9626,7 +9780,7 @@ struct trace_array *trace_array_get_by_name(const char *name)
                        goto out_unlock;
        }
 
-       tr = trace_array_create(name);
+       tr = trace_array_create_systems(name, systems);
 
        if (IS_ERR(tr))
                tr = NULL;
@@ -9673,6 +9827,7 @@ static int __remove_instance(struct trace_array *tr)
 
        free_cpumask_var(tr->pipe_cpumask);
        free_cpumask_var(tr->tracing_cpumask);
+       kfree_const(tr->system_names);
        kfree(tr->name);
        kfree(tr);
 
@@ -9805,6 +9960,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
        trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
                        tr, &buffer_percent_fops);
 
+       trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
+                         tr, &buffer_subbuf_size_fops);
+
        create_trace_options_dir(tr);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -10391,7 +10549,7 @@ __init static void enable_instances(void)
                if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
                        do_allocate_snapshot(tok);
 
-               tr = trace_array_get_by_name(tok);
+               tr = trace_array_get_by_name(tok, NULL);
                if (!tr) {
                        pr_warn("Failed to create instance buffer %s\n", curr_str);
                        continue;