perf tools: Add support for PERF_RECORD_TEXT_POKE
authorAdrian Hunter <adrian.hunter@intel.com>
Tue, 12 May 2020 12:19:18 +0000 (15:19 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 10 Jul 2020 11:20:01 +0000 (08:20 -0300)
Add processing for PERF_RECORD_TEXT_POKE events. When a text poke event
is processed, then the kernel dso data cache is updated with the poked
bytes.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: x86@kernel.org
Link: http://lore.kernel.org/lkml/20200512121922.8997-12-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
14 files changed:
tools/include/uapi/linux/perf_event.h
tools/lib/perf/include/perf/event.h
tools/perf/builtin-record.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evsel.c
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/perf_api_probe.c
tools/perf/util/perf_api_probe.h
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/record.h
tools/perf/util/session.c
tools/perf/util/tool.h

index 7b2d6fc9e6ed7ba5e245e066517c89f2fc00dd6b..e5bee6c17b86818b0e058545f61340cf91f385e9 100644 (file)
@@ -383,7 +383,8 @@ struct perf_event_attr {
                                bpf_event      :  1, /* include bpf events */
                                aux_output     :  1, /* generate AUX records instead of events */
                                cgroup         :  1, /* include cgroup events */
-                               __reserved_1   : 31;
+                               text_poke      :  1, /* include text poke events */
+                               __reserved_1   : 30;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -1024,6 +1025,24 @@ enum perf_event_type {
         */
        PERF_RECORD_CGROUP                      = 19,
 
+       /*
+        * Records changes to kernel text i.e. self-modified code. 'old_len' is
+        * the number of old bytes, 'new_len' is the number of new bytes. Either
+        * 'old_len' or 'new_len' may be zero to indicate, for example, the
+        * addition or removal of a trampoline. 'bytes' contains the old bytes
+        * followed immediately by the new bytes.
+        *
+        * struct {
+        *      struct perf_event_header        header;
+        *      u64                             addr;
+        *      u16                             old_len;
+        *      u16                             new_len;
+        *      u8                              bytes[];
+        *      struct sample_id                sample_id;
+        * };
+        */
+       PERF_RECORD_TEXT_POKE                   = 20,
+
        PERF_RECORD_MAX,                        /* non-ABI */
 };
 
index 69b44d2cc0f5001cb3098870555608c17c9f72b2..842028858d66e953566123f40c6a09b6e2756183 100644 (file)
@@ -111,6 +111,14 @@ struct perf_record_cgroup {
        char                     path[PATH_MAX];
 };
 
+struct perf_record_text_poke_event {
+       struct perf_event_header header;
+       __u64                   addr;
+       __u16                   old_len;
+       __u16                   new_len;
+       __u8                    bytes[];
+};
+
 struct perf_record_sample {
        struct perf_event_header header;
        __u64                    array[];
@@ -367,6 +375,7 @@ union perf_event {
        struct perf_record_sample               sample;
        struct perf_record_bpf_event            bpf;
        struct perf_record_ksymbol              ksymbol;
+       struct perf_record_text_poke_event      text_poke;
        struct perf_record_header_attr          attr;
        struct perf_record_event_update         event_update;
        struct perf_record_header_event_type    event_type;
index bf1f18366726e4272bffb8980cd3ea8cf1e591ae..b6bdccd875bc04814320f335f412c7b6a10e6d9c 100644 (file)
@@ -765,6 +765,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
 
 #endif
 
+static int record__config_text_poke(struct evlist *evlist)
+{
+       struct evsel *evsel;
+       int err;
+
+       /* Nothing to do if text poke is already configured */
+       evlist__for_each_entry(evlist, evsel) {
+               if (evsel->core.attr.text_poke)
+                       return 0;
+       }
+
+       err = parse_events(evlist, "dummy:u", NULL);
+       if (err)
+               return err;
+
+       evsel = evlist__last(evlist);
+
+       evsel->core.attr.freq = 0;
+       evsel->core.attr.sample_period = 1;
+       evsel->core.attr.text_poke = 1;
+       evsel->core.attr.ksymbol = 1;
+
+       evsel->core.system_wide = true;
+       evsel->no_aux_samples = true;
+       evsel->immediate = true;
+
+       /* Text poke must be collected on all CPUs */
+       perf_cpu_map__put(evsel->core.own_cpus);
+       evsel->core.own_cpus = perf_cpu_map__new(NULL);
+       perf_cpu_map__put(evsel->core.cpus);
+       evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+       evsel__set_sample_bit(evsel, TIME);
+
+       return 0;
+}
+
 static bool record__kcore_readable(struct machine *machine)
 {
        char kcore[PATH_MAX];
@@ -2766,6 +2803,14 @@ int cmd_record(int argc, const char **argv)
        if (rec->opts.full_auxtrace)
                rec->buildid_all = true;
 
+       if (rec->opts.text_poke) {
+               err = record__config_text_poke(rec->evlist);
+               if (err) {
+                       pr_err("record__config_text_poke failed, error %d\n", err);
+                       goto out;
+               }
+       }
+
        if (record_opts__config(&rec->opts)) {
                err = -EINVAL;
                goto out;
index f581550a3015d88b8ccde34fd428c3da6287f455..dde2f01477134ba1ba34c379ee4a55e8cef63c78 100644 (file)
@@ -31,6 +31,7 @@
 #include "stat.h"
 #include "session.h"
 #include "bpf-event.h"
+#include "print_binary.h"
 #include "tool.h"
 #include "../perf.h"
 
@@ -55,6 +56,7 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_KSYMBOL]                   = "KSYMBOL",
        [PERF_RECORD_BPF_EVENT]                 = "BPF_EVENT",
        [PERF_RECORD_CGROUP]                    = "CGROUP",
+       [PERF_RECORD_TEXT_POKE]                 = "TEXT_POKE",
        [PERF_RECORD_HEADER_ATTR]               = "ATTR",
        [PERF_RECORD_HEADER_EVENT_TYPE]         = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]       = "TRACING_DATA",
@@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
        return machine__process_bpf(machine, event, sample);
 }
 
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+                                 union perf_event *event,
+                                 struct perf_sample *sample,
+                                 struct machine *machine)
+{
+       return machine__process_text_poke(machine, event, sample);
+}
+
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 {
        return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
@@ -413,6 +423,40 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
                       event->bpf.type, event->bpf.flags, event->bpf.id);
 }
 
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+                            void *extra, FILE *fp)
+{
+       bool old = *(bool *)extra;
+
+       switch ((int)op) {
+       case BINARY_PRINT_LINE_BEGIN:
+               return fprintf(fp, "            %s bytes:", old ? "Old" : "New");
+       case BINARY_PRINT_NUM_DATA:
+               return fprintf(fp, " %02x", val);
+       case BINARY_PRINT_LINE_END:
+               return fprintf(fp, "\n");
+       default:
+               return 0;
+       }
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, FILE *fp)
+{
+       struct perf_record_text_poke_event *tp = &event->text_poke;
+       size_t ret;
+       bool old;
+
+       ret = fprintf(fp, " %" PRI_lx64 " old len %u new len %u\n",
+                     tp->addr, tp->old_len, tp->new_len);
+       old = true;
+       ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+                              &old, fp);
+       old = false;
+       ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+                              text_poke_printer, &old, fp);
+       return ret;
+}
+
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 {
        size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -457,6 +501,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
        case PERF_RECORD_BPF_EVENT:
                ret += perf_event__fprintf_bpf(event, fp);
                break;
+       case PERF_RECORD_TEXT_POKE:
+               ret += perf_event__fprintf_text_poke(event, fp);
+               break;
        default:
                ret += fprintf(fp, "\n");
        }
index 6ae01c3c2ffa7e1d1947c4366bc2af02cc712e5c..3c4eb94f93de4a7e499a5536eb3fcb9eb046fdc4 100644 (file)
@@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool,
                            union perf_event *event,
                            struct perf_sample *sample,
                            struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+                                 union perf_event *event,
+                                 struct perf_sample *sample,
+                                 struct machine *machine);
 int perf_event__process(struct perf_tool *tool,
                        union perf_event *event,
                        struct perf_sample *sample,
@@ -385,6 +389,7 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
 int kallsyms__get_function_start(const char *kallsyms_filename,
index ef802f6d40c17a27e8560fad8ccb9fb65fd30ebc..9aa51a65593dcea7503438c71a21458e74a60bab 100644 (file)
@@ -1064,7 +1064,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
        attr->mmap  = track;
        attr->mmap2 = track && !perf_missing_features.mmap2;
        attr->comm  = track;
-       attr->ksymbol = track && !perf_missing_features.ksymbol;
+       /*
+        * ksymbol is tracked separately with text poke because it needs to be
+        * system wide and enabled immediately.
+        */
+       if (!opts->text_poke)
+               attr->ksymbol = track && !perf_missing_features.ksymbol;
        attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
 
        if (opts->record_namespaces)
index d5384807372b505921983a6bd1d5961c0ff268ce..1e9fe6b4d8db41bcb67ac5eca699d4178273328a 100644 (file)
@@ -808,6 +808,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
        return machine__process_ksymbol_register(machine, event, sample);
 }
 
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+                              struct perf_sample *sample __maybe_unused)
+{
+       struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+       u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+       if (dump_trace)
+               perf_event__fprintf_text_poke(event, stdout);
+
+       if (!event->text_poke.new_len)
+               return 0;
+
+       if (cpumode != PERF_RECORD_MISC_KERNEL) {
+               pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+               return 0;
+       }
+
+       if (map && map->dso) {
+               u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+               int ret;
+
+               /*
+                * Kernel maps might be changed when loading symbols so loading
+                * must be done prior to using kernel maps.
+                */
+               map__load(map);
+               ret = dso__data_write_cache_addr(map->dso, map, machine,
+                                                event->text_poke.addr,
+                                                new_bytes,
+                                                event->text_poke.new_len);
+               if (ret != event->text_poke.new_len)
+                       pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+                                event->text_poke.addr);
+       } else {
+               pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+                        event->text_poke.addr);
+       }
+
+       return 0;
+}
+
 static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
                                              const char *filename)
 {
@@ -1930,6 +1971,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
                ret = machine__process_ksymbol(machine, event, sample); break;
        case PERF_RECORD_BPF_EVENT:
                ret = machine__process_bpf(machine, event, sample); break;
+       case PERF_RECORD_TEXT_POKE:
+               ret = machine__process_text_poke(machine, event, sample); break;
        default:
                ret = -1;
                break;
index fa1be9ea00fa88a7b1150448958e35191254272e..062c36a8433cec835a53313009d91aab640ec7ec 100644 (file)
@@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even
 int machine__process_ksymbol(struct machine *machine,
                             union perf_event *event,
                             struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+                              union perf_event *event,
+                              struct perf_sample *sample);
 int machine__process_event(struct machine *machine, union perf_event *event,
                                struct perf_sample *sample);
 
index 1337965673d7069d5bc50cc3389d51eaa2e37aaa..3840d02f0f7b0aaa0d8730565eebd910943fc3ec 100644 (file)
@@ -93,6 +93,11 @@ static void perf_probe_context_switch(struct evsel *evsel)
        evsel->core.attr.context_switch = 1;
 }
 
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+       evsel->core.attr.text_poke = 1;
+}
+
 bool perf_can_sample_identifier(void)
 {
        return perf_probe_api(perf_probe_sample_identifier);
@@ -108,6 +113,11 @@ bool perf_can_record_switch_events(void)
        return perf_probe_api(perf_probe_context_switch);
 }
 
+bool perf_can_record_text_poke_events(void)
+{
+       return perf_probe_api(perf_probe_text_poke);
+}
+
 bool perf_can_record_cpu_wide(void)
 {
        struct perf_event_attr attr = {
index 706c3c6426e2fb68043884eeabd690669651fdb6..d5506a983a94a4be830f4cc056a0d63aa65ec55d 100644 (file)
@@ -9,6 +9,7 @@ bool perf_can_aux_sample(void);
 bool perf_can_comm_exec(void);
 bool perf_can_record_cpu_wide(void);
 bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
 bool perf_can_sample_identifier(void);
 
 #endif // __PERF_API_PROBE_H
index b94fa07f5d3264fbcc7798fb20899044d96c2b93..e67a227c0ce7e6b20312972eb2f1b78a63a3bb2b 100644 (file)
@@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
        PRINT_ATTRf(aux_watermark, p_unsigned);
        PRINT_ATTRf(sample_max_stack, p_unsigned);
        PRINT_ATTRf(aux_sample_size, p_unsigned);
+       PRINT_ATTRf(text_poke, p_unsigned);
 
        return ret;
 }
index 39d1de4b2a3698be5279b77b9cf02fbd13cf5c3f..84dbbc3f02049a38326636b4f49c1eaa65e08219 100644 (file)
@@ -48,6 +48,7 @@ struct record_opts {
        bool          sample_id;
        bool          no_bpf_event;
        bool          kcore;
+       bool          text_poke;
        unsigned int  freq;
        unsigned int  mmap_pages;
        unsigned int  auxtrace_mmap_pages;
index 396424fcaadfe29914fae0c6ec5ebee25d2c04cb..ffbc9d35a383f717d254c680aabe26ea4e7dc0fd 100644 (file)
@@ -490,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                tool->ksymbol = perf_event__process_ksymbol;
        if (tool->bpf == NULL)
                tool->bpf = perf_event__process_bpf;
+       if (tool->text_poke == NULL)
+               tool->text_poke = perf_event__process_text_poke;
        if (tool->read == NULL)
                tool->read = process_event_sample_stub;
        if (tool->throttle == NULL)
@@ -659,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
                swap_sample_id_all(event, &event->context_switch + 1);
 }
 
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+       event->text_poke.addr    = bswap_64(event->text_poke.addr);
+       event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+       event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+       if (sample_id_all) {
+               size_t len = sizeof(event->text_poke.old_len) +
+                            sizeof(event->text_poke.new_len) +
+                            event->text_poke.old_len +
+                            event->text_poke.new_len;
+               void *data = &event->text_poke.old_len;
+
+               data += PERF_ALIGN(len, sizeof(u64));
+               swap_sample_id_all(event, data);
+       }
+}
+
 static void perf_event__throttle_swap(union perf_event *event,
                                      bool sample_id_all)
 {
@@ -932,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
        [PERF_RECORD_SWITCH]              = perf_event__switch_swap,
        [PERF_RECORD_SWITCH_CPU_WIDE]     = perf_event__switch_swap,
        [PERF_RECORD_NAMESPACES]          = perf_event__namespaces_swap,
+       [PERF_RECORD_TEXT_POKE]           = perf_event__text_poke_swap,
        [PERF_RECORD_HEADER_ATTR]         = perf_event__hdr_attr_swap,
        [PERF_RECORD_HEADER_EVENT_TYPE]   = perf_event__event_type_swap,
        [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1474,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines,
                return tool->ksymbol(tool, event, sample, machine);
        case PERF_RECORD_BPF_EVENT:
                return tool->bpf(tool, event, sample, machine);
+       case PERF_RECORD_TEXT_POKE:
+               return tool->text_poke(tool, event, sample, machine);
        default:
                ++evlist->stats.nr_unknown_events;
                return -1;
index 3fb67bd31e4a062993a0aecb0786d8cb17da9d6e..bbbc0dcd461ff453b66791cf78f019aad4da5139 100644 (file)
@@ -57,7 +57,8 @@ struct perf_tool {
                        throttle,
                        unthrottle,
                        ksymbol,
-                       bpf;
+                       bpf,
+                       text_poke;
 
        event_attr_op   attr;
        event_attr_op   event_update;