Merge tag 'perf-core-for-mingo-5.3-20190611' of git://git.kernel.org/pub/scm/linux...
[sfrench/cifs-2.6.git] / tools / perf / util / intel-pt.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_pt.c: Intel Processor Trace support
4  * Copyright (c) 2013-2015, Intel Corporation.
5  */
6
7 #include <inttypes.h>
8 #include <stdio.h>
9 #include <stdbool.h>
10 #include <errno.h>
11 #include <linux/kernel.h>
12 #include <linux/types.h>
13
14 #include "../perf.h"
15 #include "session.h"
16 #include "machine.h"
17 #include "memswap.h"
18 #include "sort.h"
19 #include "tool.h"
20 #include "event.h"
21 #include "evlist.h"
22 #include "evsel.h"
23 #include "map.h"
24 #include "color.h"
25 #include "util.h"
26 #include "thread.h"
27 #include "thread-stack.h"
28 #include "symbol.h"
29 #include "callchain.h"
30 #include "dso.h"
31 #include "debug.h"
32 #include "auxtrace.h"
33 #include "tsc.h"
34 #include "intel-pt.h"
35 #include "config.h"
36 #include "time-utils.h"
37
38 #include "intel-pt-decoder/intel-pt-log.h"
39 #include "intel-pt-decoder/intel-pt-decoder.h"
40 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
41 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
42
43 #define MAX_TIMESTAMP (~0ULL)
44
45 struct range {
46         u64 start;
47         u64 end;
48 };
49
50 struct intel_pt {
51         struct auxtrace auxtrace;
52         struct auxtrace_queues queues;
53         struct auxtrace_heap heap;
54         u32 auxtrace_type;
55         struct perf_session *session;
56         struct machine *machine;
57         struct perf_evsel *switch_evsel;
58         struct thread *unknown_thread;
59         bool timeless_decoding;
60         bool sampling_mode;
61         bool snapshot_mode;
62         bool per_cpu_mmaps;
63         bool have_tsc;
64         bool data_queued;
65         bool est_tsc;
66         bool sync_switch;
67         bool mispred_all;
68         int have_sched_switch;
69         u32 pmu_type;
70         u64 kernel_start;
71         u64 switch_ip;
72         u64 ptss_ip;
73
74         struct perf_tsc_conversion tc;
75         bool cap_user_time_zero;
76
77         struct itrace_synth_opts synth_opts;
78
79         bool sample_instructions;
80         u64 instructions_sample_type;
81         u64 instructions_id;
82
83         bool sample_branches;
84         u32 branches_filter;
85         u64 branches_sample_type;
86         u64 branches_id;
87
88         bool sample_transactions;
89         u64 transactions_sample_type;
90         u64 transactions_id;
91
92         bool sample_ptwrites;
93         u64 ptwrites_sample_type;
94         u64 ptwrites_id;
95
96         bool sample_pwr_events;
97         u64 pwr_events_sample_type;
98         u64 mwait_id;
99         u64 pwre_id;
100         u64 exstop_id;
101         u64 pwrx_id;
102         u64 cbr_id;
103
104         u64 tsc_bit;
105         u64 mtc_bit;
106         u64 mtc_freq_bits;
107         u32 tsc_ctc_ratio_n;
108         u32 tsc_ctc_ratio_d;
109         u64 cyc_bit;
110         u64 noretcomp_bit;
111         unsigned max_non_turbo_ratio;
112         unsigned cbr2khz;
113
114         unsigned long num_events;
115
116         char *filter;
117         struct addr_filters filts;
118
119         struct range *time_ranges;
120         unsigned int range_cnt;
121 };
122
123 enum switch_state {
124         INTEL_PT_SS_NOT_TRACING,
125         INTEL_PT_SS_UNKNOWN,
126         INTEL_PT_SS_TRACING,
127         INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
128         INTEL_PT_SS_EXPECTING_SWITCH_IP,
129 };
130
131 struct intel_pt_queue {
132         struct intel_pt *pt;
133         unsigned int queue_nr;
134         struct auxtrace_buffer *buffer;
135         struct auxtrace_buffer *old_buffer;
136         void *decoder;
137         const struct intel_pt_state *state;
138         struct ip_callchain *chain;
139         struct branch_stack *last_branch;
140         struct branch_stack *last_branch_rb;
141         size_t last_branch_pos;
142         union perf_event *event_buf;
143         bool on_heap;
144         bool stop;
145         bool step_through_buffers;
146         bool use_buffer_pid_tid;
147         bool sync_switch;
148         pid_t pid, tid;
149         int cpu;
150         int switch_state;
151         pid_t next_tid;
152         struct thread *thread;
153         bool exclude_kernel;
154         bool have_sample;
155         u64 time;
156         u64 timestamp;
157         u64 sel_timestamp;
158         bool sel_start;
159         unsigned int sel_idx;
160         u32 flags;
161         u16 insn_len;
162         u64 last_insn_cnt;
163         u64 ipc_insn_cnt;
164         u64 ipc_cyc_cnt;
165         u64 last_in_insn_cnt;
166         u64 last_in_cyc_cnt;
167         u64 last_br_insn_cnt;
168         u64 last_br_cyc_cnt;
169         char insn[INTEL_PT_INSN_BUF_SZ];
170 };
171
172 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
173                           unsigned char *buf, size_t len)
174 {
175         struct intel_pt_pkt packet;
176         size_t pos = 0;
177         int ret, pkt_len, i;
178         char desc[INTEL_PT_PKT_DESC_MAX];
179         const char *color = PERF_COLOR_BLUE;
180
181         color_fprintf(stdout, color,
182                       ". ... Intel Processor Trace data: size %zu bytes\n",
183                       len);
184
185         while (len) {
186                 ret = intel_pt_get_packet(buf, len, &packet);
187                 if (ret > 0)
188                         pkt_len = ret;
189                 else
190                         pkt_len = 1;
191                 printf(".");
192                 color_fprintf(stdout, color, "  %08x: ", pos);
193                 for (i = 0; i < pkt_len; i++)
194                         color_fprintf(stdout, color, " %02x", buf[i]);
195                 for (; i < 16; i++)
196                         color_fprintf(stdout, color, "   ");
197                 if (ret > 0) {
198                         ret = intel_pt_pkt_desc(&packet, desc,
199                                                 INTEL_PT_PKT_DESC_MAX);
200                         if (ret > 0)
201                                 color_fprintf(stdout, color, " %s\n", desc);
202                 } else {
203                         color_fprintf(stdout, color, " Bad packet!\n");
204                 }
205                 pos += pkt_len;
206                 buf += pkt_len;
207                 len -= pkt_len;
208         }
209 }
210
211 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
212                                 size_t len)
213 {
214         printf(".\n");
215         intel_pt_dump(pt, buf, len);
216 }
217
218 static void intel_pt_log_event(union perf_event *event)
219 {
220         FILE *f = intel_pt_log_fp();
221
222         if (!intel_pt_enable_logging || !f)
223                 return;
224
225         perf_event__fprintf(event, f);
226 }
227
228 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
229                                    struct auxtrace_buffer *b)
230 {
231         bool consecutive = false;
232         void *start;
233
234         start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
235                                       pt->have_tsc, &consecutive);
236         if (!start)
237                 return -EINVAL;
238         b->use_size = b->data + b->size - start;
239         b->use_data = start;
240         if (b->use_size && consecutive)
241                 b->consecutive = true;
242         return 0;
243 }
244
245 static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
246                                struct auxtrace_buffer *buffer,
247                                struct auxtrace_buffer *old_buffer,
248                                struct intel_pt_buffer *b)
249 {
250         bool might_overlap;
251
252         if (!buffer->data) {
253                 int fd = perf_data__fd(ptq->pt->session->data);
254
255                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
256                 if (!buffer->data)
257                         return -ENOMEM;
258         }
259
260         might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
261         if (might_overlap && !buffer->consecutive && old_buffer &&
262             intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
263                 return -ENOMEM;
264
265         if (buffer->use_data) {
266                 b->len = buffer->use_size;
267                 b->buf = buffer->use_data;
268         } else {
269                 b->len = buffer->size;
270                 b->buf = buffer->data;
271         }
272         b->ref_timestamp = buffer->reference;
273
274         if (!old_buffer || (might_overlap && !buffer->consecutive)) {
275                 b->consecutive = false;
276                 b->trace_nr = buffer->buffer_nr + 1;
277         } else {
278                 b->consecutive = true;
279         }
280
281         return 0;
282 }
283
284 /* Do not drop buffers with references - refer intel_pt_get_trace() */
285 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
286                                            struct auxtrace_buffer *buffer)
287 {
288         if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
289                 return;
290
291         auxtrace_buffer__drop_data(buffer);
292 }
293
294 /* Must be serialized with respect to intel_pt_get_trace() */
295 static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
296                               void *cb_data)
297 {
298         struct intel_pt_queue *ptq = data;
299         struct auxtrace_buffer *buffer = ptq->buffer;
300         struct auxtrace_buffer *old_buffer = ptq->old_buffer;
301         struct auxtrace_queue *queue;
302         int err = 0;
303
304         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
305
306         while (1) {
307                 struct intel_pt_buffer b = { .len = 0 };
308
309                 buffer = auxtrace_buffer__next(queue, buffer);
310                 if (!buffer)
311                         break;
312
313                 err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
314                 if (err)
315                         break;
316
317                 if (b.len) {
318                         intel_pt_lookahead_drop_buffer(ptq, old_buffer);
319                         old_buffer = buffer;
320                 } else {
321                         intel_pt_lookahead_drop_buffer(ptq, buffer);
322                         continue;
323                 }
324
325                 err = cb(&b, cb_data);
326                 if (err)
327                         break;
328         }
329
330         if (buffer != old_buffer)
331                 intel_pt_lookahead_drop_buffer(ptq, buffer);
332         intel_pt_lookahead_drop_buffer(ptq, old_buffer);
333
334         return err;
335 }
336
337 /*
338  * This function assumes data is processed sequentially only.
339  * Must be serialized with respect to intel_pt_lookahead()
340  */
341 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
342 {
343         struct intel_pt_queue *ptq = data;
344         struct auxtrace_buffer *buffer = ptq->buffer;
345         struct auxtrace_buffer *old_buffer = ptq->old_buffer;
346         struct auxtrace_queue *queue;
347         int err;
348
349         if (ptq->stop) {
350                 b->len = 0;
351                 return 0;
352         }
353
354         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
355
356         buffer = auxtrace_buffer__next(queue, buffer);
357         if (!buffer) {
358                 if (old_buffer)
359                         auxtrace_buffer__drop_data(old_buffer);
360                 b->len = 0;
361                 return 0;
362         }
363
364         ptq->buffer = buffer;
365
366         err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
367         if (err)
368                 return err;
369
370         if (ptq->step_through_buffers)
371                 ptq->stop = true;
372
373         if (b->len) {
374                 if (old_buffer)
375                         auxtrace_buffer__drop_data(old_buffer);
376                 ptq->old_buffer = buffer;
377         } else {
378                 auxtrace_buffer__drop_data(buffer);
379                 return intel_pt_get_trace(b, data);
380         }
381
382         return 0;
383 }
384
385 struct intel_pt_cache_entry {
386         struct auxtrace_cache_entry     entry;
387         u64                             insn_cnt;
388         u64                             byte_cnt;
389         enum intel_pt_insn_op           op;
390         enum intel_pt_insn_branch       branch;
391         int                             length;
392         int32_t                         rel;
393         char                            insn[INTEL_PT_INSN_BUF_SZ];
394 };
395
396 static int intel_pt_config_div(const char *var, const char *value, void *data)
397 {
398         int *d = data;
399         long val;
400
401         if (!strcmp(var, "intel-pt.cache-divisor")) {
402                 val = strtol(value, NULL, 0);
403                 if (val > 0 && val <= INT_MAX)
404                         *d = val;
405         }
406
407         return 0;
408 }
409
410 static int intel_pt_cache_divisor(void)
411 {
412         static int d;
413
414         if (d)
415                 return d;
416
417         perf_config(intel_pt_config_div, &d);
418
419         if (!d)
420                 d = 64;
421
422         return d;
423 }
424
425 static unsigned int intel_pt_cache_size(struct dso *dso,
426                                         struct machine *machine)
427 {
428         off_t size;
429
430         size = dso__data_size(dso, machine);
431         size /= intel_pt_cache_divisor();
432         if (size < 1000)
433                 return 10;
434         if (size > (1 << 21))
435                 return 21;
436         return 32 - __builtin_clz(size);
437 }
438
439 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
440                                              struct machine *machine)
441 {
442         struct auxtrace_cache *c;
443         unsigned int bits;
444
445         if (dso->auxtrace_cache)
446                 return dso->auxtrace_cache;
447
448         bits = intel_pt_cache_size(dso, machine);
449
450         /* Ignoring cache creation failure */
451         c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
452
453         dso->auxtrace_cache = c;
454
455         return c;
456 }
457
458 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
459                               u64 offset, u64 insn_cnt, u64 byte_cnt,
460                               struct intel_pt_insn *intel_pt_insn)
461 {
462         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
463         struct intel_pt_cache_entry *e;
464         int err;
465
466         if (!c)
467                 return -ENOMEM;
468
469         e = auxtrace_cache__alloc_entry(c);
470         if (!e)
471                 return -ENOMEM;
472
473         e->insn_cnt = insn_cnt;
474         e->byte_cnt = byte_cnt;
475         e->op = intel_pt_insn->op;
476         e->branch = intel_pt_insn->branch;
477         e->length = intel_pt_insn->length;
478         e->rel = intel_pt_insn->rel;
479         memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
480
481         err = auxtrace_cache__add(c, offset, &e->entry);
482         if (err)
483                 auxtrace_cache__free_entry(c, e);
484
485         return err;
486 }
487
488 static struct intel_pt_cache_entry *
489 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
490 {
491         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
492
493         if (!c)
494                 return NULL;
495
496         return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
497 }
498
499 static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
500 {
501         return ip >= pt->kernel_start ?
502                PERF_RECORD_MISC_KERNEL :
503                PERF_RECORD_MISC_USER;
504 }
505
506 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
507                                    uint64_t *insn_cnt_ptr, uint64_t *ip,
508                                    uint64_t to_ip, uint64_t max_insn_cnt,
509                                    void *data)
510 {
511         struct intel_pt_queue *ptq = data;
512         struct machine *machine = ptq->pt->machine;
513         struct thread *thread;
514         struct addr_location al;
515         unsigned char buf[INTEL_PT_INSN_BUF_SZ];
516         ssize_t len;
517         int x86_64;
518         u8 cpumode;
519         u64 offset, start_offset, start_ip;
520         u64 insn_cnt = 0;
521         bool one_map = true;
522
523         intel_pt_insn->length = 0;
524
525         if (to_ip && *ip == to_ip)
526                 goto out_no_cache;
527
528         cpumode = intel_pt_cpumode(ptq->pt, *ip);
529
530         thread = ptq->thread;
531         if (!thread) {
532                 if (cpumode != PERF_RECORD_MISC_KERNEL)
533                         return -EINVAL;
534                 thread = ptq->pt->unknown_thread;
535         }
536
537         while (1) {
538                 if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
539                         return -EINVAL;
540
541                 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
542                     dso__data_status_seen(al.map->dso,
543                                           DSO_DATA_STATUS_SEEN_ITRACE))
544                         return -ENOENT;
545
546                 offset = al.map->map_ip(al.map, *ip);
547
548                 if (!to_ip && one_map) {
549                         struct intel_pt_cache_entry *e;
550
551                         e = intel_pt_cache_lookup(al.map->dso, machine, offset);
552                         if (e &&
553                             (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
554                                 *insn_cnt_ptr = e->insn_cnt;
555                                 *ip += e->byte_cnt;
556                                 intel_pt_insn->op = e->op;
557                                 intel_pt_insn->branch = e->branch;
558                                 intel_pt_insn->length = e->length;
559                                 intel_pt_insn->rel = e->rel;
560                                 memcpy(intel_pt_insn->buf, e->insn,
561                                        INTEL_PT_INSN_BUF_SZ);
562                                 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
563                                 return 0;
564                         }
565                 }
566
567                 start_offset = offset;
568                 start_ip = *ip;
569
570                 /* Load maps to ensure dso->is_64_bit has been updated */
571                 map__load(al.map);
572
573                 x86_64 = al.map->dso->is_64_bit;
574
575                 while (1) {
576                         len = dso__data_read_offset(al.map->dso, machine,
577                                                     offset, buf,
578                                                     INTEL_PT_INSN_BUF_SZ);
579                         if (len <= 0)
580                                 return -EINVAL;
581
582                         if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
583                                 return -EINVAL;
584
585                         intel_pt_log_insn(intel_pt_insn, *ip);
586
587                         insn_cnt += 1;
588
589                         if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
590                                 goto out;
591
592                         if (max_insn_cnt && insn_cnt >= max_insn_cnt)
593                                 goto out_no_cache;
594
595                         *ip += intel_pt_insn->length;
596
597                         if (to_ip && *ip == to_ip)
598                                 goto out_no_cache;
599
600                         if (*ip >= al.map->end)
601                                 break;
602
603                         offset += intel_pt_insn->length;
604                 }
605                 one_map = false;
606         }
607 out:
608         *insn_cnt_ptr = insn_cnt;
609
610         if (!one_map)
611                 goto out_no_cache;
612
613         /*
614          * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
615          * entries.
616          */
617         if (to_ip) {
618                 struct intel_pt_cache_entry *e;
619
620                 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
621                 if (e)
622                         return 0;
623         }
624
625         /* Ignore cache errors */
626         intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
627                            *ip - start_ip, intel_pt_insn);
628
629         return 0;
630
631 out_no_cache:
632         *insn_cnt_ptr = insn_cnt;
633         return 0;
634 }
635
636 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
637                                   uint64_t offset, const char *filename)
638 {
639         struct addr_filter *filt;
640         bool have_filter   = false;
641         bool hit_tracestop = false;
642         bool hit_filter    = false;
643
644         list_for_each_entry(filt, &pt->filts.head, list) {
645                 if (filt->start)
646                         have_filter = true;
647
648                 if ((filename && !filt->filename) ||
649                     (!filename && filt->filename) ||
650                     (filename && strcmp(filename, filt->filename)))
651                         continue;
652
653                 if (!(offset >= filt->addr && offset < filt->addr + filt->size))
654                         continue;
655
656                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
657                              ip, offset, filename ? filename : "[kernel]",
658                              filt->start ? "filter" : "stop",
659                              filt->addr, filt->size);
660
661                 if (filt->start)
662                         hit_filter = true;
663                 else
664                         hit_tracestop = true;
665         }
666
667         if (!hit_tracestop && !hit_filter)
668                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
669                              ip, offset, filename ? filename : "[kernel]");
670
671         return hit_tracestop || (have_filter && !hit_filter);
672 }
673
674 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
675 {
676         struct intel_pt_queue *ptq = data;
677         struct thread *thread;
678         struct addr_location al;
679         u8 cpumode;
680         u64 offset;
681
682         if (ip >= ptq->pt->kernel_start)
683                 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
684
685         cpumode = PERF_RECORD_MISC_USER;
686
687         thread = ptq->thread;
688         if (!thread)
689                 return -EINVAL;
690
691         if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
692                 return -EINVAL;
693
694         offset = al.map->map_ip(al.map, ip);
695
696         return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
697                                      al.map->dso->long_name);
698 }
699
700 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
701 {
702         return __intel_pt_pgd_ip(ip, data) > 0;
703 }
704
705 static bool intel_pt_get_config(struct intel_pt *pt,
706                                 struct perf_event_attr *attr, u64 *config)
707 {
708         if (attr->type == pt->pmu_type) {
709                 if (config)
710                         *config = attr->config;
711                 return true;
712         }
713
714         return false;
715 }
716
717 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
718 {
719         struct perf_evsel *evsel;
720
721         evlist__for_each_entry(pt->session->evlist, evsel) {
722                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
723                     !evsel->attr.exclude_kernel)
724                         return false;
725         }
726         return true;
727 }
728
729 static bool intel_pt_return_compression(struct intel_pt *pt)
730 {
731         struct perf_evsel *evsel;
732         u64 config;
733
734         if (!pt->noretcomp_bit)
735                 return true;
736
737         evlist__for_each_entry(pt->session->evlist, evsel) {
738                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
739                     (config & pt->noretcomp_bit))
740                         return false;
741         }
742         return true;
743 }
744
745 static bool intel_pt_branch_enable(struct intel_pt *pt)
746 {
747         struct perf_evsel *evsel;
748         u64 config;
749
750         evlist__for_each_entry(pt->session->evlist, evsel) {
751                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
752                     (config & 1) && !(config & 0x2000))
753                         return false;
754         }
755         return true;
756 }
757
758 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
759 {
760         struct perf_evsel *evsel;
761         unsigned int shift;
762         u64 config;
763
764         if (!pt->mtc_freq_bits)
765                 return 0;
766
767         for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
768                 config >>= 1;
769
770         evlist__for_each_entry(pt->session->evlist, evsel) {
771                 if (intel_pt_get_config(pt, &evsel->attr, &config))
772                         return (config & pt->mtc_freq_bits) >> shift;
773         }
774         return 0;
775 }
776
777 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
778 {
779         struct perf_evsel *evsel;
780         bool timeless_decoding = true;
781         u64 config;
782
783         if (!pt->tsc_bit || !pt->cap_user_time_zero)
784                 return true;
785
786         evlist__for_each_entry(pt->session->evlist, evsel) {
787                 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
788                         return true;
789                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
790                         if (config & pt->tsc_bit)
791                                 timeless_decoding = false;
792                         else
793                                 return true;
794                 }
795         }
796         return timeless_decoding;
797 }
798
799 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
800 {
801         struct perf_evsel *evsel;
802
803         evlist__for_each_entry(pt->session->evlist, evsel) {
804                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
805                     !evsel->attr.exclude_kernel)
806                         return true;
807         }
808         return false;
809 }
810
811 static bool intel_pt_have_tsc(struct intel_pt *pt)
812 {
813         struct perf_evsel *evsel;
814         bool have_tsc = false;
815         u64 config;
816
817         if (!pt->tsc_bit)
818                 return false;
819
820         evlist__for_each_entry(pt->session->evlist, evsel) {
821                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
822                         if (config & pt->tsc_bit)
823                                 have_tsc = true;
824                         else
825                                 return false;
826                 }
827         }
828         return have_tsc;
829 }
830
831 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
832 {
833         u64 quot, rem;
834
835         quot = ns / pt->tc.time_mult;
836         rem  = ns % pt->tc.time_mult;
837         return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
838                 pt->tc.time_mult;
839 }
840
841 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
842                                                    unsigned int queue_nr)
843 {
844         struct intel_pt_params params = { .get_trace = 0, };
845         struct perf_env *env = pt->machine->env;
846         struct intel_pt_queue *ptq;
847
848         ptq = zalloc(sizeof(struct intel_pt_queue));
849         if (!ptq)
850                 return NULL;
851
852         if (pt->synth_opts.callchain) {
853                 size_t sz = sizeof(struct ip_callchain);
854
855                 /* Add 1 to callchain_sz for callchain context */
856                 sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
857                 ptq->chain = zalloc(sz);
858                 if (!ptq->chain)
859                         goto out_free;
860         }
861
862         if (pt->synth_opts.last_branch) {
863                 size_t sz = sizeof(struct branch_stack);
864
865                 sz += pt->synth_opts.last_branch_sz *
866                       sizeof(struct branch_entry);
867                 ptq->last_branch = zalloc(sz);
868                 if (!ptq->last_branch)
869                         goto out_free;
870                 ptq->last_branch_rb = zalloc(sz);
871                 if (!ptq->last_branch_rb)
872                         goto out_free;
873         }
874
875         ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
876         if (!ptq->event_buf)
877                 goto out_free;
878
879         ptq->pt = pt;
880         ptq->queue_nr = queue_nr;
881         ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
882         ptq->pid = -1;
883         ptq->tid = -1;
884         ptq->cpu = -1;
885         ptq->next_tid = -1;
886
887         params.get_trace = intel_pt_get_trace;
888         params.walk_insn = intel_pt_walk_next_insn;
889         params.lookahead = intel_pt_lookahead;
890         params.data = ptq;
891         params.return_compression = intel_pt_return_compression(pt);
892         params.branch_enable = intel_pt_branch_enable(pt);
893         params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
894         params.mtc_period = intel_pt_mtc_period(pt);
895         params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
896         params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
897
898         if (pt->filts.cnt > 0)
899                 params.pgd_ip = intel_pt_pgd_ip;
900
901         if (pt->synth_opts.instructions) {
902                 if (pt->synth_opts.period) {
903                         switch (pt->synth_opts.period_type) {
904                         case PERF_ITRACE_PERIOD_INSTRUCTIONS:
905                                 params.period_type =
906                                                 INTEL_PT_PERIOD_INSTRUCTIONS;
907                                 params.period = pt->synth_opts.period;
908                                 break;
909                         case PERF_ITRACE_PERIOD_TICKS:
910                                 params.period_type = INTEL_PT_PERIOD_TICKS;
911                                 params.period = pt->synth_opts.period;
912                                 break;
913                         case PERF_ITRACE_PERIOD_NANOSECS:
914                                 params.period_type = INTEL_PT_PERIOD_TICKS;
915                                 params.period = intel_pt_ns_to_ticks(pt,
916                                                         pt->synth_opts.period);
917                                 break;
918                         default:
919                                 break;
920                         }
921                 }
922
923                 if (!params.period) {
924                         params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
925                         params.period = 1;
926                 }
927         }
928
929         if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
930                 params.flags |= INTEL_PT_FUP_WITH_NLIP;
931
932         ptq->decoder = intel_pt_decoder_new(&params);
933         if (!ptq->decoder)
934                 goto out_free;
935
936         return ptq;
937
938 out_free:
939         zfree(&ptq->event_buf);
940         zfree(&ptq->last_branch);
941         zfree(&ptq->last_branch_rb);
942         zfree(&ptq->chain);
943         free(ptq);
944         return NULL;
945 }
946
947 static void intel_pt_free_queue(void *priv)
948 {
949         struct intel_pt_queue *ptq = priv;
950
951         if (!ptq)
952                 return;
953         thread__zput(ptq->thread);
954         intel_pt_decoder_free(ptq->decoder);
955         zfree(&ptq->event_buf);
956         zfree(&ptq->last_branch);
957         zfree(&ptq->last_branch_rb);
958         zfree(&ptq->chain);
959         free(ptq);
960 }
961
962 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
963                                      struct auxtrace_queue *queue)
964 {
965         struct intel_pt_queue *ptq = queue->priv;
966
967         if (queue->tid == -1 || pt->have_sched_switch) {
968                 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
969                 thread__zput(ptq->thread);
970         }
971
972         if (!ptq->thread && ptq->tid != -1)
973                 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
974
975         if (ptq->thread) {
976                 ptq->pid = ptq->thread->pid_;
977                 if (queue->cpu == -1)
978                         ptq->cpu = ptq->thread->cpu;
979         }
980 }
981
982 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
983 {
984         if (ptq->state->flags & INTEL_PT_ABORT_TX) {
985                 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
986         } else if (ptq->state->flags & INTEL_PT_ASYNC) {
987                 if (ptq->state->to_ip)
988                         ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
989                                      PERF_IP_FLAG_ASYNC |
990                                      PERF_IP_FLAG_INTERRUPT;
991                 else
992                         ptq->flags = PERF_IP_FLAG_BRANCH |
993                                      PERF_IP_FLAG_TRACE_END;
994                 ptq->insn_len = 0;
995         } else {
996                 if (ptq->state->from_ip)
997                         ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
998                 else
999                         ptq->flags = PERF_IP_FLAG_BRANCH |
1000                                      PERF_IP_FLAG_TRACE_BEGIN;
1001                 if (ptq->state->flags & INTEL_PT_IN_TX)
1002                         ptq->flags |= PERF_IP_FLAG_IN_TX;
1003                 ptq->insn_len = ptq->state->insn_len;
1004                 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
1005         }
1006
1007         if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
1008                 ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
1009         if (ptq->state->type & INTEL_PT_TRACE_END)
1010                 ptq->flags |= PERF_IP_FLAG_TRACE_END;
1011 }
1012
1013 static void intel_pt_setup_time_range(struct intel_pt *pt,
1014                                       struct intel_pt_queue *ptq)
1015 {
1016         if (!pt->range_cnt)
1017                 return;
1018
1019         ptq->sel_timestamp = pt->time_ranges[0].start;
1020         ptq->sel_idx = 0;
1021
1022         if (ptq->sel_timestamp) {
1023                 ptq->sel_start = true;
1024         } else {
1025                 ptq->sel_timestamp = pt->time_ranges[0].end;
1026                 ptq->sel_start = false;
1027         }
1028 }
1029
1030 static int intel_pt_setup_queue(struct intel_pt *pt,
1031                                 struct auxtrace_queue *queue,
1032                                 unsigned int queue_nr)
1033 {
1034         struct intel_pt_queue *ptq = queue->priv;
1035
1036         if (list_empty(&queue->head))
1037                 return 0;
1038
1039         if (!ptq) {
1040                 ptq = intel_pt_alloc_queue(pt, queue_nr);
1041                 if (!ptq)
1042                         return -ENOMEM;
1043                 queue->priv = ptq;
1044
1045                 if (queue->cpu != -1)
1046                         ptq->cpu = queue->cpu;
1047                 ptq->tid = queue->tid;
1048
1049                 if (pt->sampling_mode && !pt->snapshot_mode &&
1050                     pt->timeless_decoding)
1051                         ptq->step_through_buffers = true;
1052
1053                 ptq->sync_switch = pt->sync_switch;
1054
1055                 intel_pt_setup_time_range(pt, ptq);
1056         }
1057
1058         if (!ptq->on_heap &&
1059             (!ptq->sync_switch ||
1060              ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
1061                 const struct intel_pt_state *state;
1062                 int ret;
1063
1064                 if (pt->timeless_decoding)
1065                         return 0;
1066
1067                 intel_pt_log("queue %u getting timestamp\n", queue_nr);
1068                 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1069                              queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1070
1071                 if (ptq->sel_start && ptq->sel_timestamp) {
1072                         ret = intel_pt_fast_forward(ptq->decoder,
1073                                                     ptq->sel_timestamp);
1074                         if (ret)
1075                                 return ret;
1076                 }
1077
1078                 while (1) {
1079                         state = intel_pt_decode(ptq->decoder);
1080                         if (state->err) {
1081                                 if (state->err == INTEL_PT_ERR_NODATA) {
1082                                         intel_pt_log("queue %u has no timestamp\n",
1083                                                      queue_nr);
1084                                         return 0;
1085                                 }
1086                                 continue;
1087                         }
1088                         if (state->timestamp)
1089                                 break;
1090                 }
1091
1092                 ptq->timestamp = state->timestamp;
1093                 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
1094                              queue_nr, ptq->timestamp);
1095                 ptq->state = state;
1096                 ptq->have_sample = true;
1097                 if (ptq->sel_start && ptq->sel_timestamp &&
1098                     ptq->timestamp < ptq->sel_timestamp)
1099                         ptq->have_sample = false;
1100                 intel_pt_sample_flags(ptq);
1101                 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
1102                 if (ret)
1103                         return ret;
1104                 ptq->on_heap = true;
1105         }
1106
1107         return 0;
1108 }
1109
1110 static int intel_pt_setup_queues(struct intel_pt *pt)
1111 {
1112         unsigned int i;
1113         int ret;
1114
1115         for (i = 0; i < pt->queues.nr_queues; i++) {
1116                 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
1117                 if (ret)
1118                         return ret;
1119         }
1120         return 0;
1121 }
1122
1123 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
1124 {
1125         struct branch_stack *bs_src = ptq->last_branch_rb;
1126         struct branch_stack *bs_dst = ptq->last_branch;
1127         size_t nr = 0;
1128
1129         bs_dst->nr = bs_src->nr;
1130
1131         if (!bs_src->nr)
1132                 return;
1133
1134         nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1135         memcpy(&bs_dst->entries[0],
1136                &bs_src->entries[ptq->last_branch_pos],
1137                sizeof(struct branch_entry) * nr);
1138
1139         if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1140                 memcpy(&bs_dst->entries[nr],
1141                        &bs_src->entries[0],
1142                        sizeof(struct branch_entry) * ptq->last_branch_pos);
1143         }
1144 }
1145
1146 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1147 {
1148         ptq->last_branch_pos = 0;
1149         ptq->last_branch_rb->nr = 0;
1150 }
1151
1152 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1153 {
1154         const struct intel_pt_state *state = ptq->state;
1155         struct branch_stack *bs = ptq->last_branch_rb;
1156         struct branch_entry *be;
1157
1158         if (!ptq->last_branch_pos)
1159                 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1160
1161         ptq->last_branch_pos -= 1;
1162
1163         be              = &bs->entries[ptq->last_branch_pos];
1164         be->from        = state->from_ip;
1165         be->to          = state->to_ip;
1166         be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1167         be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1168         /* No support for mispredict */
1169         be->flags.mispred = ptq->pt->mispred_all;
1170
1171         if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1172                 bs->nr += 1;
1173 }
1174
1175 static inline bool intel_pt_skip_event(struct intel_pt *pt)
1176 {
1177         return pt->synth_opts.initial_skip &&
1178                pt->num_events++ < pt->synth_opts.initial_skip;
1179 }
1180
1181 static void intel_pt_prep_b_sample(struct intel_pt *pt,
1182                                    struct intel_pt_queue *ptq,
1183                                    union perf_event *event,
1184                                    struct perf_sample *sample)
1185 {
1186         if (!pt->timeless_decoding)
1187                 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1188
1189         sample->ip = ptq->state->from_ip;
1190         sample->cpumode = intel_pt_cpumode(pt, sample->ip);
1191         sample->pid = ptq->pid;
1192         sample->tid = ptq->tid;
1193         sample->addr = ptq->state->to_ip;
1194         sample->period = 1;
1195         sample->cpu = ptq->cpu;
1196         sample->flags = ptq->flags;
1197         sample->insn_len = ptq->insn_len;
1198         memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1199
1200         event->sample.header.type = PERF_RECORD_SAMPLE;
1201         event->sample.header.misc = sample->cpumode;
1202         event->sample.header.size = sizeof(struct perf_event_header);
1203 }
1204
1205 static int intel_pt_inject_event(union perf_event *event,
1206                                  struct perf_sample *sample, u64 type)
1207 {
1208         event->header.size = perf_event__sample_event_size(sample, type, 0);
1209         return perf_event__synthesize_sample(event, type, 0, sample);
1210 }
1211
1212 static inline int intel_pt_opt_inject(struct intel_pt *pt,
1213                                       union perf_event *event,
1214                                       struct perf_sample *sample, u64 type)
1215 {
1216         if (!pt->synth_opts.inject)
1217                 return 0;
1218
1219         return intel_pt_inject_event(event, sample, type);
1220 }
1221
1222 static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
1223                                           union perf_event *event,
1224                                           struct perf_sample *sample, u64 type)
1225 {
1226         int ret;
1227
1228         ret = intel_pt_opt_inject(pt, event, sample, type);
1229         if (ret)
1230                 return ret;
1231
1232         ret = perf_session__deliver_synth_event(pt->session, event, sample);
1233         if (ret)
1234                 pr_err("Intel PT: failed to deliver event, error %d\n", ret);
1235
1236         return ret;
1237 }
1238
1239 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1240 {
1241         struct intel_pt *pt = ptq->pt;
1242         union perf_event *event = ptq->event_buf;
1243         struct perf_sample sample = { .ip = 0, };
1244         struct dummy_branch_stack {
1245                 u64                     nr;
1246                 struct branch_entry     entries;
1247         } dummy_bs;
1248
1249         if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1250                 return 0;
1251
1252         if (intel_pt_skip_event(pt))
1253                 return 0;
1254
1255         intel_pt_prep_b_sample(pt, ptq, event, &sample);
1256
1257         sample.id = ptq->pt->branches_id;
1258         sample.stream_id = ptq->pt->branches_id;
1259
1260         /*
1261          * perf report cannot handle events without a branch stack when using
1262          * SORT_MODE__BRANCH so make a dummy one.
1263          */
1264         if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1265                 dummy_bs = (struct dummy_branch_stack){
1266                         .nr = 1,
1267                         .entries = {
1268                                 .from = sample.ip,
1269                                 .to = sample.addr,
1270                         },
1271                 };
1272                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1273         }
1274
1275         sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
1276         if (sample.cyc_cnt) {
1277                 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
1278                 ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
1279                 ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
1280         }
1281
1282         return intel_pt_deliver_synth_b_event(pt, event, &sample,
1283                                               pt->branches_sample_type);
1284 }
1285
1286 static void intel_pt_prep_sample(struct intel_pt *pt,
1287                                  struct intel_pt_queue *ptq,
1288                                  union perf_event *event,
1289                                  struct perf_sample *sample)
1290 {
1291         intel_pt_prep_b_sample(pt, ptq, event, sample);
1292
1293         if (pt->synth_opts.callchain) {
1294                 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1295                                      pt->synth_opts.callchain_sz + 1,
1296                                      sample->ip, pt->kernel_start);
1297                 sample->callchain = ptq->chain;
1298         }
1299
1300         if (pt->synth_opts.last_branch) {
1301                 intel_pt_copy_last_branch_rb(ptq);
1302                 sample->branch_stack = ptq->last_branch;
1303         }
1304 }
1305
1306 static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
1307                                                struct intel_pt_queue *ptq,
1308                                                union perf_event *event,
1309                                                struct perf_sample *sample,
1310                                                u64 type)
1311 {
1312         int ret;
1313
1314         ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
1315
1316         if (pt->synth_opts.last_branch)
1317                 intel_pt_reset_last_branch_rb(ptq);
1318
1319         return ret;
1320 }
1321
1322 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1323 {
1324         struct intel_pt *pt = ptq->pt;
1325         union perf_event *event = ptq->event_buf;
1326         struct perf_sample sample = { .ip = 0, };
1327
1328         if (intel_pt_skip_event(pt))
1329                 return 0;
1330
1331         intel_pt_prep_sample(pt, ptq, event, &sample);
1332
1333         sample.id = ptq->pt->instructions_id;
1334         sample.stream_id = ptq->pt->instructions_id;
1335         sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1336
1337         sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
1338         if (sample.cyc_cnt) {
1339                 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
1340                 ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
1341                 ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
1342         }
1343
1344         ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1345
1346         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1347                                             pt->instructions_sample_type);
1348 }
1349
1350 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1351 {
1352         struct intel_pt *pt = ptq->pt;
1353         union perf_event *event = ptq->event_buf;
1354         struct perf_sample sample = { .ip = 0, };
1355
1356         if (intel_pt_skip_event(pt))
1357                 return 0;
1358
1359         intel_pt_prep_sample(pt, ptq, event, &sample);
1360
1361         sample.id = ptq->pt->transactions_id;
1362         sample.stream_id = ptq->pt->transactions_id;
1363
1364         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1365                                             pt->transactions_sample_type);
1366 }
1367
1368 static void intel_pt_prep_p_sample(struct intel_pt *pt,
1369                                    struct intel_pt_queue *ptq,
1370                                    union perf_event *event,
1371                                    struct perf_sample *sample)
1372 {
1373         intel_pt_prep_sample(pt, ptq, event, sample);
1374
1375         /*
1376          * Zero IP is used to mean "trace start" but that is not the case for
1377          * power or PTWRITE events with no IP, so clear the flags.
1378          */
1379         if (!sample->ip)
1380                 sample->flags = 0;
1381 }
1382
1383 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
1384 {
1385         struct intel_pt *pt = ptq->pt;
1386         union perf_event *event = ptq->event_buf;
1387         struct perf_sample sample = { .ip = 0, };
1388         struct perf_synth_intel_ptwrite raw;
1389
1390         if (intel_pt_skip_event(pt))
1391                 return 0;
1392
1393         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1394
1395         sample.id = ptq->pt->ptwrites_id;
1396         sample.stream_id = ptq->pt->ptwrites_id;
1397
1398         raw.flags = 0;
1399         raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1400         raw.payload = cpu_to_le64(ptq->state->ptw_payload);
1401
1402         sample.raw_size = perf_synth__raw_size(raw);
1403         sample.raw_data = perf_synth__raw_data(&raw);
1404
1405         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1406                                             pt->ptwrites_sample_type);
1407 }
1408
1409 static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
1410 {
1411         struct intel_pt *pt = ptq->pt;
1412         union perf_event *event = ptq->event_buf;
1413         struct perf_sample sample = { .ip = 0, };
1414         struct perf_synth_intel_cbr raw;
1415         u32 flags;
1416
1417         if (intel_pt_skip_event(pt))
1418                 return 0;
1419
1420         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1421
1422         sample.id = ptq->pt->cbr_id;
1423         sample.stream_id = ptq->pt->cbr_id;
1424
1425         flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
1426         raw.flags = cpu_to_le32(flags);
1427         raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
1428         raw.reserved3 = 0;
1429
1430         sample.raw_size = perf_synth__raw_size(raw);
1431         sample.raw_data = perf_synth__raw_data(&raw);
1432
1433         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1434                                             pt->pwr_events_sample_type);
1435 }
1436
1437 static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
1438 {
1439         struct intel_pt *pt = ptq->pt;
1440         union perf_event *event = ptq->event_buf;
1441         struct perf_sample sample = { .ip = 0, };
1442         struct perf_synth_intel_mwait raw;
1443
1444         if (intel_pt_skip_event(pt))
1445                 return 0;
1446
1447         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1448
1449         sample.id = ptq->pt->mwait_id;
1450         sample.stream_id = ptq->pt->mwait_id;
1451
1452         raw.reserved = 0;
1453         raw.payload = cpu_to_le64(ptq->state->mwait_payload);
1454
1455         sample.raw_size = perf_synth__raw_size(raw);
1456         sample.raw_data = perf_synth__raw_data(&raw);
1457
1458         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1459                                             pt->pwr_events_sample_type);
1460 }
1461
1462 static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
1463 {
1464         struct intel_pt *pt = ptq->pt;
1465         union perf_event *event = ptq->event_buf;
1466         struct perf_sample sample = { .ip = 0, };
1467         struct perf_synth_intel_pwre raw;
1468
1469         if (intel_pt_skip_event(pt))
1470                 return 0;
1471
1472         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1473
1474         sample.id = ptq->pt->pwre_id;
1475         sample.stream_id = ptq->pt->pwre_id;
1476
1477         raw.reserved = 0;
1478         raw.payload = cpu_to_le64(ptq->state->pwre_payload);
1479
1480         sample.raw_size = perf_synth__raw_size(raw);
1481         sample.raw_data = perf_synth__raw_data(&raw);
1482
1483         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1484                                             pt->pwr_events_sample_type);
1485 }
1486
1487 static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
1488 {
1489         struct intel_pt *pt = ptq->pt;
1490         union perf_event *event = ptq->event_buf;
1491         struct perf_sample sample = { .ip = 0, };
1492         struct perf_synth_intel_exstop raw;
1493
1494         if (intel_pt_skip_event(pt))
1495                 return 0;
1496
1497         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1498
1499         sample.id = ptq->pt->exstop_id;
1500         sample.stream_id = ptq->pt->exstop_id;
1501
1502         raw.flags = 0;
1503         raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1504
1505         sample.raw_size = perf_synth__raw_size(raw);
1506         sample.raw_data = perf_synth__raw_data(&raw);
1507
1508         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1509                                             pt->pwr_events_sample_type);
1510 }
1511
1512 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
1513 {
1514         struct intel_pt *pt = ptq->pt;
1515         union perf_event *event = ptq->event_buf;
1516         struct perf_sample sample = { .ip = 0, };
1517         struct perf_synth_intel_pwrx raw;
1518
1519         if (intel_pt_skip_event(pt))
1520                 return 0;
1521
1522         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1523
1524         sample.id = ptq->pt->pwrx_id;
1525         sample.stream_id = ptq->pt->pwrx_id;
1526
1527         raw.reserved = 0;
1528         raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
1529
1530         sample.raw_size = perf_synth__raw_size(raw);
1531         sample.raw_data = perf_synth__raw_data(&raw);
1532
1533         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1534                                             pt->pwr_events_sample_type);
1535 }
1536
1537 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1538                                 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
1539 {
1540         union perf_event event;
1541         char msg[MAX_AUXTRACE_ERROR_MSG];
1542         int err;
1543
1544         intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1545
1546         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1547                              code, cpu, pid, tid, ip, msg, timestamp);
1548
1549         err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1550         if (err)
1551                 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1552                        err);
1553
1554         return err;
1555 }
1556
1557 static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
1558                                  const struct intel_pt_state *state)
1559 {
1560         struct intel_pt *pt = ptq->pt;
1561         u64 tm = ptq->timestamp;
1562
1563         tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
1564
1565         return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
1566                                     ptq->tid, state->from_ip, tm);
1567 }
1568
1569 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1570 {
1571         struct auxtrace_queue *queue;
1572         pid_t tid = ptq->next_tid;
1573         int err;
1574
1575         if (tid == -1)
1576                 return 0;
1577
1578         intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1579
1580         err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1581
1582         queue = &pt->queues.queue_array[ptq->queue_nr];
1583         intel_pt_set_pid_tid_cpu(pt, queue);
1584
1585         ptq->next_tid = -1;
1586
1587         return err;
1588 }
1589
1590 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1591 {
1592         struct intel_pt *pt = ptq->pt;
1593
1594         return ip == pt->switch_ip &&
1595                (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1596                !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1597                                PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1598 }
1599
1600 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1601                           INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
1602                           INTEL_PT_CBR_CHG)
1603
1604 static int intel_pt_sample(struct intel_pt_queue *ptq)
1605 {
1606         const struct intel_pt_state *state = ptq->state;
1607         struct intel_pt *pt = ptq->pt;
1608         int err;
1609
1610         if (!ptq->have_sample)
1611                 return 0;
1612
1613         ptq->have_sample = false;
1614
1615         if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
1616                 /*
1617                  * Cycle count and instruction count only go together to create
1618                  * a valid IPC ratio when the cycle count changes.
1619                  */
1620                 ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
1621                 ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
1622         }
1623
1624         if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
1625                 if (state->type & INTEL_PT_CBR_CHG) {
1626                         err = intel_pt_synth_cbr_sample(ptq);
1627                         if (err)
1628                                 return err;
1629                 }
1630                 if (state->type & INTEL_PT_MWAIT_OP) {
1631                         err = intel_pt_synth_mwait_sample(ptq);
1632                         if (err)
1633                                 return err;
1634                 }
1635                 if (state->type & INTEL_PT_PWR_ENTRY) {
1636                         err = intel_pt_synth_pwre_sample(ptq);
1637                         if (err)
1638                                 return err;
1639                 }
1640                 if (state->type & INTEL_PT_EX_STOP) {
1641                         err = intel_pt_synth_exstop_sample(ptq);
1642                         if (err)
1643                                 return err;
1644                 }
1645                 if (state->type & INTEL_PT_PWR_EXIT) {
1646                         err = intel_pt_synth_pwrx_sample(ptq);
1647                         if (err)
1648                                 return err;
1649                 }
1650         }
1651
1652         if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
1653                 err = intel_pt_synth_instruction_sample(ptq);
1654                 if (err)
1655                         return err;
1656         }
1657
1658         if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
1659                 err = intel_pt_synth_transaction_sample(ptq);
1660                 if (err)
1661                         return err;
1662         }
1663
1664         if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
1665                 err = intel_pt_synth_ptwrite_sample(ptq);
1666                 if (err)
1667                         return err;
1668         }
1669
1670         if (!(state->type & INTEL_PT_BRANCH))
1671                 return 0;
1672
1673         if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1674                 thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
1675                                     state->to_ip, ptq->insn_len,
1676                                     state->trace_nr);
1677         else
1678                 thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
1679
1680         if (pt->sample_branches) {
1681                 err = intel_pt_synth_branch_sample(ptq);
1682                 if (err)
1683                         return err;
1684         }
1685
1686         if (pt->synth_opts.last_branch)
1687                 intel_pt_update_last_branch_rb(ptq);
1688
1689         if (!ptq->sync_switch)
1690                 return 0;
1691
1692         if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1693                 switch (ptq->switch_state) {
1694                 case INTEL_PT_SS_NOT_TRACING:
1695                 case INTEL_PT_SS_UNKNOWN:
1696                 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1697                         err = intel_pt_next_tid(pt, ptq);
1698                         if (err)
1699                                 return err;
1700                         ptq->switch_state = INTEL_PT_SS_TRACING;
1701                         break;
1702                 default:
1703                         ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1704                         return 1;
1705                 }
1706         } else if (!state->to_ip) {
1707                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1708         } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1709                 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1710         } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1711                    state->to_ip == pt->ptss_ip &&
1712                    (ptq->flags & PERF_IP_FLAG_CALL)) {
1713                 ptq->switch_state = INTEL_PT_SS_TRACING;
1714         }
1715
1716         return 0;
1717 }
1718
1719 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1720 {
1721         struct machine *machine = pt->machine;
1722         struct map *map;
1723         struct symbol *sym, *start;
1724         u64 ip, switch_ip = 0;
1725         const char *ptss;
1726
1727         if (ptss_ip)
1728                 *ptss_ip = 0;
1729
1730         map = machine__kernel_map(machine);
1731         if (!map)
1732                 return 0;
1733
1734         if (map__load(map))
1735                 return 0;
1736
1737         start = dso__first_symbol(map->dso);
1738
1739         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1740                 if (sym->binding == STB_GLOBAL &&
1741                     !strcmp(sym->name, "__switch_to")) {
1742                         ip = map->unmap_ip(map, sym->start);
1743                         if (ip >= map->start && ip < map->end) {
1744                                 switch_ip = ip;
1745                                 break;
1746                         }
1747                 }
1748         }
1749
1750         if (!switch_ip || !ptss_ip)
1751                 return 0;
1752
1753         if (pt->have_sched_switch == 1)
1754                 ptss = "perf_trace_sched_switch";
1755         else
1756                 ptss = "__perf_event_task_sched_out";
1757
1758         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1759                 if (!strcmp(sym->name, ptss)) {
1760                         ip = map->unmap_ip(map, sym->start);
1761                         if (ip >= map->start && ip < map->end) {
1762                                 *ptss_ip = ip;
1763                                 break;
1764                         }
1765                 }
1766         }
1767
1768         return switch_ip;
1769 }
1770
1771 static void intel_pt_enable_sync_switch(struct intel_pt *pt)
1772 {
1773         unsigned int i;
1774
1775         pt->sync_switch = true;
1776
1777         for (i = 0; i < pt->queues.nr_queues; i++) {
1778                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1779                 struct intel_pt_queue *ptq = queue->priv;
1780
1781                 if (ptq)
1782                         ptq->sync_switch = true;
1783         }
1784 }
1785
1786 /*
1787  * To filter against time ranges, it is only necessary to look at the next start
1788  * or end time.
1789  */
1790 static bool intel_pt_next_time(struct intel_pt_queue *ptq)
1791 {
1792         struct intel_pt *pt = ptq->pt;
1793
1794         if (ptq->sel_start) {
1795                 /* Next time is an end time */
1796                 ptq->sel_start = false;
1797                 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
1798                 return true;
1799         } else if (ptq->sel_idx + 1 < pt->range_cnt) {
1800                 /* Next time is a start time */
1801                 ptq->sel_start = true;
1802                 ptq->sel_idx += 1;
1803                 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
1804                 return true;
1805         }
1806
1807         /* No next time */
1808         return false;
1809 }
1810
1811 static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
1812 {
1813         int err;
1814
1815         while (1) {
1816                 if (ptq->sel_start) {
1817                         if (ptq->timestamp >= ptq->sel_timestamp) {
1818                                 /* After start time, so consider next time */
1819                                 intel_pt_next_time(ptq);
1820                                 if (!ptq->sel_timestamp) {
1821                                         /* No end time */
1822                                         return 0;
1823                                 }
1824                                 /* Check against end time */
1825                                 continue;
1826                         }
1827                         /* Before start time, so fast forward */
1828                         ptq->have_sample = false;
1829                         if (ptq->sel_timestamp > *ff_timestamp) {
1830                                 if (ptq->sync_switch) {
1831                                         intel_pt_next_tid(ptq->pt, ptq);
1832                                         ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1833                                 }
1834                                 *ff_timestamp = ptq->sel_timestamp;
1835                                 err = intel_pt_fast_forward(ptq->decoder,
1836                                                             ptq->sel_timestamp);
1837                                 if (err)
1838                                         return err;
1839                         }
1840                         return 0;
1841                 } else if (ptq->timestamp > ptq->sel_timestamp) {
1842                         /* After end time, so consider next time */
1843                         if (!intel_pt_next_time(ptq)) {
1844                                 /* No next time range, so stop decoding */
1845                                 ptq->have_sample = false;
1846                                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1847                                 return 1;
1848                         }
1849                         /* Check against next start time */
1850                         continue;
1851                 } else {
1852                         /* Before end time */
1853                         return 0;
1854                 }
1855         }
1856 }
1857
1858 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1859 {
1860         const struct intel_pt_state *state = ptq->state;
1861         struct intel_pt *pt = ptq->pt;
1862         u64 ff_timestamp = 0;
1863         int err;
1864
1865         if (!pt->kernel_start) {
1866                 pt->kernel_start = machine__kernel_start(pt->machine);
1867                 if (pt->per_cpu_mmaps &&
1868                     (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1869                     !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1870                     !pt->sampling_mode) {
1871                         pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1872                         if (pt->switch_ip) {
1873                                 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1874                                              pt->switch_ip, pt->ptss_ip);
1875                                 intel_pt_enable_sync_switch(pt);
1876                         }
1877                 }
1878         }
1879
1880         intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1881                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1882         while (1) {
1883                 err = intel_pt_sample(ptq);
1884                 if (err)
1885                         return err;
1886
1887                 state = intel_pt_decode(ptq->decoder);
1888                 if (state->err) {
1889                         if (state->err == INTEL_PT_ERR_NODATA)
1890                                 return 1;
1891                         if (ptq->sync_switch &&
1892                             state->from_ip >= pt->kernel_start) {
1893                                 ptq->sync_switch = false;
1894                                 intel_pt_next_tid(pt, ptq);
1895                         }
1896                         if (pt->synth_opts.errors) {
1897                                 err = intel_ptq_synth_error(ptq, state);
1898                                 if (err)
1899                                         return err;
1900                         }
1901                         continue;
1902                 }
1903
1904                 ptq->state = state;
1905                 ptq->have_sample = true;
1906                 intel_pt_sample_flags(ptq);
1907
1908                 /* Use estimated TSC upon return to user space */
1909                 if (pt->est_tsc &&
1910                     (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1911                     state->to_ip && state->to_ip < pt->kernel_start) {
1912                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1913                                      state->timestamp, state->est_timestamp);
1914                         ptq->timestamp = state->est_timestamp;
1915                 /* Use estimated TSC in unknown switch state */
1916                 } else if (ptq->sync_switch &&
1917                            ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1918                            intel_pt_is_switch_ip(ptq, state->to_ip) &&
1919                            ptq->next_tid == -1) {
1920                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1921                                      state->timestamp, state->est_timestamp);
1922                         ptq->timestamp = state->est_timestamp;
1923                 } else if (state->timestamp > ptq->timestamp) {
1924                         ptq->timestamp = state->timestamp;
1925                 }
1926
1927                 if (ptq->sel_timestamp) {
1928                         err = intel_pt_time_filter(ptq, &ff_timestamp);
1929                         if (err)
1930                                 return err;
1931                 }
1932
1933                 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1934                         *timestamp = ptq->timestamp;
1935                         return 0;
1936                 }
1937         }
1938         return 0;
1939 }
1940
1941 static inline int intel_pt_update_queues(struct intel_pt *pt)
1942 {
1943         if (pt->queues.new_data) {
1944                 pt->queues.new_data = false;
1945                 return intel_pt_setup_queues(pt);
1946         }
1947         return 0;
1948 }
1949
1950 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1951 {
1952         unsigned int queue_nr;
1953         u64 ts;
1954         int ret;
1955
1956         while (1) {
1957                 struct auxtrace_queue *queue;
1958                 struct intel_pt_queue *ptq;
1959
1960                 if (!pt->heap.heap_cnt)
1961                         return 0;
1962
1963                 if (pt->heap.heap_array[0].ordinal >= timestamp)
1964                         return 0;
1965
1966                 queue_nr = pt->heap.heap_array[0].queue_nr;
1967                 queue = &pt->queues.queue_array[queue_nr];
1968                 ptq = queue->priv;
1969
1970                 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1971                              queue_nr, pt->heap.heap_array[0].ordinal,
1972                              timestamp);
1973
1974                 auxtrace_heap__pop(&pt->heap);
1975
1976                 if (pt->heap.heap_cnt) {
1977                         ts = pt->heap.heap_array[0].ordinal + 1;
1978                         if (ts > timestamp)
1979                                 ts = timestamp;
1980                 } else {
1981                         ts = timestamp;
1982                 }
1983
1984                 intel_pt_set_pid_tid_cpu(pt, queue);
1985
1986                 ret = intel_pt_run_decoder(ptq, &ts);
1987
1988                 if (ret < 0) {
1989                         auxtrace_heap__add(&pt->heap, queue_nr, ts);
1990                         return ret;
1991                 }
1992
1993                 if (!ret) {
1994                         ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1995                         if (ret < 0)
1996                                 return ret;
1997                 } else {
1998                         ptq->on_heap = false;
1999                 }
2000         }
2001
2002         return 0;
2003 }
2004
2005 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
2006                                             u64 time_)
2007 {
2008         struct auxtrace_queues *queues = &pt->queues;
2009         unsigned int i;
2010         u64 ts = 0;
2011
2012         for (i = 0; i < queues->nr_queues; i++) {
2013                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
2014                 struct intel_pt_queue *ptq = queue->priv;
2015
2016                 if (ptq && (tid == -1 || ptq->tid == tid)) {
2017                         ptq->time = time_;
2018                         intel_pt_set_pid_tid_cpu(pt, queue);
2019                         intel_pt_run_decoder(ptq, &ts);
2020                 }
2021         }
2022         return 0;
2023 }
2024
2025 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
2026 {
2027         return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
2028                                     sample->pid, sample->tid, 0, sample->time);
2029 }
2030
2031 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
2032 {
2033         unsigned i, j;
2034
2035         if (cpu < 0 || !pt->queues.nr_queues)
2036                 return NULL;
2037
2038         if ((unsigned)cpu >= pt->queues.nr_queues)
2039                 i = pt->queues.nr_queues - 1;
2040         else
2041                 i = cpu;
2042
2043         if (pt->queues.queue_array[i].cpu == cpu)
2044                 return pt->queues.queue_array[i].priv;
2045
2046         for (j = 0; i > 0; j++) {
2047                 if (pt->queues.queue_array[--i].cpu == cpu)
2048                         return pt->queues.queue_array[i].priv;
2049         }
2050
2051         for (; j < pt->queues.nr_queues; j++) {
2052                 if (pt->queues.queue_array[j].cpu == cpu)
2053                         return pt->queues.queue_array[j].priv;
2054         }
2055
2056         return NULL;
2057 }
2058
2059 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
2060                                 u64 timestamp)
2061 {
2062         struct intel_pt_queue *ptq;
2063         int err;
2064
2065         if (!pt->sync_switch)
2066                 return 1;
2067
2068         ptq = intel_pt_cpu_to_ptq(pt, cpu);
2069         if (!ptq || !ptq->sync_switch)
2070                 return 1;
2071
2072         switch (ptq->switch_state) {
2073         case INTEL_PT_SS_NOT_TRACING:
2074                 break;
2075         case INTEL_PT_SS_UNKNOWN:
2076         case INTEL_PT_SS_TRACING:
2077                 ptq->next_tid = tid;
2078                 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
2079                 return 0;
2080         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2081                 if (!ptq->on_heap) {
2082                         ptq->timestamp = perf_time_to_tsc(timestamp,
2083                                                           &pt->tc);
2084                         err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
2085                                                  ptq->timestamp);
2086                         if (err)
2087                                 return err;
2088                         ptq->on_heap = true;
2089                 }
2090                 ptq->switch_state = INTEL_PT_SS_TRACING;
2091                 break;
2092         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2093                 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
2094                 break;
2095         default:
2096                 break;
2097         }
2098
2099         ptq->next_tid = -1;
2100
2101         return 1;
2102 }
2103
2104 static int intel_pt_process_switch(struct intel_pt *pt,
2105                                    struct perf_sample *sample)
2106 {
2107         struct perf_evsel *evsel;
2108         pid_t tid;
2109         int cpu, ret;
2110
2111         evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
2112         if (evsel != pt->switch_evsel)
2113                 return 0;
2114
2115         tid = perf_evsel__intval(evsel, sample, "next_pid");
2116         cpu = sample->cpu;
2117
2118         intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2119                      cpu, tid, sample->time, perf_time_to_tsc(sample->time,
2120                      &pt->tc));
2121
2122         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
2123         if (ret <= 0)
2124                 return ret;
2125
2126         return machine__set_current_tid(pt->machine, cpu, -1, tid);
2127 }
2128
2129 static int intel_pt_context_switch_in(struct intel_pt *pt,
2130                                       struct perf_sample *sample)
2131 {
2132         pid_t pid = sample->pid;
2133         pid_t tid = sample->tid;
2134         int cpu = sample->cpu;
2135
2136         if (pt->sync_switch) {
2137                 struct intel_pt_queue *ptq;
2138
2139                 ptq = intel_pt_cpu_to_ptq(pt, cpu);
2140                 if (ptq && ptq->sync_switch) {
2141                         ptq->next_tid = -1;
2142                         switch (ptq->switch_state) {
2143                         case INTEL_PT_SS_NOT_TRACING:
2144                         case INTEL_PT_SS_UNKNOWN:
2145                         case INTEL_PT_SS_TRACING:
2146                                 break;
2147                         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2148                         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2149                                 ptq->switch_state = INTEL_PT_SS_TRACING;
2150                                 break;
2151                         default:
2152                                 break;
2153                         }
2154                 }
2155         }
2156
2157         /*
2158          * If the current tid has not been updated yet, ensure it is now that
2159          * a "switch in" event has occurred.
2160          */
2161         if (machine__get_current_tid(pt->machine, cpu) == tid)
2162                 return 0;
2163
2164         return machine__set_current_tid(pt->machine, cpu, pid, tid);
2165 }
2166
2167 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
2168                                    struct perf_sample *sample)
2169 {
2170         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2171         pid_t pid, tid;
2172         int cpu, ret;
2173
2174         cpu = sample->cpu;
2175
2176         if (pt->have_sched_switch == 3) {
2177                 if (!out)
2178                         return intel_pt_context_switch_in(pt, sample);
2179                 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
2180                         pr_err("Expecting CPU-wide context switch event\n");
2181                         return -EINVAL;
2182                 }
2183                 pid = event->context_switch.next_prev_pid;
2184                 tid = event->context_switch.next_prev_tid;
2185         } else {
2186                 if (out)
2187                         return 0;
2188                 pid = sample->pid;
2189                 tid = sample->tid;
2190         }
2191
2192         if (tid == -1) {
2193                 pr_err("context_switch event has no tid\n");
2194                 return -EINVAL;
2195         }
2196
2197         intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2198                      cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
2199                      &pt->tc));
2200
2201         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
2202         if (ret <= 0)
2203                 return ret;
2204
2205         return machine__set_current_tid(pt->machine, cpu, pid, tid);
2206 }
2207
2208 static int intel_pt_process_itrace_start(struct intel_pt *pt,
2209                                          union perf_event *event,
2210                                          struct perf_sample *sample)
2211 {
2212         if (!pt->per_cpu_mmaps)
2213                 return 0;
2214
2215         intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2216                      sample->cpu, event->itrace_start.pid,
2217                      event->itrace_start.tid, sample->time,
2218                      perf_time_to_tsc(sample->time, &pt->tc));
2219
2220         return machine__set_current_tid(pt->machine, sample->cpu,
2221                                         event->itrace_start.pid,
2222                                         event->itrace_start.tid);
2223 }
2224
2225 static int intel_pt_process_event(struct perf_session *session,
2226                                   union perf_event *event,
2227                                   struct perf_sample *sample,
2228                                   struct perf_tool *tool)
2229 {
2230         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2231                                            auxtrace);
2232         u64 timestamp;
2233         int err = 0;
2234
2235         if (dump_trace)
2236                 return 0;
2237
2238         if (!tool->ordered_events) {
2239                 pr_err("Intel Processor Trace requires ordered events\n");
2240                 return -EINVAL;
2241         }
2242
2243         if (sample->time && sample->time != (u64)-1)
2244                 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
2245         else
2246                 timestamp = 0;
2247
2248         if (timestamp || pt->timeless_decoding) {
2249                 err = intel_pt_update_queues(pt);
2250                 if (err)
2251                         return err;
2252         }
2253
2254         if (pt->timeless_decoding) {
2255                 if (event->header.type == PERF_RECORD_EXIT) {
2256                         err = intel_pt_process_timeless_queues(pt,
2257                                                                event->fork.tid,
2258                                                                sample->time);
2259                 }
2260         } else if (timestamp) {
2261                 err = intel_pt_process_queues(pt, timestamp);
2262         }
2263         if (err)
2264                 return err;
2265
2266         if (event->header.type == PERF_RECORD_AUX &&
2267             (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
2268             pt->synth_opts.errors) {
2269                 err = intel_pt_lost(pt, sample);
2270                 if (err)
2271                         return err;
2272         }
2273
2274         if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
2275                 err = intel_pt_process_switch(pt, sample);
2276         else if (event->header.type == PERF_RECORD_ITRACE_START)
2277                 err = intel_pt_process_itrace_start(pt, event, sample);
2278         else if (event->header.type == PERF_RECORD_SWITCH ||
2279                  event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
2280                 err = intel_pt_context_switch(pt, event, sample);
2281
2282         intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
2283                      event->header.type, sample->cpu, sample->time, timestamp);
2284         intel_pt_log_event(event);
2285
2286         return err;
2287 }
2288
2289 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
2290 {
2291         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2292                                            auxtrace);
2293         int ret;
2294
2295         if (dump_trace)
2296                 return 0;
2297
2298         if (!tool->ordered_events)
2299                 return -EINVAL;
2300
2301         ret = intel_pt_update_queues(pt);
2302         if (ret < 0)
2303                 return ret;
2304
2305         if (pt->timeless_decoding)
2306                 return intel_pt_process_timeless_queues(pt, -1,
2307                                                         MAX_TIMESTAMP - 1);
2308
2309         return intel_pt_process_queues(pt, MAX_TIMESTAMP);
2310 }
2311
2312 static void intel_pt_free_events(struct perf_session *session)
2313 {
2314         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2315                                            auxtrace);
2316         struct auxtrace_queues *queues = &pt->queues;
2317         unsigned int i;
2318
2319         for (i = 0; i < queues->nr_queues; i++) {
2320                 intel_pt_free_queue(queues->queue_array[i].priv);
2321                 queues->queue_array[i].priv = NULL;
2322         }
2323         intel_pt_log_disable();
2324         auxtrace_queues__free(queues);
2325 }
2326
2327 static void intel_pt_free(struct perf_session *session)
2328 {
2329         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2330                                            auxtrace);
2331
2332         auxtrace_heap__free(&pt->heap);
2333         intel_pt_free_events(session);
2334         session->auxtrace = NULL;
2335         thread__put(pt->unknown_thread);
2336         addr_filters__exit(&pt->filts);
2337         zfree(&pt->filter);
2338         zfree(&pt->time_ranges);
2339         free(pt);
2340 }
2341
2342 static int intel_pt_process_auxtrace_event(struct perf_session *session,
2343                                            union perf_event *event,
2344                                            struct perf_tool *tool __maybe_unused)
2345 {
2346         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2347                                            auxtrace);
2348
2349         if (!pt->data_queued) {
2350                 struct auxtrace_buffer *buffer;
2351                 off_t data_offset;
2352                 int fd = perf_data__fd(session->data);
2353                 int err;
2354
2355                 if (perf_data__is_pipe(session->data)) {
2356                         data_offset = 0;
2357                 } else {
2358                         data_offset = lseek(fd, 0, SEEK_CUR);
2359                         if (data_offset == -1)
2360                                 return -errno;
2361                 }
2362
2363                 err = auxtrace_queues__add_event(&pt->queues, session, event,
2364                                                  data_offset, &buffer);
2365                 if (err)
2366                         return err;
2367
2368                 /* Dump here now we have copied a piped trace out of the pipe */
2369                 if (dump_trace) {
2370                         if (auxtrace_buffer__get_data(buffer, fd)) {
2371                                 intel_pt_dump_event(pt, buffer->data,
2372                                                     buffer->size);
2373                                 auxtrace_buffer__put_data(buffer);
2374                         }
2375                 }
2376         }
2377
2378         return 0;
2379 }
2380
2381 struct intel_pt_synth {
2382         struct perf_tool dummy_tool;
2383         struct perf_session *session;
2384 };
2385
2386 static int intel_pt_event_synth(struct perf_tool *tool,
2387                                 union perf_event *event,
2388                                 struct perf_sample *sample __maybe_unused,
2389                                 struct machine *machine __maybe_unused)
2390 {
2391         struct intel_pt_synth *intel_pt_synth =
2392                         container_of(tool, struct intel_pt_synth, dummy_tool);
2393
2394         return perf_session__deliver_synth_event(intel_pt_synth->session, event,
2395                                                  NULL);
2396 }
2397
2398 static int intel_pt_synth_event(struct perf_session *session, const char *name,
2399                                 struct perf_event_attr *attr, u64 id)
2400 {
2401         struct intel_pt_synth intel_pt_synth;
2402         int err;
2403
2404         pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2405                  name, id, (u64)attr->sample_type);
2406
2407         memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
2408         intel_pt_synth.session = session;
2409
2410         err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
2411                                           &id, intel_pt_event_synth);
2412         if (err)
2413                 pr_err("%s: failed to synthesize '%s' event type\n",
2414                        __func__, name);
2415
2416         return err;
2417 }
2418
2419 static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
2420                                     const char *name)
2421 {
2422         struct perf_evsel *evsel;
2423
2424         evlist__for_each_entry(evlist, evsel) {
2425                 if (evsel->id && evsel->id[0] == id) {
2426                         if (evsel->name)
2427                                 zfree(&evsel->name);
2428                         evsel->name = strdup(name);
2429                         break;
2430                 }
2431         }
2432 }
2433
2434 static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
2435                                          struct perf_evlist *evlist)
2436 {
2437         struct perf_evsel *evsel;
2438
2439         evlist__for_each_entry(evlist, evsel) {
2440                 if (evsel->attr.type == pt->pmu_type && evsel->ids)
2441                         return evsel;
2442         }
2443
2444         return NULL;
2445 }
2446
2447 static int intel_pt_synth_events(struct intel_pt *pt,
2448                                  struct perf_session *session)
2449 {
2450         struct perf_evlist *evlist = session->evlist;
2451         struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
2452         struct perf_event_attr attr;
2453         u64 id;
2454         int err;
2455
2456         if (!evsel) {
2457                 pr_debug("There are no selected events with Intel Processor Trace data\n");
2458                 return 0;
2459         }
2460
2461         memset(&attr, 0, sizeof(struct perf_event_attr));
2462         attr.size = sizeof(struct perf_event_attr);
2463         attr.type = PERF_TYPE_HARDWARE;
2464         attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
2465         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
2466                             PERF_SAMPLE_PERIOD;
2467         if (pt->timeless_decoding)
2468                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
2469         else
2470                 attr.sample_type |= PERF_SAMPLE_TIME;
2471         if (!pt->per_cpu_mmaps)
2472                 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
2473         attr.exclude_user = evsel->attr.exclude_user;
2474         attr.exclude_kernel = evsel->attr.exclude_kernel;
2475         attr.exclude_hv = evsel->attr.exclude_hv;
2476         attr.exclude_host = evsel->attr.exclude_host;
2477         attr.exclude_guest = evsel->attr.exclude_guest;
2478         attr.sample_id_all = evsel->attr.sample_id_all;
2479         attr.read_format = evsel->attr.read_format;
2480
2481         id = evsel->id[0] + 1000000000;
2482         if (!id)
2483                 id = 1;
2484
2485         if (pt->synth_opts.branches) {
2486                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2487                 attr.sample_period = 1;
2488                 attr.sample_type |= PERF_SAMPLE_ADDR;
2489                 err = intel_pt_synth_event(session, "branches", &attr, id);
2490                 if (err)
2491                         return err;
2492                 pt->sample_branches = true;
2493                 pt->branches_sample_type = attr.sample_type;
2494                 pt->branches_id = id;
2495                 id += 1;
2496                 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
2497         }
2498
2499         if (pt->synth_opts.callchain)
2500                 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2501         if (pt->synth_opts.last_branch)
2502                 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2503
2504         if (pt->synth_opts.instructions) {
2505                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2506                 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
2507                         attr.sample_period =
2508                                 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
2509                 else
2510                         attr.sample_period = pt->synth_opts.period;
2511                 err = intel_pt_synth_event(session, "instructions", &attr, id);
2512                 if (err)
2513                         return err;
2514                 pt->sample_instructions = true;
2515                 pt->instructions_sample_type = attr.sample_type;
2516                 pt->instructions_id = id;
2517                 id += 1;
2518         }
2519
2520         attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
2521         attr.sample_period = 1;
2522
2523         if (pt->synth_opts.transactions) {
2524                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2525                 err = intel_pt_synth_event(session, "transactions", &attr, id);
2526                 if (err)
2527                         return err;
2528                 pt->sample_transactions = true;
2529                 pt->transactions_sample_type = attr.sample_type;
2530                 pt->transactions_id = id;
2531                 intel_pt_set_event_name(evlist, id, "transactions");
2532                 id += 1;
2533         }
2534
2535         attr.type = PERF_TYPE_SYNTH;
2536         attr.sample_type |= PERF_SAMPLE_RAW;
2537
2538         if (pt->synth_opts.ptwrites) {
2539                 attr.config = PERF_SYNTH_INTEL_PTWRITE;
2540                 err = intel_pt_synth_event(session, "ptwrite", &attr, id);
2541                 if (err)
2542                         return err;
2543                 pt->sample_ptwrites = true;
2544                 pt->ptwrites_sample_type = attr.sample_type;
2545                 pt->ptwrites_id = id;
2546                 intel_pt_set_event_name(evlist, id, "ptwrite");
2547                 id += 1;
2548         }
2549
2550         if (pt->synth_opts.pwr_events) {
2551                 pt->sample_pwr_events = true;
2552                 pt->pwr_events_sample_type = attr.sample_type;
2553
2554                 attr.config = PERF_SYNTH_INTEL_CBR;
2555                 err = intel_pt_synth_event(session, "cbr", &attr, id);
2556                 if (err)
2557                         return err;
2558                 pt->cbr_id = id;
2559                 intel_pt_set_event_name(evlist, id, "cbr");
2560                 id += 1;
2561         }
2562
2563         if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
2564                 attr.config = PERF_SYNTH_INTEL_MWAIT;
2565                 err = intel_pt_synth_event(session, "mwait", &attr, id);
2566                 if (err)
2567                         return err;
2568                 pt->mwait_id = id;
2569                 intel_pt_set_event_name(evlist, id, "mwait");
2570                 id += 1;
2571
2572                 attr.config = PERF_SYNTH_INTEL_PWRE;
2573                 err = intel_pt_synth_event(session, "pwre", &attr, id);
2574                 if (err)
2575                         return err;
2576                 pt->pwre_id = id;
2577                 intel_pt_set_event_name(evlist, id, "pwre");
2578                 id += 1;
2579
2580                 attr.config = PERF_SYNTH_INTEL_EXSTOP;
2581                 err = intel_pt_synth_event(session, "exstop", &attr, id);
2582                 if (err)
2583                         return err;
2584                 pt->exstop_id = id;
2585                 intel_pt_set_event_name(evlist, id, "exstop");
2586                 id += 1;
2587
2588                 attr.config = PERF_SYNTH_INTEL_PWRX;
2589                 err = intel_pt_synth_event(session, "pwrx", &attr, id);
2590                 if (err)
2591                         return err;
2592                 pt->pwrx_id = id;
2593                 intel_pt_set_event_name(evlist, id, "pwrx");
2594                 id += 1;
2595         }
2596
2597         return 0;
2598 }
2599
2600 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2601 {
2602         struct perf_evsel *evsel;
2603
2604         evlist__for_each_entry_reverse(evlist, evsel) {
2605                 const char *name = perf_evsel__name(evsel);
2606
2607                 if (!strcmp(name, "sched:sched_switch"))
2608                         return evsel;
2609         }
2610
2611         return NULL;
2612 }
2613
2614 static bool intel_pt_find_switch(struct perf_evlist *evlist)
2615 {
2616         struct perf_evsel *evsel;
2617
2618         evlist__for_each_entry(evlist, evsel) {
2619                 if (evsel->attr.context_switch)
2620                         return true;
2621         }
2622
2623         return false;
2624 }
2625
2626 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2627 {
2628         struct intel_pt *pt = data;
2629
2630         if (!strcmp(var, "intel-pt.mispred-all"))
2631                 pt->mispred_all = perf_config_bool(var, value);
2632
2633         return 0;
2634 }
2635
2636 /* Find least TSC which converts to ns or later */
2637 static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
2638 {
2639         u64 tsc, tm;
2640
2641         tsc = perf_time_to_tsc(ns, &pt->tc);
2642
2643         while (1) {
2644                 tm = tsc_to_perf_time(tsc, &pt->tc);
2645                 if (tm < ns)
2646                         break;
2647                 tsc -= 1;
2648         }
2649
2650         while (tm < ns)
2651                 tm = tsc_to_perf_time(++tsc, &pt->tc);
2652
2653         return tsc;
2654 }
2655
2656 /* Find greatest TSC which converts to ns or earlier */
2657 static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
2658 {
2659         u64 tsc, tm;
2660
2661         tsc = perf_time_to_tsc(ns, &pt->tc);
2662
2663         while (1) {
2664                 tm = tsc_to_perf_time(tsc, &pt->tc);
2665                 if (tm > ns)
2666                         break;
2667                 tsc += 1;
2668         }
2669
2670         while (tm > ns)
2671                 tm = tsc_to_perf_time(--tsc, &pt->tc);
2672
2673         return tsc;
2674 }
2675
2676 static int intel_pt_setup_time_ranges(struct intel_pt *pt,
2677                                       struct itrace_synth_opts *opts)
2678 {
2679         struct perf_time_interval *p = opts->ptime_range;
2680         int n = opts->range_num;
2681         int i;
2682
2683         if (!n || !p || pt->timeless_decoding)
2684                 return 0;
2685
2686         pt->time_ranges = calloc(n, sizeof(struct range));
2687         if (!pt->time_ranges)
2688                 return -ENOMEM;
2689
2690         pt->range_cnt = n;
2691
2692         intel_pt_log("%s: %u range(s)\n", __func__, n);
2693
2694         for (i = 0; i < n; i++) {
2695                 struct range *r = &pt->time_ranges[i];
2696                 u64 ts = p[i].start;
2697                 u64 te = p[i].end;
2698
2699                 /*
2700                  * Take care to ensure the TSC range matches the perf-time range
2701                  * when converted back to perf-time.
2702                  */
2703                 r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
2704                 r->end   = te ? intel_pt_tsc_end(te, pt) : 0;
2705
2706                 intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
2707                              i, ts, te);
2708                 intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
2709                              i, r->start, r->end);
2710         }
2711
2712         return 0;
2713 }
2714
2715 static const char * const intel_pt_info_fmts[] = {
2716         [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
2717         [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
2718         [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
2719         [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
2720         [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
2721         [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
2722         [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
2723         [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
2724         [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
2725         [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
2726         [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
2727         [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
2728         [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
2729         [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
2730         [INTEL_PT_MAX_NONTURBO_RATIO]   = "  Max non-turbo ratio %"PRIu64"\n",
2731         [INTEL_PT_FILTER_STR_LEN]       = "  Filter string len.  %"PRIu64"\n",
2732 };
2733
2734 static void intel_pt_print_info(u64 *arr, int start, int finish)
2735 {
2736         int i;
2737
2738         if (!dump_trace)
2739                 return;
2740
2741         for (i = start; i <= finish; i++)
2742                 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2743 }
2744
2745 static void intel_pt_print_info_str(const char *name, const char *str)
2746 {
2747         if (!dump_trace)
2748                 return;
2749
2750         fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
2751 }
2752
2753 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2754 {
2755         return auxtrace_info->header.size >=
2756                 sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2757 }
2758
2759 int intel_pt_process_auxtrace_info(union perf_event *event,
2760                                    struct perf_session *session)
2761 {
2762         struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2763         size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2764         struct intel_pt *pt;
2765         void *info_end;
2766         u64 *info;
2767         int err;
2768
2769         if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2770                                         min_sz)
2771                 return -EINVAL;
2772
2773         pt = zalloc(sizeof(struct intel_pt));
2774         if (!pt)
2775                 return -ENOMEM;
2776
2777         addr_filters__init(&pt->filts);
2778
2779         err = perf_config(intel_pt_perf_config, pt);
2780         if (err)
2781                 goto err_free;
2782
2783         err = auxtrace_queues__init(&pt->queues);
2784         if (err)
2785                 goto err_free;
2786
2787         intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2788
2789         pt->session = session;
2790         pt->machine = &session->machines.host; /* No kvm support */
2791         pt->auxtrace_type = auxtrace_info->type;
2792         pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2793         pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2794         pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2795         pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2796         pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2797         pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2798         pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2799         pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2800         pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2801         pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2802         intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2803                             INTEL_PT_PER_CPU_MMAPS);
2804
2805         if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2806                 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2807                 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2808                 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2809                 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2810                 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2811                 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2812                                     INTEL_PT_CYC_BIT);
2813         }
2814
2815         if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2816                 pt->max_non_turbo_ratio =
2817                         auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2818                 intel_pt_print_info(&auxtrace_info->priv[0],
2819                                     INTEL_PT_MAX_NONTURBO_RATIO,
2820                                     INTEL_PT_MAX_NONTURBO_RATIO);
2821         }
2822
2823         info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2824         info_end = (void *)info + auxtrace_info->header.size;
2825
2826         if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2827                 size_t len;
2828
2829                 len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2830                 intel_pt_print_info(&auxtrace_info->priv[0],
2831                                     INTEL_PT_FILTER_STR_LEN,
2832                                     INTEL_PT_FILTER_STR_LEN);
2833                 if (len) {
2834                         const char *filter = (const char *)info;
2835
2836                         len = roundup(len + 1, 8);
2837                         info += len >> 3;
2838                         if ((void *)info > info_end) {
2839                                 pr_err("%s: bad filter string length\n", __func__);
2840                                 err = -EINVAL;
2841                                 goto err_free_queues;
2842                         }
2843                         pt->filter = memdup(filter, len);
2844                         if (!pt->filter) {
2845                                 err = -ENOMEM;
2846                                 goto err_free_queues;
2847                         }
2848                         if (session->header.needs_swap)
2849                                 mem_bswap_64(pt->filter, len);
2850                         if (pt->filter[len - 1]) {
2851                                 pr_err("%s: filter string not null terminated\n", __func__);
2852                                 err = -EINVAL;
2853                                 goto err_free_queues;
2854                         }
2855                         err = addr_filters__parse_bare_filter(&pt->filts,
2856                                                               filter);
2857                         if (err)
2858                                 goto err_free_queues;
2859                 }
2860                 intel_pt_print_info_str("Filter string", pt->filter);
2861         }
2862
2863         pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2864         if (pt->timeless_decoding && !pt->tc.time_mult)
2865                 pt->tc.time_mult = 1;
2866         pt->have_tsc = intel_pt_have_tsc(pt);
2867         pt->sampling_mode = false;
2868         pt->est_tsc = !pt->timeless_decoding;
2869
2870         pt->unknown_thread = thread__new(999999999, 999999999);
2871         if (!pt->unknown_thread) {
2872                 err = -ENOMEM;
2873                 goto err_free_queues;
2874         }
2875
2876         /*
2877          * Since this thread will not be kept in any rbtree not in a
2878          * list, initialize its list node so that at thread__put() the
2879          * current thread lifetime assuption is kept and we don't segfault
2880          * at list_del_init().
2881          */
2882         INIT_LIST_HEAD(&pt->unknown_thread->node);
2883
2884         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2885         if (err)
2886                 goto err_delete_thread;
2887         if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2888                 err = -ENOMEM;
2889                 goto err_delete_thread;
2890         }
2891
2892         pt->auxtrace.process_event = intel_pt_process_event;
2893         pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2894         pt->auxtrace.flush_events = intel_pt_flush;
2895         pt->auxtrace.free_events = intel_pt_free_events;
2896         pt->auxtrace.free = intel_pt_free;
2897         session->auxtrace = &pt->auxtrace;
2898
2899         if (dump_trace)
2900                 return 0;
2901
2902         if (pt->have_sched_switch == 1) {
2903                 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2904                 if (!pt->switch_evsel) {
2905                         pr_err("%s: missing sched_switch event\n", __func__);
2906                         err = -EINVAL;
2907                         goto err_delete_thread;
2908                 }
2909         } else if (pt->have_sched_switch == 2 &&
2910                    !intel_pt_find_switch(session->evlist)) {
2911                 pr_err("%s: missing context_switch attribute flag\n", __func__);
2912                 err = -EINVAL;
2913                 goto err_delete_thread;
2914         }
2915
2916         if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2917                 pt->synth_opts = *session->itrace_synth_opts;
2918         } else {
2919                 itrace_synth_opts__set_default(&pt->synth_opts,
2920                                 session->itrace_synth_opts->default_no_sample);
2921                 if (!session->itrace_synth_opts->default_no_sample &&
2922                     !session->itrace_synth_opts->inject) {
2923                         pt->synth_opts.branches = false;
2924                         pt->synth_opts.callchain = true;
2925                 }
2926                 if (session->itrace_synth_opts)
2927                         pt->synth_opts.thread_stack =
2928                                 session->itrace_synth_opts->thread_stack;
2929         }
2930
2931         if (pt->synth_opts.log)
2932                 intel_pt_log_enable();
2933
2934         /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2935         if (pt->tc.time_mult) {
2936                 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2937
2938                 if (!pt->max_non_turbo_ratio)
2939                         pt->max_non_turbo_ratio =
2940                                         (tsc_freq + 50000000) / 100000000;
2941                 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2942                 intel_pt_log("Maximum non-turbo ratio %u\n",
2943                              pt->max_non_turbo_ratio);
2944                 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
2945         }
2946
2947         if (session->itrace_synth_opts) {
2948                 err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
2949                 if (err)
2950                         goto err_delete_thread;
2951         }
2952
2953         if (pt->synth_opts.calls)
2954                 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2955                                        PERF_IP_FLAG_TRACE_END;
2956         if (pt->synth_opts.returns)
2957                 pt->branches_filter |= PERF_IP_FLAG_RETURN |
2958                                        PERF_IP_FLAG_TRACE_BEGIN;
2959
2960         if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2961                 symbol_conf.use_callchain = true;
2962                 if (callchain_register_param(&callchain_param) < 0) {
2963                         symbol_conf.use_callchain = false;
2964                         pt->synth_opts.callchain = false;
2965                 }
2966         }
2967
2968         err = intel_pt_synth_events(pt, session);
2969         if (err)
2970                 goto err_delete_thread;
2971
2972         err = auxtrace_queues__process_index(&pt->queues, session);
2973         if (err)
2974                 goto err_delete_thread;
2975
2976         if (pt->queues.populated)
2977                 pt->data_queued = true;
2978
2979         if (pt->timeless_decoding)
2980                 pr_debug2("Intel PT decoding without timestamps\n");
2981
2982         return 0;
2983
2984 err_delete_thread:
2985         thread__zput(pt->unknown_thread);
2986 err_free_queues:
2987         intel_pt_log_disable();
2988         auxtrace_queues__free(&pt->queues);
2989         session->auxtrace = NULL;
2990 err_free:
2991         addr_filters__exit(&pt->filts);
2992         zfree(&pt->filter);
2993         zfree(&pt->time_ranges);
2994         free(pt);
2995         return err;
2996 }