Linux 6.10-rc5
[sfrench/cifs-2.6.git] / tools / perf / util / record.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "debug.h"
3 #include "evlist.h"
4 #include "evsel.h"
5 #include "evsel_config.h"
6 #include "parse-events.h"
7 #include <errno.h>
8 #include <limits.h>
9 #include <stdlib.h>
10 #include <api/fs/fs.h>
11 #include <subcmd/parse-options.h>
12 #include <perf/cpumap.h>
13 #include "cloexec.h"
14 #include "util/perf_api_probe.h"
15 #include "record.h"
16 #include "../perf-sys.h"
17 #include "topdown.h"
18 #include "map_symbol.h"
19 #include "mem-events.h"
20
21 /*
22  * evsel__config_leader_sampling() uses special rules for leader sampling.
23  * However, if the leader is an AUX area event, then assume the event to sample
24  * is the next event.
25  */
26 static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
27 {
28         struct evsel *leader = evsel__leader(evsel);
29
30         if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
31             is_mem_loads_aux_event(leader)) {
32                 evlist__for_each_entry(evlist, evsel) {
33                         if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel))
34                                 return evsel;
35                 }
36         }
37
38         return leader;
39 }
40
41 static u64 evsel__config_term_mask(struct evsel *evsel)
42 {
43         struct evsel_config_term *term;
44         struct list_head *config_terms = &evsel->config_terms;
45         u64 term_types = 0;
46
47         list_for_each_entry(term, config_terms, list) {
48                 term_types |= 1 << term->type;
49         }
50         return term_types;
51 }
52
53 static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
54 {
55         struct perf_event_attr *attr = &evsel->core.attr;
56         struct evsel *leader = evsel__leader(evsel);
57         struct evsel *read_sampler;
58         u64 term_types, freq_mask;
59
60         if (!leader->sample_read)
61                 return;
62
63         read_sampler = evsel__read_sampler(evsel, evlist);
64
65         if (evsel == read_sampler)
66                 return;
67
68         term_types = evsel__config_term_mask(evsel);
69         /*
70          * Disable sampling for all group members except those with explicit
71          * config terms or the leader. In the case of an AUX area event, the 2nd
72          * event in the group is the one that 'leads' the sampling.
73          */
74         freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
75         if ((term_types & freq_mask) == 0) {
76                 attr->freq           = 0;
77                 attr->sample_freq    = 0;
78                 attr->sample_period  = 0;
79         }
80         if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
81                 attr->write_backward = 0;
82
83         /*
84          * We don't get a sample for slave events, we make them when delivering
85          * the group leader sample. Set the slave event to follow the master
86          * sample_type to ease up reporting.
87          * An AUX area event also has sample_type requirements, so also include
88          * the sample type bits from the leader's sample_type to cover that
89          * case.
90          */
91         attr->sample_type = read_sampler->core.attr.sample_type |
92                             leader->core.attr.sample_type;
93 }
94
95 void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
96 {
97         struct evsel *evsel;
98         bool use_sample_identifier = false;
99         bool use_comm_exec;
100         bool sample_id = opts->sample_id;
101
102         if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
103                 opts->no_inherit = true;
104
105         use_comm_exec = perf_can_comm_exec();
106
107         evlist__for_each_entry(evlist, evsel) {
108                 evsel__config(evsel, opts, callchain);
109                 if (evsel->tracking && use_comm_exec)
110                         evsel->core.attr.comm_exec = 1;
111         }
112
113         /* Configure leader sampling here now that the sample type is known */
114         evlist__for_each_entry(evlist, evsel)
115                 evsel__config_leader_sampling(evsel, evlist);
116
117         if (opts->full_auxtrace || opts->sample_identifier) {
118                 /*
119                  * Need to be able to synthesize and parse selected events with
120                  * arbitrary sample types, which requires always being able to
121                  * match the id.
122                  */
123                 use_sample_identifier = perf_can_sample_identifier();
124                 sample_id = true;
125         } else if (evlist->core.nr_entries > 1) {
126                 struct evsel *first = evlist__first(evlist);
127
128                 evlist__for_each_entry(evlist, evsel) {
129                         if (evsel->core.attr.sample_type == first->core.attr.sample_type)
130                                 continue;
131                         use_sample_identifier = perf_can_sample_identifier();
132                         break;
133                 }
134                 sample_id = true;
135         }
136
137         if (sample_id) {
138                 evlist__for_each_entry(evlist, evsel)
139                         evsel__set_sample_id(evsel, use_sample_identifier);
140         }
141
142         evlist__set_id_pos(evlist);
143 }
144
145 static int get_max_rate(unsigned int *rate)
146 {
147         return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
148 }
149
150 static int record_opts__config_freq(struct record_opts *opts)
151 {
152         bool user_freq = opts->user_freq != UINT_MAX;
153         bool user_interval = opts->user_interval != ULLONG_MAX;
154         unsigned int max_rate;
155
156         if (user_interval && user_freq) {
157                 pr_err("cannot set frequency and period at the same time\n");
158                 return -1;
159         }
160
161         if (user_interval)
162                 opts->default_interval = opts->user_interval;
163         if (user_freq)
164                 opts->freq = opts->user_freq;
165
166         /*
167          * User specified count overrides default frequency.
168          */
169         if (opts->default_interval)
170                 opts->freq = 0;
171         else if (opts->freq) {
172                 opts->default_interval = opts->freq;
173         } else {
174                 pr_err("frequency and count are zero, aborting\n");
175                 return -1;
176         }
177
178         if (get_max_rate(&max_rate))
179                 return 0;
180
181         /*
182          * User specified frequency is over current maximum.
183          */
184         if (user_freq && (max_rate < opts->freq)) {
185                 if (opts->strict_freq) {
186                         pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
187                                "       Please use -F freq option with a lower value or consider\n"
188                                "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
189                                max_rate);
190                         return -1;
191                 } else {
192                         pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
193                                    "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
194                                    "         The kernel will lower it when perf's interrupts take too long.\n"
195                                    "         Use --strict-freq to disable this throttling, refusing to record.\n",
196                                    max_rate, opts->freq, max_rate);
197
198                         opts->freq = max_rate;
199                 }
200         }
201
202         /*
203          * Default frequency is over current maximum.
204          */
205         if (max_rate < opts->freq) {
206                 pr_warning("Lowering default frequency rate from %u to %u.\n"
207                            "Please consider tweaking "
208                            "/proc/sys/kernel/perf_event_max_sample_rate.\n",
209                            opts->freq, max_rate);
210                 opts->freq = max_rate;
211         }
212
213         return 0;
214 }
215
216 int record_opts__config(struct record_opts *opts)
217 {
218         return record_opts__config_freq(opts);
219 }
220
221 bool evlist__can_select_event(struct evlist *evlist, const char *str)
222 {
223         struct evlist *temp_evlist;
224         struct evsel *evsel;
225         int err, fd;
226         struct perf_cpu cpu = { .cpu = 0 };
227         bool ret = false;
228         pid_t pid = -1;
229
230         temp_evlist = evlist__new();
231         if (!temp_evlist)
232                 return false;
233
234         err = parse_event(temp_evlist, str);
235         if (err)
236                 goto out_delete;
237
238         evsel = evlist__last(temp_evlist);
239
240         if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
241                 struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
242
243                 if (cpus)
244                         cpu =  perf_cpu_map__cpu(cpus, 0);
245
246                 perf_cpu_map__put(cpus);
247         } else {
248                 cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0);
249         }
250
251         while (1) {
252                 fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
253                                          perf_event_open_cloexec_flag());
254                 if (fd < 0) {
255                         if (pid == -1 && errno == EACCES) {
256                                 pid = 0;
257                                 continue;
258                         }
259                         goto out_delete;
260                 }
261                 break;
262         }
263         close(fd);
264         ret = true;
265
266 out_delete:
267         evlist__delete(temp_evlist);
268         return ret;
269 }
270
271 int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
272 {
273         unsigned int freq;
274         struct record_opts *opts = opt->value;
275
276         if (!str)
277                 return -EINVAL;
278
279         if (strcasecmp(str, "max") == 0) {
280                 if (get_max_rate(&freq)) {
281                         pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
282                         return -1;
283                 }
284                 pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
285         } else {
286                 freq = atoi(str);
287         }
288
289         opts->user_freq = freq;
290         return 0;
291 }