Merge branch 'msm-fix' of git://codeaurora.org/quic/kernel/davidb/linux-msm into...
[sfrench/cifs-2.6.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         int                     output;
48         unsigned int            page_size;
49         int                     realtime_prio;
50         enum write_mode_t       write_mode;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         bool                    force;
54         bool                    file_new;
55         bool                    append_file;
56         long                    samples;
57         off_t                   post_processing_offset;
58 };
59
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62         rec->bytes_written += size;
63 }
64
65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67         while (size) {
68                 int ret = write(rec->output, buf, size);
69
70                 if (ret < 0)
71                         die("failed to write");
72
73                 size -= ret;
74                 buf += ret;
75
76                 rec->bytes_written += ret;
77         }
78 }
79
80 static int process_synthesized_event(struct perf_tool *tool,
81                                      union perf_event *event,
82                                      struct perf_sample *sample __used,
83                                      struct machine *machine __used)
84 {
85         struct perf_record *rec = container_of(tool, struct perf_record, tool);
86         write_output(rec, event, event->header.size);
87         return 0;
88 }
89
90 static void perf_record__mmap_read(struct perf_record *rec,
91                                    struct perf_mmap *md)
92 {
93         unsigned int head = perf_mmap__read_head(md);
94         unsigned int old = md->prev;
95         unsigned char *data = md->base + rec->page_size;
96         unsigned long size;
97         void *buf;
98
99         if (old == head)
100                 return;
101
102         rec->samples++;
103
104         size = head - old;
105
106         if ((old & md->mask) + size != (head & md->mask)) {
107                 buf = &data[old & md->mask];
108                 size = md->mask + 1 - (old & md->mask);
109                 old += size;
110
111                 write_output(rec, buf, size);
112         }
113
114         buf = &data[old & md->mask];
115         size = head - old;
116         old += size;
117
118         write_output(rec, buf, size);
119
120         md->prev = old;
121         perf_mmap__write_tail(md, old);
122 }
123
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127
128 static void sig_handler(int sig)
129 {
130         if (sig == SIGCHLD)
131                 child_finished = 1;
132
133         done = 1;
134         signr = sig;
135 }
136
137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139         struct perf_record *rec = arg;
140         int status;
141
142         if (rec->evlist->workload.pid > 0) {
143                 if (!child_finished)
144                         kill(rec->evlist->workload.pid, SIGTERM);
145
146                 wait(&status);
147                 if (WIFSIGNALED(status))
148                         psignal(WTERMSIG(status), rec->progname);
149         }
150
151         if (signr == -1 || signr == SIGUSR1)
152                 return;
153
154         signal(signr, SIG_DFL);
155         kill(getpid(), signr);
156 }
157
158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159                                struct perf_evlist *other)
160 {
161         struct perf_evsel *pos, *pair;
162
163         if (evlist->nr_entries != other->nr_entries)
164                 return false;
165
166         pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168         list_for_each_entry(pos, &evlist->entries, node) {
169                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170                         return false;
171                 pair = list_entry(pair->node.next, struct perf_evsel, node);
172         }
173
174         return true;
175 }
176
177 static void perf_record__open(struct perf_record *rec)
178 {
179         struct perf_evsel *pos, *first;
180         struct perf_evlist *evlist = rec->evlist;
181         struct perf_session *session = rec->session;
182         struct perf_record_opts *opts = &rec->opts;
183
184         first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
186         perf_evlist__config_attrs(evlist, opts);
187
188         list_for_each_entry(pos, &evlist->entries, node) {
189                 struct perf_event_attr *attr = &pos->attr;
190                 struct xyarray *group_fd = NULL;
191                 /*
192                  * Check if parse_single_tracepoint_event has already asked for
193                  * PERF_SAMPLE_TIME.
194                  *
195                  * XXX this is kludgy but short term fix for problems introduced by
196                  * eac23d1c that broke 'perf script' by having different sample_types
197                  * when using multiple tracepoint events when we use a perf binary
198                  * that tries to use sample_id_all on an older kernel.
199                  *
200                  * We need to move counter creation to perf_session, support
201                  * different sample_types, etc.
202                  */
203                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204
205                 if (opts->group && pos != first)
206                         group_fd = first->fd;
207 retry_sample_id:
208                 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
209 try_again:
210                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
211                                      opts->group, group_fd) < 0) {
212                         int err = errno;
213
214                         if (err == EPERM || err == EACCES) {
215                                 ui__error_paranoid();
216                                 exit(EXIT_FAILURE);
217                         } else if (err ==  ENODEV && opts->cpu_list) {
218                                 die("No such device - did you specify"
219                                         " an out-of-range profile CPU?\n");
220                         } else if (err == EINVAL && opts->sample_id_all_avail) {
221                                 /*
222                                  * Old kernel, no attr->sample_id_type_all field
223                                  */
224                                 opts->sample_id_all_avail = false;
225                                 if (!opts->sample_time && !opts->raw_samples && !time_needed)
226                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
227
228                                 goto retry_sample_id;
229                         }
230
231                         /*
232                          * If it's cycles then fall back to hrtimer
233                          * based cpu-clock-tick sw counter, which
234                          * is always available even if no PMU support:
235                          */
236                         if (attr->type == PERF_TYPE_HARDWARE
237                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
238
239                                 if (verbose)
240                                         ui__warning("The cycles event is not supported, "
241                                                     "trying to fall back to cpu-clock-ticks\n");
242                                 attr->type = PERF_TYPE_SOFTWARE;
243                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
244                                 goto try_again;
245                         }
246
247                         if (err == ENOENT) {
248                                 ui__warning("The %s event is not supported.\n",
249                                             event_name(pos));
250                                 exit(EXIT_FAILURE);
251                         }
252
253                         printf("\n");
254                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
255                               err, strerror(err));
256
257 #if defined(__i386__) || defined(__x86_64__)
258                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
259                                 die("No hardware sampling interrupt available."
260                                     " No APIC? If so then you can boot the kernel"
261                                     " with the \"lapic\" boot parameter to"
262                                     " force-enable it.\n");
263 #endif
264
265                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
266                 }
267         }
268
269         if (perf_evlist__set_filters(evlist)) {
270                 error("failed to set filter with %d (%s)\n", errno,
271                         strerror(errno));
272                 exit(-1);
273         }
274
275         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
276                 if (errno == EPERM)
277                         die("Permission error mapping pages.\n"
278                             "Consider increasing "
279                             "/proc/sys/kernel/perf_event_mlock_kb,\n"
280                             "or try again with a smaller value of -m/--mmap_pages.\n"
281                             "(current value: %d)\n", opts->mmap_pages);
282                 else if (!is_power_of_2(opts->mmap_pages))
283                         die("--mmap_pages/-m value must be a power of two.");
284
285                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
286         }
287
288         if (rec->file_new)
289                 session->evlist = evlist;
290         else {
291                 if (!perf_evlist__equal(session->evlist, evlist)) {
292                         fprintf(stderr, "incompatible append\n");
293                         exit(-1);
294                 }
295         }
296
297         perf_session__update_sample_type(session);
298 }
299
300 static int process_buildids(struct perf_record *rec)
301 {
302         u64 size = lseek(rec->output, 0, SEEK_CUR);
303
304         if (size == 0)
305                 return 0;
306
307         rec->session->fd = rec->output;
308         return __perf_session__process_events(rec->session, rec->post_processing_offset,
309                                               size - rec->post_processing_offset,
310                                               size, &build_id__mark_dso_hit_ops);
311 }
312
313 static void perf_record__exit(int status __used, void *arg)
314 {
315         struct perf_record *rec = arg;
316
317         if (!rec->opts.pipe_output) {
318                 rec->session->header.data_size += rec->bytes_written;
319
320                 if (!rec->no_buildid)
321                         process_buildids(rec);
322                 perf_session__write_header(rec->session, rec->evlist,
323                                            rec->output, true);
324                 perf_session__delete(rec->session);
325                 perf_evlist__delete(rec->evlist);
326                 symbol__exit();
327         }
328 }
329
330 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
331 {
332         int err;
333         struct perf_tool *tool = data;
334
335         if (machine__is_host(machine))
336                 return;
337
338         /*
339          *As for guest kernel when processing subcommand record&report,
340          *we arrange module mmap prior to guest kernel mmap and trigger
341          *a preload dso because default guest module symbols are loaded
342          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
343          *method is used to avoid symbol missing when the first addr is
344          *in module instead of in guest kernel.
345          */
346         err = perf_event__synthesize_modules(tool, process_synthesized_event,
347                                              machine);
348         if (err < 0)
349                 pr_err("Couldn't record guest kernel [%d]'s reference"
350                        " relocation symbol.\n", machine->pid);
351
352         /*
353          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
354          * have no _text sometimes.
355          */
356         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
357                                                  machine, "_text");
358         if (err < 0)
359                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
360                                                          machine, "_stext");
361         if (err < 0)
362                 pr_err("Couldn't record guest kernel [%d]'s reference"
363                        " relocation symbol.\n", machine->pid);
364 }
365
366 static struct perf_event_header finished_round_event = {
367         .size = sizeof(struct perf_event_header),
368         .type = PERF_RECORD_FINISHED_ROUND,
369 };
370
371 static void perf_record__mmap_read_all(struct perf_record *rec)
372 {
373         int i;
374
375         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
376                 if (rec->evlist->mmap[i].base)
377                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
378         }
379
380         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
381                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
382 }
383
384 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
385 {
386         struct stat st;
387         int flags;
388         int err, output;
389         unsigned long waking = 0;
390         const bool forks = argc > 0;
391         struct machine *machine;
392         struct perf_tool *tool = &rec->tool;
393         struct perf_record_opts *opts = &rec->opts;
394         struct perf_evlist *evsel_list = rec->evlist;
395         const char *output_name = rec->output_name;
396         struct perf_session *session;
397
398         rec->progname = argv[0];
399
400         rec->page_size = sysconf(_SC_PAGE_SIZE);
401
402         on_exit(perf_record__sig_exit, rec);
403         signal(SIGCHLD, sig_handler);
404         signal(SIGINT, sig_handler);
405         signal(SIGUSR1, sig_handler);
406
407         if (!output_name) {
408                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
409                         opts->pipe_output = true;
410                 else
411                         rec->output_name = output_name = "perf.data";
412         }
413         if (output_name) {
414                 if (!strcmp(output_name, "-"))
415                         opts->pipe_output = true;
416                 else if (!stat(output_name, &st) && st.st_size) {
417                         if (rec->write_mode == WRITE_FORCE) {
418                                 char oldname[PATH_MAX];
419                                 snprintf(oldname, sizeof(oldname), "%s.old",
420                                          output_name);
421                                 unlink(oldname);
422                                 rename(output_name, oldname);
423                         }
424                 } else if (rec->write_mode == WRITE_APPEND) {
425                         rec->write_mode = WRITE_FORCE;
426                 }
427         }
428
429         flags = O_CREAT|O_RDWR;
430         if (rec->write_mode == WRITE_APPEND)
431                 rec->file_new = 0;
432         else
433                 flags |= O_TRUNC;
434
435         if (opts->pipe_output)
436                 output = STDOUT_FILENO;
437         else
438                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
439         if (output < 0) {
440                 perror("failed to create output file");
441                 exit(-1);
442         }
443
444         rec->output = output;
445
446         session = perf_session__new(output_name, O_WRONLY,
447                                     rec->write_mode == WRITE_FORCE, false, NULL);
448         if (session == NULL) {
449                 pr_err("Not enough memory for reading perf file header\n");
450                 return -1;
451         }
452
453         rec->session = session;
454
455         if (!rec->no_buildid)
456                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
457
458         if (!rec->file_new) {
459                 err = perf_session__read_header(session, output);
460                 if (err < 0)
461                         goto out_delete_session;
462         }
463
464         if (have_tracepoints(&evsel_list->entries))
465                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
466
467         perf_header__set_feat(&session->header, HEADER_HOSTNAME);
468         perf_header__set_feat(&session->header, HEADER_OSRELEASE);
469         perf_header__set_feat(&session->header, HEADER_ARCH);
470         perf_header__set_feat(&session->header, HEADER_CPUDESC);
471         perf_header__set_feat(&session->header, HEADER_NRCPUS);
472         perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
473         perf_header__set_feat(&session->header, HEADER_CMDLINE);
474         perf_header__set_feat(&session->header, HEADER_VERSION);
475         perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
476         perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
477         perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
478         perf_header__set_feat(&session->header, HEADER_CPUID);
479
480         if (forks) {
481                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
482                 if (err < 0) {
483                         pr_err("Couldn't run the workload!\n");
484                         goto out_delete_session;
485                 }
486         }
487
488         perf_record__open(rec);
489
490         /*
491          * perf_session__delete(session) will be called at perf_record__exit()
492          */
493         on_exit(perf_record__exit, rec);
494
495         if (opts->pipe_output) {
496                 err = perf_header__write_pipe(output);
497                 if (err < 0)
498                         return err;
499         } else if (rec->file_new) {
500                 err = perf_session__write_header(session, evsel_list,
501                                                  output, false);
502                 if (err < 0)
503                         return err;
504         }
505
506         if (!!rec->no_buildid
507             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
508                 pr_err("Couldn't generating buildids. "
509                        "Use --no-buildid to profile anyway.\n");
510                 return -1;
511         }
512
513         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
514
515         machine = perf_session__find_host_machine(session);
516         if (!machine) {
517                 pr_err("Couldn't find native kernel information.\n");
518                 return -1;
519         }
520
521         if (opts->pipe_output) {
522                 err = perf_event__synthesize_attrs(tool, session,
523                                                    process_synthesized_event);
524                 if (err < 0) {
525                         pr_err("Couldn't synthesize attrs.\n");
526                         return err;
527                 }
528
529                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
530                                                          machine);
531                 if (err < 0) {
532                         pr_err("Couldn't synthesize event_types.\n");
533                         return err;
534                 }
535
536                 if (have_tracepoints(&evsel_list->entries)) {
537                         /*
538                          * FIXME err <= 0 here actually means that
539                          * there were no tracepoints so its not really
540                          * an error, just that we don't need to
541                          * synthesize anything.  We really have to
542                          * return this more properly and also
543                          * propagate errors that now are calling die()
544                          */
545                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
546                                                                   process_synthesized_event);
547                         if (err <= 0) {
548                                 pr_err("Couldn't record tracing data.\n");
549                                 return err;
550                         }
551                         advance_output(rec, err);
552                 }
553         }
554
555         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
556                                                  machine, "_text");
557         if (err < 0)
558                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
559                                                          machine, "_stext");
560         if (err < 0)
561                 pr_err("Couldn't record kernel reference relocation symbol\n"
562                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
563                        "Check /proc/kallsyms permission or run as root.\n");
564
565         err = perf_event__synthesize_modules(tool, process_synthesized_event,
566                                              machine);
567         if (err < 0)
568                 pr_err("Couldn't record kernel module information.\n"
569                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
570                        "Check /proc/modules permission or run as root.\n");
571
572         if (perf_guest)
573                 perf_session__process_machines(session, tool,
574                                                perf_event__synthesize_guest_os);
575
576         if (!opts->system_wide)
577                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
578                                                   process_synthesized_event,
579                                                   machine);
580         else
581                 perf_event__synthesize_threads(tool, process_synthesized_event,
582                                                machine);
583
584         if (rec->realtime_prio) {
585                 struct sched_param param;
586
587                 param.sched_priority = rec->realtime_prio;
588                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
589                         pr_err("Could not set realtime priority.\n");
590                         exit(-1);
591                 }
592         }
593
594         perf_evlist__enable(evsel_list);
595
596         /*
597          * Let the child rip
598          */
599         if (forks)
600                 perf_evlist__start_workload(evsel_list);
601
602         for (;;) {
603                 int hits = rec->samples;
604
605                 perf_record__mmap_read_all(rec);
606
607                 if (hits == rec->samples) {
608                         if (done)
609                                 break;
610                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
611                         waking++;
612                 }
613
614                 if (done)
615                         perf_evlist__disable(evsel_list);
616         }
617
618         if (quiet || signr == SIGUSR1)
619                 return 0;
620
621         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
622
623         /*
624          * Approximate RIP event size: 24 bytes.
625          */
626         fprintf(stderr,
627                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
628                 (double)rec->bytes_written / 1024.0 / 1024.0,
629                 output_name,
630                 rec->bytes_written / 24);
631
632         return 0;
633
634 out_delete_session:
635         perf_session__delete(session);
636         return err;
637 }
638
639 static const char * const record_usage[] = {
640         "perf record [<options>] [<command>]",
641         "perf record [<options>] -- <command> [<options>]",
642         NULL
643 };
644
645 /*
646  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
647  * because we need to have access to it in perf_record__exit, that is called
648  * after cmd_record() exits, but since record_options need to be accessible to
649  * builtin-script, leave it here.
650  *
651  * At least we don't ouch it in all the other functions here directly.
652  *
653  * Just say no to tons of global variables, sigh.
654  */
655 static struct perf_record record = {
656         .opts = {
657                 .target_pid          = -1,
658                 .target_tid          = -1,
659                 .mmap_pages          = UINT_MAX,
660                 .user_freq           = UINT_MAX,
661                 .user_interval       = ULLONG_MAX,
662                 .freq                = 1000,
663                 .sample_id_all_avail = true,
664         },
665         .write_mode = WRITE_FORCE,
666         .file_new   = true,
667 };
668
669 /*
670  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
671  * with it and switch to use the library functions in perf_evlist that came
672  * from builtin-record.c, i.e. use perf_record_opts,
673  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
674  * using pipes, etc.
675  */
676 const struct option record_options[] = {
677         OPT_CALLBACK('e', "event", &record.evlist, "event",
678                      "event selector. use 'perf list' to list available events",
679                      parse_events_option),
680         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
681                      "event filter", parse_filter),
682         OPT_INTEGER('p', "pid", &record.opts.target_pid,
683                     "record events on existing process id"),
684         OPT_INTEGER('t', "tid", &record.opts.target_tid,
685                     "record events on existing thread id"),
686         OPT_INTEGER('r', "realtime", &record.realtime_prio,
687                     "collect data with this RT SCHED_FIFO priority"),
688         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
689                     "collect data without buffering"),
690         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
691                     "collect raw sample records from all opened counters"),
692         OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
693                             "system-wide collection from all CPUs"),
694         OPT_BOOLEAN('A', "append", &record.append_file,
695                             "append to the output file to do incremental profiling"),
696         OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
697                     "list of cpus to monitor"),
698         OPT_BOOLEAN('f', "force", &record.force,
699                         "overwrite existing data file (deprecated)"),
700         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
701         OPT_STRING('o', "output", &record.output_name, "file",
702                     "output file name"),
703         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
704                     "child tasks do not inherit counters"),
705         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
706         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
707                      "number of mmap data pages"),
708         OPT_BOOLEAN(0, "group", &record.opts.group,
709                     "put the counters into a counter group"),
710         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
711                     "do call-graph (stack chain/backtrace) recording"),
712         OPT_INCR('v', "verbose", &verbose,
713                     "be more verbose (show counter open errors, etc)"),
714         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
715         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
716                     "per thread counts"),
717         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
718                     "Sample addresses"),
719         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
720         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
721         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
722                     "don't sample"),
723         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
724                     "do not update the buildid cache"),
725         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
726                     "do not collect buildids in perf.data"),
727         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
728                      "monitor event in cgroup name only",
729                      parse_cgroups),
730         OPT_END()
731 };
732
733 int cmd_record(int argc, const char **argv, const char *prefix __used)
734 {
735         int err = -ENOMEM;
736         struct perf_evsel *pos;
737         struct perf_evlist *evsel_list;
738         struct perf_record *rec = &record;
739
740         perf_header__set_cmdline(argc, argv);
741
742         evsel_list = perf_evlist__new(NULL, NULL);
743         if (evsel_list == NULL)
744                 return -ENOMEM;
745
746         rec->evlist = evsel_list;
747
748         argc = parse_options(argc, argv, record_options, record_usage,
749                             PARSE_OPT_STOP_AT_NON_OPTION);
750         if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
751                 !rec->opts.system_wide && !rec->opts.cpu_list)
752                 usage_with_options(record_usage, record_options);
753
754         if (rec->force && rec->append_file) {
755                 fprintf(stderr, "Can't overwrite and append at the same time."
756                                 " You need to choose between -f and -A");
757                 usage_with_options(record_usage, record_options);
758         } else if (rec->append_file) {
759                 rec->write_mode = WRITE_APPEND;
760         } else {
761                 rec->write_mode = WRITE_FORCE;
762         }
763
764         if (nr_cgroups && !rec->opts.system_wide) {
765                 fprintf(stderr, "cgroup monitoring only available in"
766                         " system-wide mode\n");
767                 usage_with_options(record_usage, record_options);
768         }
769
770         symbol__init();
771
772         if (symbol_conf.kptr_restrict)
773                 pr_warning(
774 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
775 "check /proc/sys/kernel/kptr_restrict.\n\n"
776 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
777 "file is not found in the buildid cache or in the vmlinux path.\n\n"
778 "Samples in kernel modules won't be resolved at all.\n\n"
779 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
780 "even with a suitable vmlinux or kallsyms file.\n\n");
781
782         if (rec->no_buildid_cache || rec->no_buildid)
783                 disable_buildid_cache();
784
785         if (evsel_list->nr_entries == 0 &&
786             perf_evlist__add_default(evsel_list) < 0) {
787                 pr_err("Not enough memory for event selector list\n");
788                 goto out_symbol_exit;
789         }
790
791         if (rec->opts.target_pid != -1)
792                 rec->opts.target_tid = rec->opts.target_pid;
793
794         if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
795                                      rec->opts.target_tid, rec->opts.cpu_list) < 0)
796                 usage_with_options(record_usage, record_options);
797
798         list_for_each_entry(pos, &evsel_list->entries, node) {
799                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
800                         goto out_free_fd;
801         }
802
803         if (rec->opts.user_interval != ULLONG_MAX)
804                 rec->opts.default_interval = rec->opts.user_interval;
805         if (rec->opts.user_freq != UINT_MAX)
806                 rec->opts.freq = rec->opts.user_freq;
807
808         /*
809          * User specified count overrides default frequency.
810          */
811         if (rec->opts.default_interval)
812                 rec->opts.freq = 0;
813         else if (rec->opts.freq) {
814                 rec->opts.default_interval = rec->opts.freq;
815         } else {
816                 fprintf(stderr, "frequency and count are zero, aborting\n");
817                 err = -EINVAL;
818                 goto out_free_fd;
819         }
820
821         err = __cmd_record(&record, argc, argv);
822 out_free_fd:
823         perf_evlist__delete_maps(evsel_list);
824 out_symbol_exit:
825         symbol__exit();
826         return err;
827 }