1 #define _FILE_OFFSET_BITS 64
3 #include <linux/kernel.h>
14 static int perf_session__open(struct perf_session *self, bool force)
16 struct stat input_stat;
18 if (!strcmp(self->filename, "-")) {
20 self->fd = STDIN_FILENO;
22 if (perf_header__read(self, self->fd) < 0)
23 pr_err("incompatible file format");
28 self->fd = open(self->filename, O_RDONLY);
32 pr_err("failed to open %s: %s", self->filename, strerror(err));
33 if (err == ENOENT && !strcmp(self->filename, "perf.data"))
34 pr_err(" (try 'perf record' first)");
39 if (fstat(self->fd, &input_stat) < 0)
42 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
43 pr_err("file %s not owned by current user or root\n",
48 if (!input_stat.st_size) {
49 pr_info("zero-sized file (%s), nothing to do!\n",
54 if (perf_header__read(self, self->fd) < 0) {
55 pr_err("incompatible file format");
59 self->size = input_stat.st_size;
68 void perf_session__update_sample_type(struct perf_session *self)
70 self->sample_type = perf_header__sample_type(&self->header);
73 int perf_session__create_kernel_maps(struct perf_session *self)
75 int ret = machine__create_kernel_maps(&self->host_machine);
78 ret = machines__create_guest_kernel_maps(&self->machines);
82 static void perf_session__destroy_kernel_maps(struct perf_session *self)
84 machine__destroy_kernel_maps(&self->host_machine);
85 machines__destroy_guest_kernel_maps(&self->machines);
88 struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
90 size_t len = filename ? strlen(filename) + 1 : 0;
91 struct perf_session *self = zalloc(sizeof(*self) + len);
96 if (perf_header__init(&self->header) < 0)
99 memcpy(self->filename, filename, len);
100 self->threads = RB_ROOT;
101 INIT_LIST_HEAD(&self->dead_threads);
102 self->hists_tree = RB_ROOT;
103 self->last_match = NULL;
105 * On 64bit we can mmap the data file in one go. No need for tiny mmap
106 * slices. On 32bit we use 32MB.
108 #if BITS_PER_LONG == 64
109 self->mmap_window = ULLONG_MAX;
111 self->mmap_window = 32 * 1024 * 1024ULL;
113 self->machines = RB_ROOT;
114 self->repipe = repipe;
115 INIT_LIST_HEAD(&self->ordered_samples.samples);
116 machine__init(&self->host_machine, "", HOST_KERNEL_ID);
118 if (mode == O_RDONLY) {
119 if (perf_session__open(self, force) < 0)
121 } else if (mode == O_WRONLY) {
123 * In O_RDONLY mode this will be performed when reading the
124 * kernel MMAP event, in event__process_mmap().
126 if (perf_session__create_kernel_maps(self) < 0)
130 perf_session__update_sample_type(self);
137 perf_session__delete(self);
141 static void perf_session__delete_dead_threads(struct perf_session *self)
143 struct thread *n, *t;
145 list_for_each_entry_safe(t, n, &self->dead_threads, node) {
151 static void perf_session__delete_threads(struct perf_session *self)
153 struct rb_node *nd = rb_first(&self->threads);
156 struct thread *t = rb_entry(nd, struct thread, rb_node);
158 rb_erase(&t->rb_node, &self->threads);
164 void perf_session__delete(struct perf_session *self)
166 perf_header__exit(&self->header);
167 perf_session__destroy_kernel_maps(self);
168 perf_session__delete_dead_threads(self);
169 perf_session__delete_threads(self);
170 machine__exit(&self->host_machine);
175 void perf_session__remove_thread(struct perf_session *self, struct thread *th)
177 self->last_match = NULL;
178 rb_erase(&th->rb_node, &self->threads);
180 * We may have references to this thread, for instance in some hist_entry
181 * instances, so just move them to a separate list.
183 list_add_tail(&th->node, &self->dead_threads);
186 static bool symbol__match_parent_regex(struct symbol *sym)
188 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
194 struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
195 struct thread *thread,
196 struct ip_callchain *chain,
197 struct symbol **parent)
199 u8 cpumode = PERF_RECORD_MISC_USER;
201 struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
206 for (i = 0; i < chain->nr; i++) {
207 u64 ip = chain->ips[i];
208 struct addr_location al;
210 if (ip >= PERF_CONTEXT_MAX) {
212 case PERF_CONTEXT_HV:
213 cpumode = PERF_RECORD_MISC_HYPERVISOR; break;
214 case PERF_CONTEXT_KERNEL:
215 cpumode = PERF_RECORD_MISC_KERNEL; break;
216 case PERF_CONTEXT_USER:
217 cpumode = PERF_RECORD_MISC_USER; break;
225 thread__find_addr_location(thread, self, cpumode,
226 MAP__FUNCTION, thread->pid, ip, &al, NULL);
227 if (al.sym != NULL) {
228 if (sort__has_parent && !*parent &&
229 symbol__match_parent_regex(al.sym))
231 if (!symbol_conf.use_callchain)
233 syms[i].map = al.map;
234 syms[i].sym = al.sym;
241 static int process_event_stub(event_t *event __used,
242 struct perf_session *session __used)
244 dump_printf(": unhandled!\n");
248 static int process_finished_round_stub(event_t *event __used,
249 struct perf_session *session __used,
250 struct perf_event_ops *ops __used)
252 dump_printf(": unhandled!\n");
256 static int process_finished_round(event_t *event,
257 struct perf_session *session,
258 struct perf_event_ops *ops);
260 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
262 if (handler->sample == NULL)
263 handler->sample = process_event_stub;
264 if (handler->mmap == NULL)
265 handler->mmap = process_event_stub;
266 if (handler->comm == NULL)
267 handler->comm = process_event_stub;
268 if (handler->fork == NULL)
269 handler->fork = process_event_stub;
270 if (handler->exit == NULL)
271 handler->exit = process_event_stub;
272 if (handler->lost == NULL)
273 handler->lost = event__process_lost;
274 if (handler->read == NULL)
275 handler->read = process_event_stub;
276 if (handler->throttle == NULL)
277 handler->throttle = process_event_stub;
278 if (handler->unthrottle == NULL)
279 handler->unthrottle = process_event_stub;
280 if (handler->attr == NULL)
281 handler->attr = process_event_stub;
282 if (handler->event_type == NULL)
283 handler->event_type = process_event_stub;
284 if (handler->tracing_data == NULL)
285 handler->tracing_data = process_event_stub;
286 if (handler->build_id == NULL)
287 handler->build_id = process_event_stub;
288 if (handler->finished_round == NULL) {
289 if (handler->ordered_samples)
290 handler->finished_round = process_finished_round;
292 handler->finished_round = process_finished_round_stub;
296 void mem_bswap_64(void *src, int byte_size)
300 while (byte_size > 0) {
302 byte_size -= sizeof(u64);
307 static void event__all64_swap(event_t *self)
309 struct perf_event_header *hdr = &self->header;
310 mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
313 static void event__comm_swap(event_t *self)
315 self->comm.pid = bswap_32(self->comm.pid);
316 self->comm.tid = bswap_32(self->comm.tid);
319 static void event__mmap_swap(event_t *self)
321 self->mmap.pid = bswap_32(self->mmap.pid);
322 self->mmap.tid = bswap_32(self->mmap.tid);
323 self->mmap.start = bswap_64(self->mmap.start);
324 self->mmap.len = bswap_64(self->mmap.len);
325 self->mmap.pgoff = bswap_64(self->mmap.pgoff);
328 static void event__task_swap(event_t *self)
330 self->fork.pid = bswap_32(self->fork.pid);
331 self->fork.tid = bswap_32(self->fork.tid);
332 self->fork.ppid = bswap_32(self->fork.ppid);
333 self->fork.ptid = bswap_32(self->fork.ptid);
334 self->fork.time = bswap_64(self->fork.time);
337 static void event__read_swap(event_t *self)
339 self->read.pid = bswap_32(self->read.pid);
340 self->read.tid = bswap_32(self->read.tid);
341 self->read.value = bswap_64(self->read.value);
342 self->read.time_enabled = bswap_64(self->read.time_enabled);
343 self->read.time_running = bswap_64(self->read.time_running);
344 self->read.id = bswap_64(self->read.id);
347 static void event__attr_swap(event_t *self)
351 self->attr.attr.type = bswap_32(self->attr.attr.type);
352 self->attr.attr.size = bswap_32(self->attr.attr.size);
353 self->attr.attr.config = bswap_64(self->attr.attr.config);
354 self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period);
355 self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type);
356 self->attr.attr.read_format = bswap_64(self->attr.attr.read_format);
357 self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events);
358 self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type);
359 self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr);
360 self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len);
362 size = self->header.size;
363 size -= (void *)&self->attr.id - (void *)self;
364 mem_bswap_64(self->attr.id, size);
367 static void event__event_type_swap(event_t *self)
369 self->event_type.event_type.event_id =
370 bswap_64(self->event_type.event_type.event_id);
373 static void event__tracing_data_swap(event_t *self)
375 self->tracing_data.size = bswap_32(self->tracing_data.size);
378 typedef void (*event__swap_op)(event_t *self);
380 static event__swap_op event__swap_ops[] = {
381 [PERF_RECORD_MMAP] = event__mmap_swap,
382 [PERF_RECORD_COMM] = event__comm_swap,
383 [PERF_RECORD_FORK] = event__task_swap,
384 [PERF_RECORD_EXIT] = event__task_swap,
385 [PERF_RECORD_LOST] = event__all64_swap,
386 [PERF_RECORD_READ] = event__read_swap,
387 [PERF_RECORD_SAMPLE] = event__all64_swap,
388 [PERF_RECORD_HEADER_ATTR] = event__attr_swap,
389 [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap,
390 [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap,
391 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
392 [PERF_RECORD_HEADER_MAX] = NULL,
395 struct sample_queue {
398 struct list_head list;
401 static void flush_sample_queue(struct perf_session *s,
402 struct perf_event_ops *ops)
404 struct ordered_samples *os = &s->ordered_samples;
405 struct list_head *head = &os->samples;
406 struct sample_queue *tmp, *iter;
407 u64 limit = os->next_flush;
408 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
410 if (!ops->ordered_samples || !limit)
413 list_for_each_entry_safe(iter, tmp, head, list) {
414 if (iter->timestamp > limit)
417 ops->sample(iter->event, s);
419 os->last_flush = iter->timestamp;
420 list_del(&iter->list);
425 if (list_empty(head)) {
426 os->last_sample = NULL;
427 } else if (last_ts <= limit) {
429 list_entry(head->prev, struct sample_queue, list);
434 * When perf record finishes a pass on every buffers, it records this pseudo
436 * We record the max timestamp t found in the pass n.
437 * Assuming these timestamps are monotonic across cpus, we know that if
438 * a buffer still has events with timestamps below t, they will be all
439 * available and then read in the pass n + 1.
440 * Hence when we start to read the pass n + 2, we can safely flush every
441 * events with timestamps below t.
443 * ============ PASS n =================
446 * cnt1 timestamps | cnt2 timestamps
449 * - | 4 <--- max recorded
451 * ============ PASS n + 1 ==============
454 * cnt1 timestamps | cnt2 timestamps
457 * 5 | 7 <---- max recorded
459 * Flush every events below timestamp 4
461 * ============ PASS n + 2 ==============
464 * cnt1 timestamps | cnt2 timestamps
469 * Flush every events below timestamp 7
472 static int process_finished_round(event_t *event __used,
473 struct perf_session *session,
474 struct perf_event_ops *ops)
476 flush_sample_queue(session, ops);
477 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
482 /* The queue is ordered by time */
483 static void __queue_sample_event(struct sample_queue *new,
484 struct perf_session *s)
486 struct ordered_samples *os = &s->ordered_samples;
487 struct sample_queue *sample = os->last_sample;
488 u64 timestamp = new->timestamp;
491 os->last_sample = new;
494 list_add(&new->list, &os->samples);
495 os->max_timestamp = timestamp;
500 * last_sample might point to some random place in the list as it's
501 * the last queued event. We expect that the new event is close to
504 if (sample->timestamp <= timestamp) {
505 while (sample->timestamp <= timestamp) {
506 p = sample->list.next;
507 if (p == &os->samples) {
508 list_add_tail(&new->list, &os->samples);
509 os->max_timestamp = timestamp;
512 sample = list_entry(p, struct sample_queue, list);
514 list_add_tail(&new->list, &sample->list);
516 while (sample->timestamp > timestamp) {
517 p = sample->list.prev;
518 if (p == &os->samples) {
519 list_add(&new->list, &os->samples);
522 sample = list_entry(p, struct sample_queue, list);
524 list_add(&new->list, &sample->list);
528 static int queue_sample_event(event_t *event, struct sample_data *data,
529 struct perf_session *s)
531 u64 timestamp = data->time;
532 struct sample_queue *new;
535 if (timestamp < s->ordered_samples.last_flush) {
536 printf("Warning: Timestamp below last timeslice flush\n");
540 new = malloc(sizeof(*new));
544 new->timestamp = timestamp;
546 new->event = malloc(event->header.size);
552 memcpy(new->event, event, event->header.size);
554 __queue_sample_event(new, s);
559 static int perf_session__process_sample(event_t *event, struct perf_session *s,
560 struct perf_event_ops *ops)
562 struct sample_data data;
564 if (!ops->ordered_samples)
565 return ops->sample(event, s);
567 bzero(&data, sizeof(struct sample_data));
568 event__parse_sample(event, s->sample_type, &data);
570 queue_sample_event(event, &data, s);
575 static int perf_session__process_event(struct perf_session *self,
577 struct perf_event_ops *ops,
582 if (event->header.type < PERF_RECORD_HEADER_MAX) {
583 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
584 file_offset, event->header.size,
585 event__name[event->header.type]);
586 hists__inc_nr_events(&self->hists, event->header.type);
589 if (self->header.needs_swap && event__swap_ops[event->header.type])
590 event__swap_ops[event->header.type](event);
592 switch (event->header.type) {
593 case PERF_RECORD_SAMPLE:
594 return perf_session__process_sample(event, self, ops);
595 case PERF_RECORD_MMAP:
596 return ops->mmap(event, self);
597 case PERF_RECORD_COMM:
598 return ops->comm(event, self);
599 case PERF_RECORD_FORK:
600 return ops->fork(event, self);
601 case PERF_RECORD_EXIT:
602 return ops->exit(event, self);
603 case PERF_RECORD_LOST:
604 return ops->lost(event, self);
605 case PERF_RECORD_READ:
606 return ops->read(event, self);
607 case PERF_RECORD_THROTTLE:
608 return ops->throttle(event, self);
609 case PERF_RECORD_UNTHROTTLE:
610 return ops->unthrottle(event, self);
611 case PERF_RECORD_HEADER_ATTR:
612 return ops->attr(event, self);
613 case PERF_RECORD_HEADER_EVENT_TYPE:
614 return ops->event_type(event, self);
615 case PERF_RECORD_HEADER_TRACING_DATA:
616 /* setup for reading amidst mmap */
617 lseek(self->fd, file_offset, SEEK_SET);
618 return ops->tracing_data(event, self);
619 case PERF_RECORD_HEADER_BUILD_ID:
620 return ops->build_id(event, self);
621 case PERF_RECORD_FINISHED_ROUND:
622 return ops->finished_round(event, self, ops);
624 ++self->hists.stats.nr_unknown_events;
629 void perf_event_header__bswap(struct perf_event_header *self)
631 self->type = bswap_32(self->type);
632 self->misc = bswap_16(self->misc);
633 self->size = bswap_16(self->size);
636 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
638 struct thread *thread = perf_session__findnew(self, 0);
640 if (thread == NULL || thread__set_comm(thread, "swapper")) {
641 pr_err("problem inserting idle task.\n");
648 int do_read(int fd, void *buf, size_t size)
650 void *buf_start = buf;
653 int ret = read(fd, buf, size);
662 return buf - buf_start;
665 #define session_done() (*(volatile int *)(&session_done))
666 volatile int session_done;
668 static int __perf_session__process_pipe_events(struct perf_session *self,
669 struct perf_event_ops *ops)
678 perf_event_ops__fill_defaults(ops);
682 err = do_read(self->fd, &event, sizeof(struct perf_event_header));
687 pr_err("failed to read event header\n");
691 if (self->header.needs_swap)
692 perf_event_header__bswap(&event.header);
694 size = event.header.size;
699 p += sizeof(struct perf_event_header);
701 if (size - sizeof(struct perf_event_header)) {
702 err = do_read(self->fd, p,
703 size - sizeof(struct perf_event_header));
706 pr_err("unexpected end of event stream\n");
710 pr_err("failed to read event data\n");
716 (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
717 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
718 head, event.header.size, event.header.type);
720 * assume we lost track of the stream, check alignment, and
721 * increment a single u64 in the hope to catch on again 'soon'.
723 if (unlikely(head & 7))
731 dump_printf("\n%#Lx [%#x]: event: %d\n",
732 head, event.header.size, event.header.type);
745 int __perf_session__process_events(struct perf_session *session,
746 u64 data_offset, u64 data_size,
747 u64 file_size, struct perf_event_ops *ops)
749 u64 head, page_offset, file_offset, file_pos, progress_next;
750 int err, mmap_prot, mmap_flags;
751 struct ui_progress *progress;
752 size_t page_size, mmap_size;
757 perf_event_ops__fill_defaults(ops);
759 page_size = sysconf(_SC_PAGESIZE);
761 page_offset = page_size * (data_offset / page_size);
762 file_offset = page_offset;
763 head = data_offset - page_offset;
765 if (data_offset + data_size < file_size)
766 file_size = data_offset + data_size;
768 progress_next = file_size / 16;
769 progress = ui_progress__new("Processing events...", file_size);
770 if (progress == NULL)
773 mmap_size = session->mmap_window;
774 if (mmap_size > file_size)
775 mmap_size = file_size;
777 mmap_prot = PROT_READ;
778 mmap_flags = MAP_SHARED;
780 if (session->header.needs_swap) {
781 mmap_prot |= PROT_WRITE;
782 mmap_flags = MAP_PRIVATE;
785 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
787 if (buf == MAP_FAILED) {
788 pr_err("failed to mmap file\n");
792 file_pos = file_offset + head;
795 event = (event_t *)(buf + head);
797 if (session->header.needs_swap)
798 perf_event_header__bswap(&event->header);
799 size = event->header.size;
803 if (head + event->header.size >= mmap_size) {
806 munmap_ret = munmap(buf, mmap_size);
807 assert(munmap_ret == 0);
809 page_offset = page_size * (head / page_size);
810 file_offset += page_offset;
815 size = event->header.size;
817 dump_printf("\n%#Lx [%#x]: event: %d\n",
818 file_pos, event->header.size, event->header.type);
821 perf_session__process_event(session, event, ops, file_pos) < 0) {
822 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
823 file_offset + head, event->header.size,
826 * assume we lost track of the stream, check alignment, and
827 * increment a single u64 in the hope to catch on again 'soon'.
829 if (unlikely(head & 7))
838 if (file_pos >= progress_next) {
839 progress_next += file_size / 16;
840 ui_progress__update(progress, file_pos);
843 if (file_pos < file_size)
847 /* do the final flush for ordered samples */
848 session->ordered_samples.next_flush = ULLONG_MAX;
849 flush_sample_queue(session, ops);
851 ui_progress__delete(progress);
853 if (ops->lost == event__process_lost &&
854 session->hists.stats.total_lost != 0) {
855 ui__warning("Processed %Lu events and LOST %Lu!\n\n"
856 "Check IO/CPU overload!\n\n",
857 session->hists.stats.total_period,
858 session->hists.stats.total_lost);
861 if (session->hists.stats.nr_unknown_events != 0) {
862 ui__warning("Found %u unknown events!\n\n"
863 "Is this an older tool processing a perf.data "
864 "file generated by a more recent tool?\n\n"
865 "If that is not the case, consider "
866 "reporting to linux-kernel@vger.kernel.org.\n\n",
867 session->hists.stats.nr_unknown_events);
873 int perf_session__process_events(struct perf_session *self,
874 struct perf_event_ops *ops)
878 if (perf_session__register_idle_thread(self) == NULL)
882 err = __perf_session__process_events(self,
883 self->header.data_offset,
884 self->header.data_size,
887 err = __perf_session__process_pipe_events(self, ops);
892 bool perf_session__has_traces(struct perf_session *self, const char *msg)
894 if (!(self->sample_type & PERF_SAMPLE_RAW)) {
895 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
902 int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
903 const char *symbol_name,
908 struct ref_reloc_sym *ref;
910 ref = zalloc(sizeof(struct ref_reloc_sym));
914 ref->name = strdup(symbol_name);
915 if (ref->name == NULL) {
920 bracket = strchr(ref->name, ']');
926 for (i = 0; i < MAP__NR_TYPES; ++i) {
927 struct kmap *kmap = map__kmap(maps[i]);
928 kmap->ref_reloc_sym = ref;
934 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
936 return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
937 __dsos__fprintf(&self->host_machine.user_dsos, fp) +
938 machines__fprintf_dsos(&self->machines, fp);
941 size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
944 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
945 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);