perf diff: Introduce tool to show performance difference
I guess it is enough to show some examples: [root@doppio linux-2.6-tip]# rm -f perf.data* [root@doppio linux-2.6-tip]# ls -la perf.data* ls: cannot access perf.data*: No such file or directory [root@doppio linux-2.6-tip]# perf record -f find / > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.062 MB perf.data (~2699 samples) ] [root@doppio linux-2.6-tip]# ls -la perf.data* -rw------- 1 root root 74440 2009-12-14 20:03 perf.data [root@doppio linux-2.6-tip]# perf record -f find / > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.062 MB perf.data (~2692 samples) ] [root@doppio linux-2.6-tip]# ls -la perf.data* -rw------- 1 root root 74280 2009-12-14 20:03 perf.data -rw------- 1 root root 74440 2009-12-14 20:03 perf.data.old [root@doppio linux-2.6-tip]# perf diff | head -5 1 -34994580 /lib64/libc-2.10.1.so _IO_vfprintf_internal 2 -15307806 [kernel.kallsyms] __kmalloc 3 +1 +3665941 /lib64/libc-2.10.1.so __GI_memmove 4 +4 +23508995 /lib64/libc-2.10.1.so _int_malloc 5 +7 +38538813 [kernel.kallsyms] __d_lookup [root@doppio linux-2.6-tip]# perf diff -p | head -5 1 +1.00% /lib64/libc-2.10.1.so _IO_vfprintf_internal 2 [kernel.kallsyms] __kmalloc 3 +1 /lib64/libc-2.10.1.so __GI_memmove 4 +4 /lib64/libc-2.10.1.so _int_malloc 5 +7 -1.00% [kernel.kallsyms] __d_lookup [root@doppio linux-2.6-tip]# perf diff -v | head -5 1 361449551 326454971 -34994580 /lib64/libc-2.10.1.so _IO_vfprintf_internal 2 151009241 135701435 -15307806 [kernel.kallsyms] __kmalloc 3 +1 101805328 105471269 +3665941 /lib64/libc-2.10.1.so __GI_memmove 4 +4 78041440 101550435 +23508995 /lib64/libc-2.10.1.so _int_malloc 5 +7 59536172 98074985 +38538813 [kernel.kallsyms] __d_lookup [root@doppio linux-2.6-tip]# perf diff -vp | head -5 1 9.00% 8.00% +1.00% /lib64/libc-2.10.1.so _IO_vfprintf_internal 2 3.00% 3.00% [kernel.kallsyms] __kmalloc 3 +1 2.00% 2.00% /lib64/libc-2.10.1.so __GI_memmove 4 +4 2.00% 2.00% /lib64/libc-2.10.1.so _int_malloc 5 +7 1.00% 2.00% -1.00% [kernel.kallsyms] __d_lookup [root@doppio linux-2.6-tip]# This should be enough for diffs where the system is non volatile, i.e. when one doesn't updates binaries. For volatile environments, stay tuned for the next perf tool feature: a buildid cache populated by 'perf record', managed by 'perf buildid-cache' a-la ccache, and used by all the report tools. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Stephen Hemminger <shemminger@vyatta.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> LKML-Reference: <1260828571-3613-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
b38d34645c
commit
86a9eee047
7 changed files with 329 additions and 2 deletions
31
tools/perf/Documentation/perf-diff.txt
Normal file
31
tools/perf/Documentation/perf-diff.txt
Normal file
|
@ -0,0 +1,31 @@
|
|||
perf-diff(1)
|
||||
==============
|
||||
|
||||
NAME
|
||||
----
|
||||
perf-diff - Read perf.data (created by perf record) and display the profile
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf diff' [oldfile] [newfile]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
This command displays the performance difference among two perf.data files
|
||||
captured via perf record.
|
||||
|
||||
If no parameters are passed it will assume perf.data.old and perf.data.
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
-p::
|
||||
--percentage::
|
||||
Show percentages instead of raw counters
|
||||
-v::
|
||||
--verbose::
|
||||
Be verbose, for instance, show the raw counters in addition to the
|
||||
diff.
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1]
|
|
@ -427,6 +427,7 @@ BUILTIN_OBJS += bench/sched-messaging.o
|
|||
BUILTIN_OBJS += bench/sched-pipe.o
|
||||
BUILTIN_OBJS += bench/mem-memcpy.o
|
||||
|
||||
BUILTIN_OBJS += builtin-diff.o
|
||||
BUILTIN_OBJS += builtin-help.o
|
||||
BUILTIN_OBJS += builtin-sched.o
|
||||
BUILTIN_OBJS += builtin-buildid-list.o
|
||||
|
|
288
tools/perf/builtin-diff.c
Normal file
288
tools/perf/builtin-diff.c
Normal file
|
@ -0,0 +1,288 @@
|
|||
/*
|
||||
* builtin-diff.c
|
||||
*
|
||||
* Builtin diff command: Analyze two perf.data input files, look up and read
|
||||
* DSOs and symbol information, sort them and produce a diff.
|
||||
*/
|
||||
#include "builtin.h"
|
||||
|
||||
#include "util/debug.h"
|
||||
#include "util/event.h"
|
||||
#include "util/hist.h"
|
||||
#include "util/session.h"
|
||||
#include "util/sort.h"
|
||||
#include "util/symbol.h"
|
||||
#include "util/util.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
static char const *input_old = "perf.data.old",
|
||||
*input_new = "perf.data";
|
||||
static int force;
|
||||
static bool show_percent;
|
||||
|
||||
struct symbol_conf symbol_conf;
|
||||
|
||||
static int perf_session__add_hist_entry(struct perf_session *self,
|
||||
struct addr_location *al, u64 count)
|
||||
{
|
||||
bool hit;
|
||||
struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL,
|
||||
count, &hit);
|
||||
if (he == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (hit)
|
||||
he->count += count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int diff__process_sample_event(event_t *event, struct perf_session *session)
|
||||
{
|
||||
struct addr_location al;
|
||||
struct sample_data data = { .period = 1, };
|
||||
|
||||
dump_printf("(IP, %d): %d: %p\n", event->header.misc,
|
||||
event->ip.pid, (void *)(long)event->ip.ip);
|
||||
|
||||
if (event__preprocess_sample(event, session, &al, NULL) < 0) {
|
||||
pr_warning("problem processing %d event, skipping it.\n",
|
||||
event->header.type);
|
||||
return -1;
|
||||
}
|
||||
|
||||
event__parse_sample(event, session->sample_type, &data);
|
||||
|
||||
if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
|
||||
pr_warning("problem incrementing symbol count, skipping event\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
session->events_stats.total += data.period;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct perf_event_ops event_ops = {
|
||||
.process_sample_event = diff__process_sample_event,
|
||||
.process_mmap_event = event__process_mmap,
|
||||
.process_comm_event = event__process_comm,
|
||||
.process_exit_event = event__process_task,
|
||||
.process_fork_event = event__process_task,
|
||||
.process_lost_event = event__process_lost,
|
||||
};
|
||||
|
||||
static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
|
||||
struct hist_entry *he)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct hist_entry *iter;
|
||||
|
||||
while (*p != NULL) {
|
||||
int cmp;
|
||||
parent = *p;
|
||||
iter = rb_entry(parent, struct hist_entry, rb_node);
|
||||
|
||||
cmp = strcmp(he->map->dso->name, iter->map->dso->name);
|
||||
if (cmp > 0)
|
||||
p = &(*p)->rb_left;
|
||||
else if (cmp < 0)
|
||||
p = &(*p)->rb_right;
|
||||
else {
|
||||
cmp = strcmp(he->sym->name, iter->sym->name);
|
||||
if (cmp > 0)
|
||||
p = &(*p)->rb_left;
|
||||
else
|
||||
p = &(*p)->rb_right;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&he->rb_node, parent, p);
|
||||
rb_insert_color(&he->rb_node, root);
|
||||
}
|
||||
|
||||
static void perf_session__resort_by_name(struct perf_session *self)
|
||||
{
|
||||
unsigned long position = 1;
|
||||
struct rb_root tmp = RB_ROOT;
|
||||
struct rb_node *next = rb_first(&self->hists);
|
||||
|
||||
while (next != NULL) {
|
||||
struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
|
||||
|
||||
next = rb_next(&n->rb_node);
|
||||
rb_erase(&n->rb_node, &self->hists);
|
||||
n->position = position++;
|
||||
perf_session__insert_hist_entry_by_name(&tmp, n);
|
||||
}
|
||||
|
||||
self->hists = tmp;
|
||||
}
|
||||
|
||||
static struct hist_entry *
|
||||
perf_session__find_hist_entry_by_name(struct perf_session *self,
|
||||
struct hist_entry *he)
|
||||
{
|
||||
struct rb_node *n = self->hists.rb_node;
|
||||
|
||||
while (n) {
|
||||
struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
|
||||
int cmp = strcmp(he->map->dso->name, iter->map->dso->name);
|
||||
|
||||
if (cmp > 0)
|
||||
n = n->rb_left;
|
||||
else if (cmp < 0)
|
||||
n = n->rb_right;
|
||||
else {
|
||||
cmp = strcmp(he->sym->name, iter->sym->name);
|
||||
if (cmp > 0)
|
||||
n = n->rb_left;
|
||||
else if (cmp < 0)
|
||||
n = n->rb_right;
|
||||
else
|
||||
return iter;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void perf_session__match_hists(struct perf_session *old_session,
|
||||
struct perf_session *new_session)
|
||||
{
|
||||
struct rb_node *nd;
|
||||
|
||||
perf_session__resort_by_name(old_session);
|
||||
|
||||
for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
|
||||
struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
|
||||
pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t hist_entry__fprintf_matched(struct hist_entry *self,
|
||||
unsigned long pos,
|
||||
struct perf_session *session,
|
||||
struct perf_session *pair_session,
|
||||
FILE *fp)
|
||||
{
|
||||
u64 old_count = 0;
|
||||
char displacement[16];
|
||||
size_t printed;
|
||||
|
||||
if (self->pair != NULL) {
|
||||
long pdiff = (long)self->pair->position - (long)pos;
|
||||
old_count = self->pair->count;
|
||||
if (pdiff == 0)
|
||||
goto blank;
|
||||
snprintf(displacement, sizeof(displacement), "%+4ld", pdiff);
|
||||
} else {
|
||||
blank: memset(displacement, ' ', sizeof(displacement));
|
||||
}
|
||||
|
||||
printed = fprintf(fp, "%4lu %5.5s ", pos, displacement);
|
||||
|
||||
if (show_percent) {
|
||||
double old_percent = (old_count * 100) / pair_session->events_stats.total,
|
||||
new_percent = (self->count * 100) / session->events_stats.total;
|
||||
double diff = old_percent - new_percent;
|
||||
|
||||
if (verbose)
|
||||
printed += fprintf(fp, " %3.2f%% %3.2f%%", old_percent, new_percent);
|
||||
|
||||
if ((u64)diff != 0)
|
||||
printed += fprintf(fp, " %+4.2F%%", diff);
|
||||
else
|
||||
printed += fprintf(fp, " ");
|
||||
} else {
|
||||
if (verbose)
|
||||
printed += fprintf(fp, " %9Lu %9Lu", old_count, self->count);
|
||||
printed += fprintf(fp, " %+9Ld", (s64)self->count - (s64)old_count);
|
||||
}
|
||||
|
||||
return printed + fprintf(fp, " %25.25s %s\n",
|
||||
self->map->dso->name, self->sym->name);
|
||||
}
|
||||
|
||||
static size_t perf_session__fprintf_matched_hists(struct perf_session *self,
|
||||
struct perf_session *pair,
|
||||
FILE *fp)
|
||||
{
|
||||
struct rb_node *nd;
|
||||
size_t printed = 0;
|
||||
unsigned long pos = 1;
|
||||
|
||||
for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) {
|
||||
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
|
||||
printed += hist_entry__fprintf_matched(he, pos++, self, pair, fp);
|
||||
}
|
||||
|
||||
return printed;
|
||||
}
|
||||
|
||||
static int __cmd_diff(void)
|
||||
{
|
||||
int ret, i;
|
||||
struct perf_session *session[2];
|
||||
|
||||
session[0] = perf_session__new(input_old, O_RDONLY, force, &symbol_conf);
|
||||
session[1] = perf_session__new(input_new, O_RDONLY, force, &symbol_conf);
|
||||
if (session[0] == NULL || session[1] == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
ret = perf_session__process_events(session[i], &event_ops);
|
||||
if (ret)
|
||||
goto out_delete;
|
||||
perf_session__output_resort(session[i], session[i]->events_stats.total);
|
||||
}
|
||||
|
||||
perf_session__match_hists(session[0], session[1]);
|
||||
perf_session__fprintf_matched_hists(session[1], session[0], stdout);
|
||||
out_delete:
|
||||
for (i = 0; i < 2; ++i)
|
||||
perf_session__delete(session[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *const diff_usage[] = {
|
||||
"perf diff [<options>] [old_file] [new_file]",
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_BOOLEAN('v', "verbose", &verbose,
|
||||
"be more verbose (show symbol address, etc)"),
|
||||
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
|
||||
"dump raw trace in ASCII"),
|
||||
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
|
||||
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
|
||||
"load module symbols - WARNING: use only with -k and LIVE kernel"),
|
||||
OPT_BOOLEAN('p', "percentages", &show_percent,
|
||||
"Don't shorten the pathnames taking into account the cwd"),
|
||||
OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
|
||||
"Don't shorten the pathnames taking into account the cwd"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
int cmd_diff(int argc, const char **argv, const char *prefix __used)
|
||||
{
|
||||
if (symbol__init(&symbol_conf) < 0)
|
||||
return -1;
|
||||
|
||||
setup_sorting(diff_usage, options);
|
||||
|
||||
argc = parse_options(argc, argv, options, diff_usage, 0);
|
||||
if (argc) {
|
||||
if (argc > 2)
|
||||
usage_with_options(diff_usage, options);
|
||||
if (argc == 2) {
|
||||
input_old = argv[0];
|
||||
input_new = argv[1];
|
||||
} else
|
||||
input_new = argv[0];
|
||||
}
|
||||
|
||||
setup_pager();
|
||||
return __cmd_diff();
|
||||
}
|
|
@ -17,6 +17,7 @@ extern int check_pager_config(const char *cmd);
|
|||
extern int cmd_annotate(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_bench(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_diff(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_help(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_sched(int argc, const char **argv, const char *prefix);
|
||||
extern int cmd_list(int argc, const char **argv, const char *prefix);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
perf-annotate mainporcelain common
|
||||
perf-bench mainporcelain common
|
||||
perf-buildid-list mainporcelain common
|
||||
perf-diff mainporcelain common
|
||||
perf-list mainporcelain common
|
||||
perf-sched mainporcelain common
|
||||
perf-record mainporcelain common
|
||||
|
|
|
@ -286,6 +286,7 @@ static void handle_internal_command(int argc, const char **argv)
|
|||
const char *cmd = argv[0];
|
||||
static struct cmd_struct commands[] = {
|
||||
{ "buildid-list", cmd_buildid_list, 0 },
|
||||
{ "diff", cmd_diff, 0 },
|
||||
{ "help", cmd_help, 0 },
|
||||
{ "list", cmd_list, 0 },
|
||||
{ "record", cmd_record, 0 },
|
||||
|
|
|
@ -49,9 +49,13 @@ struct hist_entry {
|
|||
struct symbol *sym;
|
||||
u64 ip;
|
||||
char level;
|
||||
struct symbol *parent;
|
||||
struct symbol *parent;
|
||||
struct callchain_node callchain;
|
||||
struct rb_root sorted_chain;
|
||||
union {
|
||||
unsigned long position;
|
||||
struct hist_entry *pair;
|
||||
struct rb_root sorted_chain;
|
||||
};
|
||||
};
|
||||
|
||||
enum sort_type {
|
||||
|
|
Loading…
Reference in a new issue