From 2190de2f59b3a371f7a5bf8dcc7a0c3f71723679 Mon Sep 17 00:00:00 2001 From: Thavidu Ranatunga Date: Mon, 5 Jul 2010 18:00:14 +1000 Subject: [PATCH 1/5] perf: Version String fix, using kernel version Changes the Perf --version string such that it shows the kernel version as suggested by Ingo as follows: That way the perf that comes with v2.6.34 will be: perf version v2.6.34 while interim versions will have the version of the interim kernel - for example: perf version v2.6.35-rc4-70-g39ef13a This functionality was already in the perf version generator file except that it was looking for a .git in the perf directory instead of the kernel directory. Signed-off-by: Thavidu Ranatunga Acked-by: Ian Munsie Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1278316815-6099-1-git-send-email-tharan@au1.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/util/PERF-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 49ece7921914..1b32e8c0253f 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -15,7 +15,7 @@ LF=' if test -f version then VN=$(cat version) || VN="$DEF_VER" -elif test -d .git -o -f .git && +elif test -d ../../.git -o -f ../../.git && VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; From 869599ceda4a035cdb3345c563b74cdeef10f790 Mon Sep 17 00:00:00 2001 From: Thavidu Ranatunga Date: Mon, 5 Jul 2010 18:00:15 +1000 Subject: [PATCH 2/5] perf: Version String fix, for fallback if not from git This gets rid of the default version fallback for Perf and changes it so that it returns the version of the kernel from it's Makefile (if sources were not from git, ie. if it was downloaded from a tarball) Signed-off-by: Thavidu Ranatunga Acked-by: Ian Munsie Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1278316815-6099-2-git-send-email-tharan@au1.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/util/PERF-VERSION-GEN | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 1b32e8c0253f..97d76562a1a0 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -5,17 +5,13 @@ if [ $# -eq 1 ] ; then fi GVF=${OUTPUT}PERF-VERSION-FILE -DEF_VER=v0.0.2.PERF LF=' ' -# First see if there is a version file (included in release tarballs), -# then try git-describe, then default. -if test -f version -then - VN=$(cat version) || VN="$DEF_VER" -elif test -d ../../.git -o -f ../../.git && +# First check if there is a .git to get the version from git describe +# otherwise try to get the version from the kernel makefile +if test -d ../../.git -o -f ../../.git && VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; @@ -27,7 +23,12 @@ elif test -d ../../.git -o -f ../../.git && then VN=$(echo "$VN" | sed -e 's/-/./g'); else - VN="$DEF_VER" + eval `grep '^VERSION\s*=' ../../Makefile|tr -d ' '` + eval `grep '^PATCHLEVEL\s*=' ../../Makefile|tr -d ' '` + eval `grep '^SUBLEVEL\s*=' ../../Makefile|tr -d ' '` + eval `grep '^EXTRAVERSION\s*=' ../../Makefile|tr -d ' '` + + VN="${VERSION}.${PATCHLEVEL}.${SUBLEVEL}${EXTRAVERSION}" fi VN=$(expr "$VN" : v*'\(.*\)') From 97aa1052739c6a06cb6b0467dbf410613d20bc97 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Jul 2010 06:06:17 +0200 Subject: [PATCH 3/5] perf: Resurrect flat callchains Initialize the callchain radix tree root correctly. When we walk through the parents, we must stop after the root, but since it wasn't well initialized, its parent pointer was random. Also the number of hits was random because uninitialized, hence it was part of the callchain while the root doesn't contain anything. This fixes segfaults and percentages followed by empty callchains while running: perf report -g flat Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: 2.6.31.x-2.6.34.x --- tools/perf/util/callchain.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 1ca73e4a2723..22dbaec003d5 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -49,6 +49,9 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->brothers); INIT_LIST_HEAD(&node->children); INIT_LIST_HEAD(&node->val); + + node->parent = NULL; + node->hit = 0; } static inline u64 cumul_hits(struct callchain_node *node) From 108553e1f3c45a92d23681a378ad9e4c3230eebc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Jul 2010 03:41:46 +0200 Subject: [PATCH 4/5] perf: Sync callchains with period based hits Hists have their hits increased by the event period. And this period based counting is the foundation of all the stats in perf report. But callchains still use the raw number of hits, without taking the period into account. So when we compute the percentage, absolute based percentages are totally broken, and relative ones too in the first parent level. Because we pass the number of events muliplied by their period as the total number of hits to the callchain filtering, while callchains expect this number to be the number of raw hits. perf report -g graph was simply not working, showing no graph unless the min percent was zero. And even there the percentage of the branches was always 0. And may be fractal filtering was broken on the first branch level too. flat also was broken, but it was hidden because of other breakages. Anyway fix this by counting using periods on callchains. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras --- tools/perf/builtin-report.c | 2 +- tools/perf/util/callchain.c | 35 ++++++++++++++++++----------------- tools/perf/util/callchain.h | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 359205782964..fd7407c7205c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -107,7 +107,7 @@ static int perf_session__add_hist_entry(struct perf_session *self, goto out_free_syms; err = 0; if (symbol_conf.use_callchain) { - err = append_chain(he->callchain, data->callchain, syms); + err = append_chain(he->callchain, data->callchain, syms, data->period); if (err) goto out_free_syms; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 62b69ad4aa73..52c777e451ed 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -230,7 +230,7 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) static void add_child(struct callchain_node *parent, struct resolved_chain *chain, - int start) + int start, u64 period) { struct callchain_node *new; @@ -238,7 +238,7 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain, fill_node(new, chain, start); new->children_hit = 0; - new->hit = 1; + new->hit = period; } /* @@ -248,7 +248,8 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain, */ static void split_add_child(struct callchain_node *parent, struct resolved_chain *chain, - struct callchain_list *to_split, int idx_parents, int idx_local) + struct callchain_list *to_split, int idx_parents, int idx_local, + u64 period) { struct callchain_node *new; struct list_head *old_tail; @@ -275,41 +276,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, /* create a new child for the new branch if any */ if (idx_total < chain->nr) { parent->hit = 0; - add_child(parent, chain, idx_total); - parent->children_hit++; + add_child(parent, chain, idx_total, period); + parent->children_hit += period; } else { - parent->hit = 1; + parent->hit = period; } } static int __append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start); + unsigned int start, u64 period); static void __append_chain_children(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start) + unsigned int start, u64 period) { struct callchain_node *rnode; /* lookup in childrens */ chain_for_each_child(rnode, root) { - unsigned int ret = __append_chain(rnode, chain, start); + unsigned int ret = __append_chain(rnode, chain, start, period); if (!ret) goto inc_children_hit; } /* nothing in children, add to the current node */ - add_child(root, chain, start); + add_child(root, chain, start, period); inc_children_hit: - root->children_hit++; + root->children_hit += period; } static int __append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start) + unsigned int start, u64 period) { struct callchain_list *cnode; unsigned int i = start; @@ -345,18 +346,18 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain, /* we match only a part of the node. Split it and add the new chain */ if (i - start < root->val_nr) { - split_add_child(root, chain, cnode, start, i - start); + split_add_child(root, chain, cnode, start, i - start, period); return 0; } /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { - root->hit++; + root->hit += period; return 0; } /* We match the node and still have a part remaining */ - __append_chain_children(root, chain, i); + __append_chain_children(root, chain, i, period); return 0; } @@ -380,7 +381,7 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new, int append_chain(struct callchain_node *root, struct ip_callchain *chain, - struct map_symbol *syms) + struct map_symbol *syms, u64 period) { struct resolved_chain *filtered; @@ -397,7 +398,7 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain, if (!filtered->nr) goto end; - __append_chain_children(root, filtered, 0); + __append_chain_children(root, filtered, 0, period); end: free(filtered); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 22dbaec003d5..f2e9ee164bd8 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -61,7 +61,7 @@ static inline u64 cumul_hits(struct callchain_node *node) int register_callchain_param(struct callchain_param *param); int append_chain(struct callchain_node *root, struct ip_callchain *chain, - struct map_symbol *syms); + struct map_symbol *syms, u64 period); bool ip_callchain__valid(struct ip_callchain *chain, event_t *event); #endif /* __PERF_CALLCHAIN_H */ From 44a54f787c0abcf75a2ed49b8ec8b2b512468f73 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 Jul 2010 15:41:44 -0400 Subject: [PATCH 5/5] tracing: Add alignment to syscall metadata declarations For some reason if we declare a static variable and then assign it later, and the assignment contains a __attribute__((__aligned__(#))), some versions of gcc will ignore it. This caused the syscall meta data to not be compact in its section and caused a kernel oops when the section was being read. The fix for these versions of gcc seems to be to add the aligned attribute to the declaration as well. This fixes the BZ regression: https://bugzilla.kernel.org/show_bug.cgi?id=16353 Reported-by: Zeev Tarantov Tested-by: Zeev Tarantov Acked-by: Frederic Weisbecker LKML-Reference: Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7f614ce274a9..13ebb5413a79 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -124,7 +124,8 @@ extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ - static struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata \ + __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ @@ -138,7 +139,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ - static struct syscall_metadata __syscall_meta_##sname; \ + static struct syscall_metadata \ + __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call \ __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \