From fa48c892645dfd3159e5aa6eb9cefd00d5cb347a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 16:04:26 -0300 Subject: perf script: Fix --per-event-dump for auxtrace synth evsels When processing PERF_RECORD_AUXTRACE_INFO several perf_evsel entries will be synthesized and inserted into session->evlist, eventually ending in perf_script.tool.sample(), which ends up calling builtin-script.c's process_event(), that expects evsel->priv to be a perf_evsel_script object with a valid FILE pointer in fp. So we need to intercept the processing of PERF_RECORD_AUXTRACE_INFO and then setup evsel->priv for these newly created perf_evsel instances, do it to fix the segfault in process_event() trying to use a NULL for that FILE pointer. Reported-by: Alexander Shishkin Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Wang Nan Cc: yuzhoujian Fixes: a14390fde64e ("perf script: Allow creating per-event dump files") Link: http://lkml.kernel.org/n/tip-bthnur8r8de01gxvn2qayx6e@git.kernel.org [ Merge fix by Ravi Bangoria before pushing upstream to preserv bisectability ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 68f36dc0344f..9b43bda45a41 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) struct perf_evsel *evsel; evlist__for_each_entry(script->session->evlist, evsel) { + /* + * Already setup? I.e. we may be called twice in cases like + * Intel PT, one for the intel_pt// and dummy events, then + * for the evsels syntheized from the auxtrace info. + * + * Ses perf_script__process_auxtrace_info. + */ + if (evsel->priv != NULL) + continue; + evsel->priv = perf_evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; @@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +#ifdef HAVE_AUXTRACE_SUPPORT +static int perf_script__process_auxtrace_info(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + int ret = perf_event__process_auxtrace_info(tool, event, session); + + if (ret == 0) { + struct perf_script *script = container_of(tool, struct perf_script, tool); + + ret = perf_script__setup_per_event_dump(script); + } + + return ret; +} +#else +#define perf_script__process_auxtrace_info 0 +#endif + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv) .feature = perf_event__process_feature, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, - .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace_info = perf_script__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, .auxtrace_error = perf_event__process_auxtrace_error, .stat = perf_event__process_stat_event, -- cgit v1.2.1 From 5039c8a28fa97b8dce7b363a5ecd4bee2b87bf03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:26 -0800 Subject: perf script: Allow printing period for non freq mode groups When using leader sampling the values of the not sampled but counted events are shown by perf script in "period". Currently printing period is only allowed when the main event has a period, that is it is in frequency mode. This implies that we cannot dump the values of counted events when the leader event is not in frequency mode. Just remove the check that the period must be set on all events. It will just be printed as 0 instead if it's not available. This fixes the following: $ perf record -c 100000 -e '{cycles,branches}:S' $ perf script -F event,period Further commentary by Jiri Olsa: The period will be the value of configured period, not 0: int perf_evsel__parse_sample(struct ... ... data->period = evsel->attr.sample_period; $ perf record -c 100000 $ perf script -F event,period | head -3 Failed to open /tmp/perf-2048.map, continuing without symbols 100000 cycles:ppp: 100000 cycles:ppp: other than that I think we can remove that check, because we will have always sane number in period Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9b43bda45a41..ee7c7aaaae72 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -423,11 +423,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; - if (PRINT_FIELD(PERIOD) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_PERIOD, "PERIOD", - PERF_OUTPUT_PERIOD)) - return -EINVAL; - if (PRINT_FIELD(IREGS) && perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) -- cgit v1.2.1 From 4bd1bef8bba2f99ff472ae3617864dda301f81bd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:43:00 -0800 Subject: perf script: Allow computing 'perf stat' style metrics Add support for computing 'perf stat' style metrics in 'perf script'. When using leader sampling we can get metrics for each sampling period by computing formulas over the values of the different group members. This allows things like fine grained IPC tracking through sampling, much more fine grained than with 'perf stat'. The metric is still averaged over the sampling period, it is not just for the sampling point. This patch adds a new metric output field for 'perf script' that uses the existing 'perf stat' metrics infrastructure to compute any metrics supported by 'perf stat'. For example to sample IPC: $ perf record -e '{ref-cycles,cycles,instructions}:S' -a sleep 1 $ perf script -F metric,ip,sym,time,cpu,comm ... alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: metric: 0.13 insn per cycle swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: metric: 0.23 insn per cycle qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: metric: 0.46 insn per cycle :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: metric: 0.45 insn per cycle TopDown: This requires disabling SMT if you have it enabled, because SMT would require sampling per core, which is not supported. $ perf record -e '{ref-cycles,topdown-fetch-bubbles,\ topdown-recovery-bubbles,\ topdown-slots-retired,topdown-total-slots,\ topdown-slots-issued}:S' -a sleep 1 $ perf script --header -I -F cpu,ip,sym,event,metric,period ... [000] 121108 ref-cycles: ffffffff8165222e copy_user_enhanced_fast_string [000] 190350 topdown-fetch-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 2055 topdown-recovery-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 148729 topdown-slots-retired: ffffffff8165222e copy_user_enhanced_fast_string [000] 144324 topdown-total-slots: ffffffff8165222e copy_user_enhanced_fast_string [000] 160852 topdown-slots-issued: ffffffff8165222e copy_user_enhanced_fast_string [000] metric: 33.0% frontend bound [000] metric: 3.5% bad speculation [000] metric: 25.8% retiring [000] metric: 37.7% backend bound [000] 112112 ref-cycles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 357222 topdown-fetch-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 3325 topdown-recovery-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 323553 topdown-slots-retired: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 270507 topdown-total-slots: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 341226 topdown-slots-issued: ffffffff8165aec8 _raw_spin_lock_irqsave [000] metric: 33.0% frontend bound [000] metric: 2.9% bad speculation [000] metric: 29.9% retiring [000] metric: 34.2% backend bound ... v2: Use evsel->priv for new fields Port to new base line, support fp output. Handle stats in ->stats, not ->priv Minor cleanups Extra explanation about the use of the term 'averaging', from Andi in the thread in the Link: tag below: The current samples contains the sum of event counts for a sampling period. EventA-1 EventA-2 EventA-3 EventA-4 EventB-1 EventB-2 EventC-3 gap with no events overflow |-----------------------------------------------------------------| period-start period-end ^ ^ | | previous sample current sample So EventA = 4 and EventB = 3 at the sample point I generate a metric, let's say EventA / EventB. It applies to the whole period. But the metric is over a longer time which does not have the same behavior. For example the gap above doesn't have any events, while they are clustered at the beginning and end of the sample period. But we're summing everything together. The metric doesn't know that the gap is different than the busy period. That's what I'm trying to express with averaging. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171117214300.32746-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 97 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ee7c7aaaae72..39d8b55f0db3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -22,6 +22,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/color.h" #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" @@ -90,6 +91,7 @@ enum perf_output_field { PERF_OUTPUT_SYNTH = 1U << 25, PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, + PERF_OUTPUT_METRIC = 1U << 28, }; struct output_option { @@ -124,6 +126,7 @@ struct output_option { {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, + {.str = "metric", .field = PERF_OUTPUT_METRIC}, }; enum { @@ -215,12 +218,20 @@ struct perf_evsel_script { char *filename; FILE *fp; u64 samples; + /* For metric output */ + u64 val; + int gnum; }; +static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel) +{ + return (struct perf_evsel_script *)evsel->priv; +} + static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel, struct perf_data *data) { - struct perf_evsel_script *es = malloc(sizeof(*es)); + struct perf_evsel_script *es = zalloc(sizeof(*es)); if (es != NULL) { if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) @@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel es->fp = fopen(es->filename, "w"); if (es->fp == NULL) goto out_free_filename; - es->samples = 0; } return es; @@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp) return fprintf(fp, "%-*s", maxlen, out); } +struct metric_ctx { + struct perf_sample *sample; + struct thread *thread; + struct perf_evsel *evsel; + FILE *fp; +}; + +static void script_print_metric(void *ctx, const char *color, + const char *fmt, + const char *unit, double val) +{ + struct metric_ctx *mctx = ctx; + + if (!fmt) + return; + perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + mctx->fp); + fputs("\tmetric: ", mctx->fp); + if (color) + color_fprintf(mctx->fp, color, fmt, val); + else + printf(fmt, val); + fprintf(mctx->fp, " %s\n", unit); +} + +static void script_new_line(void *ctx) +{ + struct metric_ctx *mctx = ctx; + + perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + mctx->fp); + fputs("\tmetric: ", mctx->fp); +} + +static void perf_sample__fprint_metric(struct perf_script *script, + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + FILE *fp) +{ + struct perf_stat_output_ctx ctx = { + .print_metric = script_print_metric, + .new_line = script_new_line, + .ctx = &(struct metric_ctx) { + .sample = sample, + .thread = thread, + .evsel = evsel, + .fp = fp, + }, + .force_header = false, + }; + struct perf_evsel *ev2; + static bool init; + u64 val; + + if (!init) { + perf_stat__init_shadow_stats(); + init = true; + } + if (!evsel->stats) + perf_evlist__alloc_stats(script->session->evlist, false); + if (evsel_script(evsel->leader)->gnum++ == 0) + perf_stat__reset_shadow_stats(); + val = sample->period * evsel->scale; + perf_stat__update_shadow_stats(evsel, + val, + sample->cpu); + evsel_script(evsel)->val = val; + if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { + for_each_group_member (ev2, evsel->leader) { + perf_stat__print_shadow_stats(ev2, + evsel_script(ev2)->val, + sample->cpu, + &ctx, + NULL); + } + evsel_script(evsel->leader)->gnum = 0; + } +} + static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); fprintf(fp, "\n"); + + if (PRINT_FIELD(METRIC)) + perf_sample__fprint_metric(script, thread, evsel, sample, fp); } static struct scripting_ops *scripting_ops; -- cgit v1.2.1 From 1fcd03946b52b8a57a6692fedd4406b45baedfe6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:04 +0800 Subject: perf stat: Update per-thread shadow stats The functions perf_stat__update_shadow_stats() is called to update the shadow stats on a set of static variables. But the static variables are the limitations to be extended to support per-thread shadow stats. This patch lets the perf_stat__update_shadow_stats() support to update the shadow stats on a input parameter 'st' and uses update_runtime_stat() to update the stats. It will not directly update the static variables as before. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-5-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 39d8b55f0db3..81b395040298 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1548,7 +1548,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, val = sample->period * evsel->scale; perf_stat__update_shadow_stats(evsel, val, - sample->cpu); + sample->cpu, + &rt_stat); evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { -- cgit v1.2.1 From e0128b30dbfb2884530251b4accdffdbf55a6b72 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:05 +0800 Subject: perf stat: Print per-thread shadow stats The function perf_stat__print_shadow_stats() is called to print the shadow stats on a set of static variables. But the static variables are the limitations to support per-thread shadow stats. This patch lets the perf_stat__print_shadow_stats() support to print the shadow stats from a input parameter 'st'. It will not directly get value from static variable. Instead, it now uses runtime_stat_avg() and runtime_stat_n() to get and compute the values. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-6-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 81b395040298..fac6f053e4da 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1557,7 +1557,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(ev2)->val, sample->cpu, &ctx, - NULL); + NULL, + &rt_stat); } evsel_script(evsel->leader)->gnum = 0; } -- cgit v1.2.1 From 06c3f2aa9fc68e7f3fe3d83e7569d2a2801d9f99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf utils: Move is_directory() to path.h So that it can be used more widely, like in the next patch, when it will be used to fix a bug in 'perf test' handling of dirent.d_type == DT_UNKNOWN. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch, removed needless includes in path.h ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fac6f053e4da..77e47cf39f2c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "util/path.h" #include "print_binary.h" #include #include @@ -2401,19 +2402,6 @@ out: return rc; } -/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ -static int is_directory(const char *base_path, const struct dirent *dent) -{ - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) - return 0; - - return S_ISDIR(st.st_mode); -} - #define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ while ((lang_dirent = readdir(scripts_dir)) != NULL) \ if ((lang_dirent->d_type == DT_DIR || \ -- cgit v1.2.1 From 2ab046cd01e33a854798a3e245c9e3f32b950a7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:46 +0800 Subject: perf script: Support time percent and multiple time ranges perf script has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf script --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf script --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 77e47cf39f2c..330dcd9b9b8f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1436,6 +1436,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } +#define PTIME_RANGE_MAX 10 + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1449,7 +1451,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; }; static int perf_evlist__max_name_len(struct perf_evlist *evlist) @@ -1734,8 +1737,10 @@ static int process_sample_event(struct perf_tool *tool, struct perf_script *scr = container_of(tool, struct perf_script, tool); struct addr_location al; - if (perf_time__skip_sample(&scr->ptime, sample->time)) + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { return 0; + } if (debug_mode) { if (sample->time < last_timestamp) { @@ -3360,10 +3365,27 @@ int cmd_script(int argc, const char **argv) goto out_delete; /* needs to be parsed after looking up reference time */ - if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { - pr_err("Invalid time string\n"); - err = -EINVAL; - goto out_delete; + if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + err = -EINVAL; + goto out_delete; + } + + script.range_num = perf_time__percent_parse_str( + script.ptime_range, PTIME_RANGE_MAX, + script.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (script.range_num < 0) { + pr_err("Invalid time string\n"); + err = -EINVAL; + goto out_delete; + } + } else { + script.range_num = 1; } err = __cmd_script(&script); -- cgit v1.2.1 From 28a0b39877f5ed64ae9fadf95dddb90999309dee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:52 +0100 Subject: perf script: Add support to display sample misc field Adding support to display sample misc field in form of letter for each bit: # perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... sched-messaging 1407 U 28690.636600: 325620 cycles ... sched-messaging 1414 K 28690.636608: 19473 cycles ... misc field __________/ The misc bits are assigned to following letters: PERF_RECORD_MISC_KERNEL K PERF_RECORD_MISC_USER U PERF_RECORD_MISC_HYPERVISOR H PERF_RECORD_MISC_GUEST_KERNEL G PERF_RECORD_MISC_GUEST_USER g PERF_RECORD_MISC_MMAP_DATA* M PERF_RECORD_MISC_COMM_EXEC E PERF_RECORD_MISC_SWITCH_OUT S Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 74 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 11 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 330dcd9b9b8f..bb603495cf4a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -93,6 +93,7 @@ enum perf_output_field { PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, PERF_OUTPUT_METRIC = 1U << 28, + PERF_OUTPUT_MISC = 1U << 29, }; struct output_option { @@ -128,6 +129,7 @@ struct output_option { {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, {.str = "metric", .field = PERF_OUTPUT_METRIC}, + {.str = "misc", .field = PERF_OUTPUT_MISC}, }; enum { @@ -594,7 +596,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample, static int perf_sample__fprintf_start(struct perf_sample *sample, struct thread *thread, - struct perf_evsel *evsel, FILE *fp) + struct perf_evsel *evsel, + u32 type, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; unsigned long secs; @@ -624,6 +627,47 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += fprintf(fp, "[%03d] ", sample->cpu); } + if (PRINT_FIELD(MISC)) { + int ret = 0; + + #define has(m) \ + (sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m + + if (has(KERNEL)) + ret += fprintf(fp, "K"); + if (has(USER)) + ret += fprintf(fp, "U"); + if (has(HYPERVISOR)) + ret += fprintf(fp, "H"); + if (has(GUEST_KERNEL)) + ret += fprintf(fp, "G"); + if (has(GUEST_USER)) + ret += fprintf(fp, "g"); + + switch (type) { + case PERF_RECORD_MMAP: + case PERF_RECORD_MMAP2: + if (has(MMAP_DATA)) + ret += fprintf(fp, "M"); + break; + case PERF_RECORD_COMM: + if (has(COMM_EXEC)) + ret += fprintf(fp, "E"); + break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + if (has(SWITCH_OUT)) + ret += fprintf(fp, "S"); + default: + break; + } + + #undef has + + ret += fprintf(fp, "%*s", 6 - ret, " "); + printed += ret; + } + if (PRINT_FIELD(TIME)) { nsecs = sample->time; secs = nsecs / NSEC_PER_SEC; @@ -1502,7 +1546,7 @@ static void script_print_metric(void *ctx, const char *color, if (!fmt) return; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); if (color) color_fprintf(mctx->fp, color, fmt, val); @@ -1516,7 +1560,7 @@ static void script_new_line(void *ctx) struct metric_ctx *mctx = ctx; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); } @@ -1584,7 +1628,8 @@ static void process_event(struct perf_script *script, ++es->samples; - perf_sample__fprintf_start(sample, thread, evsel, fp); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SAMPLE, fp); if (PRINT_FIELD(PERIOD)) fprintf(fp, "%10" PRIu64 " ", sample->period); @@ -1833,7 +1878,8 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_COMM, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1868,7 +1914,8 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1901,7 +1948,8 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); perf_event__fprintf(event, stdout); thread__put(thread); @@ -1930,7 +1978,8 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); perf_event__fprintf(event, stdout); if (perf_event__process_exit(tool, event, sample, machine) < 0) @@ -1965,7 +2014,8 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -1996,7 +2046,8 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -2022,7 +2073,8 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; -- cgit v1.2.1 From 3d7c27b6dbca4c90e7d921b45c2240e7c3cb92a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:53 +0100 Subject: perf script: Add support to display lost events Adding option to display lost events: $ perf script --show-lost-events ... mplayer 13810 [002] 468011.402396: 100 cycles:ppp: ff.. mplayer 13810 [002] 468011.402396: PERF_RECORD_LOST lost 3880 mplayer 13810 [002] 468011.402397: 100 cycles:ppp: ff.. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-10-jolsa@kernel.org [ Use PRIu64 when printing u64 values, fixing the build in some arches ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb603495cf4a..c1cce474c0f1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1489,6 +1489,7 @@ struct perf_script { bool show_mmap_events; bool show_switch_events; bool show_namespace_events; + bool show_lost_events; bool allocated; bool per_event_dump; struct cpu_map *cpus; @@ -2080,6 +2081,29 @@ static int process_switch_event(struct perf_tool *tool, return 0; } +static int +process_lost_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + struct thread *thread; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) + return -1; + + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2174,6 +2198,8 @@ static int __cmd_script(struct perf_script *script) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; + if (script->show_lost_events) + script->tool.lost = process_lost_event; if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3110,6 +3136,8 @@ int cmd_script(int argc, const char **argv) "Show context switch events (if recorded)"), OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), + OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, + "Show lost events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), -- cgit v1.2.1 From eabad8c6856f185f876b54c426c2cc69fe0f0a7d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 16:48:46 -0300 Subject: perf unwind: Do not look just at the global callchain_param.record_mode When setting up DWARF callchains on specific events, without using 'record' or 'trace' --call-graph, but instead doing it like: perf trace -e cycles/call-graph=dwarf/ The unwind__prepare_access() call in thread__insert_map() when we process PERF_RECORD_MMAP(2) metadata events were not being performed, precluding us from using per-event DWARF callchains, handling them just when we asked for all events to be DWARF, using "--call-graph dwarf". We do it in the PERF_RECORD_MMAP because we have to look at one of the executable maps to figure out the executable type (64-bit, 32-bit) of the DSO laid out in that mmap. Also to look at the architecture where the perf.data file was recorded. All this probably should be deferred to when we process a sample for some thread that has callchains, so that we do this processing only for the threads with samples, not for all of them. For now, fix using DWARF on specific events. Before: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms 0.000 probe_libc:inet_pton:(7fe9597bb350)) Problem processing probe_libc:inet_pton callchain, skipping... # After: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms 0.000 probe_libc:inet_pton:(7fd4aa930350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa804e51af3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa804e51b379] (/usr/bin/ping) # # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms 0.000 probe_libc:inet_pton:(7f9363b9e350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffa9e8a14e0f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffa9e8a14e1379] (/usr/bin/ping) # # perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms 0.000 probe_libc:inet_pton:(7f4947e1c350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa716d88ef3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa716d88f379] (/usr/bin/ping) # # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms 0.000 probe_libc:inet_pton:(7fa157696350)) __GI___inet_pton (/usr/lib64/libc-2.26.so) getaddrinfo (/usr/lib64/libc-2.26.so) [0xffffa9ba39c74f40] (/usr/bin/ping) # Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/r/20180116182650.GE16107@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c1cce474c0f1..08bc818f371b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script) if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { callchain_param.record_mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; -- cgit v1.2.1 From 1e2778e91616086177a255f3fc8c72ecaa564ae6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:27 +0800 Subject: perf script: Improve error msg when no first/last sample time found The following message will be returned to user when executing 'perf script --time' if perf data file doesn't contain the first/last sample time. "HINT: no first/last sample time found in perf data. Please use latest perf binary to execute 'perf record' (if '--buildid-all' is enabled, needs to set '--timestamp-boundary')." Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 08bc818f371b..ac781916e51e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3449,7 +3449,9 @@ int cmd_script(int argc, const char **argv) if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { - pr_err("No first/last sample time in perf data\n"); + pr_err("HINT: no first/last sample time found in perf data.\n" + "Please use latest perf binary to execute 'perf record'\n" + "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); err = -EINVAL; goto out_delete; } -- cgit v1.2.1 From cc2ef584a863b7c8033b78723cd253ca47e9a589 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:33 +0800 Subject: perf script: Remove the time slices number limitation Previously it was only allowed to use at most 10 time slices in 'perf script --time'. This patch removes this limitation. For example, following command line is OK (12 time slices) perf script --time 1%/1,1%/2,1%/3,1%/4,1%/5,1%/6,1%/7,1%/8,1%/9,1%/10,1%/11,1%/12 Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-9-git-send-email-yao.jin@linux.intel.com [ No need to check for NULL to call free, use zfree ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ac781916e51e..3499d68e1d70 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1480,8 +1480,6 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } -#define PTIME_RANGE_MAX 10 - struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1496,7 +1494,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + struct perf_time_interval *ptime_range; + int range_size; int range_num; }; @@ -3445,6 +3444,13 @@ int cmd_script(int argc, const char **argv) if (err < 0) goto out_delete; + script.ptime_range = perf_time__range_alloc(script.time_str, + &script.range_size); + if (!script.ptime_range) { + err = -ENOMEM; + goto out_delete; + } + /* needs to be parsed after looking up reference time */ if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (session->evlist->first_sample_time == 0 && @@ -3457,7 +3463,7 @@ int cmd_script(int argc, const char **argv) } script.range_num = perf_time__percent_parse_str( - script.ptime_range, PTIME_RANGE_MAX, + script.ptime_range, script.range_size, script.time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); @@ -3476,6 +3482,8 @@ int cmd_script(int argc, const char **argv) flush_scripting(); out_delete: + zfree(&script.ptime_range); + perf_evlist__free_stats(session->evlist); perf_session__delete(session); -- cgit v1.2.1 From bafae98e7a95df74ce4529ae96251cb12c86fdf3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 16:42:16 -0300 Subject: perf evlist: Remove fcntl.h from evlist.h Not needed there, fixup the places where it is needed and was getting only by luck via evlist.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yxjpetn64z8vjuguu84gr6x6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3499d68e1d70..ab19a6ee4093 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "sane_ctype.h" -- cgit v1.2.1