mirror of
https://github.com/hardkernel/linux.git
synced 2026-03-26 12:30:23 +09:00
[ Upstream commitb63fd11cce] When using 'perf stat' with repeat and interval option, it shows wrong values for events. The wrong values will be shown for the first interval on the second and subsequent repetitions. Without the fix: # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 2.000282489 53 faults 2.000282489 513 sched:sched_switch 4.005478208 3,721 faults 4.005478208 2,666 sched:sched_switch 5.025470933 395 faults 5.025470933 1,307 sched:sched_switch 2.009602825 1,84,46,74,40,73,70,95,47,520 faults <------ 2.009602825 1,84,46,74,40,73,70,95,49,568 sched:sched_switch <------ 4.019612206 4,730 faults 4.019612206 2,746 sched:sched_switch 5.039615484 3,953 faults 5.039615484 1,496 sched:sched_switch 2.000274620 1,84,46,74,40,73,70,95,47,520 faults <------ 2.000274620 1,84,46,74,40,73,70,95,47,520 sched:sched_switch <------ 4.000480342 4,282 faults 4.000480342 2,303 sched:sched_switch 5.000916811 1,322 faults 5.000916811 1,064 sched:sched_switch # prev_raw_counts is allocated when using intervals. This is used when calculating the difference in the counts of events when using interval. The current counts are stored in prev_raw_counts to calculate the differences in the next iteration. On the first interval of the second and subsequent repetitions, prev_raw_counts would be the values stored in the last interval of the previous repetitions, while the current counts will only be for the first interval of the current repetition. Hence there is a possibility of events showing up as big number. Fix this by resetting prev_raw_counts whenever perf stat repeats the command. With the fix: # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 2.019349347 2,597 faults 2.019349347 2,753 sched:sched_switch 4.019577372 3,098 faults 4.019577372 2,532 sched:sched_switch 5.019415481 1,879 faults 5.019415481 1,356 sched:sched_switch 2.000178813 8,468 faults 2.000178813 2,254 sched:sched_switch 4.000404621 7,440 faults 4.000404621 1,266 sched:sched_switch 5.040196079 2,458 faults 5.040196079 556 sched:sched_switch 2.000191939 6,870 faults 2.000191939 1,170 sched:sched_switch 4.000414103 541 faults 4.000414103 902 sched:sched_switch 5.000809863 450 faults 5.000809863 364 sched:sched_switch # Committer notes: This was broken since the cset introducing the --interval feature, i.e. --repeat + --interval wasn't tested at that point, add the Fixes tag so that automatic scripts can pick this up. Fixes:13370a9b5b("perf stat: Add interval printing") Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Stephane Eranian <eranian@google.com> Cc: stable@vger.kernel.org # v3.9+ Link: http://lore.kernel.org/lkml/20190904094738.9558-2-srikar@linux.vnet.ibm.com [ Fixed up conflicts with libperf, i.e. some perf_{evsel,evlist} lost the 'perf' prefix ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
439 lines
9.9 KiB
C
439 lines
9.9 KiB
C
#include <math.h>
|
|
#include "stat.h"
|
|
#include "evlist.h"
|
|
#include "evsel.h"
|
|
#include "thread_map.h"
|
|
|
|
void update_stats(struct stats *stats, u64 val)
|
|
{
|
|
double delta;
|
|
|
|
stats->n++;
|
|
delta = val - stats->mean;
|
|
stats->mean += delta / stats->n;
|
|
stats->M2 += delta*(val - stats->mean);
|
|
|
|
if (val > stats->max)
|
|
stats->max = val;
|
|
|
|
if (val < stats->min)
|
|
stats->min = val;
|
|
}
|
|
|
|
double avg_stats(struct stats *stats)
|
|
{
|
|
return stats->mean;
|
|
}
|
|
|
|
/*
|
|
* http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
|
*
|
|
* (\Sum n_i^2) - ((\Sum n_i)^2)/n
|
|
* s^2 = -------------------------------
|
|
* n - 1
|
|
*
|
|
* http://en.wikipedia.org/wiki/Stddev
|
|
*
|
|
* The std dev of the mean is related to the std dev by:
|
|
*
|
|
* s
|
|
* s_mean = -------
|
|
* sqrt(n)
|
|
*
|
|
*/
|
|
double stddev_stats(struct stats *stats)
|
|
{
|
|
double variance, variance_mean;
|
|
|
|
if (stats->n < 2)
|
|
return 0.0;
|
|
|
|
variance = stats->M2 / (stats->n - 1);
|
|
variance_mean = variance / stats->n;
|
|
|
|
return sqrt(variance_mean);
|
|
}
|
|
|
|
double rel_stddev_stats(double stddev, double avg)
|
|
{
|
|
double pct = 0.0;
|
|
|
|
if (avg)
|
|
pct = 100.0 * stddev/avg;
|
|
|
|
return pct;
|
|
}
|
|
|
|
bool __perf_evsel_stat__is(struct perf_evsel *evsel,
|
|
enum perf_stat_evsel_id id)
|
|
{
|
|
struct perf_stat_evsel *ps = evsel->priv;
|
|
|
|
return ps->id == id;
|
|
}
|
|
|
|
#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
|
|
static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
|
|
ID(NONE, x),
|
|
ID(CYCLES_IN_TX, cpu/cycles-t/),
|
|
ID(TRANSACTION_START, cpu/tx-start/),
|
|
ID(ELISION_START, cpu/el-start/),
|
|
ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
|
|
ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
|
|
ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
|
|
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
|
|
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
|
|
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
|
|
};
|
|
#undef ID
|
|
|
|
void perf_stat_evsel_id_init(struct perf_evsel *evsel)
|
|
{
|
|
struct perf_stat_evsel *ps = evsel->priv;
|
|
int i;
|
|
|
|
/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
|
|
|
|
for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
|
|
if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
|
|
ps->id = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
|
|
{
|
|
int i;
|
|
struct perf_stat_evsel *ps = evsel->priv;
|
|
|
|
for (i = 0; i < 3; i++)
|
|
init_stats(&ps->res_stats[i]);
|
|
|
|
perf_stat_evsel_id_init(evsel);
|
|
}
|
|
|
|
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
|
|
{
|
|
evsel->priv = zalloc(sizeof(struct perf_stat_evsel));
|
|
if (evsel->priv == NULL)
|
|
return -ENOMEM;
|
|
perf_evsel__reset_stat_priv(evsel);
|
|
return 0;
|
|
}
|
|
|
|
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
|
|
{
|
|
zfree(&evsel->priv);
|
|
}
|
|
|
|
static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
|
|
int ncpus, int nthreads)
|
|
{
|
|
struct perf_counts *counts;
|
|
|
|
counts = perf_counts__new(ncpus, nthreads);
|
|
if (counts)
|
|
evsel->prev_raw_counts = counts;
|
|
|
|
return counts ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
|
|
{
|
|
perf_counts__delete(evsel->prev_raw_counts);
|
|
evsel->prev_raw_counts = NULL;
|
|
}
|
|
|
|
static void perf_evsel__reset_prev_raw_counts(struct perf_evsel *evsel)
|
|
{
|
|
if (evsel->prev_raw_counts) {
|
|
evsel->prev_raw_counts->aggr.val = 0;
|
|
evsel->prev_raw_counts->aggr.ena = 0;
|
|
evsel->prev_raw_counts->aggr.run = 0;
|
|
}
|
|
}
|
|
|
|
static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
|
|
{
|
|
int ncpus = perf_evsel__nr_cpus(evsel);
|
|
int nthreads = thread_map__nr(evsel->threads);
|
|
|
|
if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
|
|
perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
|
|
(alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
if (perf_evsel__alloc_stats(evsel, alloc_raw))
|
|
goto out_free;
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_free:
|
|
perf_evlist__free_stats(evlist);
|
|
return -1;
|
|
}
|
|
|
|
void perf_evlist__free_stats(struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
perf_evsel__free_stat_priv(evsel);
|
|
perf_evsel__free_counts(evsel);
|
|
perf_evsel__free_prev_raw_counts(evsel);
|
|
}
|
|
}
|
|
|
|
void perf_evlist__reset_stats(struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
perf_evsel__reset_stat_priv(evsel);
|
|
perf_evsel__reset_counts(evsel);
|
|
}
|
|
}
|
|
|
|
void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
|
|
evlist__for_each_entry(evlist, evsel)
|
|
perf_evsel__reset_prev_raw_counts(evsel);
|
|
}
|
|
|
|
static void zero_per_pkg(struct perf_evsel *counter)
|
|
{
|
|
if (counter->per_pkg_mask)
|
|
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
|
|
}
|
|
|
|
static int check_per_pkg(struct perf_evsel *counter,
|
|
struct perf_counts_values *vals, int cpu, bool *skip)
|
|
{
|
|
unsigned long *mask = counter->per_pkg_mask;
|
|
struct cpu_map *cpus = perf_evsel__cpus(counter);
|
|
int s;
|
|
|
|
*skip = false;
|
|
|
|
if (!counter->per_pkg)
|
|
return 0;
|
|
|
|
if (cpu_map__empty(cpus))
|
|
return 0;
|
|
|
|
if (!mask) {
|
|
mask = zalloc(MAX_NR_CPUS);
|
|
if (!mask)
|
|
return -ENOMEM;
|
|
|
|
counter->per_pkg_mask = mask;
|
|
}
|
|
|
|
/*
|
|
* we do not consider an event that has not run as a good
|
|
* instance to mark a package as used (skip=1). Otherwise
|
|
* we may run into a situation where the first CPU in a package
|
|
* is not running anything, yet the second is, and this function
|
|
* would mark the package as used after the first CPU and would
|
|
* not read the values from the second CPU.
|
|
*/
|
|
if (!(vals->run && vals->ena))
|
|
return 0;
|
|
|
|
s = cpu_map__get_socket(cpus, cpu, NULL);
|
|
if (s < 0)
|
|
return -1;
|
|
|
|
*skip = test_and_set_bit(s, mask) == 1;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
|
|
int cpu, int thread,
|
|
struct perf_counts_values *count)
|
|
{
|
|
struct perf_counts_values *aggr = &evsel->counts->aggr;
|
|
static struct perf_counts_values zero;
|
|
bool skip = false;
|
|
|
|
if (check_per_pkg(evsel, count, cpu, &skip)) {
|
|
pr_err("failed to read per-pkg counter\n");
|
|
return -1;
|
|
}
|
|
|
|
if (skip)
|
|
count = &zero;
|
|
|
|
switch (config->aggr_mode) {
|
|
case AGGR_THREAD:
|
|
case AGGR_CORE:
|
|
case AGGR_SOCKET:
|
|
case AGGR_NONE:
|
|
if (!evsel->snapshot)
|
|
perf_evsel__compute_deltas(evsel, cpu, thread, count);
|
|
perf_counts_values__scale(count, config->scale, NULL);
|
|
if (config->aggr_mode == AGGR_NONE)
|
|
perf_stat__update_shadow_stats(evsel, count->values, cpu);
|
|
break;
|
|
case AGGR_GLOBAL:
|
|
aggr->val += count->val;
|
|
if (config->scale) {
|
|
aggr->ena += count->ena;
|
|
aggr->run += count->run;
|
|
}
|
|
case AGGR_UNSET:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int process_counter_maps(struct perf_stat_config *config,
|
|
struct perf_evsel *counter)
|
|
{
|
|
int nthreads = thread_map__nr(counter->threads);
|
|
int ncpus = perf_evsel__nr_cpus(counter);
|
|
int cpu, thread;
|
|
|
|
if (counter->system_wide)
|
|
nthreads = 1;
|
|
|
|
for (thread = 0; thread < nthreads; thread++) {
|
|
for (cpu = 0; cpu < ncpus; cpu++) {
|
|
if (process_counter_values(config, counter, cpu, thread,
|
|
perf_counts(counter->counts, cpu, thread)))
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int perf_stat_process_counter(struct perf_stat_config *config,
|
|
struct perf_evsel *counter)
|
|
{
|
|
struct perf_counts_values *aggr = &counter->counts->aggr;
|
|
struct perf_stat_evsel *ps = counter->priv;
|
|
u64 *count = counter->counts->aggr.values;
|
|
u64 val;
|
|
int i, ret;
|
|
|
|
aggr->val = aggr->ena = aggr->run = 0;
|
|
|
|
/*
|
|
* We calculate counter's data every interval,
|
|
* and the display code shows ps->res_stats
|
|
* avg value. We need to zero the stats for
|
|
* interval mode, otherwise overall avg running
|
|
* averages will be shown for each interval.
|
|
*/
|
|
if (config->interval)
|
|
init_stats(ps->res_stats);
|
|
|
|
if (counter->per_pkg)
|
|
zero_per_pkg(counter);
|
|
|
|
ret = process_counter_maps(config, counter);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (config->aggr_mode != AGGR_GLOBAL)
|
|
return 0;
|
|
|
|
if (!counter->snapshot)
|
|
perf_evsel__compute_deltas(counter, -1, -1, aggr);
|
|
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
|
|
|
|
for (i = 0; i < 3; i++)
|
|
update_stats(&ps->res_stats[i], count[i]);
|
|
|
|
if (verbose) {
|
|
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
|
perf_evsel__name(counter), count[0], count[1], count[2]);
|
|
}
|
|
|
|
/*
|
|
* Save the full runtime - to allow normalization during printout:
|
|
*/
|
|
val = counter->scale * *count;
|
|
perf_stat__update_shadow_stats(counter, &val, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
|
|
union perf_event *event,
|
|
struct perf_session *session)
|
|
{
|
|
struct perf_counts_values count;
|
|
struct stat_event *st = &event->stat;
|
|
struct perf_evsel *counter;
|
|
|
|
count.val = st->val;
|
|
count.ena = st->ena;
|
|
count.run = st->run;
|
|
|
|
counter = perf_evlist__id2evsel(session->evlist, st->id);
|
|
if (!counter) {
|
|
pr_err("Failed to resolve counter for stat event.\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
*perf_counts(counter->counts, st->cpu, st->thread) = count;
|
|
counter->supported = true;
|
|
return 0;
|
|
}
|
|
|
|
size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
|
|
{
|
|
struct stat_event *st = (struct stat_event *) event;
|
|
size_t ret;
|
|
|
|
ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
|
|
st->id, st->cpu, st->thread);
|
|
ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
|
|
st->val, st->ena, st->run);
|
|
|
|
return ret;
|
|
}
|
|
|
|
size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
|
|
{
|
|
struct stat_round_event *rd = (struct stat_round_event *)event;
|
|
size_t ret;
|
|
|
|
ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
|
|
rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
|
|
|
|
return ret;
|
|
}
|
|
|
|
size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
|
|
{
|
|
struct perf_stat_config sc;
|
|
size_t ret;
|
|
|
|
perf_event__read_stat_config(&sc, &event->stat_config);
|
|
|
|
ret = fprintf(fp, "\n");
|
|
ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
|
|
ret += fprintf(fp, "... scale %d\n", sc.scale);
|
|
ret += fprintf(fp, "... interval %u\n", sc.interval);
|
|
|
|
return ret;
|
|
}
|