From 270cbf3b28a98f2071f54862e7cf967e52e98356 Mon Sep 17 00:00:00 2001 From: Hanjie Lin Date: Tue, 27 Aug 2019 16:21:46 +0800 Subject: [PATCH] perf: pmu fine-tune for aarch32/64 of A53/A55/A73 [1/1] PD#SWPL-13243 Problem: pmu event is not accurate or not complete in A53/A55/A73. Solution: 1, modify event config for A53/A55/A73. 2, perf executable file must compiled from latest kernel(5.1+) 3, A55 events are most complete, A73 are least complete(eg: less ld_retired/st_retired/stall/prefetch events) 4, A55/A53 same event meanings simlar, but A73 is more different(eg: L1/L2 dcache/icache loads meanings) sample commands: a55 arm64: perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv8_pmuv3/ld_retired/,armv8_pmuv3/st_retired/,cycles,branch-loads,branch-load-misses,armv8_pmuv3/a55_l1d_cache_rd/,armv8_pmuv3/a55_l1d_cache_refill_rd/,armv8_pmuv3/a55_l1d_cache_wr/,armv8_pmuv3/a55_l1d_cache_refill_wr/,L1-icache-loads,L1-icache-load-misses,armv8_pmuv3/a55_l2d_cache_rd/,armv8_pmuv3/a55_l2d_cache_refill_rd/,armv8_pmuv3/a55_l1d_cache_refill_inner/,armv8_pmuv3/a55_l1d_cache_refill_outer/,armv8_pmuv3/a55_l1d_cache_refill_prefetch/,armv8_pmuv3/a55_l2d_cache_refill_prefetch/,armv8_pmuv3/a5x_stall_frontend_cache/,armv8_pmuv3/a5x_stall_frontend_tlb/,armv8_pmuv3/a5x_stall_backend_ld/,armv8_pmuv3/a55_stall_backend_ld_cache/,armv8_pmuv3/a55_stall_backend_ld_tlb/,armv8_pmuv3/a5x_stall_backend_st/,armv8_pmuv3/a5x_stall_backend_ilock_agu/,armv8_pmuv3/a5x_stall_backend_ilock_fpu/ ls a53 arm64: perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv8_pmuv3/ld_retired/,armv8_pmuv3/st_retired/,cycles,branch-loads,branch-load-misses,armv8_pmuv3/l1d_cache/,armv8_pmuv3/l1d_cache_refill/,L1-icache-loads,L1-icache-load-misses,armv8_pmuv3/a5x_l2d_cache/,armv8_pmuv3/a5x_l2d_cache_refill/,armv8_pmuv3/a53_cache_refill_prefetch/,armv8_pmuv3/a53_scu_snooped/,armv8_pmuv3/a5x_stall_frontend_cache/,armv8_pmuv3/a5x_stall_frontend_tlb/,armv8_pmuv3/a5x_stall_backend_ld/,,armv8_pmuv3/a5x_stall_backend_st/,armv8_pmuv3/a5x_stall_backend_ilock_agu/,armv8_pmuv3/a5x_stall_backend_ilock_fpu/ ls a73 arm64: (w400 bind to a73 cpu2) perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,cycles,branch-loads,branch-load-misses,armv8_pmuv3/l1d_cache/,armv8_pmuv3/l1d_cache_refill/,armv8_pmuv3/a55_l1d_cache_rd/,armv8_pmuv3/a55_l1d_cache_wr/,armv8_pmuv3/a5x_l2d_cache/,armv8_pmuv3/a5x_l2d_cache_refill/,armv8_pmuv3/a55_l2d_cache_rd/,armv8_pmuv3/a55_l2d_cache_wr/ busybox taskset 4 ls a55 arm: perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv7_cortex_a15/ld_retired/,armv7_cortex_a15/st_retired/,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/a55_l1d_cache_rd/,armv7_cortex_a15/a55_l1d_cache_refill_rd/,armv7_cortex_a15/a55_l1d_cache_wr/,armv7_cortex_a15/a55_l1d_cache_refill_wr/,L1-icache-loads,L1-icache-load-misses,armv7_cortex_a15/a55_l2d_cache_rd/,armv7_cortex_a15/a55_l2d_cache_refill_rd/,armv7_cortex_a15/a55_l1d_cache_refill_inner/,armv7_cortex_a15/a55_l1d_cache_refill_outer/,armv7_cortex_a15/a55_l1d_cache_refill_prefetch/,armv7_cortex_a15/a55_l2d_cache_refill_prefetch/,armv7_cortex_a15/a5x_stall_frontend_cache/,armv7_cortex_a15/a5x_stall_frontend_tlb/,armv7_cortex_a15/a5x_stall_backend_ld/,armv7_cortex_a15/a55_stall_backend_ld_cache/,armv7_cortex_a15/a55_stall_backend_ld_tlb/,armv7_cortex_a15/a5x_stall_backend_st/,armv7_cortex_a15/a5x_stall_backend_ilock_agu/,armv7_cortex_a15/a5x_stall_backend_ilock_fpu/ ls a53 arm: perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv7_cortex_a15/ld_retired/,armv7_cortex_a15/st_retired/,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/l1d_cache/,armv7_cortex_a15/l1d_cache_refill/,L1-icache-loads,L1-icache-load-misses,armv7_cortex_a15/a5x_l2d_cache/,armv7_cortex_a15/a5x_l2d_cache_refill/,armv7_cortex_a15/a53_cache_refill_prefetch/,armv7_cortex_a15/a53_scu_snooped/,armv7_cortex_a15/a5x_stall_frontend_cache/,armv7_cortex_a15/a5x_stall_frontend_tlb/,armv7_cortex_a15/a5x_stall_backend_ld/,armv7_cortex_a15/a5x_stall_backend_st/,armv7_cortex_a15/a5x_stall_backend_ilock_agu/,armv7_cortex_a15/a5x_stall_backend_ilock_fpu/ ls a73 arm: (w400 bind to a73 cpu2) perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/l1d_cache/,armv7_cortex_a15/l1d_cache_refill/,armv7_cortex_a15/a55_l1d_cache_rd/,armv7_cortex_a15/a55_l1d_cache_wr/,armv7_cortex_a15/a5x_l2d_cache/,armv7_cortex_a15/a5x_l2d_cache_refill/,armv7_cortex_a15/a55_l2d_cache_rd/,armv7_cortex_a15/a55_l2d_cache_wr/ busybox taskset 4 ls Verify: ac200/u200/w400 Change-Id: I7f11e1480c3c27d016b011d2a84c33e824f69b08 Signed-off-by: Hanjie Lin --- arch/arm/kernel/perf_event_v7.c | 83 +++++++++++++++++++++++++++++++ arch/arm64/kernel/perf_event.c | 87 +++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index e3a3ebc6f23f..a35b80329507 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -612,6 +612,49 @@ ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); +#ifdef CONFIG_AMLOGIC_MODIFY +/* a53/a55 common events */ +ARMV7_EVENT_ATTR(a5x_stall_frontend_cache, 0xe1); +ARMV7_EVENT_ATTR(a5x_stall_frontend_tlb, 0xe2); +ARMV7_EVENT_ATTR(a5x_stall_frontend_pderr, 0xe3); +ARMV7_EVENT_ATTR(a5x_stall_backend_ilock_agu, 0xe5); +ARMV7_EVENT_ATTR(a5x_stall_backend_ilock_fpu, 0xe6); +ARMV7_EVENT_ATTR(a5x_stall_backend_ld, 0xe7); +ARMV7_EVENT_ATTR(a5x_stall_backend_st, 0xe8); +ARMV7_EVENT_ATTR(a5x_l2d_cache, 0x16); +ARMV7_EVENT_ATTR(a5x_l2d_cache_refill, 0x17); + +/* a55 events */ +ARMV7_EVENT_ATTR(a55_stall_frontend, 0x23); +ARMV7_EVENT_ATTR(a55_stall_backend, 0x24); +ARMV7_EVENT_ATTR(a55_stall_backend_ilock, 0xe4); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_inner, 0x44); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_outer, 0x45); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_prefetch, 0xc2); +ARMV7_EVENT_ATTR(a55_l2d_cache_refill_prefetch, 0xc1); +ARMV7_EVENT_ATTR(a55_l3d_cache_refill_prefetch, 0xc0); +ARMV7_EVENT_ATTR(a55_stall_backend_ld_cache, 0xe9); +ARMV7_EVENT_ATTR(a55_stall_backend_ld_tlb, 0xea); +ARMV7_EVENT_ATTR(a55_stall_backend_st_stb, 0xeb); +ARMV7_EVENT_ATTR(a55_stall_backend_st_tlb, 0xec); +ARMV7_EVENT_ATTR(a55_l1d_cache_rd, 0x40); +ARMV7_EVENT_ATTR(a55_l1d_cache_wr, 0x41); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_rd, 0x42); +ARMV7_EVENT_ATTR(a55_l1d_cache_refill_wr, 0x43); +ARMV7_EVENT_ATTR(a55_l2d_cache_rd, 0x50); +ARMV7_EVENT_ATTR(a55_l2d_cache_wr, 0x51); +ARMV7_EVENT_ATTR(a55_l2d_cache_refill_rd, 0x52); +ARMV7_EVENT_ATTR(a55_l2d_cache_refill_wr, 0x53); +ARMV7_EVENT_ATTR(a55_l3d_cache_rd, 0xa0); +ARMV7_EVENT_ATTR(a55_l3d_cache_refill_rd, 0xa2); + +/* a53 events */ +ARMV7_EVENT_ATTR(a53_cache_refill_prefetch, 0xc2); +ARMV7_EVENT_ATTR(a53_scu_snooped, 0xc8); +ARMV7_EVENT_ATTR(a53_stall_backend_st_stb, 0xc7); +ARMV7_EVENT_ATTR(a53_stall_frontend_other, 0xe0); +#endif + static struct attribute *armv7_pmuv2_event_attrs[] = { &armv7_event_attr_sw_incr.attr.attr, &armv7_event_attr_l1i_cache_refill.attr.attr, @@ -643,6 +686,46 @@ static struct attribute *armv7_pmuv2_event_attrs[] = { &armv7_event_attr_inst_spec.attr.attr, &armv7_event_attr_ttbr_write_retired.attr.attr, &armv7_event_attr_bus_cycles.attr.attr, +#ifdef CONFIG_AMLOGIC_MODIFY + /* a55/a53 common events */ + &armv7_event_attr_a5x_stall_frontend_cache.attr.attr, //0xe1 + &armv7_event_attr_a5x_stall_frontend_tlb.attr.attr, //0xe2 + &armv7_event_attr_a5x_stall_frontend_pderr.attr.attr, //0xe3 + &armv7_event_attr_a5x_stall_backend_ilock_agu.attr.attr, //0xe5 + &armv7_event_attr_a5x_stall_backend_ilock_fpu.attr.attr, //0xe6 + &armv7_event_attr_a5x_stall_backend_ld.attr.attr, //0xe7 + &armv7_event_attr_a5x_stall_backend_st.attr.attr, //0xe8 + &armv7_event_attr_a5x_l2d_cache.attr.attr, //0x16 + &armv7_event_attr_a5x_l2d_cache_refill.attr.attr, //0x17 + /* a55 events */ + &armv7_event_attr_a55_stall_frontend.attr.attr, //0x23 + &armv7_event_attr_a55_stall_backend.attr.attr, //0x24 + &armv7_event_attr_a55_stall_backend_ilock.attr.attr, //0xe4 + &armv7_event_attr_a55_stall_backend_ld_cache.attr.attr, //0xe9 + &armv7_event_attr_a55_stall_backend_ld_tlb.attr.attr, //0xea + &armv7_event_attr_a55_stall_backend_st_stb.attr.attr, //0xeb + &armv7_event_attr_a55_stall_backend_st_tlb.attr.attr, //0xec + &armv7_event_attr_a55_l1d_cache_refill_inner.attr.attr, //0x44 + &armv7_event_attr_a55_l1d_cache_refill_outer.attr.attr, //0x45 + &armv7_event_attr_a55_l1d_cache_refill_prefetch.attr.attr, //0xc2 + &armv7_event_attr_a55_l2d_cache_refill_prefetch.attr.attr, //0xc1 + &armv7_event_attr_a55_l3d_cache_refill_prefetch.attr.attr, //0xc0 + &armv7_event_attr_a55_l1d_cache_rd.attr.attr, //0x40 + &armv7_event_attr_a55_l1d_cache_wr.attr.attr, //0x41 + &armv7_event_attr_a55_l1d_cache_refill_rd.attr.attr, //0x42 + &armv7_event_attr_a55_l1d_cache_refill_wr.attr.attr, //0x43 + &armv7_event_attr_a55_l2d_cache_rd.attr.attr, //0x50 + &armv7_event_attr_a55_l2d_cache_wr.attr.attr, //0x51 + &armv7_event_attr_a55_l2d_cache_refill_rd.attr.attr, //0x52 + &armv7_event_attr_a55_l2d_cache_refill_wr.attr.attr, //0x53 + &armv7_event_attr_a55_l3d_cache_rd.attr.attr, //0xa0 + &armv7_event_attr_a55_l3d_cache_refill_rd.attr.attr, //0xa2 + /* a53 events */ + &armv7_event_attr_a53_cache_refill_prefetch.attr.attr, //0xc2 + &armv7_event_attr_a53_scu_snooped.attr.attr, //0xc8 + &armv7_event_attr_a53_stall_backend_st_stb.attr.attr, //0xc7 + &armv7_event_attr_a53_stall_frontend_other.attr.attr, //0xe0 +#endif NULL, }; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9b3b5dd17cb3..0042224d5300 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -449,6 +449,49 @@ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL); ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB); +#ifdef CONFIG_AMLOGIC_MODIFY +/* a53/a55 common */ +ARMV8_EVENT_ATTR(a5x_stall_frontend_cache, 0xe1); +ARMV8_EVENT_ATTR(a5x_stall_frontend_tlb, 0xe2); +ARMV8_EVENT_ATTR(a5x_stall_frontend_pderr, 0xe3); +ARMV8_EVENT_ATTR(a5x_stall_backend_ilock_agu, 0xe5); +ARMV8_EVENT_ATTR(a5x_stall_backend_ilock_fpu, 0xe6); +ARMV8_EVENT_ATTR(a5x_stall_backend_ld, 0xe7); +ARMV8_EVENT_ATTR(a5x_stall_backend_st, 0xe8); +ARMV8_EVENT_ATTR(a5x_l2d_cache, 0x16); +ARMV8_EVENT_ATTR(a5x_l2d_cache_refill, 0x17); + +/* a55 events */ +ARMV8_EVENT_ATTR(a55_stall_frontend, 0x23); +ARMV8_EVENT_ATTR(a55_stall_backend, 0x24); +ARMV8_EVENT_ATTR(a55_stall_backend_ilock, 0xe4); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_inner, 0x44); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_outer, 0x45); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_prefetch, 0xc2); +ARMV8_EVENT_ATTR(a55_l2d_cache_refill_prefetch, 0xc1); +ARMV8_EVENT_ATTR(a55_l3d_cache_refill_prefetch, 0xc0); +ARMV8_EVENT_ATTR(a55_stall_backend_ld_cache, 0xe9); +ARMV8_EVENT_ATTR(a55_stall_backend_ld_tlb, 0xea); +ARMV8_EVENT_ATTR(a55_stall_backend_st_stb, 0xeb); +ARMV8_EVENT_ATTR(a55_stall_backend_st_tlb, 0xec); +ARMV8_EVENT_ATTR(a55_l1d_cache_rd, 0x40); +ARMV8_EVENT_ATTR(a55_l1d_cache_wr, 0x41); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_rd, 0x42); +ARMV8_EVENT_ATTR(a55_l1d_cache_refill_wr, 0x43); +ARMV8_EVENT_ATTR(a55_l2d_cache_rd, 0x50); +ARMV8_EVENT_ATTR(a55_l2d_cache_wr, 0x51); +ARMV8_EVENT_ATTR(a55_l2d_cache_refill_rd, 0x52); +ARMV8_EVENT_ATTR(a55_l2d_cache_refill_wr, 0x53); +ARMV8_EVENT_ATTR(a55_l3d_cache_rd, 0xa0); +ARMV8_EVENT_ATTR(a55_l3d_cache_refill_rd, 0xa2); + +/* a53 events */ +ARMV8_EVENT_ATTR(a53_cache_refill_prefetch, 0xc2); +ARMV8_EVENT_ATTR(a53_scu_snooped, 0xc8); +ARMV8_EVENT_ATTR(a53_stall_backend_st_stb, 0xc7); +ARMV8_EVENT_ATTR(a53_stall_frontend_other, 0xe0); +#endif + static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_sw_incr.attr.attr, &armv8_event_attr_l1i_cache_refill.attr.attr, @@ -498,6 +541,46 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_l2i_tlb_refill.attr.attr, &armv8_event_attr_l2d_tlb.attr.attr, &armv8_event_attr_l2i_tlb.attr.attr, +#ifdef CONFIG_AMLOGIC_MODIFY + /* a55/a53 common events */ + &armv8_event_attr_a5x_stall_frontend_cache.attr.attr, //0xe1 + &armv8_event_attr_a5x_stall_frontend_tlb.attr.attr, //0xe2 + &armv8_event_attr_a5x_stall_frontend_pderr.attr.attr, //0xe3 + &armv8_event_attr_a5x_stall_backend_ilock_agu.attr.attr, //0xe5 + &armv8_event_attr_a5x_stall_backend_ilock_fpu.attr.attr, //0xe6 + &armv8_event_attr_a5x_stall_backend_ld.attr.attr, //0xe7 + &armv8_event_attr_a5x_stall_backend_st.attr.attr, //0xe8 + &armv8_event_attr_a5x_l2d_cache.attr.attr, //0x16 + &armv8_event_attr_a5x_l2d_cache_refill.attr.attr, //0x17 + /* a55 events */ + &armv8_event_attr_a55_stall_frontend.attr.attr, //0x23 + &armv8_event_attr_a55_stall_backend.attr.attr, //0x24 + &armv8_event_attr_a55_stall_backend_ilock.attr.attr, //0xe4 + &armv8_event_attr_a55_stall_backend_ld_cache.attr.attr, //0xe9 + &armv8_event_attr_a55_stall_backend_ld_tlb.attr.attr, //0xea + &armv8_event_attr_a55_stall_backend_st_stb.attr.attr, //0xeb + &armv8_event_attr_a55_stall_backend_st_tlb.attr.attr, //0xec + &armv8_event_attr_a55_l1d_cache_refill_inner.attr.attr, //0x44 + &armv8_event_attr_a55_l1d_cache_refill_outer.attr.attr, //0x45 + &armv8_event_attr_a55_l1d_cache_refill_prefetch.attr.attr, //0xc2 + &armv8_event_attr_a55_l2d_cache_refill_prefetch.attr.attr, //0xc1 + &armv8_event_attr_a55_l3d_cache_refill_prefetch.attr.attr, //0xc0 + &armv8_event_attr_a55_l1d_cache_rd.attr.attr, //0x40 + &armv8_event_attr_a55_l1d_cache_wr.attr.attr, //0x41 + &armv8_event_attr_a55_l1d_cache_refill_rd.attr.attr, //0x42 + &armv8_event_attr_a55_l1d_cache_refill_wr.attr.attr, //0x43 + &armv8_event_attr_a55_l2d_cache_rd.attr.attr, //0x50 + &armv8_event_attr_a55_l2d_cache_wr.attr.attr, //0x51 + &armv8_event_attr_a55_l2d_cache_refill_rd.attr.attr, //0x52 + &armv8_event_attr_a55_l2d_cache_refill_wr.attr.attr, //0x53 + &armv8_event_attr_a55_l3d_cache_rd.attr.attr, //0xa0 + &armv8_event_attr_a55_l3d_cache_refill_rd.attr.attr, //0xa2 + /* a53 events */ + &armv8_event_attr_a53_cache_refill_prefetch.attr.attr, //0xc2 + &armv8_event_attr_a53_scu_snooped.attr.attr, //0xc8 + &armv8_event_attr_a53_stall_backend_st_stb.attr.attr, //0xc7 + &armv8_event_attr_a53_stall_frontend_other.attr.attr, //0xe0 +#endif NULL, }; @@ -505,6 +588,9 @@ static umode_t armv8pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr, int unused) { +#ifdef CONFIG_AMLOGIC_MODIFY + return 0444; +#else struct device *dev = kobj_to_dev(kobj); struct pmu *pmu = dev_get_drvdata(dev); struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); @@ -516,6 +602,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, return attr->mode; return 0; +#endif } static struct attribute_group armv8_pmuv3_events_attr_group = {