mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 10:58:48 +09:00
Merge 52abb27abf ("Merge tag 'slab-for-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab") into android-mainline
Steps on the way to 6.1-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I56d49191f3a4419cda96f59c2aab815d0be54538
This commit is contained in:
@@ -25,6 +25,7 @@ Required properties:
|
||||
For those SoCs that use SYST
|
||||
* "mediatek,mt8183-timer" for MT8183 compatible timers (SYST)
|
||||
* "mediatek,mt8186-timer" for MT8186 compatible timers (SYST)
|
||||
* "mediatek,mt8188-timer" for MT8188 compatible timers (SYST)
|
||||
* "mediatek,mt8192-timer" for MT8192 compatible timers (SYST)
|
||||
* "mediatek,mt8195-timer" for MT8195 compatible timers (SYST)
|
||||
* "mediatek,mt7629-timer" for MT7629 compatible timers (SYST)
|
||||
|
||||
@@ -32,6 +32,10 @@ properties:
|
||||
clock-names:
|
||||
const: per
|
||||
|
||||
nxp,no-divider:
|
||||
description: if present, means there is no internal base clk divider.
|
||||
type: boolean
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
@@ -37,6 +37,7 @@ properties:
|
||||
- renesas,tmu-r8a77990 # R-Car E3
|
||||
- renesas,tmu-r8a77995 # R-Car D3
|
||||
- renesas,tmu-r8a779a0 # R-Car V3U
|
||||
- renesas,tmu-r8a779f0 # R-Car S4-8
|
||||
- const: renesas,tmu
|
||||
|
||||
reg:
|
||||
|
||||
@@ -25,6 +25,7 @@ properties:
|
||||
- samsung,exynos4412-mct
|
||||
- items:
|
||||
- enum:
|
||||
- axis,artpec8-mct
|
||||
- samsung,exynos3250-mct
|
||||
- samsung,exynos5250-mct
|
||||
- samsung,exynos5260-mct
|
||||
@@ -45,6 +46,19 @@ properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
samsung,frc-shared:
|
||||
type: boolean
|
||||
description: |
|
||||
Indicates that the hardware requires that this processor share the
|
||||
free-running counter with a different (main) processor.
|
||||
|
||||
samsung,local-timers:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
minItems: 1
|
||||
maxItems: 16
|
||||
description: |
|
||||
List of indices of local timers usable from this processor.
|
||||
|
||||
interrupts:
|
||||
description: |
|
||||
Interrupts should be put in specific order. This is, the local timer
|
||||
@@ -74,6 +88,17 @@ required:
|
||||
- reg
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
not:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- axis,artpec8-mct
|
||||
then:
|
||||
properties:
|
||||
samsung,local-timers: false
|
||||
samsung,frc-shared: false
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
@@ -101,6 +126,7 @@ allOf:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- axis,artpec8-mct
|
||||
- samsung,exynos5260-mct
|
||||
- samsung,exynos5420-mct
|
||||
- samsung,exynos5433-mct
|
||||
|
||||
@@ -400,21 +400,30 @@ information:
|
||||
allocated objects. The output is sorted by frequency of each trace.
|
||||
|
||||
Information in the output:
|
||||
Number of objects, allocating function, minimal/average/maximal jiffies since alloc,
|
||||
pid range of the allocating processes, cpu mask of allocating cpus, and stack trace.
|
||||
Number of objects, allocating function, possible memory wastage of
|
||||
kmalloc objects(total/per-object), minimal/average/maximal jiffies
|
||||
since alloc, pid range of the allocating processes, cpu mask of
|
||||
allocating cpus, numa node mask of origins of memory, and stack trace.
|
||||
|
||||
Example:::
|
||||
|
||||
1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1::
|
||||
__slab_alloc+0x6d/0x90
|
||||
kmem_cache_alloc_trace+0x2eb/0x300
|
||||
populate_error_injection_list+0x97/0x110
|
||||
init_error_injection+0x1b/0x71
|
||||
do_one_initcall+0x5f/0x2d0
|
||||
kernel_init_freeable+0x26f/0x2d7
|
||||
kernel_init+0xe/0x118
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
338 pci_alloc_dev+0x2c/0xa0 waste=521872/1544 age=290837/291891/293509 pid=1 cpus=106 nodes=0-1
|
||||
__kmem_cache_alloc_node+0x11f/0x4e0
|
||||
kmalloc_trace+0x26/0xa0
|
||||
pci_alloc_dev+0x2c/0xa0
|
||||
pci_scan_single_device+0xd2/0x150
|
||||
pci_scan_slot+0xf7/0x2d0
|
||||
pci_scan_child_bus_extend+0x4e/0x360
|
||||
acpi_pci_root_create+0x32e/0x3b0
|
||||
pci_acpi_scan_root+0x2b9/0x2d0
|
||||
acpi_pci_root_add.cold.11+0x110/0xb0a
|
||||
acpi_bus_attach+0x262/0x3f0
|
||||
device_for_each_child+0xb7/0x110
|
||||
acpi_dev_for_each_child+0x77/0xa0
|
||||
acpi_bus_attach+0x108/0x3f0
|
||||
device_for_each_child+0xb7/0x110
|
||||
acpi_dev_for_each_child+0x77/0xa0
|
||||
acpi_bus_attach+0x108/0x3f0
|
||||
|
||||
2. free_traces::
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
@@ -129,7 +130,14 @@ struct breakpoint {
|
||||
bool ptrace_bp;
|
||||
};
|
||||
|
||||
/*
|
||||
* While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
|
||||
* rely on it safely synchronizing internals here; however, we can rely on it
|
||||
* not requesting more breakpoints than available.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(cpu_bps_lock);
|
||||
static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
|
||||
static DEFINE_SPINLOCK(task_bps_lock);
|
||||
static LIST_HEAD(task_bps);
|
||||
|
||||
static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
|
||||
@@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp)
|
||||
if (IS_ERR(tmp))
|
||||
return PTR_ERR(tmp);
|
||||
|
||||
spin_lock(&task_bps_lock);
|
||||
list_add(&tmp->list, &task_bps);
|
||||
spin_unlock(&task_bps_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp)
|
||||
{
|
||||
struct list_head *pos, *q;
|
||||
|
||||
spin_lock(&task_bps_lock);
|
||||
list_for_each_safe(pos, q, &task_bps) {
|
||||
struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
|
||||
|
||||
@@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&task_bps_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
|
||||
static bool all_task_bps_check(struct perf_event *bp)
|
||||
{
|
||||
struct breakpoint *tmp;
|
||||
bool ret = false;
|
||||
|
||||
spin_lock(&task_bps_lock);
|
||||
list_for_each_entry(tmp, &task_bps, list) {
|
||||
if (!can_co_exist(tmp, bp))
|
||||
return true;
|
||||
if (!can_co_exist(tmp, bp)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
spin_unlock(&task_bps_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
|
||||
static bool same_task_bps_check(struct perf_event *bp)
|
||||
{
|
||||
struct breakpoint *tmp;
|
||||
bool ret = false;
|
||||
|
||||
spin_lock(&task_bps_lock);
|
||||
list_for_each_entry(tmp, &task_bps, list) {
|
||||
if (tmp->bp->hw.target == bp->hw.target &&
|
||||
!can_co_exist(tmp, bp))
|
||||
return true;
|
||||
!can_co_exist(tmp, bp)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
spin_unlock(&task_bps_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int cpu_bps_add(struct perf_event *bp)
|
||||
@@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp)
|
||||
if (IS_ERR(tmp))
|
||||
return PTR_ERR(tmp);
|
||||
|
||||
spin_lock(&cpu_bps_lock);
|
||||
cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
|
||||
for (i = 0; i < nr_wp_slots(); i++) {
|
||||
if (!cpu_bp[i]) {
|
||||
@@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp)
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&cpu_bps_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
|
||||
struct breakpoint **cpu_bp;
|
||||
int i = 0;
|
||||
|
||||
spin_lock(&cpu_bps_lock);
|
||||
cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
|
||||
for (i = 0; i < nr_wp_slots(); i++) {
|
||||
if (!cpu_bp[i])
|
||||
@@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&cpu_bps_lock);
|
||||
}
|
||||
|
||||
static bool cpu_bps_check(int cpu, struct perf_event *bp)
|
||||
{
|
||||
struct breakpoint **cpu_bp;
|
||||
bool ret = false;
|
||||
int i;
|
||||
|
||||
spin_lock(&cpu_bps_lock);
|
||||
cpu_bp = per_cpu_ptr(cpu_bps, cpu);
|
||||
for (i = 0; i < nr_wp_slots(); i++) {
|
||||
if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
|
||||
return true;
|
||||
if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
spin_unlock(&cpu_bps_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool all_cpu_bps_check(struct perf_event *bp)
|
||||
@@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't use any locks to serialize accesses to cpu_bps or task_bps
|
||||
* because are already inside nr_bp_mutex.
|
||||
*/
|
||||
int arch_reserve_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -2314,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
cpuhw = this_cpu_ptr(&cpu_hw_events);
|
||||
power_pmu_bhrb_read(event, cpuhw);
|
||||
data.br_stack = &cpuhw->bhrb_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
|
||||
ppmu->get_mem_data_src)
|
||||
ppmu->get_mem_data_src) {
|
||||
ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
|
||||
data.sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
|
||||
ppmu->get_mem_weight)
|
||||
ppmu->get_mem_weight) {
|
||||
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
|
||||
|
||||
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
power_pmu_stop(event, 0);
|
||||
} else if (period) {
|
||||
|
||||
@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
|
||||
raw.frag.data = cpuhw->stop;
|
||||
raw.size = raw.frag.size;
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
||||
@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void)
|
||||
raw.frag.data = cpump->save;
|
||||
raw.size = raw.frag.size;
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
||||
@@ -48,10 +48,7 @@ struct pmu;
|
||||
/* Maximum number of UBC channels */
|
||||
#define HBP_NUM 2
|
||||
|
||||
static inline int hw_breakpoint_slots(int type)
|
||||
{
|
||||
return HBP_NUM;
|
||||
}
|
||||
#define hw_breakpoint_slots(type) (HBP_NUM)
|
||||
|
||||
/* arch/sh/kernel/hw_breakpoint.c */
|
||||
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y += core.o probe.o
|
||||
obj-y += core.o probe.o utils.o
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o
|
||||
obj-y += amd/
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += core.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += core.o lbr.o
|
||||
obj-$(CONFIG_PERF_EVENTS_AMD_BRS) += brs.o
|
||||
obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
|
||||
|
||||
@@ -81,7 +81,7 @@ static bool __init amd_brs_detect(void)
|
||||
* a br_sel_map. Software filtering is not supported because it would not correlate well
|
||||
* with a sampling period.
|
||||
*/
|
||||
int amd_brs_setup_filter(struct perf_event *event)
|
||||
static int amd_brs_setup_filter(struct perf_event *event)
|
||||
{
|
||||
u64 type = event->attr.branch_sample_type;
|
||||
|
||||
@@ -96,6 +96,73 @@ int amd_brs_setup_filter(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int amd_is_brs_event(struct perf_event *e)
|
||||
{
|
||||
return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
|
||||
}
|
||||
|
||||
int amd_brs_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Due to interrupt holding, BRS is not recommended in
|
||||
* counting mode.
|
||||
*/
|
||||
if (!is_sampling_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Due to the way BRS operates by holding the interrupt until
|
||||
* lbr_nr entries have been captured, it does not make sense
|
||||
* to allow sampling on BRS with an event that does not match
|
||||
* what BRS is capturing, i.e., retired taken branches.
|
||||
* Otherwise the correlation with the event's period is even
|
||||
* more loose:
|
||||
*
|
||||
* With retired taken branch:
|
||||
* Effective P = P + 16 + X
|
||||
* With any other event:
|
||||
* Effective P = P + Y + X
|
||||
*
|
||||
* Where X is the number of taken branches due to interrupt
|
||||
* skid. Skid is large.
|
||||
*
|
||||
* Where Y is the occurences of the event while BRS is
|
||||
* capturing the lbr_nr entries.
|
||||
*
|
||||
* By using retired taken branches, we limit the impact on the
|
||||
* Y variable. We know it cannot be more than the depth of
|
||||
* BRS.
|
||||
*/
|
||||
if (!amd_is_brs_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* BRS implementation does not work with frequency mode
|
||||
* reprogramming of the period.
|
||||
*/
|
||||
if (event->attr.freq)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* The kernel subtracts BRS depth from period, so it must
|
||||
* be big enough.
|
||||
*/
|
||||
if (event->attr.sample_period <= x86_pmu.lbr_nr)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Check if we can allow PERF_SAMPLE_BRANCH_STACK
|
||||
*/
|
||||
ret = amd_brs_setup_filter(event);
|
||||
|
||||
/* only set in case of success */
|
||||
if (!ret)
|
||||
event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* tos = top of stack, i.e., last valid entry written */
|
||||
static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
|
||||
{
|
||||
|
||||
@@ -330,16 +330,10 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
|
||||
}
|
||||
}
|
||||
|
||||
#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
|
||||
static inline int amd_is_brs_event(struct perf_event *e)
|
||||
{
|
||||
return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
|
||||
}
|
||||
DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
|
||||
|
||||
static int amd_core_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (event->attr.exclude_host && event->attr.exclude_guest)
|
||||
/*
|
||||
* When HO == GO == 1 the hardware treats that as GO == HO == 0
|
||||
@@ -356,66 +350,10 @@ static int amd_core_hw_config(struct perf_event *event)
|
||||
if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
|
||||
event->hw.flags |= PERF_X86_EVENT_PAIR;
|
||||
|
||||
/*
|
||||
* if branch stack is requested
|
||||
*/
|
||||
if (has_branch_stack(event)) {
|
||||
/*
|
||||
* Due to interrupt holding, BRS is not recommended in
|
||||
* counting mode.
|
||||
*/
|
||||
if (!is_sampling_event(event))
|
||||
return -EINVAL;
|
||||
if (has_branch_stack(event))
|
||||
return static_call(amd_pmu_branch_hw_config)(event);
|
||||
|
||||
/*
|
||||
* Due to the way BRS operates by holding the interrupt until
|
||||
* lbr_nr entries have been captured, it does not make sense
|
||||
* to allow sampling on BRS with an event that does not match
|
||||
* what BRS is capturing, i.e., retired taken branches.
|
||||
* Otherwise the correlation with the event's period is even
|
||||
* more loose:
|
||||
*
|
||||
* With retired taken branch:
|
||||
* Effective P = P + 16 + X
|
||||
* With any other event:
|
||||
* Effective P = P + Y + X
|
||||
*
|
||||
* Where X is the number of taken branches due to interrupt
|
||||
* skid. Skid is large.
|
||||
*
|
||||
* Where Y is the occurences of the event while BRS is
|
||||
* capturing the lbr_nr entries.
|
||||
*
|
||||
* By using retired taken branches, we limit the impact on the
|
||||
* Y variable. We know it cannot be more than the depth of
|
||||
* BRS.
|
||||
*/
|
||||
if (!amd_is_brs_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* BRS implementation does not work with frequency mode
|
||||
* reprogramming of the period.
|
||||
*/
|
||||
if (event->attr.freq)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* The kernel subtracts BRS depth from period, so it must
|
||||
* be big enough.
|
||||
*/
|
||||
if (event->attr.sample_period <= x86_pmu.lbr_nr)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Check if we can allow PERF_SAMPLE_BRANCH_STACK
|
||||
*/
|
||||
ret = amd_brs_setup_filter(event);
|
||||
|
||||
/* only set in case of success */
|
||||
if (!ret)
|
||||
event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
|
||||
}
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
|
||||
@@ -582,8 +520,14 @@ static struct amd_nb *amd_alloc_nb(int cpu)
|
||||
return nb;
|
||||
}
|
||||
|
||||
typedef void (amd_pmu_branch_reset_t)(void);
|
||||
DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t);
|
||||
|
||||
static void amd_pmu_cpu_reset(int cpu)
|
||||
{
|
||||
if (x86_pmu.lbr_nr)
|
||||
static_call(amd_pmu_branch_reset)();
|
||||
|
||||
if (x86_pmu.version < 2)
|
||||
return;
|
||||
|
||||
@@ -598,16 +542,24 @@ static int amd_pmu_cpu_prepare(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL,
|
||||
cpu_to_node(cpu));
|
||||
if (!cpuc->lbr_sel)
|
||||
return -ENOMEM;
|
||||
|
||||
WARN_ON_ONCE(cpuc->amd_nb);
|
||||
|
||||
if (!x86_pmu.amd_nb_constraints)
|
||||
return 0;
|
||||
|
||||
cpuc->amd_nb = amd_alloc_nb(cpu);
|
||||
if (!cpuc->amd_nb)
|
||||
return -ENOMEM;
|
||||
if (cpuc->amd_nb)
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
kfree(cpuc->lbr_sel);
|
||||
cpuc->lbr_sel = NULL;
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void amd_pmu_cpu_starting(int cpu)
|
||||
@@ -640,19 +592,19 @@ static void amd_pmu_cpu_starting(int cpu)
|
||||
cpuc->amd_nb->nb_id = nb_id;
|
||||
cpuc->amd_nb->refcnt++;
|
||||
|
||||
amd_brs_reset();
|
||||
amd_pmu_cpu_reset(cpu);
|
||||
}
|
||||
|
||||
static void amd_pmu_cpu_dead(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
kfree(cpuhw->lbr_sel);
|
||||
cpuhw->lbr_sel = NULL;
|
||||
|
||||
if (!x86_pmu.amd_nb_constraints)
|
||||
return;
|
||||
|
||||
cpuhw = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
if (cpuhw->amd_nb) {
|
||||
struct amd_nb *nb = cpuhw->amd_nb;
|
||||
|
||||
@@ -677,7 +629,7 @@ static inline u64 amd_pmu_get_global_status(void)
|
||||
/* PerfCntrGlobalStatus is read-only */
|
||||
rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
|
||||
|
||||
return status & amd_pmu_global_cntr_mask;
|
||||
return status;
|
||||
}
|
||||
|
||||
static inline void amd_pmu_ack_global_status(u64 status)
|
||||
@@ -688,8 +640,6 @@ static inline void amd_pmu_ack_global_status(u64 status)
|
||||
* clears the same bit in PerfCntrGlobalStatus
|
||||
*/
|
||||
|
||||
/* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
|
||||
status &= amd_pmu_global_cntr_mask;
|
||||
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
|
||||
}
|
||||
|
||||
@@ -799,11 +749,17 @@ static void amd_pmu_v2_enable_event(struct perf_event *event)
|
||||
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
}
|
||||
|
||||
static void amd_pmu_v2_enable_all(int added)
|
||||
static __always_inline void amd_pmu_core_enable_all(void)
|
||||
{
|
||||
amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
|
||||
}
|
||||
|
||||
static void amd_pmu_v2_enable_all(int added)
|
||||
{
|
||||
amd_pmu_lbr_enable_all();
|
||||
amd_pmu_core_enable_all();
|
||||
}
|
||||
|
||||
static void amd_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
x86_pmu_disable_event(event);
|
||||
@@ -828,23 +784,32 @@ static void amd_pmu_disable_all(void)
|
||||
amd_pmu_check_overflow();
|
||||
}
|
||||
|
||||
static __always_inline void amd_pmu_core_disable_all(void)
|
||||
{
|
||||
amd_pmu_set_global_ctl(0);
|
||||
}
|
||||
|
||||
static void amd_pmu_v2_disable_all(void)
|
||||
{
|
||||
/* Disable all PMCs */
|
||||
amd_pmu_set_global_ctl(0);
|
||||
amd_pmu_core_disable_all();
|
||||
amd_pmu_lbr_disable_all();
|
||||
amd_pmu_check_overflow();
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add);
|
||||
|
||||
static void amd_pmu_add_event(struct perf_event *event)
|
||||
{
|
||||
if (needs_branch_stack(event))
|
||||
amd_pmu_brs_add(event);
|
||||
static_call(amd_pmu_branch_add)(event);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del);
|
||||
|
||||
static void amd_pmu_del_event(struct perf_event *event)
|
||||
{
|
||||
if (needs_branch_stack(event))
|
||||
amd_pmu_brs_del(event);
|
||||
static_call(amd_pmu_branch_del)(event);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -930,8 +895,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
pmu_enabled = cpuc->enabled;
|
||||
cpuc->enabled = 0;
|
||||
|
||||
/* Stop counting */
|
||||
amd_pmu_v2_disable_all();
|
||||
/* Stop counting but do not disable LBR */
|
||||
amd_pmu_core_disable_all();
|
||||
|
||||
status = amd_pmu_get_global_status();
|
||||
|
||||
@@ -939,6 +904,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
if (!status)
|
||||
goto done;
|
||||
|
||||
/* Read branch records before unfreezing */
|
||||
if (status & GLOBAL_STATUS_LBRS_FROZEN) {
|
||||
amd_pmu_lbr_read();
|
||||
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
|
||||
}
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
@@ -958,6 +929,11 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
if (!x86_perf_event_set_period(event))
|
||||
continue;
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
|
||||
@@ -971,7 +947,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
*/
|
||||
WARN_ON(status > 0);
|
||||
|
||||
/* Clear overflow bits */
|
||||
/* Clear overflow and freeze bits */
|
||||
amd_pmu_ack_global_status(~status);
|
||||
|
||||
/*
|
||||
@@ -985,7 +961,7 @@ done:
|
||||
|
||||
/* Resume counting only if PMU is active */
|
||||
if (pmu_enabled)
|
||||
amd_pmu_v2_enable_all(0);
|
||||
amd_pmu_core_enable_all();
|
||||
|
||||
return amd_pmu_adjust_nmi_window(handled);
|
||||
}
|
||||
@@ -1248,23 +1224,14 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
|
||||
return x86_event_sysfs_show(page, config, event);
|
||||
}
|
||||
|
||||
static void amd_pmu_sched_task(struct perf_event_context *ctx,
|
||||
bool sched_in)
|
||||
{
|
||||
if (sched_in && x86_pmu.lbr_nr)
|
||||
amd_pmu_brs_sched_task(ctx, sched_in);
|
||||
}
|
||||
|
||||
static u64 amd_pmu_limit_period(struct perf_event *event, u64 left)
|
||||
static void amd_pmu_limit_period(struct perf_event *event, s64 *left)
|
||||
{
|
||||
/*
|
||||
* Decrease period by the depth of the BRS feature to get the last N
|
||||
* taken branches and approximate the desired period
|
||||
*/
|
||||
if (has_branch_stack(event) && left > x86_pmu.lbr_nr)
|
||||
left -= x86_pmu.lbr_nr;
|
||||
|
||||
return left;
|
||||
if (has_branch_stack(event) && *left > x86_pmu.lbr_nr)
|
||||
*left -= x86_pmu.lbr_nr;
|
||||
}
|
||||
|
||||
static __initconst const struct x86_pmu amd_pmu = {
|
||||
@@ -1311,23 +1278,25 @@ static ssize_t branches_show(struct device *cdev,
|
||||
|
||||
static DEVICE_ATTR_RO(branches);
|
||||
|
||||
static struct attribute *amd_pmu_brs_attrs[] = {
|
||||
static struct attribute *amd_pmu_branches_attrs[] = {
|
||||
&dev_attr_branches.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static umode_t
|
||||
amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
return x86_pmu.lbr_nr ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static struct attribute_group group_caps_amd_brs = {
|
||||
static struct attribute_group group_caps_amd_branches = {
|
||||
.name = "caps",
|
||||
.attrs = amd_pmu_brs_attrs,
|
||||
.is_visible = amd_brs_is_visible,
|
||||
.attrs = amd_pmu_branches_attrs,
|
||||
.is_visible = amd_branches_is_visible,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
|
||||
|
||||
EVENT_ATTR_STR(branch-brs, amd_branch_brs,
|
||||
"event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
|
||||
|
||||
@@ -1336,15 +1305,26 @@ static struct attribute *amd_brs_events_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static umode_t
|
||||
amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ?
|
||||
attr->mode : 0;
|
||||
}
|
||||
|
||||
static struct attribute_group group_events_amd_brs = {
|
||||
.name = "events",
|
||||
.attrs = amd_brs_events_attrs,
|
||||
.is_visible = amd_brs_is_visible,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_PERF_EVENTS_AMD_BRS */
|
||||
|
||||
static const struct attribute_group *amd_attr_update[] = {
|
||||
&group_caps_amd_brs,
|
||||
&group_caps_amd_branches,
|
||||
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
|
||||
&group_events_amd_brs,
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -1421,13 +1401,27 @@ static int __init amd_core_pmu_init(void)
|
||||
x86_pmu.flags |= PMU_FL_PAIR;
|
||||
}
|
||||
|
||||
/*
|
||||
* BRS requires special event constraints and flushing on ctxsw.
|
||||
*/
|
||||
if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
|
||||
/* LBR and BRS are mutually exclusive features */
|
||||
if (!amd_pmu_lbr_init()) {
|
||||
/* LBR requires flushing on context switch */
|
||||
x86_pmu.sched_task = amd_pmu_lbr_sched_task;
|
||||
static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config);
|
||||
static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
|
||||
static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
|
||||
static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
|
||||
} else if (!amd_brs_init()) {
|
||||
/*
|
||||
* BRS requires special event constraints and flushing on ctxsw.
|
||||
*/
|
||||
x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
|
||||
x86_pmu.sched_task = amd_pmu_sched_task;
|
||||
x86_pmu.sched_task = amd_pmu_brs_sched_task;
|
||||
x86_pmu.limit_period = amd_pmu_limit_period;
|
||||
|
||||
static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config);
|
||||
static_call_update(amd_pmu_branch_reset, amd_brs_reset);
|
||||
static_call_update(amd_pmu_branch_add, amd_pmu_brs_add);
|
||||
static_call_update(amd_pmu_branch_del, amd_pmu_brs_del);
|
||||
|
||||
/*
|
||||
* put_event_constraints callback same as Fam17h, set above
|
||||
*/
|
||||
|
||||
@@ -300,16 +300,6 @@ static int perf_ibs_init(struct perf_event *event)
|
||||
hwc->config_base = perf_ibs->msr;
|
||||
hwc->config = config;
|
||||
|
||||
/*
|
||||
* rip recorded by IbsOpRip will not be consistent with rsp and rbp
|
||||
* recorded as part of interrupt regs. Thus we need to use rip from
|
||||
* interrupt regs while unwinding call stack. Setting _EARLY flag
|
||||
* makes sure we unwind call-stack before perf sample rip is set to
|
||||
* IbsOpRip.
|
||||
*/
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -688,6 +678,339 @@ static struct perf_ibs perf_ibs_op = {
|
||||
.get_count = get_ibs_op_count,
|
||||
};
|
||||
|
||||
static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
union perf_mem_data_src *data_src = &data->data_src;
|
||||
|
||||
data_src->mem_op = PERF_MEM_OP_NA;
|
||||
|
||||
if (op_data3->ld_op)
|
||||
data_src->mem_op = PERF_MEM_OP_LOAD;
|
||||
else if (op_data3->st_op)
|
||||
data_src->mem_op = PERF_MEM_OP_STORE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has
|
||||
* more fine granular DataSrc encodings. Others have coarse.
|
||||
*/
|
||||
static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
|
||||
{
|
||||
if (ibs_caps & IBS_CAPS_ZEN4)
|
||||
return (op_data2->data_src_hi << 3) | op_data2->data_src_lo;
|
||||
|
||||
return op_data2->data_src_lo;
|
||||
}
|
||||
|
||||
static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
|
||||
union ibs_op_data3 *op_data3,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
union perf_mem_data_src *data_src = &data->data_src;
|
||||
u8 ibs_data_src = perf_ibs_data_src(op_data2);
|
||||
|
||||
data_src->mem_lvl = 0;
|
||||
|
||||
/*
|
||||
* DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
|
||||
* memory accesses. So, check DcUcMemAcc bit early.
|
||||
*/
|
||||
if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* L1 Hit */
|
||||
if (op_data3->dc_miss == 0) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* L2 Hit */
|
||||
if (op_data3->l2_miss == 0) {
|
||||
/* Erratum #1293 */
|
||||
if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
|
||||
!(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* OP_DATA2 is valid only for load ops. Skip all checks which
|
||||
* uses OP_DATA2[DataSrc].
|
||||
*/
|
||||
if (data_src->mem_op != PERF_MEM_OP_LOAD)
|
||||
goto check_mab;
|
||||
|
||||
/* L3 Hit */
|
||||
if (ibs_caps & IBS_CAPS_ZEN4) {
|
||||
if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
|
||||
PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* A peer cache in a near CCX */
|
||||
if (ibs_caps & IBS_CAPS_ZEN4 &&
|
||||
ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* A peer cache in a far CCX */
|
||||
if (ibs_caps & IBS_CAPS_ZEN4) {
|
||||
if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* DRAM */
|
||||
if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
|
||||
if (op_data2->rmt_node == 0)
|
||||
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
|
||||
else
|
||||
data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
/* PMEM */
|
||||
if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
|
||||
data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
|
||||
if (op_data2->rmt_node) {
|
||||
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
|
||||
/* IBS doesn't provide Remote socket detail */
|
||||
data_src->mem_hops = PERF_MEM_HOPS_1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Extension Memory */
|
||||
if (ibs_caps & IBS_CAPS_ZEN4 &&
|
||||
ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
|
||||
data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM;
|
||||
if (op_data2->rmt_node) {
|
||||
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
|
||||
/* IBS doesn't provide Remote socket detail */
|
||||
data_src->mem_hops = PERF_MEM_HOPS_1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* IO */
|
||||
if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_IO;
|
||||
data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
|
||||
if (op_data2->rmt_node) {
|
||||
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
|
||||
/* IBS doesn't provide Remote socket detail */
|
||||
data_src->mem_hops = PERF_MEM_HOPS_1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
check_mab:
|
||||
/*
|
||||
* MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding
|
||||
* DC misses. However, such data may come from any level in mem
|
||||
* hierarchy. IBS provides detail about both MAB as well as actual
|
||||
* DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
|
||||
* MAB only when IBS fails to provide DataSrc.
|
||||
*/
|
||||
if (op_data3->dc_miss_no_mab_alloc) {
|
||||
data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
data_src->mem_lvl = PERF_MEM_LVL_NA;
|
||||
}
|
||||
|
||||
static bool perf_ibs_cache_hit_st_valid(void)
|
||||
{
|
||||
/* 0: Uninitialized, 1: Valid, -1: Invalid */
|
||||
static int cache_hit_st_valid;
|
||||
|
||||
if (unlikely(!cache_hit_st_valid)) {
|
||||
if (boot_cpu_data.x86 == 0x19 &&
|
||||
(boot_cpu_data.x86_model <= 0xF ||
|
||||
(boot_cpu_data.x86_model >= 0x20 &&
|
||||
boot_cpu_data.x86_model <= 0x5F))) {
|
||||
cache_hit_st_valid = -1;
|
||||
} else {
|
||||
cache_hit_st_valid = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return cache_hit_st_valid == 1;
|
||||
}
|
||||
|
||||
static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
union perf_mem_data_src *data_src = &data->data_src;
|
||||
u8 ibs_data_src;
|
||||
|
||||
data_src->mem_snoop = PERF_MEM_SNOOP_NA;
|
||||
|
||||
if (!perf_ibs_cache_hit_st_valid() ||
|
||||
data_src->mem_op != PERF_MEM_OP_LOAD ||
|
||||
data_src->mem_lvl & PERF_MEM_LVL_L1 ||
|
||||
data_src->mem_lvl & PERF_MEM_LVL_L2 ||
|
||||
op_data2->cache_hit_st)
|
||||
return;
|
||||
|
||||
ibs_data_src = perf_ibs_data_src(op_data2);
|
||||
|
||||
if (ibs_caps & IBS_CAPS_ZEN4) {
|
||||
if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ||
|
||||
ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ||
|
||||
ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE)
|
||||
data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
|
||||
} else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
|
||||
data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
union perf_mem_data_src *data_src = &data->data_src;
|
||||
|
||||
data_src->mem_dtlb = PERF_MEM_TLB_NA;
|
||||
|
||||
if (!op_data3->dc_lin_addr_valid)
|
||||
return;
|
||||
|
||||
if (!op_data3->dc_l1tlb_miss) {
|
||||
data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!op_data3->dc_l2tlb_miss) {
|
||||
data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT;
|
||||
return;
|
||||
}
|
||||
|
||||
data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS;
|
||||
}
|
||||
|
||||
static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
union perf_mem_data_src *data_src = &data->data_src;
|
||||
|
||||
data_src->mem_lock = PERF_MEM_LOCK_NA;
|
||||
|
||||
if (op_data3->dc_locked_op)
|
||||
data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
|
||||
}
|
||||
|
||||
#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
|
||||
|
||||
static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
|
||||
struct perf_sample_data *data,
|
||||
union ibs_op_data2 *op_data2,
|
||||
union ibs_op_data3 *op_data3)
|
||||
{
|
||||
perf_ibs_get_mem_lvl(op_data2, op_data3, data);
|
||||
perf_ibs_get_mem_snoop(op_data2, data);
|
||||
perf_ibs_get_tlb_lvl(op_data3, data);
|
||||
perf_ibs_get_mem_lock(op_data3, data);
|
||||
}
|
||||
|
||||
static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data,
|
||||
union ibs_op_data3 *op_data3)
|
||||
{
|
||||
__u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)];
|
||||
|
||||
/* Erratum #1293 */
|
||||
if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF &&
|
||||
(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
|
||||
/*
|
||||
* OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode.
|
||||
* DataSrc=0 is 'No valid status' and RmtNode is invalid when
|
||||
* DataSrc=0.
|
||||
*/
|
||||
val = 0;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static void perf_ibs_parse_ld_st_data(__u64 sample_type,
|
||||
struct perf_ibs_data *ibs_data,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
union ibs_op_data3 op_data3;
|
||||
union ibs_op_data2 op_data2;
|
||||
union ibs_op_data op_data;
|
||||
|
||||
data->data_src.val = PERF_MEM_NA;
|
||||
op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
|
||||
|
||||
perf_ibs_get_mem_op(&op_data3, data);
|
||||
if (data->data_src.mem_op != PERF_MEM_OP_LOAD &&
|
||||
data->data_src.mem_op != PERF_MEM_OP_STORE)
|
||||
return;
|
||||
|
||||
op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3);
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss &&
|
||||
data->data_src.mem_op == PERF_MEM_OP_LOAD) {
|
||||
op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)];
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
||||
data->weight.var1_dw = op_data3.dc_miss_lat;
|
||||
data->weight.var2_w = op_data.tag_to_ret_ctr;
|
||||
} else if (sample_type & PERF_SAMPLE_WEIGHT) {
|
||||
data->weight.full = op_data3.dc_miss_lat;
|
||||
}
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) {
|
||||
data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)];
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) {
|
||||
data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)];
|
||||
data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
|
||||
}
|
||||
}
|
||||
|
||||
static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
|
||||
int check_rip)
|
||||
{
|
||||
if (sample_type & PERF_SAMPLE_RAW ||
|
||||
(perf_ibs == &perf_ibs_op &&
|
||||
(sample_type & PERF_SAMPLE_DATA_SRC ||
|
||||
sample_type & PERF_SAMPLE_WEIGHT_TYPE ||
|
||||
sample_type & PERF_SAMPLE_ADDR ||
|
||||
sample_type & PERF_SAMPLE_PHYS_ADDR)))
|
||||
return perf_ibs->offset_max;
|
||||
else if (check_rip)
|
||||
return 3;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
@@ -735,12 +1058,9 @@ fail:
|
||||
size = 1;
|
||||
offset = 1;
|
||||
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW)
|
||||
offset_max = perf_ibs->offset_max;
|
||||
else if (check_rip)
|
||||
offset_max = 3;
|
||||
else
|
||||
offset_max = 1;
|
||||
|
||||
offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
|
||||
|
||||
do {
|
||||
rdmsrl(msr + offset, *buf++);
|
||||
size++;
|
||||
@@ -791,15 +1111,21 @@ fail:
|
||||
},
|
||||
};
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
}
|
||||
|
||||
if (perf_ibs == &perf_ibs_op)
|
||||
perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data);
|
||||
|
||||
/*
|
||||
* rip recorded by IbsOpRip will not be consistent with rsp and rbp
|
||||
* recorded as part of interrupt regs. Thus we need to use rip from
|
||||
* interrupt regs while unwinding call stack.
|
||||
*/
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
data.callchain = perf_callchain(event, iregs);
|
||||
data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
|
||||
439
arch/x86/events/amd/lbr.c
Normal file
439
arch/x86/events/amd/lbr.c
Normal file
@@ -0,0 +1,439 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
|
||||
/* LBR Branch Select valid bits */
|
||||
#define LBR_SELECT_MASK 0x1ff
|
||||
|
||||
/*
|
||||
* LBR Branch Select filter bits which when set, ensures that the
|
||||
* corresponding type of branches are not recorded
|
||||
*/
|
||||
#define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
|
||||
#define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
|
||||
#define LBR_SELECT_JCC 2 /* Conditional branches */
|
||||
#define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
|
||||
#define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
|
||||
#define LBR_SELECT_RET_NEAR 5 /* Near returns */
|
||||
#define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
|
||||
#define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
|
||||
#define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
|
||||
|
||||
#define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
|
||||
#define LBR_USER BIT(LBR_SELECT_USER)
|
||||
#define LBR_JCC BIT(LBR_SELECT_JCC)
|
||||
#define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
|
||||
#define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
|
||||
#define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
|
||||
#define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
|
||||
#define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
|
||||
#define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
|
||||
#define LBR_NOT_SUPP -1 /* unsupported filter */
|
||||
#define LBR_IGNORE 0
|
||||
|
||||
#define LBR_ANY \
|
||||
(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
|
||||
LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
|
||||
|
||||
struct branch_entry {
|
||||
union {
|
||||
struct {
|
||||
u64 ip:58;
|
||||
u64 ip_sign_ext:5;
|
||||
u64 mispredict:1;
|
||||
} split;
|
||||
u64 full;
|
||||
} from;
|
||||
|
||||
union {
|
||||
struct {
|
||||
u64 ip:58;
|
||||
u64 ip_sign_ext:3;
|
||||
u64 reserved:1;
|
||||
u64 spec:1;
|
||||
u64 valid:1;
|
||||
} split;
|
||||
u64 full;
|
||||
} to;
|
||||
};
|
||||
|
||||
static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
|
||||
{
|
||||
wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
|
||||
}
|
||||
|
||||
static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
|
||||
{
|
||||
wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
|
||||
}
|
||||
|
||||
static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static __always_inline u64 sign_ext_branch_ip(u64 ip)
|
||||
{
|
||||
u32 shift = 64 - boot_cpu_data.x86_virt_bits;
|
||||
|
||||
return (u64)(((s64)ip << shift) >> shift);
|
||||
}
|
||||
|
||||
static void amd_pmu_lbr_filter(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int br_sel = cpuc->br_sel, offset, type, i, j;
|
||||
bool compress = false;
|
||||
bool fused_only = false;
|
||||
u64 from, to;
|
||||
|
||||
/* If sampling all branches, there is nothing to filter */
|
||||
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
|
||||
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
|
||||
fused_only = true;
|
||||
|
||||
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
|
||||
from = cpuc->lbr_entries[i].from;
|
||||
to = cpuc->lbr_entries[i].to;
|
||||
type = branch_type_fused(from, to, 0, &offset);
|
||||
|
||||
/*
|
||||
* Adjust the branch from address in case of instruction
|
||||
* fusion where it points to an instruction preceding the
|
||||
* actual branch
|
||||
*/
|
||||
if (offset) {
|
||||
cpuc->lbr_entries[i].from += offset;
|
||||
if (fused_only)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If type does not correspond, then discard */
|
||||
if (type == X86_BR_NONE || (br_sel & type) != type) {
|
||||
cpuc->lbr_entries[i].from = 0; /* mark invalid */
|
||||
compress = true;
|
||||
}
|
||||
|
||||
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
|
||||
cpuc->lbr_entries[i].type = common_branch_type(type);
|
||||
}
|
||||
|
||||
if (!compress)
|
||||
return;
|
||||
|
||||
/* Remove all invalid entries */
|
||||
for (i = 0; i < cpuc->lbr_stack.nr; ) {
|
||||
if (!cpuc->lbr_entries[i].from) {
|
||||
j = i;
|
||||
while (++j < cpuc->lbr_stack.nr)
|
||||
cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
|
||||
cpuc->lbr_stack.nr--;
|
||||
if (!cpuc->lbr_entries[i].from)
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
|
||||
PERF_BR_SPEC_NA,
|
||||
PERF_BR_SPEC_WRONG_PATH,
|
||||
PERF_BR_NON_SPEC_CORRECT_PATH,
|
||||
PERF_BR_SPEC_CORRECT_PATH,
|
||||
};
|
||||
|
||||
void amd_pmu_lbr_read(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_branch_entry *br = cpuc->lbr_entries;
|
||||
struct branch_entry entry;
|
||||
int out = 0, idx, i;
|
||||
|
||||
if (!cpuc->lbr_users)
|
||||
return;
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
entry.from.full = amd_pmu_lbr_get_from(i);
|
||||
entry.to.full = amd_pmu_lbr_get_to(i);
|
||||
|
||||
/*
|
||||
* Check if a branch has been logged; if valid = 0, spec = 0
|
||||
* then no branch was recorded
|
||||
*/
|
||||
if (!entry.to.split.valid && !entry.to.split.spec)
|
||||
continue;
|
||||
|
||||
perf_clear_branch_entry_bitfields(br + out);
|
||||
|
||||
br[out].from = sign_ext_branch_ip(entry.from.split.ip);
|
||||
br[out].to = sign_ext_branch_ip(entry.to.split.ip);
|
||||
br[out].mispred = entry.from.split.mispredict;
|
||||
br[out].predicted = !br[out].mispred;
|
||||
|
||||
/*
|
||||
* Set branch speculation information using the status of
|
||||
* the valid and spec bits.
|
||||
*
|
||||
* When valid = 0, spec = 0, no branch was recorded and the
|
||||
* entry is discarded as seen above.
|
||||
*
|
||||
* When valid = 0, spec = 1, the recorded branch was
|
||||
* speculative but took the wrong path.
|
||||
*
|
||||
* When valid = 1, spec = 0, the recorded branch was
|
||||
* non-speculative but took the correct path.
|
||||
*
|
||||
* When valid = 1, spec = 1, the recorded branch was
|
||||
* speculative and took the correct path
|
||||
*/
|
||||
idx = (entry.to.split.valid << 1) | entry.to.split.spec;
|
||||
br[out].spec = lbr_spec_map[idx];
|
||||
out++;
|
||||
}
|
||||
|
||||
cpuc->lbr_stack.nr = out;
|
||||
|
||||
/*
|
||||
* Internal register renaming always ensures that LBR From[0] and
|
||||
* LBR To[0] always represent the TOS
|
||||
*/
|
||||
cpuc->lbr_stack.hw_idx = 0;
|
||||
|
||||
/* Perform further software filtering */
|
||||
amd_pmu_lbr_filter();
|
||||
}
|
||||
|
||||
static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
|
||||
[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
|
||||
[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
|
||||
[PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
|
||||
|
||||
[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
|
||||
[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
|
||||
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
|
||||
[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
|
||||
[PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
|
||||
[PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
|
||||
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
|
||||
|
||||
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
|
||||
[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
|
||||
[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
|
||||
|
||||
[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
|
||||
[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
|
||||
};
|
||||
|
||||
static int amd_pmu_lbr_setup_filter(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event_extra *reg = &event->hw.branch_reg;
|
||||
u64 br_type = event->attr.branch_sample_type;
|
||||
u64 mask = 0, v;
|
||||
int i;
|
||||
|
||||
/* No LBR support */
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_USER)
|
||||
mask |= X86_BR_USER;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
|
||||
mask |= X86_BR_KERNEL;
|
||||
|
||||
/* Ignore BRANCH_HV here */
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY)
|
||||
mask |= X86_BR_ANY;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
|
||||
mask |= X86_BR_ANY_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
|
||||
mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
||||
mask |= X86_BR_IND_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_COND)
|
||||
mask |= X86_BR_JCC;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
|
||||
mask |= X86_BR_IND_JMP;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_CALL)
|
||||
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
|
||||
mask |= X86_BR_TYPE_SAVE;
|
||||
|
||||
reg->reg = mask;
|
||||
mask = 0;
|
||||
|
||||
for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
|
||||
if (!(br_type & BIT_ULL(i)))
|
||||
continue;
|
||||
|
||||
v = lbr_select_map[i];
|
||||
if (v == LBR_NOT_SUPP)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (v != LBR_IGNORE)
|
||||
mask |= v;
|
||||
}
|
||||
|
||||
/* Filter bits operate in suppress mode */
|
||||
reg->config = mask ^ LBR_SELECT_MASK;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amd_pmu_lbr_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* LBR is not recommended in counting mode */
|
||||
if (!is_sampling_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
ret = amd_pmu_lbr_setup_filter(event);
|
||||
if (!ret)
|
||||
event->attach_state |= PERF_ATTACH_SCHED_CB;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_reset(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int i;
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
/* Reset all branch records individually */
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
amd_pmu_lbr_set_from(i, 0);
|
||||
amd_pmu_lbr_set_to(i, 0);
|
||||
}
|
||||
|
||||
cpuc->last_task_ctx = NULL;
|
||||
cpuc->last_log_id = 0;
|
||||
wrmsrl(MSR_AMD64_LBR_SELECT, 0);
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_add(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event_extra *reg = &event->hw.branch_reg;
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
cpuc->lbr_select = 1;
|
||||
cpuc->lbr_sel->config = reg->config;
|
||||
cpuc->br_sel = reg->reg;
|
||||
}
|
||||
|
||||
perf_sched_cb_inc(event->ctx->pmu);
|
||||
|
||||
if (!cpuc->lbr_users++ && !event->total_time_running)
|
||||
amd_pmu_lbr_reset();
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_del(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
cpuc->lbr_select = 0;
|
||||
|
||||
cpuc->lbr_users--;
|
||||
WARN_ON_ONCE(cpuc->lbr_users < 0);
|
||||
perf_sched_cb_dec(event->ctx->pmu);
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
/*
|
||||
* A context switch can flip the address space and LBR entries are
|
||||
* not tagged with an identifier. Hence, branches cannot be resolved
|
||||
* from the old address space and the LBR records should be wiped.
|
||||
*/
|
||||
if (cpuc->lbr_users && sched_in)
|
||||
amd_pmu_lbr_reset();
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_enable_all(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 lbr_select, dbg_ctl, dbg_extn_cfg;
|
||||
|
||||
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
/* Set hardware branch filter */
|
||||
if (cpuc->lbr_select) {
|
||||
lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
|
||||
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
|
||||
}
|
||||
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
|
||||
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
|
||||
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
|
||||
}
|
||||
|
||||
void amd_pmu_lbr_disable_all(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 dbg_ctl, dbg_extn_cfg;
|
||||
|
||||
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
|
||||
|
||||
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
}
|
||||
|
||||
__init int amd_pmu_lbr_init(void)
|
||||
{
|
||||
union cpuid_0x80000022_ebx ebx;
|
||||
|
||||
if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Set number of entries */
|
||||
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
|
||||
x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
|
||||
|
||||
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -72,6 +72,10 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_set_period, *x86_pmu.set_period);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_update, *x86_pmu.update);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
|
||||
@@ -116,9 +120,6 @@ u64 x86_perf_event_update(struct perf_event *event)
|
||||
if (unlikely(!hwc->event_base))
|
||||
return 0;
|
||||
|
||||
if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
|
||||
return x86_pmu.update_topdown_event(event);
|
||||
|
||||
/*
|
||||
* Careful: an NMI might modify the previous event value.
|
||||
*
|
||||
@@ -621,8 +622,9 @@ int x86_pmu_hw_config(struct perf_event *event)
|
||||
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
|
||||
|
||||
if (event->attr.sample_period && x86_pmu.limit_period) {
|
||||
if (x86_pmu.limit_period(event, event->attr.sample_period) >
|
||||
event->attr.sample_period)
|
||||
s64 left = event->attr.sample_period;
|
||||
x86_pmu.limit_period(event, &left);
|
||||
if (left > event->attr.sample_period)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -1354,7 +1356,7 @@ static void x86_pmu_enable(struct pmu *pmu)
|
||||
static_call(x86_pmu_enable_all)(added);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
|
||||
DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
|
||||
|
||||
/*
|
||||
* Set the next IRQ period, based on the hwc->period_left value.
|
||||
@@ -1370,10 +1372,6 @@ int x86_perf_event_set_period(struct perf_event *event)
|
||||
if (unlikely(!hwc->event_base))
|
||||
return 0;
|
||||
|
||||
if (unlikely(is_topdown_count(event)) &&
|
||||
x86_pmu.set_topdown_event_period)
|
||||
return x86_pmu.set_topdown_event_period(event);
|
||||
|
||||
/*
|
||||
* If we are way outside a reasonable range then just skip forward:
|
||||
*/
|
||||
@@ -1399,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event)
|
||||
if (left > x86_pmu.max_period)
|
||||
left = x86_pmu.max_period;
|
||||
|
||||
if (x86_pmu.limit_period)
|
||||
left = x86_pmu.limit_period(event, left);
|
||||
static_call_cond(x86_pmu_limit_period)(event, &left);
|
||||
|
||||
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
|
||||
this_cpu_write(pmc_prev_left[idx], left);
|
||||
|
||||
/*
|
||||
* The hw event starts counting from this event offset,
|
||||
@@ -1419,16 +1416,6 @@ int x86_perf_event_set_period(struct perf_event *event)
|
||||
if (is_counter_pair(hwc))
|
||||
wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
|
||||
|
||||
/*
|
||||
* Due to erratum on certan cpu we need
|
||||
* a second write to be sure the register
|
||||
* is updated properly
|
||||
*/
|
||||
if (x86_pmu.perfctr_second_write) {
|
||||
wrmsrl(hwc->event_base,
|
||||
(u64)(-left) & x86_pmu.cntval_mask);
|
||||
}
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return ret;
|
||||
@@ -1518,7 +1505,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
|
||||
|
||||
if (flags & PERF_EF_RELOAD) {
|
||||
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
|
||||
x86_perf_event_set_period(event);
|
||||
static_call(x86_pmu_set_period)(event);
|
||||
}
|
||||
|
||||
event->hw.state = 0;
|
||||
@@ -1610,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
|
||||
* Drain the remaining delta count out of a event
|
||||
* that we are disabling:
|
||||
*/
|
||||
x86_perf_event_update(event);
|
||||
static_call(x86_pmu_update)(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
@@ -1700,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||
|
||||
event = cpuc->events[idx];
|
||||
|
||||
val = x86_perf_event_update(event);
|
||||
val = static_call(x86_pmu_update)(event);
|
||||
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
|
||||
continue;
|
||||
|
||||
@@ -1709,13 +1696,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||
*/
|
||||
handled++;
|
||||
|
||||
if (!x86_perf_event_set_period(event))
|
||||
if (!static_call(x86_pmu_set_period)(event))
|
||||
continue;
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (has_branch_stack(event))
|
||||
if (has_branch_stack(event)) {
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
@@ -2023,6 +2012,10 @@ static void x86_pmu_static_call_update(void)
|
||||
static_call_update(x86_pmu_del, x86_pmu.del);
|
||||
static_call_update(x86_pmu_read, x86_pmu.read);
|
||||
|
||||
static_call_update(x86_pmu_set_period, x86_pmu.set_period);
|
||||
static_call_update(x86_pmu_update, x86_pmu.update);
|
||||
static_call_update(x86_pmu_limit_period, x86_pmu.limit_period);
|
||||
|
||||
static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
|
||||
static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
|
||||
static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
|
||||
@@ -2042,7 +2035,7 @@ static void x86_pmu_static_call_update(void)
|
||||
|
||||
static void _x86_pmu_read(struct perf_event *event)
|
||||
{
|
||||
x86_perf_event_update(event);
|
||||
static_call(x86_pmu_update)(event);
|
||||
}
|
||||
|
||||
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
|
||||
@@ -2149,6 +2142,12 @@ static int __init init_hw_perf_events(void)
|
||||
if (!x86_pmu.guest_get_msrs)
|
||||
x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
|
||||
|
||||
if (!x86_pmu.set_period)
|
||||
x86_pmu.set_period = x86_perf_event_set_period;
|
||||
|
||||
if (!x86_pmu.update)
|
||||
x86_pmu.update = x86_perf_event_update;
|
||||
|
||||
x86_pmu_static_call_update();
|
||||
|
||||
/*
|
||||
@@ -2670,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
|
||||
return -EINVAL;
|
||||
|
||||
if (value && x86_pmu.limit_period) {
|
||||
if (x86_pmu.limit_period(event, value) > value)
|
||||
s64 left = value;
|
||||
x86_pmu.limit_period(event, &left);
|
||||
if (left > value)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
@@ -2199,6 +2199,12 @@ static void __intel_pmu_enable_all(int added, bool pmi)
|
||||
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
|
||||
|
||||
intel_pmu_lbr_enable_all(pmi);
|
||||
|
||||
if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
|
||||
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
|
||||
cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
|
||||
}
|
||||
|
||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
|
||||
|
||||
@@ -2311,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void)
|
||||
for (i = 0; i < 4; i++) {
|
||||
event = cpuc->events[i];
|
||||
if (event)
|
||||
x86_perf_event_update(event);
|
||||
static_call(x86_pmu_update)(event);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
@@ -2326,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void)
|
||||
event = cpuc->events[i];
|
||||
|
||||
if (event) {
|
||||
x86_perf_event_set_period(event);
|
||||
static_call(x86_pmu_set_period)(event);
|
||||
__x86_pmu_enable_event(&event->hw,
|
||||
ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
} else
|
||||
@@ -2416,9 +2422,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
|
||||
|
||||
static void intel_pmu_disable_fixed(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 ctrl_val, mask;
|
||||
int idx = hwc->idx;
|
||||
u64 mask;
|
||||
|
||||
if (is_topdown_idx(idx)) {
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
@@ -2435,9 +2442,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
|
||||
intel_clear_masks(event, idx);
|
||||
|
||||
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
|
||||
rdmsrl(hwc->config_base, ctrl_val);
|
||||
ctrl_val &= ~mask;
|
||||
wrmsrl(hwc->config_base, ctrl_val);
|
||||
cpuc->fixed_ctrl_val &= ~mask;
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_event(struct perf_event *event)
|
||||
@@ -2530,6 +2535,8 @@ static int adl_set_topdown_event_period(struct perf_event *event)
|
||||
return icl_set_topdown_event_period(event);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
|
||||
|
||||
static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
|
||||
{
|
||||
u32 val;
|
||||
@@ -2680,6 +2687,7 @@ static u64 adl_update_topdown_event(struct perf_event *event)
|
||||
return icl_update_topdown_event(event);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
|
||||
|
||||
static void intel_pmu_read_topdown_event(struct perf_event *event)
|
||||
{
|
||||
@@ -2691,7 +2699,7 @@ static void intel_pmu_read_topdown_event(struct perf_event *event)
|
||||
return;
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
x86_pmu.update_topdown_event(event);
|
||||
static_call(intel_pmu_update_topdown_event)(event);
|
||||
perf_pmu_enable(event->pmu);
|
||||
}
|
||||
|
||||
@@ -2699,7 +2707,7 @@ static void intel_pmu_read_event(struct perf_event *event)
|
||||
{
|
||||
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
|
||||
intel_pmu_auto_reload_read(event);
|
||||
else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
|
||||
else if (is_topdown_count(event))
|
||||
intel_pmu_read_topdown_event(event);
|
||||
else
|
||||
x86_perf_event_update(event);
|
||||
@@ -2707,8 +2715,9 @@ static void intel_pmu_read_event(struct perf_event *event)
|
||||
|
||||
static void intel_pmu_enable_fixed(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 ctrl_val, mask, bits = 0;
|
||||
u64 mask, bits = 0;
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (is_topdown_idx(idx)) {
|
||||
@@ -2752,10 +2761,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
||||
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
|
||||
}
|
||||
|
||||
rdmsrl(hwc->config_base, ctrl_val);
|
||||
ctrl_val &= ~mask;
|
||||
ctrl_val |= bits;
|
||||
wrmsrl(hwc->config_base, ctrl_val);
|
||||
cpuc->fixed_ctrl_val &= ~mask;
|
||||
cpuc->fixed_ctrl_val |= bits;
|
||||
}
|
||||
|
||||
static void intel_pmu_enable_event(struct perf_event *event)
|
||||
@@ -2803,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event)
|
||||
*/
|
||||
int intel_pmu_save_and_restart(struct perf_event *event)
|
||||
{
|
||||
x86_perf_event_update(event);
|
||||
static_call(x86_pmu_update)(event);
|
||||
/*
|
||||
* For a checkpointed counter always reset back to 0. This
|
||||
* avoids a situation where the counter overflows, aborts the
|
||||
@@ -2815,9 +2822,25 @@ int intel_pmu_save_and_restart(struct perf_event *event)
|
||||
wrmsrl(event->hw.event_base, 0);
|
||||
local64_set(&event->hw.prev_count, 0);
|
||||
}
|
||||
return static_call(x86_pmu_set_period)(event);
|
||||
}
|
||||
|
||||
static int intel_pmu_set_period(struct perf_event *event)
|
||||
{
|
||||
if (unlikely(is_topdown_count(event)))
|
||||
return static_call(intel_pmu_set_topdown_event_period)(event);
|
||||
|
||||
return x86_perf_event_set_period(event);
|
||||
}
|
||||
|
||||
static u64 intel_pmu_update(struct perf_event *event)
|
||||
{
|
||||
if (unlikely(is_topdown_count(event)))
|
||||
return static_call(intel_pmu_update_topdown_event)(event);
|
||||
|
||||
return x86_perf_event_update(event);
|
||||
}
|
||||
|
||||
static void intel_pmu_reset(void)
|
||||
{
|
||||
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
||||
@@ -2980,8 +3003,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
*/
|
||||
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
|
||||
handled++;
|
||||
if (x86_pmu.update_topdown_event)
|
||||
x86_pmu.update_topdown_event(NULL);
|
||||
static_call(intel_pmu_update_topdown_event)(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3004,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (has_branch_stack(event))
|
||||
if (has_branch_stack(event)) {
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
@@ -3853,9 +3877,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
}
|
||||
if (x86_pmu.pebs_aliases)
|
||||
x86_pmu.pebs_aliases(event);
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
|
||||
}
|
||||
|
||||
if (needs_branch_stack(event)) {
|
||||
@@ -4334,28 +4355,25 @@ static u8 adl_get_hybrid_cpu_type(void)
|
||||
* Therefore the effective (average) period matches the requested period,
|
||||
* despite coarser hardware granularity.
|
||||
*/
|
||||
static u64 bdw_limit_period(struct perf_event *event, u64 left)
|
||||
static void bdw_limit_period(struct perf_event *event, s64 *left)
|
||||
{
|
||||
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
|
||||
X86_CONFIG(.event=0xc0, .umask=0x01)) {
|
||||
if (left < 128)
|
||||
left = 128;
|
||||
left &= ~0x3fULL;
|
||||
if (*left < 128)
|
||||
*left = 128;
|
||||
*left &= ~0x3fULL;
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
static u64 nhm_limit_period(struct perf_event *event, u64 left)
|
||||
static void nhm_limit_period(struct perf_event *event, s64 *left)
|
||||
{
|
||||
return max(left, 32ULL);
|
||||
*left = max(*left, 32LL);
|
||||
}
|
||||
|
||||
static u64 spr_limit_period(struct perf_event *event, u64 left)
|
||||
static void spr_limit_period(struct perf_event *event, s64 *left)
|
||||
{
|
||||
if (event->attr.precise_ip == 3)
|
||||
return max(left, 128ULL);
|
||||
|
||||
return left;
|
||||
*left = max(*left, 128LL);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
@@ -4794,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
.add = intel_pmu_add_event,
|
||||
.del = intel_pmu_del_event,
|
||||
.read = intel_pmu_read_event,
|
||||
.set_period = intel_pmu_set_period,
|
||||
.update = intel_pmu_update,
|
||||
.hw_config = intel_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
||||
@@ -6312,8 +6332,10 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_skl(pmem);
|
||||
x86_pmu.num_topdown_events = 4;
|
||||
x86_pmu.update_topdown_event = icl_update_topdown_event;
|
||||
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
&icl_update_topdown_event);
|
||||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&icl_set_topdown_event_period);
|
||||
pr_cont("Icelake events, ");
|
||||
name = "icelake";
|
||||
break;
|
||||
@@ -6348,8 +6370,10 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_skl(pmem);
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
x86_pmu.update_topdown_event = icl_update_topdown_event;
|
||||
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
&icl_update_topdown_event);
|
||||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&icl_set_topdown_event_period);
|
||||
pr_cont("Sapphire Rapids events, ");
|
||||
name = "sapphire_rapids";
|
||||
break;
|
||||
@@ -6358,6 +6382,7 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_FAM6_ALDERLAKE_L:
|
||||
case INTEL_FAM6_RAPTORLAKE:
|
||||
case INTEL_FAM6_RAPTORLAKE_P:
|
||||
case INTEL_FAM6_RAPTORLAKE_S:
|
||||
/*
|
||||
* Alder Lake has 2 types of CPU, core and atom.
|
||||
*
|
||||
@@ -6382,8 +6407,10 @@ __init int intel_pmu_init(void)
|
||||
intel_pmu_pebs_data_source_adl();
|
||||
x86_pmu.pebs_latency_data = adl_latency_data_small;
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
x86_pmu.update_topdown_event = adl_update_topdown_event;
|
||||
x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
&adl_update_topdown_event);
|
||||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&adl_set_topdown_event_period);
|
||||
|
||||
x86_pmu.filter_match = intel_pmu_filter_match;
|
||||
x86_pmu.get_event_constraints = adl_get_event_constraints;
|
||||
|
||||
@@ -685,6 +685,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
|
||||
|
||||
@@ -1540,14 +1540,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
/*
|
||||
* Use latency for weight (only avail with PEBS-LL)
|
||||
*/
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
|
||||
data->weight.full = pebs->lat;
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
/*
|
||||
* data.data_src encodes the data source
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
data->data_src.val = get_data_src(event, pebs->dse);
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
/*
|
||||
* We must however always use iregs for the unwinder to stay sane; the
|
||||
@@ -1555,8 +1559,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
data->callchain = perf_callchain(event, iregs);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the interrupt regs as a base because the PEBS record does not
|
||||
@@ -1628,17 +1634,22 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
|
||||
|
||||
if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
|
||||
x86_pmu.intel_cap.pebs_format >= 1)
|
||||
x86_pmu.intel_cap.pebs_format >= 1) {
|
||||
data->addr = pebs->dla;
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_format >= 2) {
|
||||
/* Only set the TSX weight when no memory weight. */
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
|
||||
data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION) {
|
||||
data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
|
||||
pebs->ax);
|
||||
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1648,11 +1659,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
* We can only do this for the default trace clock.
|
||||
*/
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3 &&
|
||||
event->attr.use_clockid == 0)
|
||||
event->attr.use_clockid == 0) {
|
||||
data->time = native_sched_clock_from_tsc(pebs->tsc);
|
||||
data->sample_flags |= PERF_SAMPLE_TIME;
|
||||
}
|
||||
|
||||
if (has_branch_stack(event))
|
||||
if (has_branch_stack(event)) {
|
||||
data->br_stack = &cpuc->lbr_stack;
|
||||
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
}
|
||||
|
||||
static void adaptive_pebs_save_regs(struct pt_regs *regs,
|
||||
@@ -1710,8 +1725,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
data->period = event->hw.last_period;
|
||||
|
||||
if (event->attr.use_clockid == 0)
|
||||
if (event->attr.use_clockid == 0) {
|
||||
data->time = native_sched_clock_from_tsc(basic->tsc);
|
||||
data->sample_flags |= PERF_SAMPLE_TIME;
|
||||
}
|
||||
|
||||
/*
|
||||
* We must however always use iregs for the unwinder to stay sane; the
|
||||
@@ -1719,8 +1736,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
data->callchain = perf_callchain(event, iregs);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
|
||||
*regs = *iregs;
|
||||
/* The ip in basic is EventingIP */
|
||||
@@ -1771,17 +1790,24 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
}
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
data->data_src.val = get_data_src(event, meminfo->aux);
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_ADDR_TYPE)
|
||||
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
|
||||
data->addr = meminfo->address;
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION) {
|
||||
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
|
||||
gprs ? gprs->ax : 0);
|
||||
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
||||
}
|
||||
}
|
||||
|
||||
if (format_size & PEBS_DATACFG_XMMS) {
|
||||
@@ -1800,6 +1826,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
if (has_branch_stack(event)) {
|
||||
intel_pmu_store_pebs_lbrs(lbr);
|
||||
data->br_stack = &cpuc->lbr_stack;
|
||||
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
|
||||
@@ -65,65 +64,6 @@
|
||||
|
||||
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
|
||||
|
||||
/*
|
||||
* x86control flow change classification
|
||||
* x86control flow changes include branches, interrupts, traps, faults
|
||||
*/
|
||||
enum {
|
||||
X86_BR_NONE = 0, /* unknown */
|
||||
|
||||
X86_BR_USER = 1 << 0, /* branch target is user */
|
||||
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
|
||||
|
||||
X86_BR_CALL = 1 << 2, /* call */
|
||||
X86_BR_RET = 1 << 3, /* return */
|
||||
X86_BR_SYSCALL = 1 << 4, /* syscall */
|
||||
X86_BR_SYSRET = 1 << 5, /* syscall return */
|
||||
X86_BR_INT = 1 << 6, /* sw interrupt */
|
||||
X86_BR_IRET = 1 << 7, /* return from interrupt */
|
||||
X86_BR_JCC = 1 << 8, /* conditional */
|
||||
X86_BR_JMP = 1 << 9, /* jump */
|
||||
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
||||
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
||||
X86_BR_ABORT = 1 << 12,/* transaction abort */
|
||||
X86_BR_IN_TX = 1 << 13,/* in transaction */
|
||||
X86_BR_NO_TX = 1 << 14,/* not in transaction */
|
||||
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
|
||||
X86_BR_CALL_STACK = 1 << 16,/* call stack */
|
||||
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
|
||||
|
||||
X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
|
||||
|
||||
};
|
||||
|
||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
|
||||
|
||||
#define X86_BR_ANY \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_RET |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_SYSRET |\
|
||||
X86_BR_INT |\
|
||||
X86_BR_IRET |\
|
||||
X86_BR_JCC |\
|
||||
X86_BR_JMP |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_ABORT |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_IND_JMP |\
|
||||
X86_BR_ZERO_CALL)
|
||||
|
||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||
|
||||
#define X86_BR_ANY_CALL \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_ZERO_CALL |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_INT)
|
||||
|
||||
/*
|
||||
* Intel LBR_CTL bits
|
||||
*
|
||||
@@ -1151,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* return the type of control flow change at address "from"
|
||||
* instruction is not necessarily a branch (in case of interrupt).
|
||||
*
|
||||
* The branch type returned also includes the priv level of the
|
||||
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
|
||||
*
|
||||
* If a branch type is unknown OR the instruction cannot be
|
||||
* decoded (e.g., text page not present), then X86_BR_NONE is
|
||||
* returned.
|
||||
*/
|
||||
static int branch_type(unsigned long from, unsigned long to, int abort)
|
||||
{
|
||||
struct insn insn;
|
||||
void *addr;
|
||||
int bytes_read, bytes_left;
|
||||
int ret = X86_BR_NONE;
|
||||
int ext, to_plm, from_plm;
|
||||
u8 buf[MAX_INSN_SIZE];
|
||||
int is64 = 0;
|
||||
|
||||
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
|
||||
/*
|
||||
* maybe zero if lbr did not fill up after a reset by the time
|
||||
* we get a PMU interrupt
|
||||
*/
|
||||
if (from == 0 || to == 0)
|
||||
return X86_BR_NONE;
|
||||
|
||||
if (abort)
|
||||
return X86_BR_ABORT | to_plm;
|
||||
|
||||
if (from_plm == X86_BR_USER) {
|
||||
/*
|
||||
* can happen if measuring at the user level only
|
||||
* and we interrupt in a kernel thread, e.g., idle.
|
||||
*/
|
||||
if (!current->mm)
|
||||
return X86_BR_NONE;
|
||||
|
||||
/* may fail if text not present */
|
||||
bytes_left = copy_from_user_nmi(buf, (void __user *)from,
|
||||
MAX_INSN_SIZE);
|
||||
bytes_read = MAX_INSN_SIZE - bytes_left;
|
||||
if (!bytes_read)
|
||||
return X86_BR_NONE;
|
||||
|
||||
addr = buf;
|
||||
} else {
|
||||
/*
|
||||
* The LBR logs any address in the IP, even if the IP just
|
||||
* faulted. This means userspace can control the from address.
|
||||
* Ensure we don't blindly read any address by validating it is
|
||||
* a known text address.
|
||||
*/
|
||||
if (kernel_text_address(from)) {
|
||||
addr = (void *)from;
|
||||
/*
|
||||
* Assume we can get the maximum possible size
|
||||
* when grabbing kernel data. This is not
|
||||
* _strictly_ true since we could possibly be
|
||||
* executing up next to a memory hole, but
|
||||
* it is very unlikely to be a problem.
|
||||
*/
|
||||
bytes_read = MAX_INSN_SIZE;
|
||||
} else {
|
||||
return X86_BR_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* decoder needs to know the ABI especially
|
||||
* on 64-bit systems running 32-bit apps
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
|
||||
#endif
|
||||
insn_init(&insn, addr, bytes_read, is64);
|
||||
if (insn_get_opcode(&insn))
|
||||
return X86_BR_ABORT;
|
||||
|
||||
switch (insn.opcode.bytes[0]) {
|
||||
case 0xf:
|
||||
switch (insn.opcode.bytes[1]) {
|
||||
case 0x05: /* syscall */
|
||||
case 0x34: /* sysenter */
|
||||
ret = X86_BR_SYSCALL;
|
||||
break;
|
||||
case 0x07: /* sysret */
|
||||
case 0x35: /* sysexit */
|
||||
ret = X86_BR_SYSRET;
|
||||
break;
|
||||
case 0x80 ... 0x8f: /* conditional */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
default:
|
||||
ret = X86_BR_NONE;
|
||||
}
|
||||
break;
|
||||
case 0x70 ... 0x7f: /* conditional */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
case 0xc2: /* near ret */
|
||||
case 0xc3: /* near ret */
|
||||
case 0xca: /* far ret */
|
||||
case 0xcb: /* far ret */
|
||||
ret = X86_BR_RET;
|
||||
break;
|
||||
case 0xcf: /* iret */
|
||||
ret = X86_BR_IRET;
|
||||
break;
|
||||
case 0xcc ... 0xce: /* int */
|
||||
ret = X86_BR_INT;
|
||||
break;
|
||||
case 0xe8: /* call near rel */
|
||||
if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
|
||||
/* zero length call */
|
||||
ret = X86_BR_ZERO_CALL;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
case 0x9a: /* call far absolute */
|
||||
ret = X86_BR_CALL;
|
||||
break;
|
||||
case 0xe0 ... 0xe3: /* loop jmp */
|
||||
ret = X86_BR_JCC;
|
||||
break;
|
||||
case 0xe9 ... 0xeb: /* jmp */
|
||||
ret = X86_BR_JMP;
|
||||
break;
|
||||
case 0xff: /* call near absolute, call far absolute ind */
|
||||
if (insn_get_modrm(&insn))
|
||||
return X86_BR_ABORT;
|
||||
|
||||
ext = (insn.modrm.bytes[0] >> 3) & 0x7;
|
||||
switch (ext) {
|
||||
case 2: /* near ind call */
|
||||
case 3: /* far ind call */
|
||||
ret = X86_BR_IND_CALL;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
ret = X86_BR_IND_JMP;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ret = X86_BR_NONE;
|
||||
}
|
||||
/*
|
||||
* interrupts, traps, faults (and thus ring transition) may
|
||||
* occur on any instructions. Thus, to classify them correctly,
|
||||
* we need to first look at the from and to priv levels. If they
|
||||
* are different and to is in the kernel, then it indicates
|
||||
* a ring transition. If the from instruction is not a ring
|
||||
* transition instr (syscall, systenter, int), then it means
|
||||
* it was a irq, trap or fault.
|
||||
*
|
||||
* we have no way of detecting kernel to kernel faults.
|
||||
*/
|
||||
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
|
||||
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
|
||||
ret = X86_BR_IRQ;
|
||||
|
||||
/*
|
||||
* branch priv level determined by target as
|
||||
* is done by HW when LBR_SELECT is implemented
|
||||
*/
|
||||
if (ret != X86_BR_NONE)
|
||||
ret |= to_plm;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define X86_BR_TYPE_MAP_MAX 16
|
||||
|
||||
static int branch_map[X86_BR_TYPE_MAP_MAX] = {
|
||||
PERF_BR_CALL, /* X86_BR_CALL */
|
||||
PERF_BR_RET, /* X86_BR_RET */
|
||||
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
|
||||
PERF_BR_SYSRET, /* X86_BR_SYSRET */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_INT */
|
||||
PERF_BR_ERET, /* X86_BR_IRET */
|
||||
PERF_BR_COND, /* X86_BR_JCC */
|
||||
PERF_BR_UNCOND, /* X86_BR_JMP */
|
||||
PERF_BR_IRQ, /* X86_BR_IRQ */
|
||||
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
|
||||
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
|
||||
PERF_BR_IND, /* X86_BR_IND_JMP */
|
||||
};
|
||||
|
||||
static int
|
||||
common_branch_type(int type)
|
||||
{
|
||||
int i;
|
||||
|
||||
type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
|
||||
|
||||
if (type) {
|
||||
i = __ffs(type);
|
||||
if (i < X86_BR_TYPE_MAP_MAX)
|
||||
return branch_map[i];
|
||||
}
|
||||
|
||||
return PERF_BR_UNKNOWN;
|
||||
}
|
||||
|
||||
enum {
|
||||
ARCH_LBR_BR_TYPE_JCC = 0,
|
||||
ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1,
|
||||
|
||||
@@ -1006,6 +1006,29 @@ static void p4_pmu_enable_all(int added)
|
||||
}
|
||||
}
|
||||
|
||||
static int p4_pmu_set_period(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
s64 left = this_cpu_read(pmc_prev_left[hwc->idx]);
|
||||
int ret;
|
||||
|
||||
ret = x86_perf_event_set_period(event);
|
||||
|
||||
if (hwc->event_base) {
|
||||
/*
|
||||
* This handles erratum N15 in intel doc 249199-029,
|
||||
* the counter may not be updated correctly on write
|
||||
* so we need a second write operation to do the trick
|
||||
* (the official workaround didn't work)
|
||||
*
|
||||
* the former idea is taken from OProfile code
|
||||
*/
|
||||
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int p4_pmu_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
@@ -1044,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
|
||||
/* event overflow for sure */
|
||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||
|
||||
if (!x86_perf_event_set_period(event))
|
||||
if (!static_call(x86_pmu_set_period)(event))
|
||||
continue;
|
||||
|
||||
|
||||
@@ -1316,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = {
|
||||
.enable_all = p4_pmu_enable_all,
|
||||
.enable = p4_pmu_enable_event,
|
||||
.disable = p4_pmu_disable_event,
|
||||
|
||||
.set_period = p4_pmu_set_period,
|
||||
|
||||
.eventsel = MSR_P4_BPU_CCCR0,
|
||||
.perfctr = MSR_P4_BPU_PERFCTR0,
|
||||
.event_map = p4_pmu_event_map,
|
||||
@@ -1334,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = {
|
||||
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
|
||||
.hw_config = p4_hw_config,
|
||||
.schedule_events = p4_pmu_schedule_events,
|
||||
/*
|
||||
* This handles erratum N15 in intel doc 249199-029,
|
||||
* the counter may not be updated correctly on write
|
||||
* so we need a second write operation to do the trick
|
||||
* (the official workaround didn't work)
|
||||
*
|
||||
* the former idea is taken from OProfile code
|
||||
*/
|
||||
.perfctr_second_write = 1,
|
||||
|
||||
.format_attrs = intel_p4_formats_attr,
|
||||
};
|
||||
|
||||
@@ -1831,6 +1831,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
|
||||
{},
|
||||
|
||||
@@ -106,6 +106,7 @@ static bool test_intel(int idx, void *data)
|
||||
case INTEL_FAM6_ALDERLAKE_N:
|
||||
case INTEL_FAM6_RAPTORLAKE:
|
||||
case INTEL_FAM6_RAPTORLAKE_P:
|
||||
case INTEL_FAM6_RAPTORLAKE_S:
|
||||
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
|
||||
return true;
|
||||
break;
|
||||
|
||||
@@ -64,27 +64,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
|
||||
return ((ecode & c->cmask) - c->code) <= (u64)c->size;
|
||||
}
|
||||
|
||||
#define PERF_ARCH(name, val) \
|
||||
PERF_X86_EVENT_##name = val,
|
||||
|
||||
/*
|
||||
* struct hw_perf_event.flags flags
|
||||
*/
|
||||
#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */
|
||||
#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */
|
||||
#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */
|
||||
#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */
|
||||
#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */
|
||||
enum {
|
||||
#include "perf_event_flags.h"
|
||||
};
|
||||
|
||||
#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */
|
||||
#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */
|
||||
#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */
|
||||
#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */
|
||||
#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */
|
||||
#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */
|
||||
#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
|
||||
#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
|
||||
#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
|
||||
#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */
|
||||
#undef PERF_ARCH
|
||||
|
||||
#define PERF_ARCH(name, val) \
|
||||
static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \
|
||||
PERF_X86_EVENT_##name);
|
||||
|
||||
#include "perf_event_flags.h"
|
||||
|
||||
#undef PERF_ARCH
|
||||
|
||||
static inline bool is_topdown_count(struct perf_event *event)
|
||||
{
|
||||
@@ -272,6 +270,10 @@ struct cpu_hw_events {
|
||||
u64 active_pebs_data_cfg;
|
||||
int pebs_record_size;
|
||||
|
||||
/* Intel Fixed counter configuration */
|
||||
u64 fixed_ctrl_val;
|
||||
u64 active_fixed_ctrl_val;
|
||||
|
||||
/*
|
||||
* Intel LBR bits
|
||||
*/
|
||||
@@ -745,6 +747,8 @@ struct x86_pmu {
|
||||
void (*add)(struct perf_event *);
|
||||
void (*del)(struct perf_event *);
|
||||
void (*read)(struct perf_event *event);
|
||||
int (*set_period)(struct perf_event *event);
|
||||
u64 (*update)(struct perf_event *event);
|
||||
int (*hw_config)(struct perf_event *event);
|
||||
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
unsigned eventsel;
|
||||
@@ -780,8 +784,7 @@ struct x86_pmu {
|
||||
|
||||
struct event_constraint *event_constraints;
|
||||
struct x86_pmu_quirk *quirks;
|
||||
int perfctr_second_write;
|
||||
u64 (*limit_period)(struct perf_event *event, u64 l);
|
||||
void (*limit_period)(struct perf_event *event, s64 *l);
|
||||
|
||||
/* PMI handler bits */
|
||||
unsigned int late_ack :1,
|
||||
@@ -889,8 +892,6 @@ struct x86_pmu {
|
||||
* Intel perf metrics
|
||||
*/
|
||||
int num_topdown_events;
|
||||
u64 (*update_topdown_event)(struct perf_event *event);
|
||||
int (*set_topdown_event_period)(struct perf_event *event);
|
||||
|
||||
/*
|
||||
* perf task context (i.e. struct perf_event_context::task_ctx_data)
|
||||
@@ -1044,6 +1045,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
|
||||
struct pmu *x86_get_pmu(unsigned int cpu);
|
||||
extern struct x86_pmu x86_pmu __read_mostly;
|
||||
|
||||
DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
|
||||
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
|
||||
|
||||
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
|
||||
{
|
||||
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
|
||||
@@ -1059,6 +1063,7 @@ static inline bool x86_pmu_has_lbr_callstack(void)
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
|
||||
DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
|
||||
|
||||
int x86_perf_event_set_period(struct perf_event *event);
|
||||
|
||||
@@ -1210,6 +1215,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
|
||||
regs->ip = ip;
|
||||
}
|
||||
|
||||
/*
|
||||
* x86control flow change classification
|
||||
* x86control flow changes include branches, interrupts, traps, faults
|
||||
*/
|
||||
enum {
|
||||
X86_BR_NONE = 0, /* unknown */
|
||||
|
||||
X86_BR_USER = 1 << 0, /* branch target is user */
|
||||
X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
|
||||
|
||||
X86_BR_CALL = 1 << 2, /* call */
|
||||
X86_BR_RET = 1 << 3, /* return */
|
||||
X86_BR_SYSCALL = 1 << 4, /* syscall */
|
||||
X86_BR_SYSRET = 1 << 5, /* syscall return */
|
||||
X86_BR_INT = 1 << 6, /* sw interrupt */
|
||||
X86_BR_IRET = 1 << 7, /* return from interrupt */
|
||||
X86_BR_JCC = 1 << 8, /* conditional */
|
||||
X86_BR_JMP = 1 << 9, /* jump */
|
||||
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
||||
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
||||
X86_BR_ABORT = 1 << 12,/* transaction abort */
|
||||
X86_BR_IN_TX = 1 << 13,/* in transaction */
|
||||
X86_BR_NO_TX = 1 << 14,/* not in transaction */
|
||||
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
|
||||
X86_BR_CALL_STACK = 1 << 16,/* call stack */
|
||||
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
|
||||
|
||||
X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
|
||||
|
||||
};
|
||||
|
||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
|
||||
|
||||
#define X86_BR_ANY \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_RET |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_SYSRET |\
|
||||
X86_BR_INT |\
|
||||
X86_BR_IRET |\
|
||||
X86_BR_JCC |\
|
||||
X86_BR_JMP |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_ABORT |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_IND_JMP |\
|
||||
X86_BR_ZERO_CALL)
|
||||
|
||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||
|
||||
#define X86_BR_ANY_CALL \
|
||||
(X86_BR_CALL |\
|
||||
X86_BR_IND_CALL |\
|
||||
X86_BR_ZERO_CALL |\
|
||||
X86_BR_SYSCALL |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_INT)
|
||||
|
||||
int common_branch_type(int type);
|
||||
int branch_type(unsigned long from, unsigned long to, int abort);
|
||||
int branch_type_fused(unsigned long from, unsigned long to, int abort,
|
||||
int *offset);
|
||||
|
||||
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
|
||||
ssize_t intel_event_sysfs_show(char *page, u64 config);
|
||||
|
||||
@@ -1232,7 +1301,20 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
|
||||
|
||||
int amd_pmu_init(void);
|
||||
|
||||
int amd_pmu_lbr_init(void);
|
||||
void amd_pmu_lbr_reset(void);
|
||||
void amd_pmu_lbr_read(void);
|
||||
void amd_pmu_lbr_add(struct perf_event *event);
|
||||
void amd_pmu_lbr_del(struct perf_event *event);
|
||||
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
void amd_pmu_lbr_enable_all(void);
|
||||
void amd_pmu_lbr_disable_all(void);
|
||||
int amd_pmu_lbr_hw_config(struct perf_event *event);
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
|
||||
|
||||
#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
|
||||
|
||||
int amd_brs_init(void);
|
||||
void amd_brs_disable(void);
|
||||
void amd_brs_enable(void);
|
||||
@@ -1241,7 +1323,7 @@ void amd_brs_disable_all(void);
|
||||
void amd_brs_drain(void);
|
||||
void amd_brs_lopwr_init(void);
|
||||
void amd_brs_disable_all(void);
|
||||
int amd_brs_setup_filter(struct perf_event *event);
|
||||
int amd_brs_hw_config(struct perf_event *event);
|
||||
void amd_brs_reset(void);
|
||||
|
||||
static inline void amd_pmu_brs_add(struct perf_event *event)
|
||||
@@ -1277,7 +1359,7 @@ static inline void amd_brs_enable(void) {}
|
||||
static inline void amd_brs_drain(void) {}
|
||||
static inline void amd_brs_lopwr_init(void) {}
|
||||
static inline void amd_brs_disable_all(void) {}
|
||||
static inline int amd_brs_setup_filter(struct perf_event *event)
|
||||
static inline int amd_brs_hw_config(struct perf_event *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
22
arch/x86/events/perf_event_flags.h
Normal file
22
arch/x86/events/perf_event_flags.h
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
/*
|
||||
* struct hw_perf_event.flags flags
|
||||
*/
|
||||
PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */
|
||||
PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */
|
||||
PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */
|
||||
PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
|
||||
PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
|
||||
PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
|
||||
PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
|
||||
/* 0x00080 */
|
||||
PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
|
||||
PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
|
||||
PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
|
||||
PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */
|
||||
PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */
|
||||
PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */
|
||||
PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
|
||||
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
|
||||
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
|
||||
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
|
||||
251
arch/x86/events/utils.c
Normal file
251
arch/x86/events/utils.c
Normal file
@@ -0,0 +1,251 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static int decode_branch_type(struct insn *insn)
|
||||
{
|
||||
int ext;
|
||||
|
||||
if (insn_get_opcode(insn))
|
||||
return X86_BR_ABORT;
|
||||
|
||||
switch (insn->opcode.bytes[0]) {
|
||||
case 0xf:
|
||||
switch (insn->opcode.bytes[1]) {
|
||||
case 0x05: /* syscall */
|
||||
case 0x34: /* sysenter */
|
||||
return X86_BR_SYSCALL;
|
||||
case 0x07: /* sysret */
|
||||
case 0x35: /* sysexit */
|
||||
return X86_BR_SYSRET;
|
||||
case 0x80 ... 0x8f: /* conditional */
|
||||
return X86_BR_JCC;
|
||||
}
|
||||
return X86_BR_NONE;
|
||||
case 0x70 ... 0x7f: /* conditional */
|
||||
return X86_BR_JCC;
|
||||
case 0xc2: /* near ret */
|
||||
case 0xc3: /* near ret */
|
||||
case 0xca: /* far ret */
|
||||
case 0xcb: /* far ret */
|
||||
return X86_BR_RET;
|
||||
case 0xcf: /* iret */
|
||||
return X86_BR_IRET;
|
||||
case 0xcc ... 0xce: /* int */
|
||||
return X86_BR_INT;
|
||||
case 0xe8: /* call near rel */
|
||||
if (insn_get_immediate(insn) || insn->immediate1.value == 0) {
|
||||
/* zero length call */
|
||||
return X86_BR_ZERO_CALL;
|
||||
}
|
||||
fallthrough;
|
||||
case 0x9a: /* call far absolute */
|
||||
return X86_BR_CALL;
|
||||
case 0xe0 ... 0xe3: /* loop jmp */
|
||||
return X86_BR_JCC;
|
||||
case 0xe9 ... 0xeb: /* jmp */
|
||||
return X86_BR_JMP;
|
||||
case 0xff: /* call near absolute, call far absolute ind */
|
||||
if (insn_get_modrm(insn))
|
||||
return X86_BR_ABORT;
|
||||
|
||||
ext = (insn->modrm.bytes[0] >> 3) & 0x7;
|
||||
switch (ext) {
|
||||
case 2: /* near ind call */
|
||||
case 3: /* far ind call */
|
||||
return X86_BR_IND_CALL;
|
||||
case 4:
|
||||
case 5:
|
||||
return X86_BR_IND_JMP;
|
||||
}
|
||||
return X86_BR_NONE;
|
||||
}
|
||||
|
||||
return X86_BR_NONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* return the type of control flow change at address "from"
|
||||
* instruction is not necessarily a branch (in case of interrupt).
|
||||
*
|
||||
* The branch type returned also includes the priv level of the
|
||||
* target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
|
||||
*
|
||||
* If a branch type is unknown OR the instruction cannot be
|
||||
* decoded (e.g., text page not present), then X86_BR_NONE is
|
||||
* returned.
|
||||
*
|
||||
* While recording branches, some processors can report the "from"
|
||||
* address to be that of an instruction preceding the actual branch
|
||||
* when instruction fusion occurs. If fusion is expected, attempt to
|
||||
* find the type of the first branch instruction within the next
|
||||
* MAX_INSN_SIZE bytes and if found, provide the offset between the
|
||||
* reported "from" address and the actual branch instruction address.
|
||||
*/
|
||||
static int get_branch_type(unsigned long from, unsigned long to, int abort,
|
||||
bool fused, int *offset)
|
||||
{
|
||||
struct insn insn;
|
||||
void *addr;
|
||||
int bytes_read, bytes_left, insn_offset;
|
||||
int ret = X86_BR_NONE;
|
||||
int to_plm, from_plm;
|
||||
u8 buf[MAX_INSN_SIZE];
|
||||
int is64 = 0;
|
||||
|
||||
/* make sure we initialize offset */
|
||||
if (offset)
|
||||
*offset = 0;
|
||||
|
||||
to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
|
||||
|
||||
/*
|
||||
* maybe zero if lbr did not fill up after a reset by the time
|
||||
* we get a PMU interrupt
|
||||
*/
|
||||
if (from == 0 || to == 0)
|
||||
return X86_BR_NONE;
|
||||
|
||||
if (abort)
|
||||
return X86_BR_ABORT | to_plm;
|
||||
|
||||
if (from_plm == X86_BR_USER) {
|
||||
/*
|
||||
* can happen if measuring at the user level only
|
||||
* and we interrupt in a kernel thread, e.g., idle.
|
||||
*/
|
||||
if (!current->mm)
|
||||
return X86_BR_NONE;
|
||||
|
||||
/* may fail if text not present */
|
||||
bytes_left = copy_from_user_nmi(buf, (void __user *)from,
|
||||
MAX_INSN_SIZE);
|
||||
bytes_read = MAX_INSN_SIZE - bytes_left;
|
||||
if (!bytes_read)
|
||||
return X86_BR_NONE;
|
||||
|
||||
addr = buf;
|
||||
} else {
|
||||
/*
|
||||
* The LBR logs any address in the IP, even if the IP just
|
||||
* faulted. This means userspace can control the from address.
|
||||
* Ensure we don't blindly read any address by validating it is
|
||||
* a known text address.
|
||||
*/
|
||||
if (kernel_text_address(from)) {
|
||||
addr = (void *)from;
|
||||
/*
|
||||
* Assume we can get the maximum possible size
|
||||
* when grabbing kernel data. This is not
|
||||
* _strictly_ true since we could possibly be
|
||||
* executing up next to a memory hole, but
|
||||
* it is very unlikely to be a problem.
|
||||
*/
|
||||
bytes_read = MAX_INSN_SIZE;
|
||||
} else {
|
||||
return X86_BR_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* decoder needs to know the ABI especially
|
||||
* on 64-bit systems running 32-bit apps
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
|
||||
#endif
|
||||
insn_init(&insn, addr, bytes_read, is64);
|
||||
ret = decode_branch_type(&insn);
|
||||
insn_offset = 0;
|
||||
|
||||
/* Check for the possibility of branch fusion */
|
||||
while (fused && ret == X86_BR_NONE) {
|
||||
/* Check for decoding errors */
|
||||
if (insn_get_length(&insn) || !insn.length)
|
||||
break;
|
||||
|
||||
insn_offset += insn.length;
|
||||
bytes_read -= insn.length;
|
||||
if (bytes_read < 0)
|
||||
break;
|
||||
|
||||
insn_init(&insn, addr + insn_offset, bytes_read, is64);
|
||||
ret = decode_branch_type(&insn);
|
||||
}
|
||||
|
||||
if (offset)
|
||||
*offset = insn_offset;
|
||||
|
||||
/*
|
||||
* interrupts, traps, faults (and thus ring transition) may
|
||||
* occur on any instructions. Thus, to classify them correctly,
|
||||
* we need to first look at the from and to priv levels. If they
|
||||
* are different and to is in the kernel, then it indicates
|
||||
* a ring transition. If the from instruction is not a ring
|
||||
* transition instr (syscall, systenter, int), then it means
|
||||
* it was a irq, trap or fault.
|
||||
*
|
||||
* we have no way of detecting kernel to kernel faults.
|
||||
*/
|
||||
if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
|
||||
&& ret != X86_BR_SYSCALL && ret != X86_BR_INT)
|
||||
ret = X86_BR_IRQ;
|
||||
|
||||
/*
|
||||
* branch priv level determined by target as
|
||||
* is done by HW when LBR_SELECT is implemented
|
||||
*/
|
||||
if (ret != X86_BR_NONE)
|
||||
ret |= to_plm;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int branch_type(unsigned long from, unsigned long to, int abort)
|
||||
{
|
||||
return get_branch_type(from, to, abort, false, NULL);
|
||||
}
|
||||
|
||||
int branch_type_fused(unsigned long from, unsigned long to, int abort,
|
||||
int *offset)
|
||||
{
|
||||
return get_branch_type(from, to, abort, true, offset);
|
||||
}
|
||||
|
||||
#define X86_BR_TYPE_MAP_MAX 16
|
||||
|
||||
static int branch_map[X86_BR_TYPE_MAP_MAX] = {
|
||||
PERF_BR_CALL, /* X86_BR_CALL */
|
||||
PERF_BR_RET, /* X86_BR_RET */
|
||||
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
|
||||
PERF_BR_SYSRET, /* X86_BR_SYSRET */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_INT */
|
||||
PERF_BR_ERET, /* X86_BR_IRET */
|
||||
PERF_BR_COND, /* X86_BR_JCC */
|
||||
PERF_BR_UNCOND, /* X86_BR_JMP */
|
||||
PERF_BR_IRQ, /* X86_BR_IRQ */
|
||||
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
|
||||
PERF_BR_NO_TX, /* X86_BR_NO_TX */
|
||||
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
|
||||
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
|
||||
PERF_BR_IND, /* X86_BR_IND_JMP */
|
||||
};
|
||||
|
||||
int common_branch_type(int type)
|
||||
{
|
||||
int i;
|
||||
|
||||
type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
|
||||
|
||||
if (type) {
|
||||
i = __ffs(type);
|
||||
if (i < X86_BR_TYPE_MAP_MAX)
|
||||
return branch_map[i];
|
||||
}
|
||||
|
||||
return PERF_BR_UNKNOWN;
|
||||
}
|
||||
@@ -6,6 +6,22 @@
|
||||
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
/* IBS_OP_DATA2 DataSrc */
|
||||
#define IBS_DATA_SRC_LOC_CACHE 2
|
||||
#define IBS_DATA_SRC_DRAM 3
|
||||
#define IBS_DATA_SRC_REM_CACHE 4
|
||||
#define IBS_DATA_SRC_IO 7
|
||||
|
||||
/* IBS_OP_DATA2 DataSrc Extension */
|
||||
#define IBS_DATA_SRC_EXT_LOC_CACHE 1
|
||||
#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2
|
||||
#define IBS_DATA_SRC_EXT_DRAM 3
|
||||
#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5
|
||||
#define IBS_DATA_SRC_EXT_PMEM 6
|
||||
#define IBS_DATA_SRC_EXT_IO 7
|
||||
#define IBS_DATA_SRC_EXT_EXT_MEM 8
|
||||
#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12
|
||||
|
||||
/*
|
||||
* IBS Hardware MSRs
|
||||
*/
|
||||
|
||||
@@ -96,7 +96,7 @@
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
|
||||
/* FREE! ( 3*32+17) */
|
||||
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
|
||||
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
|
||||
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
|
||||
/* Total number of available HW breakpoint registers */
|
||||
#define HBP_NUM 4
|
||||
|
||||
static inline int hw_breakpoint_slots(int type)
|
||||
{
|
||||
return HBP_NUM;
|
||||
}
|
||||
#define hw_breakpoint_slots(type) (HBP_NUM)
|
||||
|
||||
struct perf_event_attr;
|
||||
struct perf_event;
|
||||
|
||||
@@ -590,6 +590,9 @@
|
||||
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
|
||||
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
|
||||
|
||||
/* AMD Last Branch Record MSRs */
|
||||
#define MSR_AMD64_LBR_SELECT 0xc000010e
|
||||
|
||||
/* Fam 17h MSRs */
|
||||
#define MSR_F17H_IRPERF 0xc00000e9
|
||||
|
||||
@@ -761,6 +764,8 @@
|
||||
#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
|
||||
#define MSR_AMD_SAMP_BR_FROM 0xc0010300
|
||||
|
||||
#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6)
|
||||
|
||||
#define MSR_IA32_MPERF 0x000000e7
|
||||
#define MSR_IA32_APERF 0x000000e8
|
||||
|
||||
|
||||
@@ -207,7 +207,8 @@ union cpuid_0x80000022_ebx {
|
||||
struct {
|
||||
/* Number of Core Performance Counters */
|
||||
unsigned int num_core_pmc:4;
|
||||
unsigned int reserved:6;
|
||||
/* Number of available LBR Stack Entries */
|
||||
unsigned int lbr_v2_stack_sz:6;
|
||||
/* Number of Data Fabric Counters */
|
||||
unsigned int num_df_pmc:6;
|
||||
} split;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
*/
|
||||
#ifdef CONFIG_64BIT
|
||||
|
||||
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
|
||||
__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
|
||||
#define __pv_queued_spin_unlock __pv_queued_spin_unlock
|
||||
#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
|
||||
#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
|
||||
@@ -20,9 +20,10 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
|
||||
/*
|
||||
* Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
|
||||
* which combines the registers saving trunk and the body of the following
|
||||
* C code:
|
||||
* C code. Note that it puts the code in the .spinlock.text section which
|
||||
* is equivalent to adding __lockfunc in the C code:
|
||||
*
|
||||
* void __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
* void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
* {
|
||||
* u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
|
||||
*
|
||||
@@ -36,7 +37,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
|
||||
* rsi = lockval (second argument)
|
||||
* rdx = internal variable (set to 0)
|
||||
*/
|
||||
asm (".pushsection .text;"
|
||||
asm (".pushsection .spinlock.text;"
|
||||
".globl " PV_UNLOCK ";"
|
||||
".type " PV_UNLOCK ", @function;"
|
||||
".align 4,0x90;"
|
||||
@@ -65,8 +66,8 @@ asm (".pushsection .text;"
|
||||
|
||||
#else /* CONFIG_64BIT */
|
||||
|
||||
extern void __pv_queued_spin_unlock(struct qspinlock *lock);
|
||||
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
|
||||
extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock);
|
||||
__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text");
|
||||
|
||||
#endif /* CONFIG_64BIT */
|
||||
#endif
|
||||
|
||||
@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = {
|
||||
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
|
||||
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
|
||||
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
|
||||
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
|
||||
{ 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
|
||||
@@ -434,7 +434,7 @@ config ATMEL_TCB_CLKSRC
|
||||
config CLKSRC_EXYNOS_MCT
|
||||
bool "Exynos multi core timer driver" if COMPILE_TEST
|
||||
depends on ARM || ARM64
|
||||
depends on ARCH_EXYNOS || COMPILE_TEST
|
||||
depends on ARCH_ARTPEC || ARCH_EXYNOS || COMPILE_TEST
|
||||
help
|
||||
Support for Multi Core Timer controller on Exynos SoCs.
|
||||
|
||||
|
||||
@@ -44,8 +44,8 @@
|
||||
#define CNTACR_RWVT BIT(4)
|
||||
#define CNTACR_RWPT BIT(5)
|
||||
|
||||
#define CNTVCT_LO 0x00
|
||||
#define CNTPCT_LO 0x08
|
||||
#define CNTPCT_LO 0x00
|
||||
#define CNTVCT_LO 0x08
|
||||
#define CNTFRQ 0x10
|
||||
#define CNTP_CVAL_LO 0x20
|
||||
#define CNTP_CTL 0x2c
|
||||
@@ -473,6 +473,8 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
|
||||
.desc = "ARM erratum 858921",
|
||||
.read_cntpct_el0 = arm64_858921_read_cntpct_el0,
|
||||
.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
|
||||
.set_next_event_phys = erratum_set_next_event_phys,
|
||||
.set_next_event_virt = erratum_set_next_event_virt,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
#define EXYNOS4_MCT_G_INT_ENB EXYNOS4_MCTREG(0x248)
|
||||
#define EXYNOS4_MCT_G_WSTAT EXYNOS4_MCTREG(0x24C)
|
||||
#define _EXYNOS4_MCT_L_BASE EXYNOS4_MCTREG(0x300)
|
||||
#define EXYNOS4_MCT_L_BASE(x) (_EXYNOS4_MCT_L_BASE + (0x100 * x))
|
||||
#define EXYNOS4_MCT_L_BASE(x) (_EXYNOS4_MCT_L_BASE + (0x100 * (x)))
|
||||
#define EXYNOS4_MCT_L_MASK (0xffffff00)
|
||||
|
||||
#define MCT_L_TCNTB_OFFSET (0x00)
|
||||
@@ -66,6 +66,8 @@
|
||||
#define MCT_L0_IRQ 4
|
||||
/* Max number of IRQ as per DT binding document */
|
||||
#define MCT_NR_IRQS 20
|
||||
/* Max number of local timers */
|
||||
#define MCT_NR_LOCAL (MCT_NR_IRQS - MCT_L0_IRQ)
|
||||
|
||||
enum {
|
||||
MCT_INT_SPI,
|
||||
@@ -233,9 +235,16 @@ static cycles_t exynos4_read_current_timer(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __init exynos4_clocksource_init(void)
|
||||
static int __init exynos4_clocksource_init(bool frc_shared)
|
||||
{
|
||||
exynos4_mct_frc_start();
|
||||
/*
|
||||
* When the frc is shared, the main processer should have already
|
||||
* turned it on and we shouldn't be writing to TCON.
|
||||
*/
|
||||
if (frc_shared)
|
||||
mct_frc.resume = NULL;
|
||||
else
|
||||
exynos4_mct_frc_start();
|
||||
|
||||
#if defined(CONFIG_ARM)
|
||||
exynos4_delay_timer.read_current_timer = &exynos4_read_current_timer;
|
||||
@@ -449,7 +458,6 @@ static int exynos4_mct_starting_cpu(unsigned int cpu)
|
||||
per_cpu_ptr(&percpu_mct_tick, cpu);
|
||||
struct clock_event_device *evt = &mevt->evt;
|
||||
|
||||
mevt->base = EXYNOS4_MCT_L_BASE(cpu);
|
||||
snprintf(mevt->name, sizeof(mevt->name), "mct_tick%d", cpu);
|
||||
|
||||
evt->name = mevt->name;
|
||||
@@ -520,8 +528,17 @@ static int __init exynos4_timer_resources(struct device_node *np)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* exynos4_timer_interrupts - initialize MCT interrupts
|
||||
* @np: device node for MCT
|
||||
* @int_type: interrupt type, MCT_INT_PPI or MCT_INT_SPI
|
||||
* @local_idx: array mapping CPU numbers to local timer indices
|
||||
* @nr_local: size of @local_idx array
|
||||
*/
|
||||
static int __init exynos4_timer_interrupts(struct device_node *np,
|
||||
unsigned int int_type)
|
||||
unsigned int int_type,
|
||||
const u32 *local_idx,
|
||||
size_t nr_local)
|
||||
{
|
||||
int nr_irqs, i, err, cpu;
|
||||
|
||||
@@ -554,13 +571,21 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
|
||||
} else {
|
||||
for_each_possible_cpu(cpu) {
|
||||
int mct_irq;
|
||||
unsigned int irq_idx;
|
||||
struct mct_clock_event_device *pcpu_mevt =
|
||||
per_cpu_ptr(&percpu_mct_tick, cpu);
|
||||
|
||||
if (cpu >= nr_local) {
|
||||
err = -EINVAL;
|
||||
goto out_irq;
|
||||
}
|
||||
|
||||
irq_idx = MCT_L0_IRQ + local_idx[cpu];
|
||||
|
||||
pcpu_mevt->evt.irq = -1;
|
||||
if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs))
|
||||
if (irq_idx >= ARRAY_SIZE(mct_irqs))
|
||||
break;
|
||||
mct_irq = mct_irqs[MCT_L0_IRQ + cpu];
|
||||
mct_irq = mct_irqs[irq_idx];
|
||||
|
||||
irq_set_status_flags(mct_irq, IRQ_NOAUTOEN);
|
||||
if (request_irq(mct_irq,
|
||||
@@ -576,6 +601,17 @@ static int __init exynos4_timer_interrupts(struct device_node *np,
|
||||
}
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct mct_clock_event_device *mevt = per_cpu_ptr(&percpu_mct_tick, cpu);
|
||||
|
||||
if (cpu >= nr_local) {
|
||||
err = -EINVAL;
|
||||
goto out_irq;
|
||||
}
|
||||
|
||||
mevt->base = EXYNOS4_MCT_L_BASE(local_idx[cpu]);
|
||||
}
|
||||
|
||||
/* Install hotplug callbacks which configure the timer on this CPU */
|
||||
err = cpuhp_setup_state(CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
|
||||
"clockevents/exynos4/mct_timer:starting",
|
||||
@@ -605,20 +641,49 @@ out_irq:
|
||||
|
||||
static int __init mct_init_dt(struct device_node *np, unsigned int int_type)
|
||||
{
|
||||
bool frc_shared = of_property_read_bool(np, "samsung,frc-shared");
|
||||
u32 local_idx[MCT_NR_LOCAL] = {0};
|
||||
int nr_local;
|
||||
int ret;
|
||||
|
||||
nr_local = of_property_count_u32_elems(np, "samsung,local-timers");
|
||||
if (nr_local == 0)
|
||||
return -EINVAL;
|
||||
if (nr_local > 0) {
|
||||
if (nr_local > ARRAY_SIZE(local_idx))
|
||||
return -EINVAL;
|
||||
|
||||
ret = of_property_read_u32_array(np, "samsung,local-timers",
|
||||
local_idx, nr_local);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
int i;
|
||||
|
||||
nr_local = ARRAY_SIZE(local_idx);
|
||||
for (i = 0; i < nr_local; i++)
|
||||
local_idx[i] = i;
|
||||
}
|
||||
|
||||
ret = exynos4_timer_resources(np);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = exynos4_timer_interrupts(np, int_type);
|
||||
ret = exynos4_timer_interrupts(np, int_type, local_idx, nr_local);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = exynos4_clocksource_init();
|
||||
ret = exynos4_clocksource_init(frc_shared);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* When the FRC is shared with a main processor, this secondary
|
||||
* processor cannot use the global comparator.
|
||||
*/
|
||||
if (frc_shared)
|
||||
return ret;
|
||||
|
||||
return exynos4_clockevent_init();
|
||||
}
|
||||
|
||||
|
||||
@@ -224,7 +224,7 @@ err_free:
|
||||
|
||||
TIMER_OF_DECLARE(ostm, "renesas,ostm", ostm_init);
|
||||
|
||||
#ifdef CONFIG_ARCH_R9A07G044
|
||||
#ifdef CONFIG_ARCH_RZG2L
|
||||
static int __init ostm_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
|
||||
@@ -171,6 +171,7 @@ static int gxp_timer_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct platform_device *gxp_watchdog_device;
|
||||
struct device *dev = &pdev->dev;
|
||||
int ret;
|
||||
|
||||
if (!gxp_timer) {
|
||||
pr_err("Gxp Timer not initialized, cannot create watchdog");
|
||||
@@ -187,7 +188,11 @@ static int gxp_timer_probe(struct platform_device *pdev)
|
||||
gxp_watchdog_device->dev.platform_data = gxp_timer->counter;
|
||||
gxp_watchdog_device->dev.parent = dev;
|
||||
|
||||
return platform_device_add(gxp_watchdog_device);
|
||||
ret = platform_device_add(gxp_watchdog_device);
|
||||
if (ret)
|
||||
platform_device_put(gxp_watchdog_device);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct of_device_id gxp_timer_of_match[] = {
|
||||
|
||||
@@ -134,8 +134,10 @@ static int __init sysctr_timer_init(struct device_node *np)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* system counter clock is divided by 3 internally */
|
||||
to_sysctr.of_clk.rate /= SYS_CTR_CLK_DIV;
|
||||
if (!of_property_read_bool(np, "nxp,no-divider")) {
|
||||
/* system counter clock is divided by 3 internally */
|
||||
to_sysctr.of_clk.rate /= SYS_CTR_CLK_DIV;
|
||||
}
|
||||
|
||||
sys_ctr_base = timer_of_base(&to_sysctr);
|
||||
cmpcr = readl(sys_ctr_base + CMPCR);
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define TIMER_IRQ_EN_REG 0x00
|
||||
#define TIMER_IRQ_EN(val) BIT(val)
|
||||
#define TIMER_IRQ_ST_REG 0x04
|
||||
#define TIMER_IRQ_CLEAR(val) BIT(val)
|
||||
#define TIMER_CTL_REG(val) (0x10 * val + 0x10)
|
||||
#define TIMER_CTL_ENABLE BIT(0)
|
||||
#define TIMER_CTL_RELOAD BIT(1)
|
||||
@@ -123,7 +124,7 @@ static int sun4i_clkevt_next_event(unsigned long evt,
|
||||
|
||||
static void sun4i_timer_clear_interrupt(void __iomem *base)
|
||||
{
|
||||
writel(TIMER_IRQ_EN(0), base + TIMER_IRQ_ST_REG);
|
||||
writel(TIMER_IRQ_CLEAR(0), base + TIMER_IRQ_ST_REG);
|
||||
}
|
||||
|
||||
static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -44,7 +44,9 @@
|
||||
* This allows us to perform the check, i.e, perfmon_capable(),
|
||||
* in the context of the event owner, once, during the event_init().
|
||||
*/
|
||||
#define SPE_PMU_HW_FLAGS_CX BIT(0)
|
||||
#define SPE_PMU_HW_FLAGS_CX 0x00001
|
||||
|
||||
static_assert((PERF_EVENT_FLAG_ARCH & SPE_PMU_HW_FLAGS_CX) == SPE_PMU_HW_FLAGS_CX);
|
||||
|
||||
static void set_spe_event_has_cx(struct perf_event *event)
|
||||
{
|
||||
|
||||
13
fs/dcache.c
13
fs/dcache.c
@@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash);
|
||||
|
||||
static inline unsigned start_dir_add(struct inode *dir)
|
||||
{
|
||||
/*
|
||||
* The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
|
||||
* kernels spin_lock() implicitly disables preemption, but not on
|
||||
* PREEMPT_RT. So for RT it has to be done explicitly to protect
|
||||
* the sequence count write side critical section against a reader
|
||||
* or another writer preempting, which would result in a live lock.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
for (;;) {
|
||||
unsigned n = dir->i_dir_seq;
|
||||
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
|
||||
@@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
|
||||
wait_queue_head_t *d_wait)
|
||||
{
|
||||
smp_store_release(&dir->i_dir_seq, n + 2);
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
wake_up_all(d_wait);
|
||||
}
|
||||
|
||||
|
||||
@@ -52,10 +52,6 @@
|
||||
#define OMAP_TIMER_TRIGGER_OVERFLOW 0x01
|
||||
#define OMAP_TIMER_TRIGGER_OVERFLOW_AND_COMPARE 0x02
|
||||
|
||||
/* posted mode types */
|
||||
#define OMAP_TIMER_NONPOSTED 0x00
|
||||
#define OMAP_TIMER_POSTED 0x01
|
||||
|
||||
/* timer capabilities used in hwmod database */
|
||||
#define OMAP_TIMER_SECURE 0x80000000
|
||||
#define OMAP_TIMER_ALWON 0x40000000
|
||||
@@ -63,73 +59,13 @@
|
||||
#define OMAP_TIMER_NEEDS_RESET 0x10000000
|
||||
#define OMAP_TIMER_HAS_DSP_IRQ 0x08000000
|
||||
|
||||
/*
|
||||
* timer errata flags
|
||||
*
|
||||
* Errata i103/i767 impacts all OMAP3/4/5 devices including AM33xx. This
|
||||
* errata prevents us from using posted mode on these devices, unless the
|
||||
* timer counter register is never read. For more details please refer to
|
||||
* the OMAP3/4/5 errata documents.
|
||||
*/
|
||||
#define OMAP_TIMER_ERRATA_I103_I767 0x80000000
|
||||
|
||||
struct timer_regs {
|
||||
u32 ocp_cfg;
|
||||
u32 tidr;
|
||||
u32 tier;
|
||||
u32 twer;
|
||||
u32 tclr;
|
||||
u32 tcrr;
|
||||
u32 tldr;
|
||||
u32 ttrg;
|
||||
u32 twps;
|
||||
u32 tmar;
|
||||
u32 tcar1;
|
||||
u32 tsicr;
|
||||
u32 tcar2;
|
||||
u32 tpir;
|
||||
u32 tnir;
|
||||
u32 tcvr;
|
||||
u32 tocr;
|
||||
u32 towr;
|
||||
};
|
||||
|
||||
struct omap_dm_timer {
|
||||
int id;
|
||||
int irq;
|
||||
struct clk *fclk;
|
||||
|
||||
void __iomem *io_base;
|
||||
void __iomem *irq_stat; /* TISR/IRQSTATUS interrupt status */
|
||||
void __iomem *irq_ena; /* irq enable */
|
||||
void __iomem *irq_dis; /* irq disable, only on v2 ip */
|
||||
void __iomem *pend; /* write pending */
|
||||
void __iomem *func_base; /* function register base */
|
||||
|
||||
atomic_t enabled;
|
||||
unsigned long rate;
|
||||
unsigned reserved:1;
|
||||
unsigned posted:1;
|
||||
struct timer_regs context;
|
||||
int revision;
|
||||
u32 capability;
|
||||
u32 errata;
|
||||
struct platform_device *pdev;
|
||||
struct list_head node;
|
||||
struct notifier_block nb;
|
||||
};
|
||||
|
||||
int omap_dm_timer_reserve_systimer(int id);
|
||||
struct omap_dm_timer *omap_dm_timer_request_by_cap(u32 cap);
|
||||
|
||||
int omap_dm_timer_get_irq(struct omap_dm_timer *timer);
|
||||
|
||||
u32 omap_dm_timer_modify_idlect_mask(u32 inputmask);
|
||||
|
||||
int omap_dm_timer_trigger(struct omap_dm_timer *timer);
|
||||
|
||||
int omap_dm_timers_active(void);
|
||||
|
||||
/*
|
||||
* Do not use the defines below, they are not needed. They should be only
|
||||
* used by dmtimer.c and sys_timer related code.
|
||||
@@ -199,52 +135,4 @@ int omap_dm_timers_active(void);
|
||||
#define _OMAP_TIMER_TICK_INT_MASK_SET_OFFSET 0x54 /* TOCR, 34xx only */
|
||||
#define _OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET 0x58 /* TOWR, 34xx only */
|
||||
|
||||
/* register offsets with the write pending bit encoded */
|
||||
#define WPSHIFT 16
|
||||
|
||||
#define OMAP_TIMER_WAKEUP_EN_REG (_OMAP_TIMER_WAKEUP_EN_OFFSET \
|
||||
| (WP_NONE << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_CTRL_REG (_OMAP_TIMER_CTRL_OFFSET \
|
||||
| (WP_TCLR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_COUNTER_REG (_OMAP_TIMER_COUNTER_OFFSET \
|
||||
| (WP_TCRR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_LOAD_REG (_OMAP_TIMER_LOAD_OFFSET \
|
||||
| (WP_TLDR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_TRIGGER_REG (_OMAP_TIMER_TRIGGER_OFFSET \
|
||||
| (WP_TTGR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_WRITE_PEND_REG (_OMAP_TIMER_WRITE_PEND_OFFSET \
|
||||
| (WP_NONE << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_MATCH_REG (_OMAP_TIMER_MATCH_OFFSET \
|
||||
| (WP_TMAR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_CAPTURE_REG (_OMAP_TIMER_CAPTURE_OFFSET \
|
||||
| (WP_NONE << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_IF_CTRL_REG (_OMAP_TIMER_IF_CTRL_OFFSET \
|
||||
| (WP_NONE << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_CAPTURE2_REG (_OMAP_TIMER_CAPTURE2_OFFSET \
|
||||
| (WP_NONE << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_TICK_POS_REG (_OMAP_TIMER_TICK_POS_OFFSET \
|
||||
| (WP_TPIR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_TICK_NEG_REG (_OMAP_TIMER_TICK_NEG_OFFSET \
|
||||
| (WP_TNIR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_TICK_COUNT_REG (_OMAP_TIMER_TICK_COUNT_OFFSET \
|
||||
| (WP_TCVR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_TICK_INT_MASK_SET_REG \
|
||||
(_OMAP_TIMER_TICK_INT_MASK_SET_OFFSET | (WP_TOCR << WPSHIFT))
|
||||
|
||||
#define OMAP_TIMER_TICK_INT_MASK_COUNT_REG \
|
||||
(_OMAP_TIMER_TICK_INT_MASK_COUNT_OFFSET | (WP_TOWR << WPSHIFT))
|
||||
|
||||
#endif /* __CLOCKSOURCE_DMTIMER_H */
|
||||
|
||||
@@ -35,7 +35,8 @@
|
||||
|
||||
/*
|
||||
* Note: do not use this directly. Instead, use __alloc_size() since it is conditionally
|
||||
* available and includes other attributes.
|
||||
* available and includes other attributes. For GCC < 9.1, __alloc_size__ gets undefined
|
||||
* in compiler-gcc.h, due to misbehaviors.
|
||||
*
|
||||
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute
|
||||
* clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size
|
||||
|
||||
@@ -271,14 +271,16 @@ struct ftrace_likely_data {
|
||||
|
||||
/*
|
||||
* Any place that could be marked with the "alloc_size" attribute is also
|
||||
* a place to be marked with the "malloc" attribute. Do this as part of the
|
||||
* __alloc_size macro to avoid redundant attributes and to avoid missing a
|
||||
* __malloc marking.
|
||||
* a place to be marked with the "malloc" attribute, except those that may
|
||||
* be performing a _reallocation_, as that may alias the existing pointer.
|
||||
* For these, use __realloc_size().
|
||||
*/
|
||||
#ifdef __alloc_size__
|
||||
# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc
|
||||
# define __realloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__)
|
||||
#else
|
||||
# define __alloc_size(x, ...) __malloc
|
||||
# define __realloc_size(x, ...)
|
||||
#endif
|
||||
|
||||
#ifndef asm_volatile_goto
|
||||
|
||||
@@ -74,12 +74,12 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
|
||||
extern int register_perf_hw_breakpoint(struct perf_event *bp);
|
||||
extern void unregister_hw_breakpoint(struct perf_event *bp);
|
||||
extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events);
|
||||
extern bool hw_breakpoint_is_used(void);
|
||||
|
||||
extern int dbg_reserve_bp_slot(struct perf_event *bp);
|
||||
extern int dbg_release_bp_slot(struct perf_event *bp);
|
||||
extern int reserve_bp_slot(struct perf_event *bp);
|
||||
extern void release_bp_slot(struct perf_event *bp);
|
||||
int hw_breakpoint_weight(struct perf_event *bp);
|
||||
int arch_reserve_bp_slot(struct perf_event *bp);
|
||||
void arch_release_bp_slot(struct perf_event *bp);
|
||||
void arch_unregister_hw_breakpoint(struct perf_event *bp);
|
||||
@@ -121,6 +121,8 @@ register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
|
||||
static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
|
||||
static inline void
|
||||
unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { }
|
||||
static inline bool hw_breakpoint_is_used(void) { return false; }
|
||||
|
||||
static inline int
|
||||
reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
|
||||
static inline void release_bp_slot(struct perf_event *bp) { }
|
||||
|
||||
@@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm);
|
||||
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM_IRQSOFF
|
||||
#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled())
|
||||
#else
|
||||
#define VM_WARN_ON_IRQS_ENABLED() do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_VIRTUAL
|
||||
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
|
||||
@@ -121,9 +121,15 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
|
||||
extern void percpu_down_write(struct percpu_rw_semaphore *);
|
||||
extern void percpu_up_write(struct percpu_rw_semaphore *);
|
||||
|
||||
static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
return atomic_read(&sem->block);
|
||||
}
|
||||
|
||||
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
|
||||
const char *, struct lock_class_key *);
|
||||
|
||||
|
||||
@@ -24,10 +24,11 @@
|
||||
/*
|
||||
* ARM PMU hw_event flags
|
||||
*/
|
||||
/* Event uses a 64bit counter */
|
||||
#define ARMPMU_EVT_64BIT 1
|
||||
/* Event uses a 47bit counter */
|
||||
#define ARMPMU_EVT_47BIT 2
|
||||
#define ARMPMU_EVT_64BIT 0x00001 /* Event uses a 64bit counter */
|
||||
#define ARMPMU_EVT_47BIT 0x00002 /* Event uses a 47bit counter */
|
||||
|
||||
static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT);
|
||||
static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT);
|
||||
|
||||
#define HW_OP_UNSUPPORTED 0xFFFF
|
||||
#define C(_x) PERF_COUNT_HW_CACHE_##_x
|
||||
|
||||
@@ -36,6 +36,7 @@ struct perf_guest_info_callbacks {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
#include <linux/rhashtable-types.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#endif
|
||||
|
||||
@@ -60,6 +61,7 @@ struct perf_guest_info_callbacks {
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/static_call.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <asm/local.h>
|
||||
|
||||
struct perf_callchain_entry {
|
||||
@@ -137,9 +139,11 @@ struct hw_perf_event_extra {
|
||||
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
|
||||
* usage.
|
||||
*/
|
||||
#define PERF_EVENT_FLAG_ARCH 0x0000ffff
|
||||
#define PERF_EVENT_FLAG_ARCH 0x000fffff
|
||||
#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
|
||||
|
||||
static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
|
||||
|
||||
/**
|
||||
* struct hw_perf_event - performance event hardware details:
|
||||
*/
|
||||
@@ -178,7 +182,7 @@ struct hw_perf_event {
|
||||
* creation and event initalization.
|
||||
*/
|
||||
struct arch_hw_breakpoint info;
|
||||
struct list_head bp_list;
|
||||
struct rhlist_head bp_list;
|
||||
};
|
||||
#endif
|
||||
struct { /* amd_iommu */
|
||||
@@ -631,7 +635,23 @@ struct pmu_event_list {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/*
|
||||
* event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
|
||||
* as such iteration must hold either lock. However, since ctx->lock is an IRQ
|
||||
* safe lock, and is only held by the CPU doing the modification, having IRQs
|
||||
* disabled is sufficient since it will hold-off the IPIs.
|
||||
*/
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
#define lockdep_assert_event_ctx(event) \
|
||||
WARN_ON_ONCE(__lockdep_enabled && \
|
||||
(this_cpu_read(hardirqs_enabled) && \
|
||||
lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
|
||||
#else
|
||||
#define lockdep_assert_event_ctx(event)
|
||||
#endif
|
||||
|
||||
#define for_each_sibling_event(sibling, event) \
|
||||
lockdep_assert_event_ctx(event); \
|
||||
if ((event)->group_leader == (event)) \
|
||||
list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
|
||||
|
||||
@@ -1007,18 +1027,20 @@ struct perf_sample_data {
|
||||
* Fields set by perf_sample_data_init(), group so as to
|
||||
* minimize the cachelines touched.
|
||||
*/
|
||||
u64 addr;
|
||||
struct perf_raw_record *raw;
|
||||
struct perf_branch_stack *br_stack;
|
||||
u64 sample_flags;
|
||||
u64 period;
|
||||
union perf_sample_weight weight;
|
||||
u64 txn;
|
||||
union perf_mem_data_src data_src;
|
||||
|
||||
/*
|
||||
* The other fields, optionally {set,used} by
|
||||
* perf_{prepare,output}_sample().
|
||||
*/
|
||||
struct perf_branch_stack *br_stack;
|
||||
union perf_sample_weight weight;
|
||||
union perf_mem_data_src data_src;
|
||||
u64 txn;
|
||||
u64 addr;
|
||||
struct perf_raw_record *raw;
|
||||
|
||||
u64 type;
|
||||
u64 ip;
|
||||
struct {
|
||||
@@ -1056,13 +1078,13 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
u64 addr, u64 period)
|
||||
{
|
||||
/* remaining struct members initialized in perf_prepare_sample() */
|
||||
data->addr = addr;
|
||||
data->raw = NULL;
|
||||
data->br_stack = NULL;
|
||||
data->sample_flags = PERF_SAMPLE_PERIOD;
|
||||
data->period = period;
|
||||
data->weight.full = 0;
|
||||
data->data_src.val = PERF_MEM_NA;
|
||||
data->txn = 0;
|
||||
|
||||
if (addr) {
|
||||
data->addr = addr;
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1078,6 +1100,7 @@ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *b
|
||||
br->abort = 0;
|
||||
br->cycles = 0;
|
||||
br->type = 0;
|
||||
br->spec = PERF_BR_SPEC_NA;
|
||||
br->reserved = 0;
|
||||
}
|
||||
|
||||
@@ -1702,4 +1725,30 @@ static inline void perf_lopwr_cb(bool mode)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
static inline bool branch_sample_no_flags(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_no_cycles(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_type(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_hw_index(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_priv(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
#endif /* _LINUX_PERF_EVENT_H */
|
||||
|
||||
@@ -421,4 +421,46 @@ static inline void migrate_enable(void) { }
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/**
|
||||
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
|
||||
*
|
||||
* Use for code which requires preemption protection inside a critical
|
||||
* section which has preemption disabled implicitly on non-PREEMPT_RT
|
||||
* enabled kernels, by e.g.:
|
||||
* - holding a spinlock/rwlock
|
||||
* - soft interrupt context
|
||||
* - regular interrupt handlers
|
||||
*
|
||||
* On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft
|
||||
* interrupt context and regular interrupt handlers are preemptible and
|
||||
* only prevent migration. preempt_disable_nested() ensures that preemption
|
||||
* is disabled for cases which require CPU local serialization even on
|
||||
* PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP.
|
||||
*
|
||||
* The use cases are code sequences which are not serialized by a
|
||||
* particular lock instance, e.g.:
|
||||
* - seqcount write side critical sections where the seqcount is not
|
||||
* associated to a particular lock and therefore the automatic
|
||||
* protection mechanism does not work. This prevents a live lock
|
||||
* against a preempting high priority reader.
|
||||
* - RMW per CPU variable updates like vmstat.
|
||||
*/
|
||||
/* Macro to avoid header recursion hell vs. lockdep */
|
||||
#define preempt_disable_nested() \
|
||||
do { \
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT)) \
|
||||
preempt_disable(); \
|
||||
else \
|
||||
lockdep_assert_preemption_disabled(); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* preempt_enable_nested - Undo the effect of preempt_disable_nested()
|
||||
*/
|
||||
static __always_inline void preempt_enable_nested(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#endif /* __LINUX_PREEMPT_H */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __LINUX_RWLOCK_H
|
||||
#define __LINUX_RWLOCK_H
|
||||
|
||||
#ifndef __LINUX_SPINLOCK_H
|
||||
#ifndef __LINUX_INSIDE_SPINLOCK_H
|
||||
# error "please don't include this file directly"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U)
|
||||
/* DEBUG: Poison objects */
|
||||
#define SLAB_POISON ((slab_flags_t __force)0x00000800U)
|
||||
/* Indicate a kmalloc slab */
|
||||
#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U)
|
||||
/* Align objs on cache lines */
|
||||
#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U)
|
||||
/* Use GFP_DMA memory */
|
||||
@@ -184,11 +186,25 @@ int kmem_cache_shrink(struct kmem_cache *s);
|
||||
/*
|
||||
* Common kmalloc functions provided by all allocators
|
||||
*/
|
||||
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2);
|
||||
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2);
|
||||
void kfree(const void *objp);
|
||||
void kfree_sensitive(const void *objp);
|
||||
size_t __ksize(const void *objp);
|
||||
|
||||
/**
|
||||
* ksize - Report actual allocation size of associated object
|
||||
*
|
||||
* @objp: Pointer returned from a prior kmalloc()-family allocation.
|
||||
*
|
||||
* This should not be used for writing beyond the originally requested
|
||||
* allocation size. Either use krealloc() or round up the allocation size
|
||||
* with kmalloc_size_roundup() prior to allocation. If this is used to
|
||||
* access beyond the originally requested allocation size, UBSAN_BOUNDS
|
||||
* and/or FORTIFY_SOURCE may trip, since they only know about the
|
||||
* originally allocated size via the __alloc_size attribute.
|
||||
*/
|
||||
size_t ksize(const void *objp);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
bool kmem_valid_obj(void *object);
|
||||
void kmem_dump_obj(void *object);
|
||||
@@ -243,27 +259,17 @@ static inline unsigned int arch_slab_minalign(void)
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
/*
|
||||
* The largest kmalloc size supported by the SLAB allocators is
|
||||
* 32 megabyte (2^25) or the maximum allocatable page order if that is
|
||||
* less than 32 MB.
|
||||
*
|
||||
* WARNING: Its not easy to increase this value since the allocators have
|
||||
* to do various tricks to work around compiler limitations in order to
|
||||
* ensure proper constant folding.
|
||||
* SLAB and SLUB directly allocates requests fitting in to an order-1 page
|
||||
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
||||
*/
|
||||
#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
|
||||
(MAX_ORDER + PAGE_SHIFT - 1) : 25)
|
||||
#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 5
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
/*
|
||||
* SLUB directly allocates requests fitting in to an order-1 page
|
||||
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
||||
*/
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
@@ -415,10 +421,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
|
||||
if (size <= 512 * 1024) return 19;
|
||||
if (size <= 1024 * 1024) return 20;
|
||||
if (size <= 2 * 1024 * 1024) return 21;
|
||||
if (size <= 4 * 1024 * 1024) return 22;
|
||||
if (size <= 8 * 1024 * 1024) return 23;
|
||||
if (size <= 16 * 1024 * 1024) return 24;
|
||||
if (size <= 32 * 1024 * 1024) return 25;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
|
||||
BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
|
||||
@@ -428,6 +430,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
|
||||
/* Will never be reached. Needed because the compiler may complain */
|
||||
return -1;
|
||||
}
|
||||
static_assert(PAGE_SHIFT <= 20);
|
||||
#define kmalloc_index(s) __kmalloc_index(s, true)
|
||||
#endif /* !CONFIG_SLOB */
|
||||
|
||||
@@ -456,42 +459,22 @@ static __always_inline void kfree_bulk(size_t size, void **p)
|
||||
kmem_cache_free_bulk(NULL, size, p);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
|
||||
__alloc_size(1);
|
||||
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
|
||||
__malloc;
|
||||
#else
|
||||
static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __kmalloc(size, flags);
|
||||
}
|
||||
|
||||
static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node)
|
||||
{
|
||||
return kmem_cache_alloc(s, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
|
||||
__assume_slab_alignment __alloc_size(3);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size) __assume_slab_alignment
|
||||
__alloc_size(4);
|
||||
#else
|
||||
static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
||||
gfp_t gfpflags, int node, size_t size)
|
||||
{
|
||||
return kmem_cache_alloc_trace(s, gfpflags, size);
|
||||
}
|
||||
#endif /* CONFIG_NUMA */
|
||||
void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
|
||||
__assume_kmalloc_alignment __alloc_size(3);
|
||||
|
||||
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size) __assume_kmalloc_alignment
|
||||
__alloc_size(4);
|
||||
#else /* CONFIG_TRACING */
|
||||
static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s,
|
||||
gfp_t flags, size_t size)
|
||||
/* Save a function call when CONFIG_TRACING=n */
|
||||
static __always_inline __alloc_size(3)
|
||||
void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
|
||||
{
|
||||
void *ret = kmem_cache_alloc(s, flags);
|
||||
|
||||
@@ -499,8 +482,9 @@ static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size)
|
||||
static __always_inline __alloc_size(4)
|
||||
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size)
|
||||
{
|
||||
void *ret = kmem_cache_alloc_node(s, gfpflags, node);
|
||||
|
||||
@@ -509,25 +493,11 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g
|
||||
}
|
||||
#endif /* CONFIG_TRACING */
|
||||
|
||||
extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment
|
||||
__alloc_size(1);
|
||||
void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment
|
||||
__alloc_size(1);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
||||
__assume_page_alignment __alloc_size(1);
|
||||
#else
|
||||
static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags,
|
||||
unsigned int order)
|
||||
{
|
||||
return kmalloc_order(size, flags, order);
|
||||
}
|
||||
#endif
|
||||
|
||||
static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags)
|
||||
{
|
||||
unsigned int order = get_order(size);
|
||||
return kmalloc_order_trace(size, flags, order);
|
||||
}
|
||||
void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment
|
||||
__alloc_size(1);
|
||||
|
||||
/**
|
||||
* kmalloc - allocate memory
|
||||
@@ -597,7 +567,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
|
||||
if (!index)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
return kmem_cache_alloc_trace(
|
||||
return kmalloc_trace(
|
||||
kmalloc_caches[kmalloc_type(flags)][index],
|
||||
flags, size);
|
||||
#endif
|
||||
@@ -605,23 +575,35 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
|
||||
return __kmalloc(size, flags);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_SLOB
|
||||
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
#ifndef CONFIG_SLOB
|
||||
if (__builtin_constant_p(size) &&
|
||||
size <= KMALLOC_MAX_CACHE_SIZE) {
|
||||
unsigned int i = kmalloc_index(size);
|
||||
if (__builtin_constant_p(size)) {
|
||||
unsigned int index;
|
||||
|
||||
if (!i)
|
||||
if (size > KMALLOC_MAX_CACHE_SIZE)
|
||||
return kmalloc_large_node(size, flags, node);
|
||||
|
||||
index = kmalloc_index(size);
|
||||
|
||||
if (!index)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
return kmem_cache_alloc_node_trace(
|
||||
kmalloc_caches[kmalloc_type(flags)][i],
|
||||
flags, node, size);
|
||||
return kmalloc_node_trace(
|
||||
kmalloc_caches[kmalloc_type(flags)][index],
|
||||
flags, node, size);
|
||||
}
|
||||
#endif
|
||||
return __kmalloc_node(size, flags, node);
|
||||
}
|
||||
#else
|
||||
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
|
||||
return kmalloc_large_node(size, flags, node);
|
||||
|
||||
return __kmalloc_node(size, flags, node);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kmalloc_array - allocate memory for an array.
|
||||
@@ -647,10 +629,10 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_
|
||||
* @new_size: new size of a single member of the array
|
||||
* @flags: the type of memory to allocate (see kmalloc)
|
||||
*/
|
||||
static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
|
||||
size_t new_n,
|
||||
size_t new_size,
|
||||
gfp_t flags)
|
||||
static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p,
|
||||
size_t new_n,
|
||||
size_t new_size,
|
||||
gfp_t flags)
|
||||
{
|
||||
size_t bytes;
|
||||
|
||||
@@ -671,6 +653,12 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag
|
||||
return kmalloc_array(n, size, flags | __GFP_ZERO);
|
||||
}
|
||||
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
|
||||
unsigned long caller) __alloc_size(1);
|
||||
#define kmalloc_node_track_caller(size, flags, node) \
|
||||
__kmalloc_node_track_caller(size, flags, node, \
|
||||
_RET_IP_)
|
||||
|
||||
/*
|
||||
* kmalloc_track_caller is a special version of kmalloc that records the
|
||||
* calling function of the routine calling it for slab leak tracking instead
|
||||
@@ -679,9 +667,9 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag
|
||||
* allocator where we care about the real place the memory allocation
|
||||
* request comes from.
|
||||
*/
|
||||
extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
|
||||
#define kmalloc_track_caller(size, flags) \
|
||||
__kmalloc_track_caller(size, flags, _RET_IP_)
|
||||
__kmalloc_node_track_caller(size, flags, \
|
||||
NUMA_NO_NODE, _RET_IP_)
|
||||
|
||||
static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
|
||||
int node)
|
||||
@@ -700,21 +688,6 @@ static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t
|
||||
return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
|
||||
unsigned long caller) __alloc_size(1);
|
||||
#define kmalloc_node_track_caller(size, flags, node) \
|
||||
__kmalloc_node_track_caller(size, flags, node, \
|
||||
_RET_IP_)
|
||||
|
||||
#else /* CONFIG_NUMA */
|
||||
|
||||
#define kmalloc_node_track_caller(size, flags, node) \
|
||||
kmalloc_track_caller(size, flags)
|
||||
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
/*
|
||||
* Shortcuts
|
||||
*/
|
||||
@@ -774,11 +747,28 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla
|
||||
}
|
||||
|
||||
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
|
||||
__alloc_size(3);
|
||||
__realloc_size(3);
|
||||
extern void kvfree(const void *addr);
|
||||
extern void kvfree_sensitive(const void *addr, size_t len);
|
||||
|
||||
unsigned int kmem_cache_size(struct kmem_cache *s);
|
||||
|
||||
/**
|
||||
* kmalloc_size_roundup - Report allocation bucket size for the given size
|
||||
*
|
||||
* @size: Number of bytes to round up from.
|
||||
*
|
||||
* This returns the number of bytes that would be available in a kmalloc()
|
||||
* allocation of @size bytes. For example, a 126 byte request would be
|
||||
* rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly
|
||||
* for the general-purpose kmalloc()-based allocations, and is not for the
|
||||
* pre-sized kmem_cache_alloc()-based allocations.)
|
||||
*
|
||||
* Use this to kmalloc() the full bucket size ahead of time instead of using
|
||||
* ksize() to query the size after an allocation.
|
||||
*/
|
||||
size_t kmalloc_size_roundup(size_t size);
|
||||
|
||||
void __init kmem_cache_init_late(void);
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __LINUX_SPINLOCK_H
|
||||
#define __LINUX_SPINLOCK_H
|
||||
#define __LINUX_INSIDE_SPINLOCK_H
|
||||
|
||||
/*
|
||||
* include/linux/spinlock.h - generic spinlock/rwlock declarations
|
||||
@@ -492,4 +493,5 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
|
||||
|
||||
void free_bucket_spinlocks(spinlock_t *locks);
|
||||
|
||||
#undef __LINUX_INSIDE_SPINLOCK_H
|
||||
#endif /* __LINUX_SPINLOCK_H */
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __LINUX_SPINLOCK_API_SMP_H
|
||||
#define __LINUX_SPINLOCK_API_SMP_H
|
||||
|
||||
#ifndef __LINUX_SPINLOCK_H
|
||||
#ifndef __LINUX_INSIDE_SPINLOCK_H
|
||||
# error "please don't include this file directly"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __LINUX_SPINLOCK_API_UP_H
|
||||
#define __LINUX_SPINLOCK_API_UP_H
|
||||
|
||||
#ifndef __LINUX_SPINLOCK_H
|
||||
#ifndef __LINUX_INSIDE_SPINLOCK_H
|
||||
# error "please don't include this file directly"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#ifndef __LINUX_SPINLOCK_RT_H
|
||||
#define __LINUX_SPINLOCK_RT_H
|
||||
|
||||
#ifndef __LINUX_SPINLOCK_H
|
||||
#ifndef __LINUX_INSIDE_SPINLOCK_H
|
||||
#error Do not include directly. Use spinlock.h
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __LINUX_SPINLOCK_UP_H
|
||||
#define __LINUX_SPINLOCK_UP_H
|
||||
|
||||
#ifndef __LINUX_SPINLOCK_H
|
||||
#ifndef __LINUX_INSIDE_SPINLOCK_H
|
||||
# error "please don't include this file directly"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Key points :
|
||||
*
|
||||
* - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP.
|
||||
* - Use a seqcount on 32-bit
|
||||
* - The whole thing is a no-op on 64-bit architectures.
|
||||
*
|
||||
* Usage constraints:
|
||||
@@ -20,7 +20,8 @@
|
||||
* writer and also spin forever.
|
||||
*
|
||||
* 3) Write side must use the _irqsave() variant if other writers, or a reader,
|
||||
* can be invoked from an IRQ context.
|
||||
* can be invoked from an IRQ context. On 64bit systems this variant does not
|
||||
* disable interrupts.
|
||||
*
|
||||
* 4) If reader fetches several counters, there is no guarantee the whole values
|
||||
* are consistent w.r.t. each other (remember point #2: seqcounts are not
|
||||
@@ -29,11 +30,6 @@
|
||||
* 5) Readers are allowed to sleep or be preempted/interrupted: they perform
|
||||
* pure reads.
|
||||
*
|
||||
* 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats
|
||||
* might be updated from a hardirq or softirq context (remember point #1:
|
||||
* seqcounts are not used for UP kernels). 32-bit UP stat readers could read
|
||||
* corrupted 64-bit values otherwise.
|
||||
*
|
||||
* Usage :
|
||||
*
|
||||
* Stats producer (writer) should use following template granted it already got
|
||||
@@ -66,7 +62,7 @@
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
struct u64_stats_sync {
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
#if BITS_PER_LONG == 32
|
||||
seqcount_t seq;
|
||||
#endif
|
||||
};
|
||||
@@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p)
|
||||
local64_inc(&p->v);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void u64_stats_init(struct u64_stats_sync *syncp) { }
|
||||
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { }
|
||||
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { }
|
||||
static inline unsigned long __u64_stats_irqsave(void) { return 0; }
|
||||
static inline void __u64_stats_irqrestore(unsigned long flags) { }
|
||||
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
|
||||
unsigned int start)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#else /* 64 bit */
|
||||
|
||||
typedef struct {
|
||||
u64 v;
|
||||
@@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p)
|
||||
{
|
||||
p->v++;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
|
||||
#else
|
||||
static inline void u64_stats_init(struct u64_stats_sync *syncp)
|
||||
{
|
||||
seqcount_init(&syncp->seq);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
|
||||
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
write_seqcount_begin(&syncp->seq);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
|
||||
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
write_seqcount_end(&syncp->seq);
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
#endif
|
||||
preempt_enable_nested();
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
|
||||
static inline unsigned long __u64_stats_irqsave(void)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
unsigned long flags;
|
||||
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
else
|
||||
local_irq_save(flags);
|
||||
write_seqcount_begin(&syncp->seq);
|
||||
#endif
|
||||
local_irq_save(flags);
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline void
|
||||
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
|
||||
unsigned long flags)
|
||||
static inline void __u64_stats_irqrestore(unsigned long flags)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
write_seqcount_end(&syncp->seq);
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
else
|
||||
local_irq_restore(flags);
|
||||
#endif
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
return read_seqcount_begin(&syncp->seq);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
|
||||
unsigned int start)
|
||||
{
|
||||
return read_seqcount_retry(&syncp->seq, start);
|
||||
}
|
||||
#endif /* !64 bit */
|
||||
|
||||
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
|
||||
{
|
||||
__u64_stats_update_begin(syncp);
|
||||
}
|
||||
|
||||
static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
|
||||
{
|
||||
__u64_stats_update_end(syncp);
|
||||
}
|
||||
|
||||
static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
|
||||
{
|
||||
unsigned long flags = __u64_stats_irqsave();
|
||||
|
||||
__u64_stats_update_begin(syncp);
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
|
||||
unsigned long flags)
|
||||
{
|
||||
__u64_stats_update_end(syncp);
|
||||
__u64_stats_irqrestore(flags);
|
||||
}
|
||||
|
||||
static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
#endif
|
||||
return __u64_stats_fetch_begin(syncp);
|
||||
}
|
||||
|
||||
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
|
||||
unsigned int start)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
|
||||
return read_seqcount_retry(&syncp->seq, start);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
|
||||
unsigned int start)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
#endif
|
||||
return __u64_stats_fetch_retry(syncp, start);
|
||||
}
|
||||
|
||||
/*
|
||||
* In case irq handlers can update u64 counters, readers can use following helpers
|
||||
* - SMP 32bit arches use seqcount protection, irq safe.
|
||||
* - UP 32bit must disable irqs.
|
||||
* - 64bit have no problem atomically reading u64 values, irq safe.
|
||||
*/
|
||||
/* Obsolete interfaces */
|
||||
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
|
||||
preempt_disable();
|
||||
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
|
||||
local_irq_disable();
|
||||
#endif
|
||||
return __u64_stats_fetch_begin(syncp);
|
||||
return u64_stats_fetch_begin(syncp);
|
||||
}
|
||||
|
||||
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
|
||||
unsigned int start)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
|
||||
preempt_enable();
|
||||
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
|
||||
local_irq_enable();
|
||||
#endif
|
||||
return __u64_stats_fetch_retry(syncp, start);
|
||||
return u64_stats_fetch_retry(syncp, start);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_U64_STATS_SYNC_H */
|
||||
|
||||
@@ -9,73 +9,15 @@
|
||||
#include <linux/tracepoint.h>
|
||||
#include <trace/events/mmflags.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(kmem_alloc,
|
||||
TRACE_EVENT(kmem_cache_alloc,
|
||||
|
||||
TP_PROTO(unsigned long call_site,
|
||||
const void *ptr,
|
||||
struct kmem_cache *s,
|
||||
size_t bytes_req,
|
||||
size_t bytes_alloc,
|
||||
gfp_t gfp_flags),
|
||||
|
||||
TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, call_site )
|
||||
__field( const void *, ptr )
|
||||
__field( size_t, bytes_req )
|
||||
__field( size_t, bytes_alloc )
|
||||
__field( unsigned long, gfp_flags )
|
||||
__field( bool, accounted )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call_site = call_site;
|
||||
__entry->ptr = ptr;
|
||||
__entry->bytes_req = bytes_req;
|
||||
__entry->bytes_alloc = bytes_alloc;
|
||||
__entry->gfp_flags = (__force unsigned long)gfp_flags;
|
||||
__entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
|
||||
((gfp_flags & __GFP_ACCOUNT) ||
|
||||
(s && s->flags & SLAB_ACCOUNT)) : false;
|
||||
),
|
||||
|
||||
TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s",
|
||||
(void *)__entry->call_site,
|
||||
__entry->ptr,
|
||||
__entry->bytes_req,
|
||||
__entry->bytes_alloc,
|
||||
show_gfp_flags(__entry->gfp_flags),
|
||||
__entry->accounted ? "true" : "false")
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kmem_alloc, kmalloc,
|
||||
|
||||
TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
|
||||
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
|
||||
|
||||
TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
|
||||
|
||||
TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s,
|
||||
size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
|
||||
|
||||
TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(kmem_alloc_node,
|
||||
|
||||
TP_PROTO(unsigned long call_site,
|
||||
const void *ptr,
|
||||
struct kmem_cache *s,
|
||||
size_t bytes_req,
|
||||
size_t bytes_alloc,
|
||||
gfp_t gfp_flags,
|
||||
int node),
|
||||
|
||||
TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node),
|
||||
TP_ARGS(call_site, ptr, s, gfp_flags, node),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, call_site )
|
||||
@@ -90,13 +32,13 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
|
||||
TP_fast_assign(
|
||||
__entry->call_site = call_site;
|
||||
__entry->ptr = ptr;
|
||||
__entry->bytes_req = bytes_req;
|
||||
__entry->bytes_alloc = bytes_alloc;
|
||||
__entry->bytes_req = s->object_size;
|
||||
__entry->bytes_alloc = s->size;
|
||||
__entry->gfp_flags = (__force unsigned long)gfp_flags;
|
||||
__entry->node = node;
|
||||
__entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
|
||||
((gfp_flags & __GFP_ACCOUNT) ||
|
||||
(s && s->flags & SLAB_ACCOUNT)) : false;
|
||||
(s->flags & SLAB_ACCOUNT)) : false;
|
||||
),
|
||||
|
||||
TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
|
||||
@@ -109,22 +51,44 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
|
||||
__entry->accounted ? "true" : "false")
|
||||
);
|
||||
|
||||
DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
|
||||
TRACE_EVENT(kmalloc,
|
||||
|
||||
TP_PROTO(unsigned long call_site, const void *ptr,
|
||||
struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
|
||||
gfp_t gfp_flags, int node),
|
||||
TP_PROTO(unsigned long call_site,
|
||||
const void *ptr,
|
||||
size_t bytes_req,
|
||||
size_t bytes_alloc,
|
||||
gfp_t gfp_flags,
|
||||
int node),
|
||||
|
||||
TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
|
||||
);
|
||||
TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
|
||||
|
||||
DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, call_site )
|
||||
__field( const void *, ptr )
|
||||
__field( size_t, bytes_req )
|
||||
__field( size_t, bytes_alloc )
|
||||
__field( unsigned long, gfp_flags )
|
||||
__field( int, node )
|
||||
),
|
||||
|
||||
TP_PROTO(unsigned long call_site, const void *ptr,
|
||||
struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc,
|
||||
gfp_t gfp_flags, int node),
|
||||
TP_fast_assign(
|
||||
__entry->call_site = call_site;
|
||||
__entry->ptr = ptr;
|
||||
__entry->bytes_req = bytes_req;
|
||||
__entry->bytes_alloc = bytes_alloc;
|
||||
__entry->gfp_flags = (__force unsigned long)gfp_flags;
|
||||
__entry->node = node;
|
||||
),
|
||||
|
||||
TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node)
|
||||
TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s",
|
||||
(void *)__entry->call_site,
|
||||
__entry->ptr,
|
||||
__entry->bytes_req,
|
||||
__entry->bytes_alloc,
|
||||
show_gfp_flags(__entry->gfp_flags),
|
||||
__entry->node,
|
||||
(IS_ENABLED(CONFIG_MEMCG_KMEM) &&
|
||||
(__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false")
|
||||
);
|
||||
|
||||
TRACE_EVENT(kfree,
|
||||
@@ -149,20 +113,20 @@ TRACE_EVENT(kfree,
|
||||
|
||||
TRACE_EVENT(kmem_cache_free,
|
||||
|
||||
TP_PROTO(unsigned long call_site, const void *ptr, const char *name),
|
||||
TP_PROTO(unsigned long call_site, const void *ptr, const struct kmem_cache *s),
|
||||
|
||||
TP_ARGS(call_site, ptr, name),
|
||||
TP_ARGS(call_site, ptr, s),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, call_site )
|
||||
__field( const void *, ptr )
|
||||
__string( name, name )
|
||||
__string( name, s->name )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->call_site = call_site;
|
||||
__entry->ptr = ptr;
|
||||
__assign_str(name, name);
|
||||
__assign_str(name, s->name);
|
||||
),
|
||||
|
||||
TP_printk("call_site=%pS ptr=%p name=%s",
|
||||
|
||||
@@ -164,8 +164,6 @@ enum perf_event_sample_format {
|
||||
PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
|
||||
|
||||
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
|
||||
};
|
||||
|
||||
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
|
||||
@@ -204,6 +202,8 @@ enum perf_branch_sample_type_shift {
|
||||
|
||||
PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
|
||||
|
||||
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
|
||||
|
||||
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
|
||||
};
|
||||
|
||||
@@ -233,6 +233,8 @@ enum perf_branch_sample_type {
|
||||
|
||||
PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
|
||||
|
||||
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
|
||||
|
||||
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
|
||||
};
|
||||
|
||||
@@ -253,9 +255,48 @@ enum {
|
||||
PERF_BR_COND_RET = 10, /* conditional function return */
|
||||
PERF_BR_ERET = 11, /* exception return */
|
||||
PERF_BR_IRQ = 12, /* irq */
|
||||
PERF_BR_SERROR = 13, /* system error */
|
||||
PERF_BR_NO_TX = 14, /* not in transaction */
|
||||
PERF_BR_EXTEND_ABI = 15, /* extend ABI */
|
||||
PERF_BR_MAX,
|
||||
};
|
||||
|
||||
/*
|
||||
* Common branch speculation outcome classification
|
||||
*/
|
||||
enum {
|
||||
PERF_BR_SPEC_NA = 0, /* Not available */
|
||||
PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
|
||||
PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
|
||||
PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
|
||||
PERF_BR_SPEC_MAX,
|
||||
};
|
||||
|
||||
enum {
|
||||
PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
|
||||
PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
|
||||
PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
|
||||
PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
|
||||
PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
|
||||
PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
|
||||
PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
|
||||
PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
|
||||
PERF_BR_NEW_MAX,
|
||||
};
|
||||
|
||||
enum {
|
||||
PERF_BR_PRIV_UNKNOWN = 0,
|
||||
PERF_BR_PRIV_USER = 1,
|
||||
PERF_BR_PRIV_KERNEL = 2,
|
||||
PERF_BR_PRIV_HV = 3,
|
||||
};
|
||||
|
||||
#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
|
||||
#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
|
||||
#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
|
||||
#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
|
||||
#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
|
||||
|
||||
#define PERF_SAMPLE_BRANCH_PLM_ALL \
|
||||
(PERF_SAMPLE_BRANCH_USER|\
|
||||
PERF_SAMPLE_BRANCH_KERNEL|\
|
||||
@@ -1295,7 +1336,9 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
|
||||
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
|
||||
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
|
||||
/* 5-0xa available */
|
||||
/* 5-0x8 available */
|
||||
#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */
|
||||
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
|
||||
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
|
||||
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
|
||||
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
|
||||
@@ -1313,7 +1356,7 @@ union perf_mem_data_src {
|
||||
#define PERF_MEM_SNOOP_SHIFT 19
|
||||
|
||||
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
|
||||
/* 1 free */
|
||||
#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
|
||||
#define PERF_MEM_SNOOPX_SHIFT 38
|
||||
|
||||
/* locked instruction */
|
||||
@@ -1363,6 +1406,7 @@ union perf_mem_data_src {
|
||||
* abort: aborting a hardware transaction
|
||||
* cycles: cycles from last branch (or 0 if not supported)
|
||||
* type: branch type
|
||||
* spec: branch speculation info (or 0 if not supported)
|
||||
*/
|
||||
struct perf_branch_entry {
|
||||
__u64 from;
|
||||
@@ -1373,7 +1417,10 @@ struct perf_branch_entry {
|
||||
abort:1, /* transaction abort */
|
||||
cycles:16, /* cycle count to last branch */
|
||||
type:4, /* branch type */
|
||||
reserved:40;
|
||||
spec:2, /* branch speculation info */
|
||||
new_type:4, /* additional branch type */
|
||||
priv:3, /* privilege level */
|
||||
reserved:31;
|
||||
};
|
||||
|
||||
union perf_sample_weight {
|
||||
|
||||
@@ -338,7 +338,7 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
|
||||
int ret;
|
||||
|
||||
/* perf_sample_data doesn't have callchain, use bpf_get_stackid */
|
||||
if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
|
||||
if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
|
||||
return bpf_get_stackid((unsigned long)(ctx->regs),
|
||||
(unsigned long) map, flags, 0, 0);
|
||||
|
||||
@@ -506,7 +506,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
|
||||
int err = -EINVAL;
|
||||
__u64 nr_kernel;
|
||||
|
||||
if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
|
||||
if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
|
||||
return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
|
||||
@@ -2,4 +2,5 @@
|
||||
obj-y := core.o ring_buffer.o callchain.o
|
||||
|
||||
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
|
||||
obj-$(CONFIG_HW_BREAKPOINT_KUNIT_TEST) += hw_breakpoint_test.o
|
||||
obj-$(CONFIG_UPROBES) += uprobes.o
|
||||
|
||||
@@ -1468,6 +1468,8 @@ static void __update_context_time(struct perf_event_context *ctx, bool adv)
|
||||
{
|
||||
u64 now = perf_clock();
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
if (adv)
|
||||
ctx->time += now - ctx->timestamp;
|
||||
ctx->timestamp = now;
|
||||
@@ -2224,16 +2226,22 @@ static inline int __pmu_filter_match(struct perf_event *event)
|
||||
static inline int pmu_filter_match(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *sibling;
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
if (!__pmu_filter_match(event))
|
||||
return 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
for_each_sibling_event(sibling, event) {
|
||||
if (!__pmu_filter_match(sibling))
|
||||
return 0;
|
||||
if (!__pmu_filter_match(sibling)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int
|
||||
@@ -6780,11 +6788,10 @@ out_put:
|
||||
|
||||
static void __perf_event_header__init_id(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event)
|
||||
struct perf_event *event,
|
||||
u64 sample_type)
|
||||
{
|
||||
u64 sample_type = event->attr.sample_type;
|
||||
|
||||
data->type = sample_type;
|
||||
data->type = event->attr.sample_type;
|
||||
header->size += event->id_header_size;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TID) {
|
||||
@@ -6813,7 +6820,7 @@ void perf_event_header__init_id(struct perf_event_header *header,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (event->attr.sample_id_all)
|
||||
__perf_event_header__init_id(header, data, event);
|
||||
__perf_event_header__init_id(header, data, event, event->attr.sample_type);
|
||||
}
|
||||
|
||||
static void __perf_event__output_id_sample(struct perf_output_handle *handle,
|
||||
@@ -6962,11 +6969,6 @@ static void perf_output_read(struct perf_output_handle *handle,
|
||||
perf_output_read_one(handle, event, enabled, running);
|
||||
}
|
||||
|
||||
static inline bool perf_sample_save_hw_index(struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
|
||||
}
|
||||
|
||||
void perf_output_sample(struct perf_output_handle *handle,
|
||||
struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
@@ -7048,14 +7050,14 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (data->br_stack) {
|
||||
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
|
||||
size_t size;
|
||||
|
||||
size = data->br_stack->nr
|
||||
* sizeof(struct perf_branch_entry);
|
||||
|
||||
perf_output_put(handle, data->br_stack->nr);
|
||||
if (perf_sample_save_hw_index(event))
|
||||
if (branch_sample_hw_index(event))
|
||||
perf_output_put(handle, data->br_stack->hw_idx);
|
||||
perf_output_copy(handle, data->br_stack->entries, size);
|
||||
} else {
|
||||
@@ -7298,6 +7300,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
u64 sample_type = event->attr.sample_type;
|
||||
u64 filtered_sample_type;
|
||||
|
||||
header->type = PERF_RECORD_SAMPLE;
|
||||
header->size = sizeof(*header) + event->header_size;
|
||||
@@ -7305,7 +7308,12 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
header->misc = 0;
|
||||
header->misc |= perf_misc_flags(regs);
|
||||
|
||||
__perf_event_header__init_id(header, data, event);
|
||||
/*
|
||||
* Clear the sample flags that have already been done by the
|
||||
* PMU driver.
|
||||
*/
|
||||
filtered_sample_type = sample_type & ~data->sample_flags;
|
||||
__perf_event_header__init_id(header, data, event, filtered_sample_type);
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
|
||||
data->ip = perf_instruction_pointer(regs);
|
||||
@@ -7313,7 +7321,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
int size = 1;
|
||||
|
||||
if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
|
||||
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
|
||||
size += data->callchain->nr;
|
||||
@@ -7325,7 +7333,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
struct perf_raw_record *raw = data->raw;
|
||||
int size;
|
||||
|
||||
if (raw) {
|
||||
if (raw && (data->sample_flags & PERF_SAMPLE_RAW)) {
|
||||
struct perf_raw_frag *frag = &raw->frag;
|
||||
u32 sum = 0;
|
||||
|
||||
@@ -7341,6 +7349,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
frag->pad = raw->size - sum;
|
||||
} else {
|
||||
size = sizeof(u64);
|
||||
data->raw = NULL;
|
||||
}
|
||||
|
||||
header->size += size;
|
||||
@@ -7348,8 +7357,8 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
int size = sizeof(u64); /* nr */
|
||||
if (data->br_stack) {
|
||||
if (perf_sample_save_hw_index(event))
|
||||
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (branch_sample_hw_index(event))
|
||||
size += sizeof(u64);
|
||||
|
||||
size += data->br_stack->nr
|
||||
@@ -7398,6 +7407,20 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
header->size += size;
|
||||
}
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
|
||||
data->weight.full = 0;
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
data->data_src.val = PERF_MEM_NA;
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
|
||||
data->txn = 0;
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
|
||||
if (filtered_sample_type & PERF_SAMPLE_ADDR)
|
||||
data->addr = 0;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_REGS_INTR) {
|
||||
/* regs dump ABI info */
|
||||
int size = sizeof(u64);
|
||||
@@ -7413,7 +7436,8 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
header->size += size;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
|
||||
if (sample_type & PERF_SAMPLE_PHYS_ADDR &&
|
||||
filtered_sample_type & PERF_SAMPLE_PHYS_ADDR)
|
||||
data->phys_addr = perf_virt_to_phys(data->addr);
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
@@ -9984,8 +10008,16 @@ static void bpf_overflow_handler(struct perf_event *event,
|
||||
goto out;
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(event->prog);
|
||||
if (prog)
|
||||
if (prog) {
|
||||
if (prog->call_get_stack &&
|
||||
(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
|
||||
!(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
|
||||
ret = bpf_prog_run(prog, &ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
@@ -10011,7 +10043,7 @@ static int perf_event_set_bpf_handler(struct perf_event *event,
|
||||
|
||||
if (event->attr.precise_ip &&
|
||||
prog->call_get_stack &&
|
||||
(!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
|
||||
(!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
|
||||
event->attr.exclude_callchain_kernel ||
|
||||
event->attr.exclude_callchain_user)) {
|
||||
/*
|
||||
@@ -10928,7 +10960,7 @@ static ssize_t nr_addr_filters_show(struct device *dev,
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
|
||||
return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
|
||||
}
|
||||
DEVICE_ATTR_RO(nr_addr_filters);
|
||||
|
||||
@@ -10939,7 +10971,7 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
|
||||
return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->type);
|
||||
}
|
||||
static DEVICE_ATTR_RO(type);
|
||||
|
||||
@@ -10950,7 +10982,7 @@ perf_event_mux_interval_ms_show(struct device *dev,
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
|
||||
return scnprintf(page, PAGE_SIZE - 1, "%d\n", pmu->hrtimer_interval_ms);
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(mux_interval_mutex);
|
||||
@@ -11704,11 +11736,9 @@ err_pmu:
|
||||
event->destroy(event);
|
||||
module_put(pmu->module);
|
||||
err_ns:
|
||||
if (event->ns)
|
||||
put_pid_ns(event->ns);
|
||||
if (event->hw.target)
|
||||
put_task_struct(event->hw.target);
|
||||
kmem_cache_free(perf_event_cache, event);
|
||||
call_rcu(&event->rcu_head, free_event_rcu);
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
@@ -17,61 +17,276 @@
|
||||
* This file contains the arch-independent routines.
|
||||
*/
|
||||
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/percpu-rwsem.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/bug.h>
|
||||
|
||||
#include <linux/hw_breakpoint.h>
|
||||
/*
|
||||
* Constraints data
|
||||
* Datastructure to track the total uses of N slots across tasks or CPUs;
|
||||
* bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots.
|
||||
*/
|
||||
struct bp_slots_histogram {
|
||||
#ifdef hw_breakpoint_slots
|
||||
atomic_t count[hw_breakpoint_slots(0)];
|
||||
#else
|
||||
atomic_t *count;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-CPU constraints data.
|
||||
*/
|
||||
struct bp_cpuinfo {
|
||||
/* Number of pinned cpu breakpoints in a cpu */
|
||||
unsigned int cpu_pinned;
|
||||
/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
|
||||
unsigned int *tsk_pinned;
|
||||
/* Number of non-pinned cpu/task breakpoints in a cpu */
|
||||
unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
|
||||
/* Number of pinned CPU breakpoints in a CPU. */
|
||||
unsigned int cpu_pinned;
|
||||
/* Histogram of pinned task breakpoints in a CPU. */
|
||||
struct bp_slots_histogram tsk_pinned;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
|
||||
static int nr_slots[TYPE_MAX];
|
||||
|
||||
static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
|
||||
{
|
||||
return per_cpu_ptr(bp_cpuinfo + type, cpu);
|
||||
}
|
||||
|
||||
/* Number of pinned CPU breakpoints globally. */
|
||||
static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
|
||||
/* Number of pinned CPU-independent task breakpoints. */
|
||||
static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX];
|
||||
|
||||
/* Keep track of the breakpoints attached to tasks */
|
||||
static LIST_HEAD(bp_task_head);
|
||||
|
||||
static int constraints_initialized;
|
||||
|
||||
/* Gather the number of total pinned and un-pinned bp in a cpuset */
|
||||
struct bp_busy_slots {
|
||||
unsigned int pinned;
|
||||
unsigned int flexible;
|
||||
static struct rhltable task_bps_ht;
|
||||
static const struct rhashtable_params task_bps_ht_params = {
|
||||
.head_offset = offsetof(struct hw_perf_event, bp_list),
|
||||
.key_offset = offsetof(struct hw_perf_event, target),
|
||||
.key_len = sizeof_field(struct hw_perf_event, target),
|
||||
.automatic_shrinking = true,
|
||||
};
|
||||
|
||||
/* Serialize accesses to the above constraints */
|
||||
static DEFINE_MUTEX(nr_bp_mutex);
|
||||
static bool constraints_initialized __ro_after_init;
|
||||
|
||||
__weak int hw_breakpoint_weight(struct perf_event *bp)
|
||||
/*
|
||||
* Synchronizes accesses to the per-CPU constraints; the locking rules are:
|
||||
*
|
||||
* 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock
|
||||
* (due to bp_slots_histogram::count being atomic, no update are lost).
|
||||
*
|
||||
* 2. Holding a write-lock is required for computations that require a
|
||||
* stable snapshot of all bp_cpuinfo::tsk_pinned.
|
||||
*
|
||||
* 3. In all other cases, non-atomic accesses require the appropriately held
|
||||
* lock (read-lock for read-only accesses; write-lock for reads/writes).
|
||||
*/
|
||||
DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem);
|
||||
|
||||
/*
|
||||
* Return mutex to serialize accesses to per-task lists in task_bps_ht. Since
|
||||
* rhltable synchronizes concurrent insertions/deletions, independent tasks may
|
||||
* insert/delete concurrently; therefore, a mutex per task is sufficient.
|
||||
*
|
||||
* Uses task_struct::perf_event_mutex, to avoid extending task_struct with a
|
||||
* hw_breakpoint-only mutex, which may be infrequently used. The caveat here is
|
||||
* that hw_breakpoint may contend with per-task perf event list management. The
|
||||
* assumption is that perf usecases involving hw_breakpoints are very unlikely
|
||||
* to result in unnecessary contention.
|
||||
*/
|
||||
static inline struct mutex *get_task_bps_mutex(struct perf_event *bp)
|
||||
{
|
||||
struct task_struct *tsk = bp->hw.target;
|
||||
|
||||
return tsk ? &tsk->perf_event_mutex : NULL;
|
||||
}
|
||||
|
||||
static struct mutex *bp_constraints_lock(struct perf_event *bp)
|
||||
{
|
||||
struct mutex *tsk_mtx = get_task_bps_mutex(bp);
|
||||
|
||||
if (tsk_mtx) {
|
||||
/*
|
||||
* Fully analogous to the perf_try_init_event() nesting
|
||||
* argument in the comment near perf_event_ctx_lock_nested();
|
||||
* this child->perf_event_mutex cannot ever deadlock against
|
||||
* the parent->perf_event_mutex usage from
|
||||
* perf_event_task_{en,dis}able().
|
||||
*
|
||||
* Specifically, inherited events will never occur on
|
||||
* ->perf_event_list.
|
||||
*/
|
||||
mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING);
|
||||
percpu_down_read(&bp_cpuinfo_sem);
|
||||
} else {
|
||||
percpu_down_write(&bp_cpuinfo_sem);
|
||||
}
|
||||
|
||||
return tsk_mtx;
|
||||
}
|
||||
|
||||
static void bp_constraints_unlock(struct mutex *tsk_mtx)
|
||||
{
|
||||
if (tsk_mtx) {
|
||||
percpu_up_read(&bp_cpuinfo_sem);
|
||||
mutex_unlock(tsk_mtx);
|
||||
} else {
|
||||
percpu_up_write(&bp_cpuinfo_sem);
|
||||
}
|
||||
}
|
||||
|
||||
static bool bp_constraints_is_locked(struct perf_event *bp)
|
||||
{
|
||||
struct mutex *tsk_mtx = get_task_bps_mutex(bp);
|
||||
|
||||
return percpu_is_write_locked(&bp_cpuinfo_sem) ||
|
||||
(tsk_mtx ? mutex_is_locked(tsk_mtx) :
|
||||
percpu_is_read_locked(&bp_cpuinfo_sem));
|
||||
}
|
||||
|
||||
static inline void assert_bp_constraints_lock_held(struct perf_event *bp)
|
||||
{
|
||||
struct mutex *tsk_mtx = get_task_bps_mutex(bp);
|
||||
|
||||
if (tsk_mtx)
|
||||
lockdep_assert_held(tsk_mtx);
|
||||
lockdep_assert_held(&bp_cpuinfo_sem);
|
||||
}
|
||||
|
||||
#ifdef hw_breakpoint_slots
|
||||
/*
|
||||
* Number of breakpoint slots is constant, and the same for all types.
|
||||
*/
|
||||
static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA));
|
||||
static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); }
|
||||
static inline int init_breakpoint_slots(void) { return 0; }
|
||||
#else
|
||||
/*
|
||||
* Dynamic number of breakpoint slots.
|
||||
*/
|
||||
static int __nr_bp_slots[TYPE_MAX] __ro_after_init;
|
||||
|
||||
static inline int hw_breakpoint_slots_cached(int type)
|
||||
{
|
||||
return __nr_bp_slots[type];
|
||||
}
|
||||
|
||||
static __init bool
|
||||
bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type)
|
||||
{
|
||||
hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL);
|
||||
return hist->count;
|
||||
}
|
||||
|
||||
static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist)
|
||||
{
|
||||
kfree(hist->count);
|
||||
}
|
||||
|
||||
static __init int init_breakpoint_slots(void)
|
||||
{
|
||||
int i, cpu, err_cpu;
|
||||
|
||||
for (i = 0; i < TYPE_MAX; i++)
|
||||
__nr_bp_slots[i] = hw_breakpoint_slots(i);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
for (i = 0; i < TYPE_MAX; i++) {
|
||||
struct bp_cpuinfo *info = get_bp_info(cpu, i);
|
||||
|
||||
if (!bp_slots_histogram_alloc(&info->tsk_pinned, i))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < TYPE_MAX; i++) {
|
||||
if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
|
||||
goto err;
|
||||
if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i))
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
for_each_possible_cpu(err_cpu) {
|
||||
for (i = 0; i < TYPE_MAX; i++)
|
||||
bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned);
|
||||
if (err_cpu == cpu)
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < TYPE_MAX; i++) {
|
||||
bp_slots_histogram_free(&cpu_pinned[i]);
|
||||
bp_slots_histogram_free(&tsk_pinned_all[i]);
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val)
|
||||
{
|
||||
const int old_idx = old - 1;
|
||||
const int new_idx = old_idx + val;
|
||||
|
||||
if (old_idx >= 0)
|
||||
WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0);
|
||||
if (new_idx >= 0)
|
||||
WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0);
|
||||
}
|
||||
|
||||
static int
|
||||
bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type)
|
||||
{
|
||||
for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
|
||||
const int count = atomic_read(&hist->count[i]);
|
||||
|
||||
/* Catch unexpected writers; we want a stable snapshot. */
|
||||
ASSERT_EXCLUSIVE_WRITER(hist->count[i]);
|
||||
if (count > 0)
|
||||
return i + 1;
|
||||
WARN(count < 0, "inconsistent breakpoint slots histogram");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2,
|
||||
enum bp_type_idx type)
|
||||
{
|
||||
for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) {
|
||||
const int count1 = atomic_read(&hist1->count[i]);
|
||||
const int count2 = atomic_read(&hist2->count[i]);
|
||||
|
||||
/* Catch unexpected writers; we want a stable snapshot. */
|
||||
ASSERT_EXCLUSIVE_WRITER(hist1->count[i]);
|
||||
ASSERT_EXCLUSIVE_WRITER(hist2->count[i]);
|
||||
if (count1 + count2 > 0)
|
||||
return i + 1;
|
||||
WARN(count1 < 0, "inconsistent breakpoint slots histogram");
|
||||
WARN(count2 < 0, "inconsistent breakpoint slots histogram");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef hw_breakpoint_weight
|
||||
static inline int hw_breakpoint_weight(struct perf_event *bp)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline enum bp_type_idx find_slot_idx(u64 bp_type)
|
||||
{
|
||||
@@ -82,39 +297,61 @@ static inline enum bp_type_idx find_slot_idx(u64 bp_type)
|
||||
}
|
||||
|
||||
/*
|
||||
* Report the maximum number of pinned breakpoints a task
|
||||
* have in this cpu
|
||||
* Return the maximum number of pinned breakpoints a task has in this CPU.
|
||||
*/
|
||||
static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
|
||||
{
|
||||
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
|
||||
int i;
|
||||
struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned;
|
||||
|
||||
for (i = nr_slots[type] - 1; i >= 0; i--) {
|
||||
if (tsk_pinned[i] > 0)
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
/*
|
||||
* At this point we want to have acquired the bp_cpuinfo_sem as a
|
||||
* writer to ensure that there are no concurrent writers in
|
||||
* toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot.
|
||||
*/
|
||||
lockdep_assert_held_write(&bp_cpuinfo_sem);
|
||||
return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Count the number of breakpoints of the same type and same task.
|
||||
* The given event must be not on the list.
|
||||
*
|
||||
* If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
|
||||
* returns a negative value.
|
||||
*/
|
||||
static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
|
||||
{
|
||||
struct task_struct *tsk = bp->hw.target;
|
||||
struct rhlist_head *head, *pos;
|
||||
struct perf_event *iter;
|
||||
int count = 0;
|
||||
|
||||
list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
|
||||
if (iter->hw.target == tsk &&
|
||||
find_slot_idx(iter->attr.bp_type) == type &&
|
||||
(iter->cpu < 0 || cpu == iter->cpu))
|
||||
count += hw_breakpoint_weight(iter);
|
||||
/*
|
||||
* We need a stable snapshot of the per-task breakpoint list.
|
||||
*/
|
||||
assert_bp_constraints_lock_held(bp);
|
||||
|
||||
rcu_read_lock();
|
||||
head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params);
|
||||
if (!head)
|
||||
goto out;
|
||||
|
||||
rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
|
||||
if (find_slot_idx(iter->attr.bp_type) != type)
|
||||
continue;
|
||||
|
||||
if (iter->cpu >= 0) {
|
||||
if (cpu == -1) {
|
||||
count = -1;
|
||||
goto out;
|
||||
} else if (cpu != iter->cpu)
|
||||
continue;
|
||||
}
|
||||
|
||||
count += hw_breakpoint_weight(iter);
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -126,16 +363,29 @@ static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Report the number of pinned/un-pinned breakpoints we have in
|
||||
* a given cpu (cpu > -1) or in all of them (cpu = -1).
|
||||
* Returns the max pinned breakpoint slots in a given
|
||||
* CPU (cpu > -1) or across all of them (cpu = -1).
|
||||
*/
|
||||
static void
|
||||
fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
|
||||
enum bp_type_idx type)
|
||||
static int
|
||||
max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
|
||||
{
|
||||
const struct cpumask *cpumask = cpumask_of_bp(bp);
|
||||
int pinned_slots = 0;
|
||||
int cpu;
|
||||
|
||||
if (bp->hw.target && bp->cpu < 0) {
|
||||
int max_pinned = task_bp_pinned(-1, bp, type);
|
||||
|
||||
if (max_pinned >= 0) {
|
||||
/*
|
||||
* Fast path: task_bp_pinned() is CPU-independent and
|
||||
* returns the same value for any CPU.
|
||||
*/
|
||||
max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
|
||||
return max_pinned;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, cpumask) {
|
||||
struct bp_cpuinfo *info = get_bp_info(cpu, type);
|
||||
int nr;
|
||||
@@ -146,71 +396,131 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
|
||||
else
|
||||
nr += task_bp_pinned(cpu, bp, type);
|
||||
|
||||
if (nr > slots->pinned)
|
||||
slots->pinned = nr;
|
||||
|
||||
nr = info->flexible;
|
||||
if (nr > slots->flexible)
|
||||
slots->flexible = nr;
|
||||
pinned_slots = max(nr, pinned_slots);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For now, continue to consider flexible as pinned, until we can
|
||||
* ensure no flexible event can ever be scheduled before a pinned event
|
||||
* in a same cpu.
|
||||
*/
|
||||
static void
|
||||
fetch_this_slot(struct bp_busy_slots *slots, int weight)
|
||||
{
|
||||
slots->pinned += weight;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a pinned breakpoint for the given task in our constraint table
|
||||
*/
|
||||
static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
|
||||
enum bp_type_idx type, int weight)
|
||||
{
|
||||
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
|
||||
int old_idx, new_idx;
|
||||
|
||||
old_idx = task_bp_pinned(cpu, bp, type) - 1;
|
||||
new_idx = old_idx + weight;
|
||||
|
||||
if (old_idx >= 0)
|
||||
tsk_pinned[old_idx]--;
|
||||
if (new_idx >= 0)
|
||||
tsk_pinned[new_idx]++;
|
||||
return pinned_slots;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add/remove the given breakpoint in our constraint table
|
||||
*/
|
||||
static void
|
||||
toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
|
||||
int weight)
|
||||
static int
|
||||
toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight)
|
||||
{
|
||||
const struct cpumask *cpumask = cpumask_of_bp(bp);
|
||||
int cpu;
|
||||
int cpu, next_tsk_pinned;
|
||||
|
||||
if (!enable)
|
||||
weight = -weight;
|
||||
|
||||
/* Pinned counter cpu profiling */
|
||||
if (!bp->hw.target) {
|
||||
get_bp_info(bp->cpu, type)->cpu_pinned += weight;
|
||||
return;
|
||||
/*
|
||||
* Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the
|
||||
* global histogram.
|
||||
*/
|
||||
struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
|
||||
|
||||
lockdep_assert_held_write(&bp_cpuinfo_sem);
|
||||
bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
|
||||
info->cpu_pinned += weight;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Pinned counter task profiling */
|
||||
for_each_cpu(cpu, cpumask)
|
||||
toggle_bp_task_slot(bp, cpu, type, weight);
|
||||
/*
|
||||
* If bp->hw.target, tsk_pinned is only modified, but not used
|
||||
* otherwise. We can permit concurrent updates as long as there are no
|
||||
* other uses: having acquired bp_cpuinfo_sem as a reader allows
|
||||
* concurrent updates here. Uses of tsk_pinned will require acquiring
|
||||
* bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value.
|
||||
*/
|
||||
lockdep_assert_held_read(&bp_cpuinfo_sem);
|
||||
|
||||
/*
|
||||
* Update the pinned task slots, in per-CPU bp_cpuinfo and in the global
|
||||
* histogram. We need to take care of 4 cases:
|
||||
*
|
||||
* 1. This breakpoint targets all CPUs (cpu < 0), and there may only
|
||||
* exist other task breakpoints targeting all CPUs. In this case we
|
||||
* can simply update the global slots histogram.
|
||||
*
|
||||
* 2. This breakpoint targets a specific CPU (cpu >= 0), but there may
|
||||
* only exist other task breakpoints targeting all CPUs.
|
||||
*
|
||||
* a. On enable: remove the existing breakpoints from the global
|
||||
* slots histogram and use the per-CPU histogram.
|
||||
*
|
||||
* b. On disable: re-insert the existing breakpoints into the global
|
||||
* slots histogram and remove from per-CPU histogram.
|
||||
*
|
||||
* 3. Some other existing task breakpoints target specific CPUs. Only
|
||||
* update the per-CPU slots histogram.
|
||||
*/
|
||||
|
||||
if (!enable) {
|
||||
/*
|
||||
* Remove before updating histograms so we can determine if this
|
||||
* was the last task breakpoint for a specific CPU.
|
||||
*/
|
||||
int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint.
|
||||
*/
|
||||
next_tsk_pinned = task_bp_pinned(-1, bp, type);
|
||||
|
||||
if (next_tsk_pinned >= 0) {
|
||||
if (bp->cpu < 0) { /* Case 1: fast path */
|
||||
if (!enable)
|
||||
next_tsk_pinned += hw_breakpoint_weight(bp);
|
||||
bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight);
|
||||
} else if (enable) { /* Case 2.a: slow path */
|
||||
/* Add existing to per-CPU histograms. */
|
||||
for_each_possible_cpu(cpu) {
|
||||
bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
|
||||
0, next_tsk_pinned);
|
||||
}
|
||||
/* Add this first CPU-pinned task breakpoint. */
|
||||
bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
|
||||
next_tsk_pinned, weight);
|
||||
/* Rebalance global task pinned histogram. */
|
||||
bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned,
|
||||
-next_tsk_pinned);
|
||||
} else { /* Case 2.b: slow path */
|
||||
/* Remove this last CPU-pinned task breakpoint. */
|
||||
bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned,
|
||||
next_tsk_pinned + hw_breakpoint_weight(bp), weight);
|
||||
/* Remove all from per-CPU histograms. */
|
||||
for_each_possible_cpu(cpu) {
|
||||
bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
|
||||
next_tsk_pinned, -next_tsk_pinned);
|
||||
}
|
||||
/* Rebalance global task pinned histogram. */
|
||||
bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned);
|
||||
}
|
||||
} else { /* Case 3: slow path */
|
||||
const struct cpumask *cpumask = cpumask_of_bp(bp);
|
||||
|
||||
for_each_cpu(cpu, cpumask) {
|
||||
next_tsk_pinned = task_bp_pinned(cpu, bp, type);
|
||||
if (!enable)
|
||||
next_tsk_pinned += hw_breakpoint_weight(bp);
|
||||
bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned,
|
||||
next_tsk_pinned, weight);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Readers want a stable snapshot of the per-task breakpoint list.
|
||||
*/
|
||||
assert_bp_constraints_lock_held(bp);
|
||||
|
||||
if (enable)
|
||||
list_add_tail(&bp->hw.bp_list, &bp_task_head);
|
||||
else
|
||||
list_del(&bp->hw.bp_list);
|
||||
return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__weak int arch_reserve_bp_slot(struct perf_event *bp)
|
||||
@@ -234,7 +544,12 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Constraints to check before allowing this new breakpoint counter:
|
||||
* Constraints to check before allowing this new breakpoint counter.
|
||||
*
|
||||
* Note: Flexible breakpoints are currently unimplemented, but outlined in the
|
||||
* below algorithm for completeness. The implementation treats flexible as
|
||||
* pinned due to no guarantee that we currently always schedule flexible events
|
||||
* before a pinned event in a same CPU.
|
||||
*
|
||||
* == Non-pinned counter == (Considered as pinned for now)
|
||||
*
|
||||
@@ -276,8 +591,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
|
||||
*/
|
||||
static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
|
||||
{
|
||||
struct bp_busy_slots slots = {0};
|
||||
enum bp_type_idx type;
|
||||
int max_pinned_slots;
|
||||
int weight;
|
||||
int ret;
|
||||
|
||||
@@ -293,36 +608,24 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
|
||||
type = find_slot_idx(bp_type);
|
||||
weight = hw_breakpoint_weight(bp);
|
||||
|
||||
fetch_bp_busy_slots(&slots, bp, type);
|
||||
/*
|
||||
* Simulate the addition of this breakpoint to the constraints
|
||||
* and see the result.
|
||||
*/
|
||||
fetch_this_slot(&slots, weight);
|
||||
|
||||
/* Flexible counters need to keep at least one slot */
|
||||
if (slots.pinned + (!!slots.flexible) > nr_slots[type])
|
||||
/* Check if this new breakpoint can be satisfied across all CPUs. */
|
||||
max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
|
||||
if (max_pinned_slots > hw_breakpoint_slots_cached(type))
|
||||
return -ENOSPC;
|
||||
|
||||
ret = arch_reserve_bp_slot(bp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
toggle_bp_slot(bp, true, type, weight);
|
||||
|
||||
return 0;
|
||||
return toggle_bp_slot(bp, true, type, weight);
|
||||
}
|
||||
|
||||
int reserve_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&nr_bp_mutex);
|
||||
|
||||
ret = __reserve_bp_slot(bp, bp->attr.bp_type);
|
||||
|
||||
mutex_unlock(&nr_bp_mutex);
|
||||
struct mutex *mtx = bp_constraints_lock(bp);
|
||||
int ret = __reserve_bp_slot(bp, bp->attr.bp_type);
|
||||
|
||||
bp_constraints_unlock(mtx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -335,17 +638,16 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
|
||||
|
||||
type = find_slot_idx(bp_type);
|
||||
weight = hw_breakpoint_weight(bp);
|
||||
toggle_bp_slot(bp, false, type, weight);
|
||||
WARN_ON(toggle_bp_slot(bp, false, type, weight));
|
||||
}
|
||||
|
||||
void release_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
mutex_lock(&nr_bp_mutex);
|
||||
struct mutex *mtx = bp_constraints_lock(bp);
|
||||
|
||||
arch_unregister_hw_breakpoint(bp);
|
||||
__release_bp_slot(bp, bp->attr.bp_type);
|
||||
|
||||
mutex_unlock(&nr_bp_mutex);
|
||||
bp_constraints_unlock(mtx);
|
||||
}
|
||||
|
||||
static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
|
||||
@@ -372,11 +674,10 @@ static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
|
||||
|
||||
static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
|
||||
{
|
||||
int ret;
|
||||
struct mutex *mtx = bp_constraints_lock(bp);
|
||||
int ret = __modify_bp_slot(bp, old_type, new_type);
|
||||
|
||||
mutex_lock(&nr_bp_mutex);
|
||||
ret = __modify_bp_slot(bp, old_type, new_type);
|
||||
mutex_unlock(&nr_bp_mutex);
|
||||
bp_constraints_unlock(mtx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -387,18 +688,28 @@ static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
|
||||
*/
|
||||
int dbg_reserve_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
if (mutex_is_locked(&nr_bp_mutex))
|
||||
int ret;
|
||||
|
||||
if (bp_constraints_is_locked(bp))
|
||||
return -1;
|
||||
|
||||
return __reserve_bp_slot(bp, bp->attr.bp_type);
|
||||
/* Locks aren't held; disable lockdep assert checking. */
|
||||
lockdep_off();
|
||||
ret = __reserve_bp_slot(bp, bp->attr.bp_type);
|
||||
lockdep_on();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int dbg_release_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
if (mutex_is_locked(&nr_bp_mutex))
|
||||
if (bp_constraints_is_locked(bp))
|
||||
return -1;
|
||||
|
||||
/* Locks aren't held; disable lockdep assert checking. */
|
||||
lockdep_off();
|
||||
__release_bp_slot(bp, bp->attr.bp_type);
|
||||
lockdep_on();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -604,6 +915,50 @@ void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
|
||||
|
||||
/**
|
||||
* hw_breakpoint_is_used - check if breakpoints are currently used
|
||||
*
|
||||
* Returns: true if breakpoints are used, false otherwise.
|
||||
*/
|
||||
bool hw_breakpoint_is_used(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!constraints_initialized)
|
||||
return false;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
for (int type = 0; type < TYPE_MAX; ++type) {
|
||||
struct bp_cpuinfo *info = get_bp_info(cpu, type);
|
||||
|
||||
if (info->cpu_pinned)
|
||||
return true;
|
||||
|
||||
for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
|
||||
if (atomic_read(&info->tsk_pinned.count[slot]))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int type = 0; type < TYPE_MAX; ++type) {
|
||||
for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
|
||||
/*
|
||||
* Warn, because if there are CPU pinned counters,
|
||||
* should never get here; bp_cpuinfo::cpu_pinned should
|
||||
* be consistent with the global cpu_pinned histogram.
|
||||
*/
|
||||
if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
|
||||
return true;
|
||||
|
||||
if (atomic_read(&tsk_pinned_all[type].count[slot]))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct notifier_block hw_breakpoint_exceptions_nb = {
|
||||
.notifier_call = hw_breakpoint_exceptions_notify,
|
||||
/* we need to be notified first */
|
||||
@@ -678,38 +1033,19 @@ static struct pmu perf_breakpoint = {
|
||||
|
||||
int __init init_hw_breakpoint(void)
|
||||
{
|
||||
int cpu, err_cpu;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < TYPE_MAX; i++)
|
||||
nr_slots[i] = hw_breakpoint_slots(i);
|
||||
ret = rhltable_init(&task_bps_ht, &task_bps_ht_params);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
for (i = 0; i < TYPE_MAX; i++) {
|
||||
struct bp_cpuinfo *info = get_bp_info(cpu, i);
|
||||
ret = init_breakpoint_slots();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
|
||||
GFP_KERNEL);
|
||||
if (!info->tsk_pinned)
|
||||
goto err_alloc;
|
||||
}
|
||||
}
|
||||
|
||||
constraints_initialized = 1;
|
||||
constraints_initialized = true;
|
||||
|
||||
perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
|
||||
|
||||
return register_die_notifier(&hw_breakpoint_exceptions_nb);
|
||||
|
||||
err_alloc:
|
||||
for_each_possible_cpu(err_cpu) {
|
||||
for (i = 0; i < TYPE_MAX; i++)
|
||||
kfree(get_bp_info(err_cpu, i)->tsk_pinned);
|
||||
if (err_cpu == cpu)
|
||||
break;
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
||||
|
||||
333
kernel/events/hw_breakpoint_test.c
Normal file
333
kernel/events/hw_breakpoint_test.c
Normal file
@@ -0,0 +1,333 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* KUnit test for hw_breakpoint constraints accounting logic.
|
||||
*
|
||||
* Copyright (C) 2022, Google LLC.
|
||||
*/
|
||||
|
||||
#include <kunit/test.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
#define TEST_REQUIRES_BP_SLOTS(test, slots) \
|
||||
do { \
|
||||
if ((slots) > get_test_bp_slots()) { \
|
||||
kunit_skip((test), "Requires breakpoint slots: %d > %d", slots, \
|
||||
get_test_bp_slots()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TEST_EXPECT_NOSPC(expr) KUNIT_EXPECT_EQ(test, -ENOSPC, PTR_ERR(expr))
|
||||
|
||||
#define MAX_TEST_BREAKPOINTS 512
|
||||
|
||||
static char break_vars[MAX_TEST_BREAKPOINTS];
|
||||
static struct perf_event *test_bps[MAX_TEST_BREAKPOINTS];
|
||||
static struct task_struct *__other_task;
|
||||
|
||||
static struct perf_event *register_test_bp(int cpu, struct task_struct *tsk, int idx)
|
||||
{
|
||||
struct perf_event_attr attr = {};
|
||||
|
||||
if (WARN_ON(idx < 0 || idx >= MAX_TEST_BREAKPOINTS))
|
||||
return NULL;
|
||||
|
||||
hw_breakpoint_init(&attr);
|
||||
attr.bp_addr = (unsigned long)&break_vars[idx];
|
||||
attr.bp_len = HW_BREAKPOINT_LEN_1;
|
||||
attr.bp_type = HW_BREAKPOINT_RW;
|
||||
return perf_event_create_kernel_counter(&attr, cpu, tsk, NULL, NULL);
|
||||
}
|
||||
|
||||
static void unregister_test_bp(struct perf_event **bp)
|
||||
{
|
||||
if (WARN_ON(IS_ERR(*bp)))
|
||||
return;
|
||||
if (WARN_ON(!*bp))
|
||||
return;
|
||||
unregister_hw_breakpoint(*bp);
|
||||
*bp = NULL;
|
||||
}
|
||||
|
||||
static int get_test_bp_slots(void)
|
||||
{
|
||||
static int slots;
|
||||
|
||||
if (!slots)
|
||||
slots = hw_breakpoint_slots(TYPE_DATA);
|
||||
|
||||
return slots;
|
||||
}
|
||||
|
||||
static void fill_one_bp_slot(struct kunit *test, int *id, int cpu, struct task_struct *tsk)
|
||||
{
|
||||
struct perf_event *bp = register_test_bp(cpu, tsk, *id);
|
||||
|
||||
KUNIT_ASSERT_NOT_NULL(test, bp);
|
||||
KUNIT_ASSERT_FALSE(test, IS_ERR(bp));
|
||||
KUNIT_ASSERT_NULL(test, test_bps[*id]);
|
||||
test_bps[(*id)++] = bp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fills up the given @cpu/@tsk with breakpoints, only leaving @skip slots free.
|
||||
*
|
||||
* Returns true if this can be called again, continuing at @id.
|
||||
*/
|
||||
static bool fill_bp_slots(struct kunit *test, int *id, int cpu, struct task_struct *tsk, int skip)
|
||||
{
|
||||
for (int i = 0; i < get_test_bp_slots() - skip; ++i)
|
||||
fill_one_bp_slot(test, id, cpu, tsk);
|
||||
|
||||
return *id + get_test_bp_slots() <= MAX_TEST_BREAKPOINTS;
|
||||
}
|
||||
|
||||
static int dummy_kthread(void *arg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct task_struct *get_other_task(struct kunit *test)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
|
||||
if (__other_task)
|
||||
return __other_task;
|
||||
|
||||
tsk = kthread_create(dummy_kthread, NULL, "hw_breakpoint_dummy_task");
|
||||
KUNIT_ASSERT_FALSE(test, IS_ERR(tsk));
|
||||
__other_task = tsk;
|
||||
return __other_task;
|
||||
}
|
||||
|
||||
static int get_test_cpu(int num)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
WARN_ON(num < 0);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (num-- <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
/* ===== Test cases ===== */
|
||||
|
||||
static void test_one_cpu(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
fill_bp_slots(test, &idx, get_test_cpu(0), NULL, 0);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
}
|
||||
|
||||
static void test_many_cpus(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
int cpu;
|
||||
|
||||
/* Test that CPUs are independent. */
|
||||
for_each_online_cpu(cpu) {
|
||||
bool do_continue = fill_bp_slots(test, &idx, cpu, NULL, 0);
|
||||
|
||||
TEST_EXPECT_NOSPC(register_test_bp(cpu, NULL, idx));
|
||||
if (!do_continue)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_one_task_on_all_cpus(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
fill_bp_slots(test, &idx, -1, current, 0);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
/* Remove one and adding back CPU-target should work. */
|
||||
unregister_test_bp(&test_bps[0]);
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
|
||||
}
|
||||
|
||||
static void test_two_tasks_on_all_cpus(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
/* Test that tasks are independent. */
|
||||
fill_bp_slots(test, &idx, -1, current, 0);
|
||||
fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
|
||||
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
/* Remove one from first task and adding back CPU-target should not work. */
|
||||
unregister_test_bp(&test_bps[0]);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
}
|
||||
|
||||
static void test_one_task_on_one_cpu(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
/*
|
||||
* Remove one and adding back CPU-target should work; this case is
|
||||
* special vs. above because the task's constraints are CPU-dependent.
|
||||
*/
|
||||
unregister_test_bp(&test_bps[0]);
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
|
||||
}
|
||||
|
||||
static void test_one_task_mixed(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
TEST_REQUIRES_BP_SLOTS(test, 3);
|
||||
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
|
||||
fill_bp_slots(test, &idx, -1, current, 1);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
|
||||
/* Transition from CPU-dependent pinned count to CPU-independent. */
|
||||
unregister_test_bp(&test_bps[0]);
|
||||
unregister_test_bp(&test_bps[1]);
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(0), NULL);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
}
|
||||
|
||||
static void test_two_tasks_on_one_cpu(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
|
||||
fill_bp_slots(test, &idx, get_test_cpu(0), get_other_task(test), 0);
|
||||
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
/* Can still create breakpoints on some other CPU. */
|
||||
fill_bp_slots(test, &idx, get_test_cpu(1), NULL, 0);
|
||||
}
|
||||
|
||||
static void test_two_tasks_on_one_all_cpus(struct kunit *test)
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
fill_bp_slots(test, &idx, get_test_cpu(0), current, 0);
|
||||
fill_bp_slots(test, &idx, -1, get_other_task(test), 0);
|
||||
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, get_other_task(test), idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), get_other_task(test), idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
/* Cannot create breakpoints on some other CPU either. */
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
|
||||
}
|
||||
|
||||
static void test_task_on_all_and_one_cpu(struct kunit *test)
|
||||
{
|
||||
int tsk_on_cpu_idx, cpu_idx;
|
||||
int idx = 0;
|
||||
|
||||
TEST_REQUIRES_BP_SLOTS(test, 3);
|
||||
|
||||
fill_bp_slots(test, &idx, -1, current, 2);
|
||||
/* Transitioning from only all CPU breakpoints to mixed. */
|
||||
tsk_on_cpu_idx = idx;
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(0), current);
|
||||
fill_one_bp_slot(test, &idx, -1, current);
|
||||
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
|
||||
/* We should still be able to use up another CPU's slots. */
|
||||
cpu_idx = idx;
|
||||
fill_one_bp_slot(test, &idx, get_test_cpu(1), NULL);
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
|
||||
|
||||
/* Transitioning back to task target on all CPUs. */
|
||||
unregister_test_bp(&test_bps[tsk_on_cpu_idx]);
|
||||
/* Still have a CPU target breakpoint in get_test_cpu(1). */
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
/* Remove it and try again. */
|
||||
unregister_test_bp(&test_bps[cpu_idx]);
|
||||
fill_one_bp_slot(test, &idx, -1, current);
|
||||
|
||||
TEST_EXPECT_NOSPC(register_test_bp(-1, current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), current, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(0), NULL, idx));
|
||||
TEST_EXPECT_NOSPC(register_test_bp(get_test_cpu(1), NULL, idx));
|
||||
}
|
||||
|
||||
static struct kunit_case hw_breakpoint_test_cases[] = {
|
||||
KUNIT_CASE(test_one_cpu),
|
||||
KUNIT_CASE(test_many_cpus),
|
||||
KUNIT_CASE(test_one_task_on_all_cpus),
|
||||
KUNIT_CASE(test_two_tasks_on_all_cpus),
|
||||
KUNIT_CASE(test_one_task_on_one_cpu),
|
||||
KUNIT_CASE(test_one_task_mixed),
|
||||
KUNIT_CASE(test_two_tasks_on_one_cpu),
|
||||
KUNIT_CASE(test_two_tasks_on_one_all_cpus),
|
||||
KUNIT_CASE(test_task_on_all_and_one_cpu),
|
||||
{},
|
||||
};
|
||||
|
||||
static int test_init(struct kunit *test)
|
||||
{
|
||||
/* Most test cases want 2 distinct CPUs. */
|
||||
if (num_online_cpus() < 2)
|
||||
return -EINVAL;
|
||||
|
||||
/* Want the system to not use breakpoints elsewhere. */
|
||||
if (hw_breakpoint_is_used())
|
||||
return -EBUSY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_exit(struct kunit *test)
|
||||
{
|
||||
for (int i = 0; i < MAX_TEST_BREAKPOINTS; ++i) {
|
||||
if (test_bps[i])
|
||||
unregister_test_bp(&test_bps[i]);
|
||||
}
|
||||
|
||||
if (__other_task) {
|
||||
kthread_stop(__other_task);
|
||||
__other_task = NULL;
|
||||
}
|
||||
|
||||
/* Verify that internal state agrees that no breakpoints are in use. */
|
||||
KUNIT_EXPECT_FALSE(test, hw_breakpoint_is_used());
|
||||
}
|
||||
|
||||
static struct kunit_suite hw_breakpoint_test_suite = {
|
||||
.name = "hw_breakpoint",
|
||||
.test_cases = hw_breakpoint_test_cases,
|
||||
.init = test_init,
|
||||
.exit = test_exit,
|
||||
};
|
||||
|
||||
kunit_test_suites(&hw_breakpoint_test_suite);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Marco Elver <elver@google.com>");
|
||||
@@ -934,8 +934,10 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
|
||||
* Huh! same key, different name? Did someone trample
|
||||
* on some memory? We're most confused.
|
||||
*/
|
||||
WARN_ON_ONCE(class->name != lock->name &&
|
||||
lock->key != &__lockdep_no_validate__);
|
||||
WARN_ONCE(class->name != lock->name &&
|
||||
lock->key != &__lockdep_no_validate__,
|
||||
"Looking for class \"%s\" with key %ps, but found a different class \"%s\" with the same key\n",
|
||||
lock->name, lock->key, class->name);
|
||||
return class;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,6 +192,12 @@ EXPORT_SYMBOL_GPL(__percpu_down_read);
|
||||
__sum; \
|
||||
})
|
||||
|
||||
bool percpu_is_read_locked(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
return per_cpu_sum(*sem->read_count) != 0 && !atomic_read(&sem->block);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(percpu_is_read_locked);
|
||||
|
||||
/*
|
||||
* Return true if the modular sum of the sem->read_count per-CPU variable is
|
||||
* zero. If this sum is zero, then it is stable due to the fact that if any
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
* queued_read_lock_slowpath - acquire read lock of a queued rwlock
|
||||
* @lock: Pointer to queued rwlock structure
|
||||
*/
|
||||
void queued_read_lock_slowpath(struct qrwlock *lock)
|
||||
void __lockfunc queued_read_lock_slowpath(struct qrwlock *lock)
|
||||
{
|
||||
/*
|
||||
* Readers come here when they cannot get the lock without waiting
|
||||
@@ -63,7 +63,7 @@ EXPORT_SYMBOL(queued_read_lock_slowpath);
|
||||
* queued_write_lock_slowpath - acquire write lock of a queued rwlock
|
||||
* @lock : Pointer to queued rwlock structure
|
||||
*/
|
||||
void queued_write_lock_slowpath(struct qrwlock *lock)
|
||||
void __lockfunc queued_write_lock_slowpath(struct qrwlock *lock)
|
||||
{
|
||||
int cnts;
|
||||
|
||||
|
||||
@@ -313,7 +313,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
|
||||
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
|
||||
* queue : ^--' :
|
||||
*/
|
||||
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
{
|
||||
struct mcs_spinlock *prev, *next, *node;
|
||||
u32 old, tail;
|
||||
|
||||
@@ -489,7 +489,7 @@ gotlock:
|
||||
* PV versions of the unlock fastpath and slowpath functions to be used
|
||||
* instead of queued_spin_unlock().
|
||||
*/
|
||||
__visible void
|
||||
__visible __lockfunc void
|
||||
__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
|
||||
{
|
||||
struct pv_node *node;
|
||||
@@ -544,7 +544,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
|
||||
#include <asm/qspinlock_paravirt.h>
|
||||
|
||||
#ifndef __pv_queued_spin_unlock
|
||||
__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
u8 locked;
|
||||
|
||||
|
||||
@@ -133,14 +133,19 @@
|
||||
* the owner value concurrently without lock. Read from owner, however,
|
||||
* may not need READ_ONCE() as long as the pointer value is only used
|
||||
* for comparison and isn't being dereferenced.
|
||||
*
|
||||
* Both rwsem_{set,clear}_owner() functions should be in the same
|
||||
* preempt disable section as the atomic op that changes sem->count.
|
||||
*/
|
||||
static inline void rwsem_set_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
lockdep_assert_preemption_disabled();
|
||||
atomic_long_set(&sem->owner, (long)current);
|
||||
}
|
||||
|
||||
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
lockdep_assert_preemption_disabled();
|
||||
atomic_long_set(&sem->owner, 0);
|
||||
}
|
||||
|
||||
@@ -251,13 +256,16 @@ static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
|
||||
static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp = RWSEM_UNLOCKED_VALUE;
|
||||
bool ret = false;
|
||||
|
||||
preempt_disable();
|
||||
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
|
||||
rwsem_set_owner(sem);
|
||||
return true;
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return false;
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1352,8 +1360,10 @@ static inline void __up_write(struct rw_semaphore *sem)
|
||||
DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
|
||||
!rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
|
||||
|
||||
preempt_disable();
|
||||
rwsem_clear_owner(sem);
|
||||
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
|
||||
preempt_enable();
|
||||
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
|
||||
rwsem_wake(sem);
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ static noinline void __up(struct semaphore *sem);
|
||||
* Use of this function is deprecated, please use down_interruptible() or
|
||||
* down_killable() instead.
|
||||
*/
|
||||
void down(struct semaphore *sem)
|
||||
void __sched down(struct semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(down);
|
||||
* If the sleep is interrupted by a signal, this function will return -EINTR.
|
||||
* If the semaphore is successfully acquired, this function returns 0.
|
||||
*/
|
||||
int down_interruptible(struct semaphore *sem)
|
||||
int __sched down_interruptible(struct semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
int result = 0;
|
||||
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(down_interruptible);
|
||||
* -EINTR. If the semaphore is successfully acquired, this function returns
|
||||
* 0.
|
||||
*/
|
||||
int down_killable(struct semaphore *sem)
|
||||
int __sched down_killable(struct semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
int result = 0;
|
||||
@@ -131,7 +131,7 @@ EXPORT_SYMBOL(down_killable);
|
||||
* Unlike mutex_trylock, this function can be used from interrupt context,
|
||||
* and the semaphore can be released by any task or interrupt.
|
||||
*/
|
||||
int down_trylock(struct semaphore *sem)
|
||||
int __sched down_trylock(struct semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
int count;
|
||||
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(down_trylock);
|
||||
* If the semaphore is not released within the specified number of jiffies,
|
||||
* this function returns -ETIME. It returns 0 if the semaphore was acquired.
|
||||
*/
|
||||
int down_timeout(struct semaphore *sem, long timeout)
|
||||
int __sched down_timeout(struct semaphore *sem, long timeout)
|
||||
{
|
||||
unsigned long flags;
|
||||
int result = 0;
|
||||
@@ -180,7 +180,7 @@ EXPORT_SYMBOL(down_timeout);
|
||||
* Release the semaphore. Unlike mutexes, up() may be called from any
|
||||
* context and even by tasks which have never called down().
|
||||
*/
|
||||
void up(struct semaphore *sem)
|
||||
void __sched up(struct semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ BUILD_LOCK_OPS(write, rwlock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_TRYLOCK
|
||||
int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
|
||||
noinline int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
|
||||
{
|
||||
return __raw_spin_trylock(lock);
|
||||
}
|
||||
@@ -141,7 +141,7 @@ EXPORT_SYMBOL(_raw_spin_trylock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
|
||||
int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
|
||||
noinline int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
|
||||
{
|
||||
return __raw_spin_trylock_bh(lock);
|
||||
}
|
||||
@@ -149,7 +149,7 @@ EXPORT_SYMBOL(_raw_spin_trylock_bh);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_LOCK
|
||||
void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
|
||||
noinline void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
|
||||
{
|
||||
__raw_spin_lock(lock);
|
||||
}
|
||||
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(_raw_spin_lock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
|
||||
unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
|
||||
noinline unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
|
||||
{
|
||||
return __raw_spin_lock_irqsave(lock);
|
||||
}
|
||||
@@ -165,7 +165,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irqsave);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
|
||||
void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
|
||||
noinline void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
|
||||
{
|
||||
__raw_spin_lock_irq(lock);
|
||||
}
|
||||
@@ -173,7 +173,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irq);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_LOCK_BH
|
||||
void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
|
||||
noinline void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
|
||||
{
|
||||
__raw_spin_lock_bh(lock);
|
||||
}
|
||||
@@ -181,7 +181,7 @@ EXPORT_SYMBOL(_raw_spin_lock_bh);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
|
||||
void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
|
||||
noinline void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
|
||||
{
|
||||
__raw_spin_unlock(lock);
|
||||
}
|
||||
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(_raw_spin_unlock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
|
||||
void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
|
||||
noinline void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
__raw_spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
@@ -197,7 +197,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
|
||||
void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
|
||||
noinline void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
|
||||
{
|
||||
__raw_spin_unlock_irq(lock);
|
||||
}
|
||||
@@ -205,7 +205,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irq);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
|
||||
void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
|
||||
noinline void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
|
||||
{
|
||||
__raw_spin_unlock_bh(lock);
|
||||
}
|
||||
@@ -215,7 +215,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_bh);
|
||||
#ifndef CONFIG_PREEMPT_RT
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_TRYLOCK
|
||||
int __lockfunc _raw_read_trylock(rwlock_t *lock)
|
||||
noinline int __lockfunc _raw_read_trylock(rwlock_t *lock)
|
||||
{
|
||||
return __raw_read_trylock(lock);
|
||||
}
|
||||
@@ -223,7 +223,7 @@ EXPORT_SYMBOL(_raw_read_trylock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_LOCK
|
||||
void __lockfunc _raw_read_lock(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_read_lock(rwlock_t *lock)
|
||||
{
|
||||
__raw_read_lock(lock);
|
||||
}
|
||||
@@ -231,7 +231,7 @@ EXPORT_SYMBOL(_raw_read_lock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
|
||||
unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
|
||||
noinline unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
|
||||
{
|
||||
return __raw_read_lock_irqsave(lock);
|
||||
}
|
||||
@@ -239,7 +239,7 @@ EXPORT_SYMBOL(_raw_read_lock_irqsave);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_LOCK_IRQ
|
||||
void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
|
||||
{
|
||||
__raw_read_lock_irq(lock);
|
||||
}
|
||||
@@ -247,7 +247,7 @@ EXPORT_SYMBOL(_raw_read_lock_irq);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_LOCK_BH
|
||||
void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
|
||||
{
|
||||
__raw_read_lock_bh(lock);
|
||||
}
|
||||
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(_raw_read_lock_bh);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_UNLOCK
|
||||
void __lockfunc _raw_read_unlock(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_read_unlock(rwlock_t *lock)
|
||||
{
|
||||
__raw_read_unlock(lock);
|
||||
}
|
||||
@@ -263,7 +263,7 @@ EXPORT_SYMBOL(_raw_read_unlock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
|
||||
void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
noinline void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
__raw_read_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
|
||||
void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
__raw_read_unlock_irq(lock);
|
||||
}
|
||||
@@ -279,7 +279,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irq);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_READ_UNLOCK_BH
|
||||
void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
__raw_read_unlock_bh(lock);
|
||||
}
|
||||
@@ -287,7 +287,7 @@ EXPORT_SYMBOL(_raw_read_unlock_bh);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_TRYLOCK
|
||||
int __lockfunc _raw_write_trylock(rwlock_t *lock)
|
||||
noinline int __lockfunc _raw_write_trylock(rwlock_t *lock)
|
||||
{
|
||||
return __raw_write_trylock(lock);
|
||||
}
|
||||
@@ -295,7 +295,7 @@ EXPORT_SYMBOL(_raw_write_trylock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_LOCK
|
||||
void __lockfunc _raw_write_lock(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_write_lock(rwlock_t *lock)
|
||||
{
|
||||
__raw_write_lock(lock);
|
||||
}
|
||||
@@ -313,7 +313,7 @@ EXPORT_SYMBOL(_raw_write_lock_nested);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
|
||||
unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
|
||||
noinline unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
|
||||
{
|
||||
return __raw_write_lock_irqsave(lock);
|
||||
}
|
||||
@@ -321,7 +321,7 @@ EXPORT_SYMBOL(_raw_write_lock_irqsave);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
|
||||
void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
|
||||
{
|
||||
__raw_write_lock_irq(lock);
|
||||
}
|
||||
@@ -329,7 +329,7 @@ EXPORT_SYMBOL(_raw_write_lock_irq);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_LOCK_BH
|
||||
void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
|
||||
{
|
||||
__raw_write_lock_bh(lock);
|
||||
}
|
||||
@@ -337,7 +337,7 @@ EXPORT_SYMBOL(_raw_write_lock_bh);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_UNLOCK
|
||||
void __lockfunc _raw_write_unlock(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_write_unlock(rwlock_t *lock)
|
||||
{
|
||||
__raw_write_unlock(lock);
|
||||
}
|
||||
@@ -345,7 +345,7 @@ EXPORT_SYMBOL(_raw_write_unlock);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
|
||||
void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
noinline void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
__raw_write_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
|
||||
void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
__raw_write_unlock_irq(lock);
|
||||
}
|
||||
@@ -361,7 +361,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irq);
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
|
||||
void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
|
||||
noinline void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
__raw_write_unlock_bh(lock);
|
||||
}
|
||||
|
||||
@@ -1706,6 +1706,9 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
|
||||
if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK)))
|
||||
return -ENOENT;
|
||||
|
||||
if (unlikely(!br_stack))
|
||||
return -ENOENT;
|
||||
|
||||
|
||||
@@ -805,6 +805,9 @@ config ARCH_HAS_DEBUG_VM_PGTABLE
|
||||
An architecture should select this when it can successfully
|
||||
build and run DEBUG_VM_PGTABLE.
|
||||
|
||||
config DEBUG_VM_IRQSOFF
|
||||
def_bool DEBUG_VM && !PREEMPT_RT
|
||||
|
||||
config DEBUG_VM
|
||||
bool "Debug VM"
|
||||
depends on DEBUG_KERNEL
|
||||
@@ -2556,6 +2559,16 @@ config FORTIFY_KUNIT_TEST
|
||||
by the str*() and mem*() family of functions. For testing runtime
|
||||
traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests.
|
||||
|
||||
config HW_BREAKPOINT_KUNIT_TEST
|
||||
bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
|
||||
depends on HAVE_HW_BREAKPOINT
|
||||
depends on KUNIT=y
|
||||
default KUNIT_ALL_TESTS
|
||||
help
|
||||
Tests for hw_breakpoint constraints accounting.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config TEST_UDELAY
|
||||
tristate "udelay test driver"
|
||||
help
|
||||
|
||||
@@ -70,6 +70,7 @@ bool fprop_new_period(struct fprop_global *p, int periods)
|
||||
*/
|
||||
if (events <= 1)
|
||||
return false;
|
||||
preempt_disable_nested();
|
||||
write_seqcount_begin(&p->sequence);
|
||||
if (periods < 64)
|
||||
events -= events >> periods;
|
||||
@@ -77,6 +78,7 @@ bool fprop_new_period(struct fprop_global *p, int periods)
|
||||
percpu_counter_add(&p->events, -events);
|
||||
p->period += periods;
|
||||
write_seqcount_end(&p->sequence);
|
||||
preempt_enable_nested();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -579,6 +579,12 @@ config COMPACTION
|
||||
it and then we would be really interested to hear about that at
|
||||
linux-mm@kvack.org.
|
||||
|
||||
config COMPACT_UNEVICTABLE_DEFAULT
|
||||
int
|
||||
depends on COMPACTION
|
||||
default 0 if PREEMPT_RT
|
||||
default 1
|
||||
|
||||
#
|
||||
# support for free page reporting
|
||||
config PAGE_REPORTING
|
||||
|
||||
@@ -1727,11 +1727,7 @@ typedef enum {
|
||||
* Allow userspace to control policy on scanning the unevictable LRU for
|
||||
* compactable pages.
|
||||
*/
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
int sysctl_compact_unevictable_allowed __read_mostly = 0;
|
||||
#else
|
||||
int sysctl_compact_unevictable_allowed __read_mostly = 1;
|
||||
#endif
|
||||
int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;
|
||||
|
||||
static inline void
|
||||
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
|
||||
|
||||
@@ -86,6 +86,7 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
|
||||
/* Also the *_bulk() variants by only checking prefixes. */
|
||||
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
|
||||
goto found;
|
||||
|
||||
@@ -597,25 +597,18 @@ static u64 flush_next_time;
|
||||
*/
|
||||
static void memcg_stats_lock(void)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
preempt_disable();
|
||||
#else
|
||||
VM_BUG_ON(!irqs_disabled());
|
||||
#endif
|
||||
preempt_disable_nested();
|
||||
VM_WARN_ON_IRQS_ENABLED();
|
||||
}
|
||||
|
||||
static void __memcg_stats_lock(void)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
preempt_disable();
|
||||
#endif
|
||||
preempt_disable_nested();
|
||||
}
|
||||
|
||||
static void memcg_stats_unlock(void)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
preempt_enable();
|
||||
#endif
|
||||
preempt_enable_nested();
|
||||
}
|
||||
|
||||
static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
|
||||
@@ -715,7 +708,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
|
||||
* interrupt context while other caller need to have disabled interrupt.
|
||||
*/
|
||||
__memcg_stats_lock();
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM) && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM)) {
|
||||
switch (idx) {
|
||||
case NR_ANON_MAPPED:
|
||||
case NR_FILE_MAPPED:
|
||||
@@ -725,7 +718,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
|
||||
WARN_ON_ONCE(!in_task());
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
VM_WARN_ON_IRQS_ENABLED();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
305
mm/slab.c
305
mm/slab.c
@@ -3181,84 +3181,46 @@ must_grow:
|
||||
}
|
||||
|
||||
static __always_inline void *
|
||||
slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size,
|
||||
unsigned long caller)
|
||||
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid)
|
||||
{
|
||||
unsigned long save_flags;
|
||||
void *ptr;
|
||||
void *objp = NULL;
|
||||
int slab_node = numa_mem_id();
|
||||
struct obj_cgroup *objcg = NULL;
|
||||
bool init = false;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
cachep = slab_pre_alloc_hook(cachep, NULL, &objcg, 1, flags);
|
||||
if (unlikely(!cachep))
|
||||
return NULL;
|
||||
|
||||
ptr = kfence_alloc(cachep, orig_size, flags);
|
||||
if (unlikely(ptr))
|
||||
goto out_hooks;
|
||||
|
||||
local_irq_save(save_flags);
|
||||
|
||||
if (nodeid == NUMA_NO_NODE)
|
||||
nodeid = slab_node;
|
||||
|
||||
if (unlikely(!get_node(cachep, nodeid))) {
|
||||
/* Node not bootstrapped yet */
|
||||
ptr = fallback_alloc(cachep, flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (nodeid == slab_node) {
|
||||
if (nodeid == NUMA_NO_NODE) {
|
||||
if (current->mempolicy || cpuset_do_slab_mem_spread()) {
|
||||
objp = alternate_node_alloc(cachep, flags);
|
||||
if (objp)
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* Use the locally cached objects if possible.
|
||||
* However ____cache_alloc does not allow fallback
|
||||
* to other nodes. It may fail while we still have
|
||||
* objects on other nodes available.
|
||||
*/
|
||||
ptr = ____cache_alloc(cachep, flags);
|
||||
if (ptr)
|
||||
goto out;
|
||||
objp = ____cache_alloc(cachep, flags);
|
||||
nodeid = slab_node;
|
||||
} else if (nodeid == slab_node) {
|
||||
objp = ____cache_alloc(cachep, flags);
|
||||
} else if (!get_node(cachep, nodeid)) {
|
||||
/* Node not bootstrapped yet */
|
||||
objp = fallback_alloc(cachep, flags);
|
||||
goto out;
|
||||
}
|
||||
/* ___cache_alloc_node can fall back to other nodes */
|
||||
ptr = ____cache_alloc_node(cachep, flags, nodeid);
|
||||
out:
|
||||
local_irq_restore(save_flags);
|
||||
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
|
||||
init = slab_want_init_on_alloc(flags, cachep);
|
||||
|
||||
out_hooks:
|
||||
slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr, init);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static __always_inline void *
|
||||
__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
|
||||
{
|
||||
void *objp;
|
||||
|
||||
if (current->mempolicy || cpuset_do_slab_mem_spread()) {
|
||||
objp = alternate_node_alloc(cache, flags);
|
||||
if (objp)
|
||||
goto out;
|
||||
}
|
||||
objp = ____cache_alloc(cache, flags);
|
||||
|
||||
/*
|
||||
* We may just have run out of memory on the local node.
|
||||
* ____cache_alloc_node() knows how to locate memory on other nodes
|
||||
*/
|
||||
if (!objp)
|
||||
objp = ____cache_alloc_node(cache, flags, numa_mem_id());
|
||||
|
||||
objp = ____cache_alloc_node(cachep, flags, nodeid);
|
||||
out:
|
||||
return objp;
|
||||
}
|
||||
#else
|
||||
|
||||
static __always_inline void *
|
||||
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
|
||||
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid __maybe_unused)
|
||||
{
|
||||
return ____cache_alloc(cachep, flags);
|
||||
}
|
||||
@@ -3266,8 +3228,8 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
static __always_inline void *
|
||||
slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
|
||||
size_t orig_size, unsigned long caller)
|
||||
slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
|
||||
int nodeid, size_t orig_size, unsigned long caller)
|
||||
{
|
||||
unsigned long save_flags;
|
||||
void *objp;
|
||||
@@ -3284,7 +3246,7 @@ slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
|
||||
goto out;
|
||||
|
||||
local_irq_save(save_flags);
|
||||
objp = __do_cache_alloc(cachep, flags);
|
||||
objp = __do_cache_alloc(cachep, flags, nodeid);
|
||||
local_irq_restore(save_flags);
|
||||
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
|
||||
prefetchw(objp);
|
||||
@@ -3295,6 +3257,14 @@ out:
|
||||
return objp;
|
||||
}
|
||||
|
||||
static __always_inline void *
|
||||
slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
|
||||
size_t orig_size, unsigned long caller)
|
||||
{
|
||||
return slab_alloc_node(cachep, lru, flags, NUMA_NO_NODE, orig_size,
|
||||
caller);
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller needs to acquire correct kmem_cache_node's list_lock
|
||||
* @list: List of detached free slabs should be freed by caller
|
||||
@@ -3470,8 +3440,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
|
||||
{
|
||||
void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
|
||||
|
||||
trace_kmem_cache_alloc(_RET_IP_, ret, cachep,
|
||||
cachep->object_size, cachep->size, flags);
|
||||
trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, NUMA_NO_NODE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -3521,7 +3490,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
|
||||
|
||||
local_irq_disable();
|
||||
for (i = 0; i < size; i++) {
|
||||
void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags);
|
||||
void *objp = kfence_alloc(s, s->object_size, flags) ?:
|
||||
__do_cache_alloc(s, flags, NUMA_NO_NODE);
|
||||
|
||||
if (unlikely(!objp))
|
||||
goto error;
|
||||
@@ -3548,23 +3518,6 @@ error:
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
void *
|
||||
kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_);
|
||||
|
||||
ret = kasan_kmalloc(cachep, ret, size, flags);
|
||||
trace_kmalloc(_RET_IP_, ret, cachep,
|
||||
size, cachep->size, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_trace);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/**
|
||||
* kmem_cache_alloc_node - Allocate an object on the specified node
|
||||
* @cachep: The cache to allocate from.
|
||||
@@ -3580,65 +3533,21 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
|
||||
*/
|
||||
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
|
||||
{
|
||||
void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
|
||||
void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_);
|
||||
|
||||
trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep,
|
||||
cachep->object_size, cachep->size,
|
||||
flags, nodeid);
|
||||
trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, nodeid);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_node);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
|
||||
gfp_t flags,
|
||||
int nodeid,
|
||||
size_t size)
|
||||
void *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
|
||||
int nodeid, size_t orig_size,
|
||||
unsigned long caller)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
|
||||
|
||||
ret = kasan_kmalloc(cachep, ret, size, flags);
|
||||
trace_kmalloc_node(_RET_IP_, ret, cachep,
|
||||
size, cachep->size,
|
||||
flags, nodeid);
|
||||
return ret;
|
||||
return slab_alloc_node(cachep, NULL, flags, nodeid,
|
||||
orig_size, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
|
||||
#endif
|
||||
|
||||
static __always_inline void *
|
||||
__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
void *ret;
|
||||
|
||||
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
|
||||
return NULL;
|
||||
cachep = kmalloc_slab(size, flags);
|
||||
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
|
||||
return cachep;
|
||||
ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
|
||||
ret = kasan_kmalloc(cachep, ret, size, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node);
|
||||
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
|
||||
int node, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node_track_caller);
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
|
||||
@@ -3662,45 +3571,25 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* __do_kmalloc - allocate memory
|
||||
* @size: how many bytes of memory are required.
|
||||
* @flags: the type of memory to allocate (see kmalloc).
|
||||
* @caller: function caller for debug tracking of the caller
|
||||
*
|
||||
* Return: pointer to the allocated memory or %NULL in case of error
|
||||
*/
|
||||
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
|
||||
unsigned long caller)
|
||||
static __always_inline
|
||||
void __do_kmem_cache_free(struct kmem_cache *cachep, void *objp,
|
||||
unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
void *ret;
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
|
||||
return NULL;
|
||||
cachep = kmalloc_slab(size, flags);
|
||||
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
|
||||
return cachep;
|
||||
ret = slab_alloc(cachep, NULL, flags, size, caller);
|
||||
|
||||
ret = kasan_kmalloc(cachep, ret, size, flags);
|
||||
trace_kmalloc(caller, ret, cachep,
|
||||
size, cachep->size, flags);
|
||||
|
||||
return ret;
|
||||
local_irq_save(flags);
|
||||
debug_check_no_locks_freed(objp, cachep->object_size);
|
||||
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
|
||||
debug_check_no_obj_freed(objp, cachep->object_size);
|
||||
__cache_free(cachep, objp, caller);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void *__kmalloc(size_t size, gfp_t flags)
|
||||
void __kmem_cache_free(struct kmem_cache *cachep, void *objp,
|
||||
unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc(size, flags, _RET_IP_);
|
||||
__do_kmem_cache_free(cachep, objp, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc);
|
||||
|
||||
void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc(size, flags, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_track_caller);
|
||||
|
||||
/**
|
||||
* kmem_cache_free - Deallocate an object
|
||||
@@ -3712,34 +3601,38 @@ EXPORT_SYMBOL(__kmalloc_track_caller);
|
||||
*/
|
||||
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
|
||||
{
|
||||
unsigned long flags;
|
||||
cachep = cache_from_obj(cachep, objp);
|
||||
if (!cachep)
|
||||
return;
|
||||
|
||||
trace_kmem_cache_free(_RET_IP_, objp, cachep->name);
|
||||
local_irq_save(flags);
|
||||
debug_check_no_locks_freed(objp, cachep->object_size);
|
||||
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
|
||||
debug_check_no_obj_freed(objp, cachep->object_size);
|
||||
__cache_free(cachep, objp, _RET_IP_);
|
||||
local_irq_restore(flags);
|
||||
trace_kmem_cache_free(_RET_IP_, objp, cachep);
|
||||
__do_kmem_cache_free(cachep, objp, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_free);
|
||||
|
||||
void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
size_t i;
|
||||
|
||||
local_irq_disable();
|
||||
for (i = 0; i < size; i++) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
void *objp = p[i];
|
||||
struct kmem_cache *s;
|
||||
|
||||
if (!orig_s) /* called via kfree_bulk */
|
||||
s = virt_to_cache(objp);
|
||||
else
|
||||
if (!orig_s) {
|
||||
struct folio *folio = virt_to_folio(objp);
|
||||
|
||||
/* called via kfree_bulk */
|
||||
if (!folio_test_slab(folio)) {
|
||||
local_irq_enable();
|
||||
free_large_kmalloc(folio, objp);
|
||||
local_irq_disable();
|
||||
continue;
|
||||
}
|
||||
s = folio_slab(folio)->slab_cache;
|
||||
} else {
|
||||
s = cache_from_obj(orig_s, objp);
|
||||
}
|
||||
|
||||
if (!s)
|
||||
continue;
|
||||
|
||||
@@ -3755,39 +3648,6 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_free_bulk);
|
||||
|
||||
/**
|
||||
* kfree - free previously allocated memory
|
||||
* @objp: pointer returned by kmalloc.
|
||||
*
|
||||
* If @objp is NULL, no operation is performed.
|
||||
*
|
||||
* Don't free memory not originally allocated by kmalloc()
|
||||
* or you will run into trouble.
|
||||
*/
|
||||
void kfree(const void *objp)
|
||||
{
|
||||
struct kmem_cache *c;
|
||||
unsigned long flags;
|
||||
|
||||
trace_kfree(_RET_IP_, objp);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(objp)))
|
||||
return;
|
||||
local_irq_save(flags);
|
||||
kfree_debugcheck(objp);
|
||||
c = virt_to_cache(objp);
|
||||
if (!c) {
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
debug_check_no_locks_freed(objp, c->object_size);
|
||||
|
||||
debug_check_no_obj_freed(objp, c->object_size);
|
||||
__cache_free(c, (void *)objp, _RET_IP_);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
/*
|
||||
* This initializes kmem_cache_node or resizes various caches for all nodes.
|
||||
*/
|
||||
@@ -4190,28 +4050,3 @@ void __check_heap_object(const void *ptr, unsigned long n,
|
||||
usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
|
||||
}
|
||||
#endif /* CONFIG_HARDENED_USERCOPY */
|
||||
|
||||
/**
|
||||
* __ksize -- Uninstrumented ksize.
|
||||
* @objp: pointer to the object
|
||||
*
|
||||
* Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
|
||||
* safety checks as ksize() with KASAN instrumentation enabled.
|
||||
*
|
||||
* Return: size of the actual memory used by @objp in bytes
|
||||
*/
|
||||
size_t __ksize(const void *objp)
|
||||
{
|
||||
struct kmem_cache *c;
|
||||
size_t size;
|
||||
|
||||
BUG_ON(!objp);
|
||||
if (unlikely(objp == ZERO_SIZE_PTR))
|
||||
return 0;
|
||||
|
||||
c = virt_to_cache(objp);
|
||||
size = c ? c->object_size : 0;
|
||||
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(__ksize);
|
||||
|
||||
10
mm/slab.h
10
mm/slab.h
@@ -273,6 +273,11 @@ void create_kmalloc_caches(slab_flags_t);
|
||||
|
||||
/* Find the kmalloc slab corresponding for a certain size */
|
||||
struct kmem_cache *kmalloc_slab(size_t, gfp_t);
|
||||
|
||||
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t orig_size,
|
||||
unsigned long caller);
|
||||
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller);
|
||||
#endif
|
||||
|
||||
gfp_t kmalloc_fix_flags(gfp_t flags);
|
||||
@@ -658,8 +663,13 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
|
||||
print_tracking(cachep, x);
|
||||
return cachep;
|
||||
}
|
||||
|
||||
void free_large_kmalloc(struct folio *folio, void *object);
|
||||
|
||||
#endif /* CONFIG_SLOB */
|
||||
|
||||
size_t __ksize(const void *objp);
|
||||
|
||||
static inline size_t slab_ksize(const struct kmem_cache *s)
|
||||
{
|
||||
#ifndef CONFIG_SLUB
|
||||
|
||||
241
mm/slab_common.c
241
mm/slab_common.c
@@ -511,13 +511,9 @@ EXPORT_SYMBOL(kmem_cache_destroy);
|
||||
*/
|
||||
int kmem_cache_shrink(struct kmem_cache *cachep)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
||||
kasan_cache_shrink(cachep);
|
||||
ret = __kmem_cache_shrink(cachep);
|
||||
|
||||
return ret;
|
||||
return __kmem_cache_shrink(cachep);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_shrink);
|
||||
|
||||
@@ -665,7 +661,8 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
|
||||
if (!s)
|
||||
panic("Out of memory when creating slab %s\n", name);
|
||||
|
||||
create_boot_cache(s, name, size, flags, useroffset, usersize);
|
||||
create_boot_cache(s, name, size, flags | SLAB_KMALLOC, useroffset,
|
||||
usersize);
|
||||
kasan_cache_create_kmalloc(s);
|
||||
list_add(&s->list, &slab_caches);
|
||||
s->refcount = 1;
|
||||
@@ -737,6 +734,26 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
|
||||
return kmalloc_caches[kmalloc_type(flags)][index];
|
||||
}
|
||||
|
||||
size_t kmalloc_size_roundup(size_t size)
|
||||
{
|
||||
struct kmem_cache *c;
|
||||
|
||||
/* Short-circuit the 0 size case. */
|
||||
if (unlikely(size == 0))
|
||||
return 0;
|
||||
/* Short-circuit saturated "too-large" case. */
|
||||
if (unlikely(size == SIZE_MAX))
|
||||
return SIZE_MAX;
|
||||
/* Above the smaller buckets, size is a multiple of page size. */
|
||||
if (size > KMALLOC_MAX_CACHE_SIZE)
|
||||
return PAGE_SIZE << get_order(size);
|
||||
|
||||
/* The flags don't matter since size_index is common to all. */
|
||||
c = kmalloc_slab(size, GFP_KERNEL);
|
||||
return c ? c->object_size : 0;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_size_roundup);
|
||||
|
||||
#ifdef CONFIG_ZONE_DMA
|
||||
#define KMALLOC_DMA_NAME(sz) .name[KMALLOC_DMA] = "dma-kmalloc-" #sz,
|
||||
#else
|
||||
@@ -760,8 +777,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
|
||||
|
||||
/*
|
||||
* kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
|
||||
* kmalloc_index() supports up to 2^25=32MB, so the final entry of the table is
|
||||
* kmalloc-32M.
|
||||
* kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is
|
||||
* kmalloc-2M.
|
||||
*/
|
||||
const struct kmalloc_info_struct kmalloc_info[] __initconst = {
|
||||
INIT_KMALLOC_INFO(0, 0),
|
||||
@@ -785,11 +802,7 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = {
|
||||
INIT_KMALLOC_INFO(262144, 256k),
|
||||
INIT_KMALLOC_INFO(524288, 512k),
|
||||
INIT_KMALLOC_INFO(1048576, 1M),
|
||||
INIT_KMALLOC_INFO(2097152, 2M),
|
||||
INIT_KMALLOC_INFO(4194304, 4M),
|
||||
INIT_KMALLOC_INFO(8388608, 8M),
|
||||
INIT_KMALLOC_INFO(16777216, 16M),
|
||||
INIT_KMALLOC_INFO(33554432, 32M)
|
||||
INIT_KMALLOC_INFO(2097152, 2M)
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -902,6 +915,155 @@ void __init create_kmalloc_caches(slab_flags_t flags)
|
||||
/* Kmalloc array is now usable */
|
||||
slab_state = UP;
|
||||
}
|
||||
|
||||
void free_large_kmalloc(struct folio *folio, void *object)
|
||||
{
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
if (WARN_ON_ONCE(order == 0))
|
||||
pr_warn_once("object pointer: 0x%p\n", object);
|
||||
|
||||
kmemleak_free(object);
|
||||
kasan_kfree_large(object);
|
||||
|
||||
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
|
||||
-(PAGE_SIZE << order));
|
||||
__free_pages(folio_page(folio, 0), order);
|
||||
}
|
||||
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
|
||||
static __always_inline
|
||||
void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
void *ret;
|
||||
|
||||
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
|
||||
ret = __kmalloc_large_node(size, flags, node);
|
||||
trace_kmalloc(_RET_IP_, ret, size,
|
||||
PAGE_SIZE << get_order(size), flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
s = kmalloc_slab(size, flags);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(s)))
|
||||
return s;
|
||||
|
||||
ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
|
||||
ret = kasan_kmalloc(s, ret, size, flags);
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node);
|
||||
|
||||
void *__kmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc);
|
||||
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
|
||||
int node, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node_track_caller);
|
||||
|
||||
/**
|
||||
* kfree - free previously allocated memory
|
||||
* @object: pointer returned by kmalloc.
|
||||
*
|
||||
* If @object is NULL, no operation is performed.
|
||||
*
|
||||
* Don't free memory not originally allocated by kmalloc()
|
||||
* or you will run into trouble.
|
||||
*/
|
||||
void kfree(const void *object)
|
||||
{
|
||||
struct folio *folio;
|
||||
struct slab *slab;
|
||||
struct kmem_cache *s;
|
||||
|
||||
trace_kfree(_RET_IP_, object);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(object)))
|
||||
return;
|
||||
|
||||
folio = virt_to_folio(object);
|
||||
if (unlikely(!folio_test_slab(folio))) {
|
||||
free_large_kmalloc(folio, (void *)object);
|
||||
return;
|
||||
}
|
||||
|
||||
slab = folio_slab(folio);
|
||||
s = slab->slab_cache;
|
||||
__kmem_cache_free(s, (void *)object, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
/**
|
||||
* __ksize -- Report full size of underlying allocation
|
||||
* @objp: pointer to the object
|
||||
*
|
||||
* This should only be used internally to query the true size of allocations.
|
||||
* It is not meant to be a way to discover the usable size of an allocation
|
||||
* after the fact. Instead, use kmalloc_size_roundup(). Using memory beyond
|
||||
* the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS,
|
||||
* and/or FORTIFY_SOURCE.
|
||||
*
|
||||
* Return: size of the actual memory used by @objp in bytes
|
||||
*/
|
||||
size_t __ksize(const void *object)
|
||||
{
|
||||
struct folio *folio;
|
||||
|
||||
if (unlikely(object == ZERO_SIZE_PTR))
|
||||
return 0;
|
||||
|
||||
folio = virt_to_folio(object);
|
||||
|
||||
if (unlikely(!folio_test_slab(folio))) {
|
||||
if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE))
|
||||
return 0;
|
||||
if (WARN_ON(object != folio_address(folio)))
|
||||
return 0;
|
||||
return folio_size(folio);
|
||||
}
|
||||
|
||||
return slab_ksize(folio_slab(folio)->slab_cache);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
||||
{
|
||||
void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
|
||||
size, _RET_IP_);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_trace);
|
||||
|
||||
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size)
|
||||
{
|
||||
void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_node_trace);
|
||||
#endif /* !CONFIG_TRACING */
|
||||
#endif /* !CONFIG_SLOB */
|
||||
|
||||
gfp_t kmalloc_fix_flags(gfp_t flags)
|
||||
@@ -921,37 +1083,50 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
|
||||
* directly to the page allocator. We use __GFP_COMP, because we will need to
|
||||
* know the allocation order to free the pages properly in kfree.
|
||||
*/
|
||||
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
||||
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
void *ret = NULL;
|
||||
struct page *page;
|
||||
void *ptr = NULL;
|
||||
unsigned int order = get_order(size);
|
||||
|
||||
if (unlikely(flags & GFP_SLAB_BUG_MASK))
|
||||
flags = kmalloc_fix_flags(flags);
|
||||
|
||||
flags |= __GFP_COMP;
|
||||
page = alloc_pages(flags, order);
|
||||
if (likely(page)) {
|
||||
ret = page_address(page);
|
||||
page = alloc_pages_node(node, flags, order);
|
||||
if (page) {
|
||||
ptr = page_address(page);
|
||||
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
ret = kasan_kmalloc_large(ret, size, flags);
|
||||
/* As ret might get tagged, call kmemleak hook after KASAN. */
|
||||
kmemleak_alloc(ret, size, 1, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_order);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
||||
ptr = kasan_kmalloc_large(ptr, size, flags);
|
||||
/* As ptr might get tagged, call kmemleak hook after KASAN. */
|
||||
kmemleak_alloc(ptr, size, 1, flags);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *kmalloc_large(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ret = kmalloc_order(size, flags, order);
|
||||
trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags);
|
||||
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, NUMA_NO_NODE);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_order_trace);
|
||||
#endif
|
||||
EXPORT_SYMBOL(kmalloc_large);
|
||||
|
||||
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, node);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, node);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large_node);
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
/* Randomize a generic freelist */
|
||||
@@ -1150,8 +1325,8 @@ module_init(slab_proc_init);
|
||||
|
||||
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
|
||||
|
||||
static __always_inline void *__do_krealloc(const void *p, size_t new_size,
|
||||
gfp_t flags)
|
||||
static __always_inline __realloc_size(2) void *
|
||||
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
||||
{
|
||||
void *ret;
|
||||
size_t ks;
|
||||
@@ -1283,8 +1458,6 @@ EXPORT_SYMBOL(ksize);
|
||||
/* Tracepoints definitions. */
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kfree);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
|
||||
|
||||
|
||||
45
mm/slob.c
45
mm/slob.c
@@ -507,8 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
|
||||
*m = size;
|
||||
ret = (void *)m + minalign;
|
||||
|
||||
trace_kmalloc_node(caller, ret, NULL,
|
||||
size, size + minalign, gfp, node);
|
||||
trace_kmalloc(caller, ret, size, size + minalign, gfp, node);
|
||||
} else {
|
||||
unsigned int order = get_order(size);
|
||||
|
||||
@@ -516,8 +515,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
|
||||
gfp |= __GFP_COMP;
|
||||
ret = slob_new_pages(gfp, order, node);
|
||||
|
||||
trace_kmalloc_node(caller, ret, NULL,
|
||||
size, PAGE_SIZE << order, gfp, node);
|
||||
trace_kmalloc(caller, ret, size, PAGE_SIZE << order, gfp, node);
|
||||
}
|
||||
|
||||
kmemleak_alloc(ret, size, 1, gfp);
|
||||
@@ -530,20 +528,12 @@ void *__kmalloc(size_t size, gfp_t gfp)
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc);
|
||||
|
||||
void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_track_caller);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
|
||||
int node, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, gfp, node, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node_track_caller);
|
||||
#endif
|
||||
|
||||
void kfree(const void *block)
|
||||
{
|
||||
@@ -574,6 +564,20 @@ void kfree(const void *block)
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
size_t kmalloc_size_roundup(size_t size)
|
||||
{
|
||||
/* Short-circuit the 0 size case. */
|
||||
if (unlikely(size == 0))
|
||||
return 0;
|
||||
/* Short-circuit saturated "too-large" case. */
|
||||
if (unlikely(size == SIZE_MAX))
|
||||
return SIZE_MAX;
|
||||
|
||||
return ALIGN(size, ARCH_KMALLOC_MINALIGN);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(kmalloc_size_roundup);
|
||||
|
||||
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
|
||||
size_t __ksize(const void *block)
|
||||
{
|
||||
@@ -594,7 +598,6 @@ size_t __ksize(const void *block)
|
||||
m = (unsigned int *)(block - align);
|
||||
return SLOB_UNITS(*m) * SLOB_UNIT;
|
||||
}
|
||||
EXPORT_SYMBOL(__ksize);
|
||||
|
||||
int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
|
||||
{
|
||||
@@ -602,6 +605,9 @@ int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
|
||||
/* leave room for rcu footer at the end of object */
|
||||
c->size += sizeof(struct slob_rcu);
|
||||
}
|
||||
|
||||
/* Actual size allocated */
|
||||
c->size = SLOB_UNITS(c->size) * SLOB_UNIT;
|
||||
c->flags = flags;
|
||||
return 0;
|
||||
}
|
||||
@@ -616,14 +622,10 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
|
||||
|
||||
if (c->size < PAGE_SIZE) {
|
||||
b = slob_alloc(c->size, flags, c->align, node, 0);
|
||||
trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
|
||||
SLOB_UNITS(c->size) * SLOB_UNIT,
|
||||
flags, node);
|
||||
trace_kmem_cache_alloc(_RET_IP_, b, c, flags, node);
|
||||
} else {
|
||||
b = slob_new_pages(flags, get_order(c->size), node);
|
||||
trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
|
||||
PAGE_SIZE << get_order(c->size),
|
||||
flags, node);
|
||||
trace_kmem_cache_alloc(_RET_IP_, b, c, flags, node);
|
||||
}
|
||||
|
||||
if (b && c->ctor) {
|
||||
@@ -647,7 +649,7 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, gfp_
|
||||
return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_lru);
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t gfp, int node)
|
||||
{
|
||||
return __do_kmalloc_node(size, gfp, node, _RET_IP_);
|
||||
@@ -659,7 +661,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node)
|
||||
return slob_alloc_node(cachep, gfp, node);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_alloc_node);
|
||||
#endif
|
||||
|
||||
static void __kmem_cache_free(void *b, int size)
|
||||
{
|
||||
@@ -680,7 +681,7 @@ static void kmem_rcu_free(struct rcu_head *head)
|
||||
void kmem_cache_free(struct kmem_cache *c, void *b)
|
||||
{
|
||||
kmemleak_free_recursive(b, c->flags);
|
||||
trace_kmem_cache_free(_RET_IP_, b, c->name);
|
||||
trace_kmem_cache_free(_RET_IP_, b, c);
|
||||
if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
|
||||
struct slob_rcu *slob_rcu;
|
||||
slob_rcu = b + (c->size - sizeof(struct slob_rcu));
|
||||
|
||||
36
mm/vmstat.c
36
mm/vmstat.c
@@ -355,8 +355,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
* CPU migrations and preemption potentially corrupts a counter so
|
||||
* disable preemption.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
@@ -368,8 +367,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_zone_page_state);
|
||||
|
||||
@@ -393,8 +391,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
}
|
||||
|
||||
/* See __mod_node_page_state */
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
@@ -406,8 +403,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_node_page_state);
|
||||
|
||||
@@ -441,8 +437,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
s8 v, t;
|
||||
|
||||
/* See __mod_node_page_state */
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -453,8 +448,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
}
|
||||
|
||||
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
@@ -466,8 +460,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
|
||||
|
||||
/* See __mod_node_page_state */
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -478,8 +471,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
}
|
||||
|
||||
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
@@ -501,8 +493,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
s8 v, t;
|
||||
|
||||
/* See __mod_node_page_state */
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -513,8 +504,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
}
|
||||
|
||||
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
@@ -526,8 +516,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
|
||||
|
||||
/* See __mod_node_page_state */
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_disable();
|
||||
preempt_disable_nested();
|
||||
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
@@ -538,8 +527,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
preempt_enable();
|
||||
preempt_enable_nested();
|
||||
}
|
||||
|
||||
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
|
||||
@@ -635,6 +635,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
|
||||
*type = INSN_CONTEXT_SWITCH;
|
||||
break;
|
||||
|
||||
case 0xe0: /* loopne */
|
||||
case 0xe1: /* loope */
|
||||
case 0xe2: /* loop */
|
||||
*type = INSN_JUMP_CONDITIONAL;
|
||||
break;
|
||||
|
||||
case 0xe8:
|
||||
*type = INSN_CALL;
|
||||
/*
|
||||
|
||||
@@ -2107,9 +2107,6 @@ static int read_noendbr_hints(struct objtool_file *file)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (insn->type == INSN_ENDBR)
|
||||
WARN_FUNC("ANNOTATE_NOENDBR on ENDBR", insn->sec, insn->offset);
|
||||
|
||||
insn->noendbr = 1;
|
||||
}
|
||||
|
||||
@@ -2238,7 +2235,7 @@ static int read_intra_function_calls(struct objtool_file *file)
|
||||
*/
|
||||
insn->type = INSN_JUMP_UNCONDITIONAL;
|
||||
|
||||
dest_off = insn->offset + insn->len + insn->immediate;
|
||||
dest_off = arch_jump_destination(insn);
|
||||
insn->jump_dest = find_insn(file, insn->sec, dest_off);
|
||||
if (!insn->jump_dest) {
|
||||
WARN_FUNC("can't find call dest at %s+0x%lx",
|
||||
|
||||
@@ -184,7 +184,7 @@ int main(int argc, char *argv[])
|
||||
/*
|
||||
* If res is non-zero, we either requeued the waiter or hit an
|
||||
* error, break out and handle it. If it is zero, then the
|
||||
* signal may have hit before the the waiter was blocked on f1.
|
||||
* signal may have hit before the waiter was blocked on f1.
|
||||
* Try again.
|
||||
*/
|
||||
if (res > 0) {
|
||||
|
||||
Reference in New Issue
Block a user