From 1d0c5f6f3d1387ec9c3a379fb232c078f5838d55 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 24 Dec 2022 16:15:22 +0900 Subject: [PATCH 1/6] samples/bpf: Use kyscall instead of kprobe in syscall tracing program Syscall tracing using kprobe is quite unstable. Since it uses the exact name of the kernel function, the program might broke due to the rename of a function. The problem can also be caused by a changes in the arguments of the function to which the kprobe connects. In this commit, ksyscall is used instead of kprobe. By using ksyscall, libbpf will detect the appropriate kernel function name. (e.g. sys_write -> __s390_sys_write). This eliminates the need to worry about which wrapper function to attach in order to parse arguments. In addition, ksyscall provides more fine method with attaching system call, the coarse SYSCALL helper at trace_common.h can be removed. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221224071527.2292-2-danieltimlee@gmail.com --- samples/bpf/map_perf_test_kern.c | 17 ++++++++--------- .../bpf/test_current_task_under_cgroup_kern.c | 3 +-- samples/bpf/test_map_in_map_kern.c | 1 - samples/bpf/test_probe_write_user_kern.c | 3 +-- samples/bpf/trace_common.h | 13 ------------- samples/bpf/trace_output_kern.c | 3 +-- samples/bpf/tracex2_kern.c | 3 +-- 7 files changed, 12 insertions(+), 31 deletions(-) delete mode 100644 samples/bpf/trace_common.h diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 7342c5b2f278..874e2f7e3d5d 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -11,7 +11,6 @@ #include #include #include -#include "trace_common.h" #define MAX_ENTRIES 1000 #define MAX_NR_CPUS 1024 @@ -102,7 +101,7 @@ struct { __uint(max_entries, MAX_ENTRIES); } lru_hash_lookup_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_getuid)) +SEC("ksyscall/getuid") int stress_hmap(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -120,7 +119,7 @@ int stress_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_geteuid)) +SEC("ksyscall/geteuid") int stress_percpu_hmap(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -137,7 +136,7 @@ int stress_percpu_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getgid)) +SEC("ksyscall/getgid") int stress_hmap_alloc(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -154,7 +153,7 @@ int stress_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getegid)) +SEC("ksyscall/getegid") int stress_percpu_hmap_alloc(struct pt_regs *ctx) { u32 key = bpf_get_current_pid_tgid(); @@ -171,7 +170,7 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_connect)) +SEC("ksyscall/connect") int stress_lru_hmap_alloc(struct pt_regs *ctx) { struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); @@ -251,7 +250,7 @@ done: return 0; } -SEC("kprobe/" SYSCALL(sys_gettid)) +SEC("ksyscall/gettid") int stress_lpm_trie_map_alloc(struct pt_regs *ctx) { union { @@ -273,7 +272,7 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getpgid)) +SEC("ksyscall/getpgid") int stress_hash_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; @@ -286,7 +285,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getppid)) +SEC("ksyscall/getppid") int stress_array_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c index fbd43e2bb4d3..541fc861b984 100644 --- a/samples/bpf/test_current_task_under_cgroup_kern.c +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -10,7 +10,6 @@ #include #include #include -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); @@ -27,7 +26,7 @@ struct { } perf_map SEC(".maps"); /* Writes the last PID that called sync to a map at index 0 */ -SEC("kprobe/" SYSCALL(sys_sync)) +SEC("ksyscall/sync") int bpf_prog1(struct pt_regs *ctx) { u64 pid = bpf_get_current_pid_tgid(); diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c index b0200c8eac09..0e17f9ade5c5 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map_kern.c @@ -13,7 +13,6 @@ #include #include #include -#include "trace_common.h" #define MAX_NR_PORTS 65536 diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c index 220a96438d75..d60cabaaf753 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user_kern.c @@ -11,7 +11,6 @@ #include #include #include -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -28,7 +27,7 @@ struct { * This example sits on a syscall, and the syscall ABI is relatively stable * of course, across platforms, and over time, the ABI may change. */ -SEC("kprobe/" SYSCALL(sys_connect)) +SEC("ksyscall/connect") int bpf_prog1(struct pt_regs *ctx) { struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h deleted file mode 100644 index 8cb5400aed1f..000000000000 --- a/samples/bpf/trace_common.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef __TRACE_COMMON_H -#define __TRACE_COMMON_H - -#ifdef __x86_64__ -#define SYSCALL(SYS) "__x64_" __stringify(SYS) -#elif defined(__s390x__) -#define SYSCALL(SYS) "__s390x_" __stringify(SYS) -#else -#define SYSCALL(SYS) __stringify(SYS) -#endif - -#endif diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index b64815af0943..a481abf8c4c5 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -2,7 +2,6 @@ #include #include #include -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -11,7 +10,7 @@ struct { __uint(max_entries, 2); } my_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_write)) +SEC("ksyscall/write") int bpf_prog1(struct pt_regs *ctx) { struct S { diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 93e0b7680b4f..82091facb83c 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -10,7 +10,6 @@ #include #include #include -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -78,7 +77,7 @@ struct { __uint(max_entries, 1024); } my_hist_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_write)) +SEC("ksyscall/write") int bpf_prog3(struct pt_regs *ctx) { long write_size = PT_REGS_PARM3(ctx); From 8a4dd0bcbdfd5bdaa5d1a5b390f44a45b60e8aa9 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 24 Dec 2022 16:15:23 +0900 Subject: [PATCH 2/6] samples/bpf: Use vmlinux.h instead of implicit headers in syscall tracing program This commit applies vmlinux.h to syscall tracing program. This change allows the bpf program to refer to the internal structure as a single "vmlinux.h" instead of including each header referenced by the bpf program. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221224071527.2292-3-danieltimlee@gmail.com --- samples/bpf/map_perf_test_kern.c | 5 ++--- samples/bpf/test_current_task_under_cgroup_kern.c | 4 +--- samples/bpf/test_probe_write_user_kern.c | 5 ++--- samples/bpf/trace_output_kern.c | 3 +-- samples/bpf/tracex2_kern.c | 4 +--- 5 files changed, 7 insertions(+), 14 deletions(-) diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 874e2f7e3d5d..0c7885057ffe 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -4,10 +4,9 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include -#include +#include "vmlinux.h" +#include #include -#include #include #include #include diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c index 541fc861b984..0b059cee3cba 100644 --- a/samples/bpf/test_current_task_under_cgroup_kern.c +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -5,11 +5,9 @@ * License as published by the Free Software Foundation. */ -#include -#include +#include "vmlinux.h" #include #include -#include struct { __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c index d60cabaaf753..a0f10c5ca273 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user_kern.c @@ -4,9 +4,8 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include -#include -#include +#include "vmlinux.h" +#include #include #include #include diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index a481abf8c4c5..565a73b51b04 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -1,6 +1,5 @@ -#include +#include "vmlinux.h" #include -#include #include struct { diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 82091facb83c..a712eefc742e 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -4,10 +4,8 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include -#include +#include "vmlinux.h" #include -#include #include #include From d4fffba4d04b8d605ff07f1ed987399f6af0ad5b Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 24 Dec 2022 16:15:24 +0900 Subject: [PATCH 3/6] samples/bpf: Change _kern suffix to .bpf with syscall tracing program Currently old compile rule (CLANG-bpf) doesn't contains VMLINUX_H define flag which is essential for the bpf program that includes "vmlinux.h". Also old compile rule doesn't directly specify the compile target as bpf, instead it uses bunch of extra options with clang followed by long chain of commands. (e.g. clang | opt | llvm-dis | llc) In Makefile, there is already new compile rule which is more simple and neat. And it also has -D__VMLINUX_H__ option. By just changing the _kern suffix to .bpf will inherit the benefit of the new CLANG-BPF compile target. Also, this commit adds dummy gnu/stub.h to the samples/bpf directory. As commit 1c2dd16add7e ("selftests/bpf: get rid of -D__x86_64__") noted, compiling with 'clang -target bpf' will raise an error with stubs.h unless workaround (-D__x86_64) is used. This commit solves this problem by adding dummy stub.h to make /usr/include/features.h to follow the expected path as the same way selftests/bpf dealt with. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221224071527.2292-4-danieltimlee@gmail.com --- samples/bpf/Makefile | 10 +++++----- samples/bpf/gnu/stubs.h | 1 + .../bpf/{map_perf_test_kern.c => map_perf_test.bpf.c} | 0 samples/bpf/map_perf_test_user.c | 2 +- ...oup_kern.c => test_current_task_under_cgroup.bpf.c} | 0 samples/bpf/test_current_task_under_cgroup_user.c | 2 +- ...e_write_user_kern.c => test_probe_write_user.bpf.c} | 0 samples/bpf/test_probe_write_user_user.c | 2 +- .../bpf/{trace_output_kern.c => trace_output.bpf.c} | 0 samples/bpf/trace_output_user.c | 2 +- samples/bpf/{tracex2_kern.c => tracex2.bpf.c} | 0 samples/bpf/tracex2_user.c | 2 +- 12 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 samples/bpf/gnu/stubs.h rename samples/bpf/{map_perf_test_kern.c => map_perf_test.bpf.c} (100%) rename samples/bpf/{test_current_task_under_cgroup_kern.c => test_current_task_under_cgroup.bpf.c} (100%) rename samples/bpf/{test_probe_write_user_kern.c => test_probe_write_user.bpf.c} (100%) rename samples/bpf/{trace_output_kern.c => trace_output.bpf.c} (100%) rename samples/bpf/{tracex2_kern.c => tracex2.bpf.c} (100%) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 727da3c5879b..22039a0a5b35 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -125,21 +125,21 @@ always-y += sockex1_kern.o always-y += sockex2_kern.o always-y += sockex3_kern.o always-y += tracex1_kern.o -always-y += tracex2_kern.o +always-y += tracex2.bpf.o always-y += tracex3_kern.o always-y += tracex4_kern.o always-y += tracex5_kern.o always-y += tracex6_kern.o always-y += tracex7_kern.o always-y += sock_flags_kern.o -always-y += test_probe_write_user_kern.o -always-y += trace_output_kern.o +always-y += test_probe_write_user.bpf.o +always-y += trace_output.bpf.o always-y += tcbpf1_kern.o always-y += tc_l2_redirect_kern.o always-y += lathist_kern.o always-y += offwaketime_kern.o always-y += spintest_kern.o -always-y += map_perf_test_kern.o +always-y += map_perf_test.bpf.o always-y += test_overhead_tp_kern.o always-y += test_overhead_raw_tp_kern.o always-y += test_overhead_kprobe_kern.o @@ -147,7 +147,7 @@ always-y += parse_varlen.o parse_simple.o parse_ldabs.o always-y += test_cgrp2_tc_kern.o always-y += xdp1_kern.o always-y += xdp2_kern.o -always-y += test_current_task_under_cgroup_kern.o +always-y += test_current_task_under_cgroup.bpf.o always-y += trace_event_kern.o always-y += sampleip_kern.o always-y += lwt_len_hist_kern.o diff --git a/samples/bpf/gnu/stubs.h b/samples/bpf/gnu/stubs.h new file mode 100644 index 000000000000..719225b16626 --- /dev/null +++ b/samples/bpf/gnu/stubs.h @@ -0,0 +1 @@ +/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */ diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test.bpf.c similarity index 100% rename from samples/bpf/map_perf_test_kern.c rename to samples/bpf/map_perf_test.bpf.c diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 1bb53f4b29e1..d2fbcf963cdf 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -443,7 +443,7 @@ int main(int argc, char **argv) if (argc > 4) max_cnt = atoi(argv[4]); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup.bpf.c similarity index 100% rename from samples/bpf/test_current_task_under_cgroup_kern.c rename to samples/bpf/test_current_task_under_cgroup.bpf.c diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index 6fb25906835e..9726ed2a8a8b 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -21,7 +21,7 @@ int main(int argc, char **argv) char filename[256]; int map_fd[2]; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user.bpf.c similarity index 100% rename from samples/bpf/test_probe_write_user_kern.c rename to samples/bpf/test_probe_write_user.bpf.c diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index 00ccfb834e45..2a539aec4116 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -24,7 +24,7 @@ int main(int ac, char **argv) mapped_addr_in = (struct sockaddr_in *)&mapped_addr; tmp_addr_in = (struct sockaddr_in *)&tmp_addr; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output.bpf.c similarity index 100% rename from samples/bpf/trace_output_kern.c rename to samples/bpf/trace_output.bpf.c diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 371732f9cf8e..d316fd2c8e24 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -51,7 +51,7 @@ int main(int argc, char **argv) char filename[256]; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2.bpf.c similarity index 100% rename from samples/bpf/tracex2_kern.c rename to samples/bpf/tracex2.bpf.c diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 089e408abd7a..2131f1648cf1 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -123,7 +123,7 @@ int main(int ac, char **argv) int i, j = 0; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); From 2e5c4dd7f81545a98e9f06317347e760749c020b Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 24 Dec 2022 16:15:25 +0900 Subject: [PATCH 4/6] samples/bpf: Fix tracex2 by using BPF_KSYSCALL macro Currently, there is a problem with tracex2, as it doesn't print the histogram properly and the results are misleading. (all results report as 0) The problem is caused by a change in arguments of the function to which the kprobe connects. This tracex2 bpf program uses kprobe (attached to __x64_sys_write) to figure out the size of the write system call. In order to achieve this, the third argument 'count' must be intact. The following is a prototype of the sys_write variant. (checked with pfunct) ~/git/linux$ pfunct -P fs/read_write.o | grep sys_write ssize_t ksys_write(unsigned int fd, const char * buf, size_t count); long int __x64_sys_write(const struct pt_regs * regs); ... cross compile with s390x ... long int __s390_sys_write(struct pt_regs * regs); Since the nature of SYSCALL_WRAPPER function wraps the argument once, additional process of argument extraction is required to properly parse the argument. #define BPF_KSYSCALL(name, args...) ... snip ... struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ : ctx; \ In order to fix this problem, the BPF_SYSCALL macro has been used. This reduces the hassle of parsing arguments from pt_regs. Since the macro uses the CORE version of argument extraction, additional portability comes too. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221224071527.2292-5-danieltimlee@gmail.com --- samples/bpf/tracex2.bpf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/bpf/tracex2.bpf.c b/samples/bpf/tracex2.bpf.c index a712eefc742e..0a5c75b367be 100644 --- a/samples/bpf/tracex2.bpf.c +++ b/samples/bpf/tracex2.bpf.c @@ -8,6 +8,7 @@ #include #include #include +#include struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -76,14 +77,13 @@ struct { } my_hist_map SEC(".maps"); SEC("ksyscall/write") -int bpf_prog3(struct pt_regs *ctx) +int BPF_KSYSCALL(bpf_prog3, unsigned int fd, const char *buf, size_t count) { - long write_size = PT_REGS_PARM3(ctx); long init_val = 1; long *value; struct hist_key key; - key.index = log2l(write_size); + key.index = log2l(count); key.pid_tgid = bpf_get_current_pid_tgid(); key.uid_gid = bpf_get_current_uid_gid(); bpf_get_current_comm(&key.comm, sizeof(key.comm)); From c5ffb26375ad309c858453f17e777b716723d527 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 24 Dec 2022 16:15:26 +0900 Subject: [PATCH 5/6] samples/bpf: Use BPF_KSYSCALL macro in syscall tracing programs This commit enhances the syscall tracing programs by using the BPF_SYSCALL macro to reduce the inconvenience of parsing arguments from pt_regs. By simplifying argument extraction, bpf program will become clear to understand. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221224071527.2292-6-danieltimlee@gmail.com --- samples/bpf/map_perf_test.bpf.c | 26 ++++++++----------- .../bpf/test_current_task_under_cgroup.bpf.c | 4 ++- samples/bpf/test_probe_write_user.bpf.c | 12 ++++----- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/samples/bpf/map_perf_test.bpf.c b/samples/bpf/map_perf_test.bpf.c index 0c7885057ffe..3cdeba2afe12 100644 --- a/samples/bpf/map_perf_test.bpf.c +++ b/samples/bpf/map_perf_test.bpf.c @@ -101,7 +101,7 @@ struct { } lru_hash_lookup_map SEC(".maps"); SEC("ksyscall/getuid") -int stress_hmap(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_hmap) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -119,7 +119,7 @@ int stress_hmap(struct pt_regs *ctx) } SEC("ksyscall/geteuid") -int stress_percpu_hmap(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_percpu_hmap) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -136,7 +136,7 @@ int stress_percpu_hmap(struct pt_regs *ctx) } SEC("ksyscall/getgid") -int stress_hmap_alloc(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_hmap_alloc) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -153,7 +153,7 @@ int stress_hmap_alloc(struct pt_regs *ctx) } SEC("ksyscall/getegid") -int stress_percpu_hmap_alloc(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_percpu_hmap_alloc) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -168,11 +168,10 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) } return 0; } - SEC("ksyscall/connect") -int stress_lru_hmap_alloc(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_lru_hmap_alloc, int fd, struct sockaddr_in *uservaddr, + int addrlen) { - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn"; union { u16 dst6[8]; @@ -185,14 +184,11 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) u32 key; }; } test_params; - struct sockaddr_in6 *in6; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)uservaddr; u16 test_case; - int addrlen, ret; long val = 1; u32 key = 0; - - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs); - addrlen = (int)PT_REGS_PARM3_CORE(real_regs); + int ret; if (addrlen != sizeof(*in6)) return 0; @@ -250,7 +246,7 @@ done: } SEC("ksyscall/gettid") -int stress_lpm_trie_map_alloc(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_lpm_trie_map_alloc) { union { u32 b32[2]; @@ -272,7 +268,7 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) } SEC("ksyscall/getpgid") -int stress_hash_map_lookup(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_hash_map_lookup) { u32 key = 1, i; long *value; @@ -285,7 +281,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx) } SEC("ksyscall/getppid") -int stress_array_map_lookup(struct pt_regs *ctx) +int BPF_KSYSCALL(stress_array_map_lookup) { u32 key = 1, i; long *value; diff --git a/samples/bpf/test_current_task_under_cgroup.bpf.c b/samples/bpf/test_current_task_under_cgroup.bpf.c index 0b059cee3cba..58b9cf7ed659 100644 --- a/samples/bpf/test_current_task_under_cgroup.bpf.c +++ b/samples/bpf/test_current_task_under_cgroup.bpf.c @@ -8,6 +8,8 @@ #include "vmlinux.h" #include #include +#include +#include struct { __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); @@ -25,7 +27,7 @@ struct { /* Writes the last PID that called sync to a map at index 0 */ SEC("ksyscall/sync") -int bpf_prog1(struct pt_regs *ctx) +int BPF_KSYSCALL(bpf_prog1) { u64 pid = bpf_get_current_pid_tgid(); int idx = 0; diff --git a/samples/bpf/test_probe_write_user.bpf.c b/samples/bpf/test_probe_write_user.bpf.c index a0f10c5ca273..a4f3798b7fb0 100644 --- a/samples/bpf/test_probe_write_user.bpf.c +++ b/samples/bpf/test_probe_write_user.bpf.c @@ -27,24 +27,22 @@ struct { * of course, across platforms, and over time, the ABI may change. */ SEC("ksyscall/connect") -int bpf_prog1(struct pt_regs *ctx) +int BPF_KSYSCALL(bpf_prog1, int fd, struct sockaddr_in *uservaddr, + int addrlen) { - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); - void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs); - int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs); struct sockaddr_in new_addr, orig_addr = {}; struct sockaddr_in *mapped_addr; - if (sockaddr_len > sizeof(orig_addr)) + if (addrlen > sizeof(orig_addr)) return 0; - if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) + if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), uservaddr) != 0) return 0; mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); if (mapped_addr != NULL) { memcpy(&new_addr, mapped_addr, sizeof(new_addr)); - bpf_probe_write_user(sockaddr_arg, &new_addr, + bpf_probe_write_user(uservaddr, &new_addr, sizeof(new_addr)); } return 0; From 7244eb669397f309c3d014264823cdc9cb3f8e6b Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Sat, 24 Dec 2022 16:15:27 +0900 Subject: [PATCH 6/6] libbpf: Fix invalid return address register in s390 There is currently an invalid register mapping in the s390 return address register. As the manual[1] states, the return address can be found at r14. In bpf_tracing.h, the s390 registers were named gprs(general purpose registers). This commit fixes the problem by correcting the mistyped mapping. [1]: https://uclibc.org/docs/psABI-s390x.pdf#page=14 Fixes: 3cc31d794097 ("libbpf: Normalize PT_REGS_xxx() macro definitions") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221224071527.2292-7-danieltimlee@gmail.com --- tools/lib/bpf/bpf_tracing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 2972dc25ff72..9c1b1689068d 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -137,7 +137,7 @@ struct pt_regs___s390 { #define __PT_PARM3_REG gprs[4] #define __PT_PARM4_REG gprs[5] #define __PT_PARM5_REG gprs[6] -#define __PT_RET_REG grps[14] +#define __PT_RET_REG gprs[14] #define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15]