mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-11 05:17:10 +09:00
Merge tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni:
"Core:
- Allow live renaming when an interface is up
- Add retpoline wrappers for tc, improving considerably the
performances of complex queue discipline configurations
- Add inet drop monitor support
- A few GRO performance improvements
- Add infrastructure for atomic dev stats, addressing long standing
data races
- De-duplicate common code between OVS and conntrack offloading
infrastructure
- A bunch of UBSAN_BOUNDS/FORTIFY_SOURCE improvements
- Netfilter: introduce packet parser for tunneled packets
- Replace IPVS timer-based estimators with kthreads to scale up the
workload with the number of available CPUs
- Add the helper support for connection-tracking OVS offload
BPF:
- Support for user defined BPF objects: the use case is to allocate
own objects, build own object hierarchies and use the building
blocks to build own data structures flexibly, for example, linked
lists in BPF
- Make cgroup local storage available to non-cgroup attached BPF
programs
- Avoid unnecessary deadlock detection and failures wrt BPF task
storage helpers
- A relevant bunch of BPF verifier fixes and improvements
- Veristat tool improvements to support custom filtering, sorting,
and replay of results
- Add LLVM disassembler as default library for dumping JITed code
- Lots of new BPF documentation for various BPF maps
- Add bpf_rcu_read_{,un}lock() support for sleepable programs
- Add RCU grace period chaining to BPF to wait for the completion of
access from both sleepable and non-sleepable BPF programs
- Add support storing struct task_struct objects as kptrs in maps
- Improve helper UAPI by explicitly defining BPF_FUNC_xxx integer
values
- Add libbpf *_opts API-variants for bpf_*_get_fd_by_id() functions
Protocols:
- TCP: implement Protective Load Balancing across switch links
- TCP: allow dynamically disabling TCP-MD5 static key, reverting back
to fast[er]-path
- UDP: Introduce optional per-netns hash lookup table
- IPv6: simplify and cleanup sockets disposal
- Netlink: support different type policies for each generic netlink
operation
- MPTCP: add MSG_FASTOPEN and FastOpen listener side support
- MPTCP: add netlink notification support for listener sockets events
- SCTP: add VRF support, allowing sctp sockets binding to VRF devices
- Add bridging MAC Authentication Bypass (MAB) support
- Extensions for Ethernet VPN bridging implementation to better
support multicast scenarios
- More work for Wi-Fi 7 support, comprising conversion of all the
existing drivers to internal TX queue usage
- IPSec: introduce a new offload type (packet offload) allowing
complete header processing and crypto offloading
- IPSec: extended ack support for more descriptive XFRM error
reporting
- RXRPC: increase SACK table size and move processing into a
per-local endpoint kernel thread, reducing considerably the
required locking
- IEEE 802154: synchronous send frame and extended filtering support,
initial support for scanning available 15.4 networks
- Tun: bump the link speed from 10Mbps to 10Gbps
- Tun/VirtioNet: implement UDP segmentation offload support
Driver API:
- PHY/SFP: improve power level switching between standard level 1 and
the higher power levels
- New API for netdev <-> devlink_port linkage
- PTP: convert existing drivers to new frequency adjustment
implementation
- DSA: add support for rx offloading
- Autoload DSA tagging driver when dynamically changing protocol
- Add new PCP and APPTRUST attributes to Data Center Bridging
- Add configuration support for 800Gbps link speed
- Add devlink port function attribute to enable/disable RoCE and
migratable
- Extend devlink-rate to support strict prioriry and weighted fair
queuing
- Add devlink support to directly reading from region memory
- New device tree helper to fetch MAC address from nvmem
- New big TCP helper to simplify temporary header stripping
New hardware / drivers:
- Ethernet:
- Marvel Octeon CNF95N and CN10KB Ethernet Switches
- Marvel Prestera AC5X Ethernet Switch
- WangXun 10 Gigabit NIC
- Motorcomm yt8521 Gigabit Ethernet
- Microchip ksz9563 Gigabit Ethernet Switch
- Microsoft Azure Network Adapter
- Linux Automation 10Base-T1L adapter
- PHY:
- Aquantia AQR112 and AQR412
- Motorcomm YT8531S
- PTP:
- Orolia ART-CARD
- WiFi:
- MediaTek Wi-Fi 7 (802.11be) devices
- RealTek rtw8821cu, rtw8822bu, rtw8822cu and rtw8723du USB
devices
- Bluetooth:
- Broadcom BCM4377/4378/4387 Bluetooth chipsets
- Realtek RTL8852BE and RTL8723DS
- Cypress.CYW4373A0 WiFi + Bluetooth combo device
Drivers:
- CAN:
- gs_usb: bus error reporting support
- kvaser_usb: listen only and bus error reporting support
- Ethernet NICs:
- Intel (100G):
- extend action skbedit to RX queue mapping
- implement devlink-rate support
- support direct read from memory
- nVidia/Mellanox (mlx5):
- SW steering improvements, increasing rules update rate
- Support for enhanced events compression
- extend H/W offload packet manipulation capabilities
- implement IPSec packet offload mode
- nVidia/Mellanox (mlx4):
- better big TCP support
- Netronome Ethernet NICs (nfp):
- IPsec offload support
- add support for multicast filter
- Broadcom:
- RSS and PTP support improvements
- AMD/SolarFlare:
- netlink extened ack improvements
- add basic flower matches to offload, and related stats
- Virtual NICs:
- ibmvnic: introduce affinity hint support
- small / embedded:
- FreeScale fec: add initial XDP support
- Marvel mv643xx_eth: support MII/GMII/RGMII modes for Kirkwood
- TI am65-cpsw: add suspend/resume support
- Mediatek MT7986: add RX wireless wthernet dispatch support
- Realtek 8169: enable GRO software interrupt coalescing per
default
- Ethernet high-speed switches:
- Microchip (sparx5):
- add support for Sparx5 TC/flower H/W offload via VCAP
- Mellanox mlxsw:
- add 802.1X and MAC Authentication Bypass offload support
- add ip6gre support
- Embedded Ethernet switches:
- Mediatek (mtk_eth_soc):
- improve PCS implementation, add DSA untag support
- enable flow offload support
- Renesas:
- add rswitch R-Car Gen4 gPTP support
- Microchip (lan966x):
- add full XDP support
- add TC H/W offload via VCAP
- enable PTP on bridge interfaces
- Microchip (ksz8):
- add MTU support for KSZ8 series
- Qualcomm 802.11ax WiFi (ath11k):
- support configuring channel dwell time during scan
- MediaTek WiFi (mt76):
- enable Wireless Ethernet Dispatch (WED) offload support
- add ack signal support
- enable coredump support
- remain_on_channel support
- Intel WiFi (iwlwifi):
- enable Wi-Fi 7 Extremely High Throughput (EHT) PHY capabilities
- 320 MHz channels support
- RealTek WiFi (rtw89):
- new dynamic header firmware format support
- wake-over-WLAN support"
* tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2002 commits)
ipvs: fix type warning in do_div() on 32 bit
net: lan966x: Remove a useless test in lan966x_ptp_add_trap()
net: ipa: add IPA v4.7 support
dt-bindings: net: qcom,ipa: Add SM6350 compatible
bnxt: Use generic HBH removal helper in tx path
IPv6/GRO: generic helper to remove temporary HBH/jumbo header in driver
selftests: forwarding: Add bridge MDB test
selftests: forwarding: Rename bridge_mdb test
bridge: mcast: Support replacement of MDB port group entries
bridge: mcast: Allow user space to specify MDB entry routing protocol
bridge: mcast: Allow user space to add (*, G) with a source list and filter mode
bridge: mcast: Add support for (*, G) with a source list and filter mode
bridge: mcast: Avoid arming group timer when (S, G) corresponds to a source
bridge: mcast: Add a flag for user installed source entries
bridge: mcast: Expose __br_multicast_del_group_src()
bridge: mcast: Expose br_multicast_new_group_src()
bridge: mcast: Add a centralized error path
bridge: mcast: Place netlink policy before validation functions
bridge: mcast: Split (*, G) and (S, G) addition into different functions
bridge: mcast: Do not derive entry type from its filter mode
...
This commit is contained in:
@@ -25,7 +25,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
|
||||
endif
|
||||
ifeq ($(CONFIG_CGROUPS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o bpf_cgrp_storage.o
|
||||
endif
|
||||
obj-$(CONFIG_CGROUP_BPF) += cgroup.o
|
||||
ifeq ($(CONFIG_INET),y)
|
||||
|
||||
@@ -306,14 +306,6 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void check_and_free_fields(struct bpf_array *arr, void *val)
|
||||
{
|
||||
if (map_value_has_timer(&arr->map))
|
||||
bpf_timer_cancel_and_free(val + arr->map.timer_off);
|
||||
if (map_value_has_kptrs(&arr->map))
|
||||
bpf_map_free_kptrs(&arr->map, val);
|
||||
}
|
||||
|
||||
/* Called from syscall or from eBPF program */
|
||||
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
u64 map_flags)
|
||||
@@ -335,13 +327,13 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
return -EEXIST;
|
||||
|
||||
if (unlikely((map_flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)))
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
|
||||
return -EINVAL;
|
||||
|
||||
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
|
||||
copy_map_value(map, val, value);
|
||||
check_and_free_fields(array, val);
|
||||
bpf_obj_free_fields(array->map.record, val);
|
||||
} else {
|
||||
val = array->value +
|
||||
(u64)array->elem_size * (index & array->index_mask);
|
||||
@@ -349,7 +341,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
copy_map_value_locked(map, val, value, false);
|
||||
else
|
||||
copy_map_value(map, val, value);
|
||||
check_and_free_fields(array, val);
|
||||
bpf_obj_free_fields(array->map.record, val);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -386,7 +378,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
|
||||
pptr = array->pptrs[index & array->index_mask];
|
||||
for_each_possible_cpu(cpu) {
|
||||
copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
|
||||
check_and_free_fields(array, per_cpu_ptr(pptr, cpu));
|
||||
bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
|
||||
off += size;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@@ -409,12 +401,12 @@ static void array_map_free_timers(struct bpf_map *map)
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
||||
/* We don't reset or free kptr on uref dropping to zero. */
|
||||
if (!map_value_has_timer(map))
|
||||
/* We don't reset or free fields other than timer on uref dropping to zero. */
|
||||
if (!btf_record_has_field(map->record, BPF_TIMER))
|
||||
return;
|
||||
|
||||
for (i = 0; i < array->map.max_entries; i++)
|
||||
bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
|
||||
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
|
||||
}
|
||||
|
||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||
@@ -423,22 +415,21 @@ static void array_map_free(struct bpf_map *map)
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
||||
if (map_value_has_kptrs(map)) {
|
||||
if (!IS_ERR_OR_NULL(map->record)) {
|
||||
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
for (i = 0; i < array->map.max_entries; i++) {
|
||||
void __percpu *pptr = array->pptrs[i & array->index_mask];
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu));
|
||||
bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < array->map.max_entries; i++)
|
||||
bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
|
||||
bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
|
||||
}
|
||||
bpf_map_free_kptr_off_tab(map);
|
||||
}
|
||||
|
||||
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
||||
|
||||
246
kernel/bpf/bpf_cgrp_storage.c
Normal file
246
kernel/bpf/bpf_cgrp_storage.c
Normal file
@@ -0,0 +1,246 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_local_storage.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(cgroup_cache);
|
||||
|
||||
static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);
|
||||
|
||||
static void bpf_cgrp_storage_lock(void)
|
||||
{
|
||||
migrate_disable();
|
||||
this_cpu_inc(bpf_cgrp_storage_busy);
|
||||
}
|
||||
|
||||
static void bpf_cgrp_storage_unlock(void)
|
||||
{
|
||||
this_cpu_dec(bpf_cgrp_storage_busy);
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
static bool bpf_cgrp_storage_trylock(void)
|
||||
{
|
||||
migrate_disable();
|
||||
if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
|
||||
this_cpu_dec(bpf_cgrp_storage_busy);
|
||||
migrate_enable();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner)
|
||||
{
|
||||
struct cgroup *cg = owner;
|
||||
|
||||
return &cg->bpf_cgrp_storage;
|
||||
}
|
||||
|
||||
void bpf_cgrp_storage_free(struct cgroup *cgroup)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool free_cgroup_storage = false;
|
||||
unsigned long flags;
|
||||
|
||||
rcu_read_lock();
|
||||
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
|
||||
if (!local_storage) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
bpf_cgrp_storage_lock();
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
bpf_cgrp_storage_unlock();
|
||||
rcu_read_unlock();
|
||||
|
||||
if (free_cgroup_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
|
||||
static struct bpf_local_storage_data *
|
||||
cgroup_storage_lookup(struct cgroup *cgroup, struct bpf_map *map, bool cacheit_lockit)
|
||||
{
|
||||
struct bpf_local_storage *cgroup_storage;
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
cgroup_storage = rcu_dereference_check(cgroup->bpf_cgrp_storage,
|
||||
bpf_rcu_lock_held());
|
||||
if (!cgroup_storage)
|
||||
return NULL;
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
return bpf_local_storage_lookup(cgroup_storage, smap, cacheit_lockit);
|
||||
}
|
||||
|
||||
static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
struct cgroup *cgroup;
|
||||
int fd;
|
||||
|
||||
fd = *(int *)key;
|
||||
cgroup = cgroup_get_from_fd(fd);
|
||||
if (IS_ERR(cgroup))
|
||||
return ERR_CAST(cgroup);
|
||||
|
||||
bpf_cgrp_storage_lock();
|
||||
sdata = cgroup_storage_lookup(cgroup, map, true);
|
||||
bpf_cgrp_storage_unlock();
|
||||
cgroup_put(cgroup);
|
||||
return sdata ? sdata->data : NULL;
|
||||
}
|
||||
|
||||
static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
struct cgroup *cgroup;
|
||||
int fd;
|
||||
|
||||
fd = *(int *)key;
|
||||
cgroup = cgroup_get_from_fd(fd);
|
||||
if (IS_ERR(cgroup))
|
||||
return PTR_ERR(cgroup);
|
||||
|
||||
bpf_cgrp_storage_lock();
|
||||
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
|
||||
value, map_flags, GFP_ATOMIC);
|
||||
bpf_cgrp_storage_unlock();
|
||||
cgroup_put(cgroup);
|
||||
return PTR_ERR_OR_ZERO(sdata);
|
||||
}
|
||||
|
||||
static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
sdata = cgroup_storage_lookup(cgroup, map, false);
|
||||
if (!sdata)
|
||||
return -ENOENT;
|
||||
|
||||
bpf_selem_unlink(SELEM(sdata), true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct cgroup *cgroup;
|
||||
int err, fd;
|
||||
|
||||
fd = *(int *)key;
|
||||
cgroup = cgroup_get_from_fd(fd);
|
||||
if (IS_ERR(cgroup))
|
||||
return PTR_ERR(cgroup);
|
||||
|
||||
bpf_cgrp_storage_lock();
|
||||
err = cgroup_storage_delete(cgroup, map);
|
||||
bpf_cgrp_storage_unlock();
|
||||
cgroup_put(cgroup);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
return bpf_local_storage_map_alloc(attr, &cgroup_cache);
|
||||
}
|
||||
|
||||
static void cgroup_storage_map_free(struct bpf_map *map)
|
||||
{
|
||||
bpf_local_storage_map_free(map, &cgroup_cache, NULL);
|
||||
}
|
||||
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
|
||||
void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
if (!cgroup)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
if (!bpf_cgrp_storage_trylock())
|
||||
return (unsigned long)NULL;
|
||||
|
||||
sdata = cgroup_storage_lookup(cgroup, map, true);
|
||||
if (sdata)
|
||||
goto unlock;
|
||||
|
||||
/* only allocate new storage, when the cgroup is refcounted */
|
||||
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
|
||||
value, BPF_NOEXIST, gfp_flags);
|
||||
|
||||
unlock:
|
||||
bpf_cgrp_storage_unlock();
|
||||
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup)
|
||||
{
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!cgroup)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bpf_cgrp_storage_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
ret = cgroup_storage_delete(cgroup, map);
|
||||
bpf_cgrp_storage_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct bpf_map_ops cgrp_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
.map_alloc = cgroup_storage_map_alloc,
|
||||
.map_free = cgroup_storage_map_free,
|
||||
.map_get_next_key = notsupp_get_next_key,
|
||||
.map_lookup_elem = bpf_cgrp_storage_lookup_elem,
|
||||
.map_update_elem = bpf_cgrp_storage_update_elem,
|
||||
.map_delete_elem = bpf_cgrp_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_owner_storage_ptr = cgroup_storage_ptr,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_cgrp_storage_get_proto = {
|
||||
.func = bpf_cgrp_storage_get,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_btf_id = &bpf_cgroup_btf_id[0],
|
||||
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_cgrp_storage_delete_proto = {
|
||||
.func = bpf_cgrp_storage_delete,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_btf_id = &bpf_cgroup_btf_id[0],
|
||||
};
|
||||
@@ -56,11 +56,9 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode,
|
||||
|
||||
void bpf_inode_storage_free(struct inode *inode)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool free_inode_storage = false;
|
||||
struct bpf_storage_blob *bsb;
|
||||
struct hlist_node *n;
|
||||
|
||||
bsb = bpf_inode(inode);
|
||||
if (!bsb)
|
||||
@@ -74,30 +72,11 @@ void bpf_inode_storage_free(struct inode *inode)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Neither the bpf_prog nor the bpf-map's syscall
|
||||
* could be modifying the local_storage->list now.
|
||||
* Thus, no elem can be added-to or deleted-from the
|
||||
* local_storage->list by the bpf_prog or by the bpf-map's syscall.
|
||||
*
|
||||
* It is racing with bpf_local_storage_map_free() alone
|
||||
* when unlinking elem from the local_storage->list and
|
||||
* the map's bucket->list.
|
||||
*/
|
||||
raw_spin_lock_bh(&local_storage->lock);
|
||||
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
|
||||
/* Always unlink from map before unlinking from
|
||||
* local_storage.
|
||||
*/
|
||||
bpf_selem_unlink_map(selem);
|
||||
free_inode_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, false, false);
|
||||
}
|
||||
free_inode_storage = bpf_local_storage_unlink_nolock(local_storage);
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* free_inoode_storage should always be true as long as
|
||||
* local_storage->list was non-empty.
|
||||
*/
|
||||
if (free_inode_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
@@ -226,27 +205,14 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
|
||||
|
||||
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = bpf_local_storage_map_alloc(attr);
|
||||
if (IS_ERR(smap))
|
||||
return ERR_CAST(smap);
|
||||
|
||||
smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
|
||||
return &smap->map;
|
||||
return bpf_local_storage_map_alloc(attr, &inode_cache);
|
||||
}
|
||||
|
||||
static void inode_storage_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
|
||||
bpf_local_storage_map_free(smap, NULL);
|
||||
bpf_local_storage_map_free(map, &inode_cache, NULL);
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct,
|
||||
bpf_local_storage_map)
|
||||
const struct bpf_map_ops inode_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
@@ -257,7 +223,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
|
||||
.map_update_elem = bpf_fd_inode_storage_update_elem,
|
||||
.map_delete_elem = bpf_fd_inode_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &inode_storage_map_btf_ids[0],
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_owner_storage_ptr = inode_storage_ptr,
|
||||
};
|
||||
|
||||
|
||||
@@ -88,8 +88,14 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
/* If RCU Tasks Trace grace period implies RCU grace period, do
|
||||
* kfree(), else do kfree_rcu().
|
||||
*/
|
||||
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
|
||||
kfree_rcu(local_storage, rcu);
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
kfree(local_storage);
|
||||
else
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
|
||||
static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
@@ -97,16 +103,19 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
|
||||
kfree_rcu(selem, rcu);
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
kfree(selem);
|
||||
else
|
||||
kfree_rcu(selem, rcu);
|
||||
}
|
||||
|
||||
/* local_storage->lock must be held and selem->local_storage == local_storage.
|
||||
* The caller must ensure selem->smap is still valid to be
|
||||
* dereferenced for its smap->elem_size and smap->cache_idx.
|
||||
*/
|
||||
bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_elem *selem,
|
||||
bool uncharge_mem, bool use_trace_rcu)
|
||||
static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_elem *selem,
|
||||
bool uncharge_mem, bool use_trace_rcu)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
bool free_local_storage;
|
||||
@@ -233,6 +242,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
|
||||
__bpf_selem_unlink_storage(selem, use_trace_rcu);
|
||||
}
|
||||
|
||||
/* If cacheit_lockit is false, this lookup function is lockless */
|
||||
struct bpf_local_storage_data *
|
||||
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_map *smap,
|
||||
@@ -372,7 +382,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
|
||||
/* BPF_F_LOCK can only be used in a value with spin_lock */
|
||||
unlikely((map_flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(&smap->map)))
|
||||
!btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
|
||||
@@ -491,7 +501,7 @@ unlock_err:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
|
||||
static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
|
||||
{
|
||||
u64 min_usage = U64_MAX;
|
||||
u16 i, res = 0;
|
||||
@@ -515,21 +525,143 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
|
||||
return res;
|
||||
}
|
||||
|
||||
void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
|
||||
u16 idx)
|
||||
static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
|
||||
u16 idx)
|
||||
{
|
||||
spin_lock(&cache->idx_lock);
|
||||
cache->idx_usage_counts[idx]--;
|
||||
spin_unlock(&cache->idx_lock);
|
||||
}
|
||||
|
||||
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
|
||||
int __percpu *busy_counter)
|
||||
int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
|
||||
{
|
||||
if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
|
||||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
|
||||
attr->max_entries ||
|
||||
attr->key_size != sizeof(int) || !attr->value_size ||
|
||||
/* Enforce BTF for userspace sk dumping */
|
||||
!attr->btf_key_type_id || !attr->btf_value_type_id)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bpf_capable())
|
||||
return -EPERM;
|
||||
|
||||
if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
|
||||
return -E2BIG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
unsigned int i;
|
||||
u32 nbuckets;
|
||||
|
||||
smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
|
||||
if (!smap)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bpf_map_init_from_attr(&smap->map, attr);
|
||||
|
||||
nbuckets = roundup_pow_of_two(num_possible_cpus());
|
||||
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
|
||||
nbuckets = max_t(u32, 2, nbuckets);
|
||||
smap->bucket_log = ilog2(nbuckets);
|
||||
|
||||
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
|
||||
GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
|
||||
if (!smap->buckets) {
|
||||
bpf_map_area_free(smap);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
for (i = 0; i < nbuckets; i++) {
|
||||
INIT_HLIST_HEAD(&smap->buckets[i].list);
|
||||
raw_spin_lock_init(&smap->buckets[i].lock);
|
||||
}
|
||||
|
||||
smap->elem_size =
|
||||
sizeof(struct bpf_local_storage_elem) + attr->value_size;
|
||||
|
||||
return smap;
|
||||
}
|
||||
|
||||
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
u32 int_data;
|
||||
|
||||
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
|
||||
return -EINVAL;
|
||||
|
||||
int_data = *(u32 *)(key_type + 1);
|
||||
if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
bool free_storage = false;
|
||||
struct hlist_node *n;
|
||||
|
||||
/* Neither the bpf_prog nor the bpf_map's syscall
|
||||
* could be modifying the local_storage->list now.
|
||||
* Thus, no elem can be added to or deleted from the
|
||||
* local_storage->list by the bpf_prog or by the bpf_map's syscall.
|
||||
*
|
||||
* It is racing with bpf_local_storage_map_free() alone
|
||||
* when unlinking elem from the local_storage->list and
|
||||
* the map's bucket->list.
|
||||
*/
|
||||
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
|
||||
/* Always unlink from map before unlinking from
|
||||
* local_storage.
|
||||
*/
|
||||
bpf_selem_unlink_map(selem);
|
||||
/* If local_storage list has only one element, the
|
||||
* bpf_selem_unlink_storage_nolock() will return true.
|
||||
* Otherwise, it will return false. The current loop iteration
|
||||
* intends to remove all local storage. So the last iteration
|
||||
* of the loop will set the free_cgroup_storage to true.
|
||||
*/
|
||||
free_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, false, false);
|
||||
}
|
||||
|
||||
return free_storage;
|
||||
}
|
||||
|
||||
struct bpf_map *
|
||||
bpf_local_storage_map_alloc(union bpf_attr *attr,
|
||||
struct bpf_local_storage_cache *cache)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = __bpf_local_storage_map_alloc(attr);
|
||||
if (IS_ERR(smap))
|
||||
return ERR_CAST(smap);
|
||||
|
||||
smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
|
||||
return &smap->map;
|
||||
}
|
||||
|
||||
void bpf_local_storage_map_free(struct bpf_map *map,
|
||||
struct bpf_local_storage_cache *cache,
|
||||
int __percpu *busy_counter)
|
||||
{
|
||||
struct bpf_local_storage_map_bucket *b;
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_map *smap;
|
||||
unsigned int i;
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(cache, smap->cache_idx);
|
||||
|
||||
/* Note that this map might be concurrently cloned from
|
||||
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
|
||||
* RCU read section to finish before proceeding. New RCU
|
||||
@@ -584,73 +716,3 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
|
||||
kvfree(smap->buckets);
|
||||
bpf_map_area_free(smap);
|
||||
}
|
||||
|
||||
int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
|
||||
{
|
||||
if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
|
||||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
|
||||
attr->max_entries ||
|
||||
attr->key_size != sizeof(int) || !attr->value_size ||
|
||||
/* Enforce BTF for userspace sk dumping */
|
||||
!attr->btf_key_type_id || !attr->btf_value_type_id)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bpf_capable())
|
||||
return -EPERM;
|
||||
|
||||
if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
|
||||
return -E2BIG;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
unsigned int i;
|
||||
u32 nbuckets;
|
||||
|
||||
smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
|
||||
if (!smap)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bpf_map_init_from_attr(&smap->map, attr);
|
||||
|
||||
nbuckets = roundup_pow_of_two(num_possible_cpus());
|
||||
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
|
||||
nbuckets = max_t(u32, 2, nbuckets);
|
||||
smap->bucket_log = ilog2(nbuckets);
|
||||
|
||||
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
|
||||
GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
|
||||
if (!smap->buckets) {
|
||||
bpf_map_area_free(smap);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
for (i = 0; i < nbuckets; i++) {
|
||||
INIT_HLIST_HEAD(&smap->buckets[i].list);
|
||||
raw_spin_lock_init(&smap->buckets[i].lock);
|
||||
}
|
||||
|
||||
smap->elem_size =
|
||||
sizeof(struct bpf_local_storage_elem) + attr->value_size;
|
||||
|
||||
return smap;
|
||||
}
|
||||
|
||||
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
u32 int_data;
|
||||
|
||||
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
|
||||
return -EINVAL;
|
||||
|
||||
int_data = *(u32 *)(key_type + 1);
|
||||
if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -151,6 +151,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
|
||||
static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
|
||||
.func = bpf_ima_inode_hash,
|
||||
.gpl_only = false,
|
||||
.might_sleep = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0],
|
||||
@@ -169,6 +170,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
|
||||
static const struct bpf_func_proto bpf_ima_file_hash_proto = {
|
||||
.func = bpf_ima_file_hash,
|
||||
.gpl_only = false,
|
||||
.might_sleep = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg1_btf_id = &bpf_ima_file_hash_btf_ids[0],
|
||||
@@ -221,9 +223,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_bprm_opts_set:
|
||||
return &bpf_bprm_opts_set_proto;
|
||||
case BPF_FUNC_ima_inode_hash:
|
||||
return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
|
||||
return &bpf_ima_inode_hash_proto;
|
||||
case BPF_FUNC_ima_file_hash:
|
||||
return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
|
||||
return &bpf_ima_file_hash_proto;
|
||||
case BPF_FUNC_get_attach_cookie:
|
||||
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
|
||||
#ifdef CONFIG_NET
|
||||
@@ -343,11 +345,27 @@ BTF_ID(func, bpf_lsm_task_to_inode)
|
||||
BTF_ID(func, bpf_lsm_userns_create)
|
||||
BTF_SET_END(sleepable_lsm_hooks)
|
||||
|
||||
BTF_SET_START(untrusted_lsm_hooks)
|
||||
BTF_ID(func, bpf_lsm_bpf_map_free_security)
|
||||
BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_bpf_prog_free_security)
|
||||
BTF_ID(func, bpf_lsm_file_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_file_free_security)
|
||||
BTF_ID(func, bpf_lsm_sk_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_sk_free_security)
|
||||
BTF_ID(func, bpf_lsm_task_free)
|
||||
BTF_SET_END(untrusted_lsm_hooks)
|
||||
|
||||
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
|
||||
{
|
||||
return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
|
||||
}
|
||||
|
||||
bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
|
||||
{
|
||||
return !btf_id_set_contains(&untrusted_lsm_hooks, prog->aux->attach_btf_id);
|
||||
}
|
||||
|
||||
const struct bpf_prog_ops lsm_prog_ops = {
|
||||
};
|
||||
|
||||
|
||||
@@ -71,10 +71,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
|
||||
|
||||
void bpf_task_storage_free(struct task_struct *task)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool free_task_storage = false;
|
||||
struct hlist_node *n;
|
||||
unsigned long flags;
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -85,32 +83,13 @@ void bpf_task_storage_free(struct task_struct *task)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Neither the bpf_prog nor the bpf-map's syscall
|
||||
* could be modifying the local_storage->list now.
|
||||
* Thus, no elem can be added-to or deleted-from the
|
||||
* local_storage->list by the bpf_prog or by the bpf-map's syscall.
|
||||
*
|
||||
* It is racing with bpf_local_storage_map_free() alone
|
||||
* when unlinking elem from the local_storage->list and
|
||||
* the map's bucket->list.
|
||||
*/
|
||||
bpf_task_storage_lock();
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
|
||||
/* Always unlink from map before unlinking from
|
||||
* local_storage.
|
||||
*/
|
||||
bpf_selem_unlink_map(selem);
|
||||
free_task_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, false, false);
|
||||
}
|
||||
free_task_storage = bpf_local_storage_unlink_nolock(local_storage);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
bpf_task_storage_unlock();
|
||||
rcu_read_unlock();
|
||||
|
||||
/* free_task_storage should always be true as long as
|
||||
* local_storage->list was non-empty.
|
||||
*/
|
||||
if (free_task_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
@@ -184,7 +163,8 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
|
||||
static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
|
||||
bool nobusy)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
@@ -192,6 +172,9 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
|
||||
if (!sdata)
|
||||
return -ENOENT;
|
||||
|
||||
if (!nobusy)
|
||||
return -EBUSY;
|
||||
|
||||
bpf_selem_unlink(SELEM(sdata), true);
|
||||
|
||||
return 0;
|
||||
@@ -220,44 +203,91 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
}
|
||||
|
||||
bpf_task_storage_lock();
|
||||
err = task_storage_delete(task, map);
|
||||
err = task_storage_delete(task, map, true);
|
||||
bpf_task_storage_unlock();
|
||||
out:
|
||||
put_pid(pid);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Called by bpf_task_storage_get*() helpers */
|
||||
static void *__bpf_task_storage_get(struct bpf_map *map,
|
||||
struct task_struct *task, void *value,
|
||||
u64 flags, gfp_t gfp_flags, bool nobusy)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
sdata = task_storage_lookup(task, map, nobusy);
|
||||
if (sdata)
|
||||
return sdata->data;
|
||||
|
||||
/* only allocate new storage, when the task is refcounted */
|
||||
if (refcount_read(&task->usage) &&
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
return IS_ERR(sdata) ? NULL : sdata->data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
|
||||
task, void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
bool nobusy;
|
||||
void *data;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
nobusy = bpf_task_storage_trylock();
|
||||
data = __bpf_task_storage_get(map, task, value, flags,
|
||||
gfp_flags, nobusy);
|
||||
if (nobusy)
|
||||
bpf_task_storage_unlock();
|
||||
return (unsigned long)data;
|
||||
}
|
||||
|
||||
/* *gfp_flags* is a hidden argument provided by the verifier */
|
||||
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
task, void *, value, u64, flags, gfp_t, gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
void *data;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
if (!task)
|
||||
return (unsigned long)NULL;
|
||||
|
||||
if (!bpf_task_storage_trylock())
|
||||
return (unsigned long)NULL;
|
||||
|
||||
sdata = task_storage_lookup(task, map, true);
|
||||
if (sdata)
|
||||
goto unlock;
|
||||
|
||||
/* only allocate new storage, when the task is refcounted */
|
||||
if (refcount_read(&task->usage) &&
|
||||
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST, gfp_flags);
|
||||
|
||||
unlock:
|
||||
bpf_task_storage_lock();
|
||||
data = __bpf_task_storage_get(map, task, value, flags,
|
||||
gfp_flags, true);
|
||||
bpf_task_storage_unlock();
|
||||
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
|
||||
(unsigned long)sdata->data;
|
||||
return (unsigned long)data;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
|
||||
task)
|
||||
{
|
||||
bool nobusy;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
if (!task)
|
||||
return -EINVAL;
|
||||
|
||||
nobusy = bpf_task_storage_trylock();
|
||||
/* This helper must only be called from places where the lifetime of the task
|
||||
* is guaranteed. Either by being refcounted or by being protected
|
||||
* by an RCU read-side critical section.
|
||||
*/
|
||||
ret = task_storage_delete(task, map, nobusy);
|
||||
if (nobusy)
|
||||
bpf_task_storage_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
|
||||
@@ -269,14 +299,12 @@ BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
|
||||
if (!task)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bpf_task_storage_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
bpf_task_storage_lock();
|
||||
/* This helper must only be called from places where the lifetime of the task
|
||||
* is guaranteed. Either by being refcounted or by being protected
|
||||
* by an RCU read-side critical section.
|
||||
*/
|
||||
ret = task_storage_delete(task, map);
|
||||
ret = task_storage_delete(task, map, true);
|
||||
bpf_task_storage_unlock();
|
||||
return ret;
|
||||
}
|
||||
@@ -288,26 +316,15 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
|
||||
static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = bpf_local_storage_map_alloc(attr);
|
||||
if (IS_ERR(smap))
|
||||
return ERR_CAST(smap);
|
||||
|
||||
smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
|
||||
return &smap->map;
|
||||
return bpf_local_storage_map_alloc(attr, &task_cache);
|
||||
}
|
||||
|
||||
static void task_storage_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
|
||||
bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
|
||||
bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
|
||||
BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
|
||||
const struct bpf_map_ops task_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
@@ -318,10 +335,21 @@ const struct bpf_map_ops task_storage_map_ops = {
|
||||
.map_update_elem = bpf_pid_task_storage_update_elem,
|
||||
.map_delete_elem = bpf_pid_task_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &task_storage_map_btf_ids[0],
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_owner_storage_ptr = task_storage_ptr,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
|
||||
.func = bpf_task_storage_get_recur,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
|
||||
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_task_storage_get_proto = {
|
||||
.func = bpf_task_storage_get,
|
||||
.gpl_only = false,
|
||||
@@ -333,6 +361,15 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
|
||||
.func = bpf_task_storage_delete_recur,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_task_storage_delete_proto = {
|
||||
.func = bpf_task_storage_delete,
|
||||
.gpl_only = false,
|
||||
|
||||
1332
kernel/bpf/btf.c
1332
kernel/bpf/btf.c
File diff suppressed because it is too large
Load Diff
@@ -157,23 +157,37 @@ static const struct seq_operations cgroup_iter_seq_ops = {
|
||||
.show = cgroup_iter_seq_show,
|
||||
};
|
||||
|
||||
BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup)
|
||||
BTF_ID_LIST_GLOBAL_SINGLE(bpf_cgroup_btf_id, struct, cgroup)
|
||||
|
||||
static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
|
||||
{
|
||||
struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv;
|
||||
struct cgroup *cgrp = aux->cgroup.start;
|
||||
|
||||
/* bpf_iter_attach_cgroup() has already acquired an extra reference
|
||||
* for the start cgroup, but the reference may be released after
|
||||
* cgroup_iter_seq_init(), so acquire another reference for the
|
||||
* start cgroup.
|
||||
*/
|
||||
p->start_css = &cgrp->self;
|
||||
css_get(p->start_css);
|
||||
p->terminate = false;
|
||||
p->visited_all = false;
|
||||
p->order = aux->cgroup.order;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cgroup_iter_seq_fini(void *priv)
|
||||
{
|
||||
struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv;
|
||||
|
||||
css_put(p->start_css);
|
||||
}
|
||||
|
||||
static const struct bpf_iter_seq_info cgroup_iter_seq_info = {
|
||||
.seq_ops = &cgroup_iter_seq_ops,
|
||||
.init_seq_private = cgroup_iter_seq_init,
|
||||
.fini_seq_private = cgroup_iter_seq_fini,
|
||||
.seq_priv_size = sizeof(struct cgroup_iter_priv),
|
||||
};
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <linux/log2.h>
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <linux/bpf_mem_alloc.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/unaligned.h>
|
||||
@@ -60,6 +61,9 @@
|
||||
#define CTX regs[BPF_REG_CTX]
|
||||
#define IMM insn->imm
|
||||
|
||||
struct bpf_mem_alloc bpf_global_ma;
|
||||
bool bpf_global_ma_set;
|
||||
|
||||
/* No hurry in this branch
|
||||
*
|
||||
* Exported for the bpf jit load helper.
|
||||
@@ -2251,8 +2255,14 @@ static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_prog_array *progs;
|
||||
|
||||
/* If RCU Tasks Trace grace period implies RCU grace period, there is
|
||||
* no need to call kfree_rcu(), just call kfree() directly.
|
||||
*/
|
||||
progs = container_of(rcu, struct bpf_prog_array, rcu);
|
||||
kfree_rcu(progs, rcu);
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
kfree(progs);
|
||||
else
|
||||
kfree_rcu(progs, rcu);
|
||||
}
|
||||
|
||||
void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
|
||||
@@ -2740,6 +2750,18 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len)
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static int __init bpf_global_ma_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
|
||||
bpf_global_ma_set = !ret;
|
||||
return ret;
|
||||
}
|
||||
late_initcall(bpf_global_ma_init);
|
||||
#endif
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
|
||||
EXPORT_SYMBOL(bpf_stats_enabled_key);
|
||||
|
||||
|
||||
@@ -4,13 +4,16 @@
|
||||
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
|
||||
*/
|
||||
|
||||
/* The 'cpumap' is primarily used as a backend map for XDP BPF helper
|
||||
/**
|
||||
* DOC: cpu map
|
||||
* The 'cpumap' is primarily used as a backend map for XDP BPF helper
|
||||
* call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'.
|
||||
*
|
||||
* Unlike devmap which redirects XDP frames out another NIC device,
|
||||
* Unlike devmap which redirects XDP frames out to another NIC device,
|
||||
* this map type redirects raw XDP frames to another CPU. The remote
|
||||
* CPU will do SKB-allocation and call the normal network stack.
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* This is a scalability and isolation mechanism, that allow
|
||||
* separating the early driver network XDP layer, from the rest of the
|
||||
* netstack, and assigning dedicated CPUs for this stage. This
|
||||
@@ -85,7 +88,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
u32 value_size = attr->value_size;
|
||||
struct bpf_cpu_map *cmap;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (!bpf_capable())
|
||||
return ERR_PTR(-EPERM);
|
||||
@@ -97,29 +99,26 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
|
||||
attr->map_flags & ~BPF_F_NUMA_NODE)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* Pre-limit array size based on NR_CPUS, not final CPU check */
|
||||
if (attr->max_entries > NR_CPUS)
|
||||
return ERR_PTR(-E2BIG);
|
||||
|
||||
cmap = bpf_map_area_alloc(sizeof(*cmap), NUMA_NO_NODE);
|
||||
if (!cmap)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bpf_map_init_from_attr(&cmap->map, attr);
|
||||
|
||||
/* Pre-limit array size based on NR_CPUS, not final CPU check */
|
||||
if (cmap->map.max_entries > NR_CPUS) {
|
||||
err = -E2BIG;
|
||||
goto free_cmap;
|
||||
}
|
||||
|
||||
/* Alloc array for possible remote "destination" CPUs */
|
||||
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
|
||||
sizeof(struct bpf_cpu_map_entry *),
|
||||
cmap->map.numa_node);
|
||||
if (!cmap->cpu_map)
|
||||
goto free_cmap;
|
||||
if (!cmap->cpu_map) {
|
||||
bpf_map_area_free(cmap);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
return &cmap->map;
|
||||
free_cmap:
|
||||
bpf_map_area_free(cmap);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
@@ -668,9 +667,9 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
|
||||
return __bpf_xdp_redirect_map(map, index, flags, 0,
|
||||
__cpu_map_lookup_elem);
|
||||
}
|
||||
|
||||
|
||||
@@ -992,14 +992,14 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
map, key, value, map_flags);
|
||||
}
|
||||
|
||||
static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
static int dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags,
|
||||
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
|
||||
__dev_map_lookup_elem);
|
||||
}
|
||||
|
||||
static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
|
||||
static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
|
||||
{
|
||||
return __bpf_xdp_redirect_map(map, ifindex, flags,
|
||||
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
|
||||
|
||||
@@ -222,7 +222,7 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
|
||||
u32 num_entries = htab->map.max_entries;
|
||||
int i;
|
||||
|
||||
if (!map_value_has_timer(&htab->map))
|
||||
if (!btf_record_has_field(htab->map.record, BPF_TIMER))
|
||||
return;
|
||||
if (htab_has_extra_elems(htab))
|
||||
num_entries += num_possible_cpus();
|
||||
@@ -231,28 +231,25 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
|
||||
struct htab_elem *elem;
|
||||
|
||||
elem = get_htab_elem(htab, i);
|
||||
bpf_timer_cancel_and_free(elem->key +
|
||||
round_up(htab->map.key_size, 8) +
|
||||
htab->map.timer_off);
|
||||
bpf_obj_free_timer(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static void htab_free_prealloced_kptrs(struct bpf_htab *htab)
|
||||
static void htab_free_prealloced_fields(struct bpf_htab *htab)
|
||||
{
|
||||
u32 num_entries = htab->map.max_entries;
|
||||
int i;
|
||||
|
||||
if (!map_value_has_kptrs(&htab->map))
|
||||
if (IS_ERR_OR_NULL(htab->map.record))
|
||||
return;
|
||||
if (htab_has_extra_elems(htab))
|
||||
num_entries += num_possible_cpus();
|
||||
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
struct htab_elem *elem;
|
||||
|
||||
elem = get_htab_elem(htab, i);
|
||||
bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8));
|
||||
bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
@@ -764,10 +761,7 @@ static void check_and_free_fields(struct bpf_htab *htab,
|
||||
{
|
||||
void *map_value = elem->key + round_up(htab->map.key_size, 8);
|
||||
|
||||
if (map_value_has_timer(&htab->map))
|
||||
bpf_timer_cancel_and_free(map_value + htab->map.timer_off);
|
||||
if (map_value_has_kptrs(&htab->map))
|
||||
bpf_map_free_kptrs(&htab->map, map_value);
|
||||
bpf_obj_free_fields(htab->map.record, map_value);
|
||||
}
|
||||
|
||||
/* It is called from the bpf_lru_list when the LRU needs to delete
|
||||
@@ -1091,7 +1085,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
head = &b->head;
|
||||
|
||||
if (unlikely(map_flags & BPF_F_LOCK)) {
|
||||
if (unlikely(!map_value_has_spin_lock(map)))
|
||||
if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
|
||||
return -EINVAL;
|
||||
/* find an element without taking the bucket lock */
|
||||
l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
|
||||
@@ -1474,12 +1468,8 @@ static void htab_free_malloced_timers(struct bpf_htab *htab)
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_nulls_for_each_entry(l, n, head, hash_node) {
|
||||
/* We don't reset or free kptr on uref dropping to zero,
|
||||
* hence just free timer.
|
||||
*/
|
||||
bpf_timer_cancel_and_free(l->key +
|
||||
round_up(htab->map.key_size, 8) +
|
||||
htab->map.timer_off);
|
||||
/* We only free timer on uref dropping to zero */
|
||||
bpf_obj_free_timer(htab->map.record, l->key + round_up(htab->map.key_size, 8));
|
||||
}
|
||||
cond_resched_rcu();
|
||||
}
|
||||
@@ -1490,8 +1480,8 @@ static void htab_map_free_timers(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
/* We don't reset or free kptr on uref dropping to zero. */
|
||||
if (!map_value_has_timer(&htab->map))
|
||||
/* We only free timer on uref dropping to zero */
|
||||
if (!btf_record_has_field(htab->map.record, BPF_TIMER))
|
||||
return;
|
||||
if (!htab_is_prealloc(htab))
|
||||
htab_free_malloced_timers(htab);
|
||||
@@ -1517,11 +1507,10 @@ static void htab_map_free(struct bpf_map *map)
|
||||
if (!htab_is_prealloc(htab)) {
|
||||
delete_all_elements(htab);
|
||||
} else {
|
||||
htab_free_prealloced_kptrs(htab);
|
||||
htab_free_prealloced_fields(htab);
|
||||
prealloc_destroy(htab);
|
||||
}
|
||||
|
||||
bpf_map_free_kptr_off_tab(map);
|
||||
free_percpu(htab->extra_elems);
|
||||
bpf_map_area_free(htab->buckets);
|
||||
bpf_mem_alloc_destroy(&htab->pcpu_ma);
|
||||
@@ -1675,7 +1664,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
||||
|
||||
elem_map_flags = attr->batch.elem_flags;
|
||||
if ((elem_map_flags & ~BPF_F_LOCK) ||
|
||||
((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
|
||||
((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
|
||||
return -EINVAL;
|
||||
|
||||
map_flags = attr->batch.flags;
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/smp.h>
|
||||
@@ -19,6 +20,7 @@
|
||||
#include <linux/proc_ns.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/bpf_mem_alloc.h>
|
||||
|
||||
#include "../../lib/kstrtox.h"
|
||||
|
||||
@@ -336,6 +338,7 @@ const struct bpf_func_proto bpf_spin_lock_proto = {
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_VOID,
|
||||
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
||||
.arg1_btf_id = BPF_PTR_POISON,
|
||||
};
|
||||
|
||||
static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
|
||||
@@ -358,6 +361,7 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_VOID,
|
||||
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
|
||||
.arg1_btf_id = BPF_PTR_POISON,
|
||||
};
|
||||
|
||||
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
||||
@@ -366,9 +370,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
||||
struct bpf_spin_lock *lock;
|
||||
|
||||
if (lock_src)
|
||||
lock = src + map->spin_lock_off;
|
||||
lock = src + map->record->spin_lock_off;
|
||||
else
|
||||
lock = dst + map->spin_lock_off;
|
||||
lock = dst + map->record->spin_lock_off;
|
||||
preempt_disable();
|
||||
__bpf_spin_lock_irqsave(lock);
|
||||
copy_map_value(map, dst, src);
|
||||
@@ -657,6 +661,7 @@ BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
|
||||
const struct bpf_func_proto bpf_copy_from_user_proto = {
|
||||
.func = bpf_copy_from_user,
|
||||
.gpl_only = false,
|
||||
.might_sleep = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
@@ -687,6 +692,7 @@ BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
|
||||
const struct bpf_func_proto bpf_copy_from_user_task_proto = {
|
||||
.func = bpf_copy_from_user_task,
|
||||
.gpl_only = true,
|
||||
.might_sleep = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
@@ -1169,7 +1175,7 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
t->value = (void *)timer - map->timer_off;
|
||||
t->value = (void *)timer - map->record->timer_off;
|
||||
t->map = map;
|
||||
t->prog = NULL;
|
||||
rcu_assign_pointer(t->callback_fn, NULL);
|
||||
@@ -1398,7 +1404,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
|
||||
#define DYNPTR_SIZE_MASK 0xFFFFFF
|
||||
#define DYNPTR_RDONLY_BIT BIT(31)
|
||||
|
||||
static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
|
||||
static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
return ptr->size & DYNPTR_RDONLY_BIT;
|
||||
}
|
||||
@@ -1408,7 +1414,7 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
|
||||
ptr->size |= type << DYNPTR_TYPE_SHIFT;
|
||||
}
|
||||
|
||||
u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
|
||||
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
return ptr->size & DYNPTR_SIZE_MASK;
|
||||
}
|
||||
@@ -1432,7 +1438,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
|
||||
memset(ptr, 0, sizeof(*ptr));
|
||||
}
|
||||
|
||||
static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
|
||||
static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
|
||||
{
|
||||
u32 size = bpf_dynptr_get_size(ptr);
|
||||
|
||||
@@ -1477,7 +1483,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
|
||||
u32, offset, u64, flags)
|
||||
{
|
||||
int err;
|
||||
@@ -1489,7 +1495,11 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(dst, src->data + src->offset + offset, len);
|
||||
/* Source and destination may possibly overlap, hence use memmove to
|
||||
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
|
||||
* pointing to overlapping PTR_TO_MAP_VALUE regions.
|
||||
*/
|
||||
memmove(dst, src->data + src->offset + offset, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1500,12 +1510,12 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_PTR_TO_DYNPTR,
|
||||
.arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
|
||||
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
|
||||
u32, len, u64, flags)
|
||||
{
|
||||
int err;
|
||||
@@ -1517,7 +1527,11 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(dst->data + dst->offset + offset, src, len);
|
||||
/* Source and destination may possibly overlap, hence use memmove to
|
||||
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
|
||||
* pointing to overlapping PTR_TO_MAP_VALUE regions.
|
||||
*/
|
||||
memmove(dst->data + dst->offset + offset, src, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1526,14 +1540,14 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
|
||||
.func = bpf_dynptr_write,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
|
||||
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
|
||||
{
|
||||
int err;
|
||||
|
||||
@@ -1554,7 +1568,7 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
|
||||
.func = bpf_dynptr_data,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
|
||||
};
|
||||
@@ -1663,6 +1677,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
return &bpf_dynptr_write_proto;
|
||||
case BPF_FUNC_dynptr_data:
|
||||
return &bpf_dynptr_data_proto;
|
||||
#ifdef CONFIG_CGROUPS
|
||||
case BPF_FUNC_cgrp_storage_get:
|
||||
return &bpf_cgrp_storage_get_proto;
|
||||
case BPF_FUNC_cgrp_storage_delete:
|
||||
return &bpf_cgrp_storage_delete_proto;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1700,20 +1720,401 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
}
|
||||
}
|
||||
|
||||
BTF_SET8_START(tracing_btf_ids)
|
||||
void bpf_list_head_free(const struct btf_field *field, void *list_head,
|
||||
struct bpf_spin_lock *spin_lock)
|
||||
{
|
||||
struct list_head *head = list_head, *orig_head = list_head;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
|
||||
BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
|
||||
|
||||
/* Do the actual list draining outside the lock to not hold the lock for
|
||||
* too long, and also prevent deadlocks if tracing programs end up
|
||||
* executing on entry/exit of functions called inside the critical
|
||||
* section, and end up doing map ops that call bpf_list_head_free for
|
||||
* the same map value again.
|
||||
*/
|
||||
__bpf_spin_lock_irqsave(spin_lock);
|
||||
if (!head->next || list_empty(head))
|
||||
goto unlock;
|
||||
head = head->next;
|
||||
unlock:
|
||||
INIT_LIST_HEAD(orig_head);
|
||||
__bpf_spin_unlock_irqrestore(spin_lock);
|
||||
|
||||
while (head != orig_head) {
|
||||
void *obj = head;
|
||||
|
||||
obj -= field->list_head.node_offset;
|
||||
head = head->next;
|
||||
/* The contained type can also have resources, including a
|
||||
* bpf_list_head which needs to be freed.
|
||||
*/
|
||||
bpf_obj_free_fields(field->list_head.value_rec, obj);
|
||||
/* bpf_mem_free requires migrate_disable(), since we can be
|
||||
* called from map free path as well apart from BPF program (as
|
||||
* part of map ops doing bpf_obj_free_fields).
|
||||
*/
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, obj);
|
||||
migrate_enable();
|
||||
}
|
||||
}
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Wmissing-prototypes",
|
||||
"Global functions as their definitions will be in vmlinux BTF");
|
||||
|
||||
void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
|
||||
{
|
||||
struct btf_struct_meta *meta = meta__ign;
|
||||
u64 size = local_type_id__k;
|
||||
void *p;
|
||||
|
||||
p = bpf_mem_alloc(&bpf_global_ma, size);
|
||||
if (!p)
|
||||
return NULL;
|
||||
if (meta)
|
||||
bpf_obj_init(meta->field_offs, p);
|
||||
return p;
|
||||
}
|
||||
|
||||
void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
|
||||
{
|
||||
struct btf_struct_meta *meta = meta__ign;
|
||||
void *p = p__alloc;
|
||||
|
||||
if (meta)
|
||||
bpf_obj_free_fields(meta->record, p);
|
||||
bpf_mem_free(&bpf_global_ma, p);
|
||||
}
|
||||
|
||||
static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
|
||||
{
|
||||
struct list_head *n = (void *)node, *h = (void *)head;
|
||||
|
||||
if (unlikely(!h->next))
|
||||
INIT_LIST_HEAD(h);
|
||||
if (unlikely(!n->next))
|
||||
INIT_LIST_HEAD(n);
|
||||
tail ? list_add_tail(n, h) : list_add(n, h);
|
||||
}
|
||||
|
||||
void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
|
||||
{
|
||||
return __bpf_list_add(node, head, false);
|
||||
}
|
||||
|
||||
void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
|
||||
{
|
||||
return __bpf_list_add(node, head, true);
|
||||
}
|
||||
|
||||
static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
|
||||
{
|
||||
struct list_head *n, *h = (void *)head;
|
||||
|
||||
if (unlikely(!h->next))
|
||||
INIT_LIST_HEAD(h);
|
||||
if (list_empty(h))
|
||||
return NULL;
|
||||
n = tail ? h->prev : h->next;
|
||||
list_del_init(n);
|
||||
return (struct bpf_list_node *)n;
|
||||
}
|
||||
|
||||
struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
|
||||
{
|
||||
return __bpf_list_del(head, false);
|
||||
}
|
||||
|
||||
struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
|
||||
{
|
||||
return __bpf_list_del(head, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
|
||||
* kfunc which is not stored in a map as a kptr, must be released by calling
|
||||
* bpf_task_release().
|
||||
* @p: The task on which a reference is being acquired.
|
||||
*/
|
||||
struct task_struct *bpf_task_acquire(struct task_struct *p)
|
||||
{
|
||||
return get_task_struct(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
|
||||
* acquired by this kfunc which is not stored in a map as a kptr, must be
|
||||
* released by calling bpf_task_release().
|
||||
* @p: The task on which a reference is being acquired.
|
||||
*/
|
||||
struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
|
||||
{
|
||||
/* For the time being this function returns NULL, as it's not currently
|
||||
* possible to safely acquire a reference to a task with RCU protection
|
||||
* using get_task_struct() and put_task_struct(). This is due to the
|
||||
* slightly odd mechanics of p->rcu_users, and how task RCU protection
|
||||
* works.
|
||||
*
|
||||
* A struct task_struct is refcounted by two different refcount_t
|
||||
* fields:
|
||||
*
|
||||
* 1. p->usage: The "true" refcount field which tracks a task's
|
||||
* lifetime. The task is freed as soon as this
|
||||
* refcount drops to 0.
|
||||
*
|
||||
* 2. p->rcu_users: An "RCU users" refcount field which is statically
|
||||
* initialized to 2, and is co-located in a union with
|
||||
* a struct rcu_head field (p->rcu). p->rcu_users
|
||||
* essentially encapsulates a single p->usage
|
||||
* refcount, and when p->rcu_users goes to 0, an RCU
|
||||
* callback is scheduled on the struct rcu_head which
|
||||
* decrements the p->usage refcount.
|
||||
*
|
||||
* There are two important implications to this task refcounting logic
|
||||
* described above. The first is that
|
||||
* refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
|
||||
* after the refcount goes to 0, the RCU callback being scheduled will
|
||||
* cause the memory backing the refcount to again be nonzero due to the
|
||||
* fields sharing a union. The other is that we can't rely on RCU to
|
||||
* guarantee that a task is valid in a BPF program. This is because a
|
||||
* task could have already transitioned to being in the TASK_DEAD
|
||||
* state, had its rcu_users refcount go to 0, and its rcu callback
|
||||
* invoked in which it drops its single p->usage reference. At this
|
||||
* point the task will be freed as soon as the last p->usage reference
|
||||
* goes to 0, without waiting for another RCU gp to elapse. The only
|
||||
* way that a BPF program can guarantee that a task is valid is in this
|
||||
* scenario is to hold a p->usage refcount itself.
|
||||
*
|
||||
* Until we're able to resolve this issue, either by pulling
|
||||
* p->rcu_users and p->rcu out of the union, or by getting rid of
|
||||
* p->usage and just using p->rcu_users for refcounting, we'll just
|
||||
* return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task
|
||||
* kptr acquired by this kfunc which is not subsequently stored in a map, must
|
||||
* be released by calling bpf_task_release().
|
||||
* @pp: A pointer to a task kptr on which a reference is being acquired.
|
||||
*/
|
||||
struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
|
||||
{
|
||||
/* We must return NULL here until we have clarity on how to properly
|
||||
* leverage RCU for ensuring a task's lifetime. See the comment above
|
||||
* in bpf_task_acquire_not_zero() for more details.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_release - Release the reference acquired on a task.
|
||||
* @p: The task on which a reference is being released.
|
||||
*/
|
||||
void bpf_task_release(struct task_struct *p)
|
||||
{
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
/**
|
||||
* bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
|
||||
* this kfunc which is not stored in a map as a kptr, must be released by
|
||||
* calling bpf_cgroup_release().
|
||||
* @cgrp: The cgroup on which a reference is being acquired.
|
||||
*/
|
||||
struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
|
||||
{
|
||||
cgroup_get(cgrp);
|
||||
return cgrp;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cgroup_kptr_get - Acquire a reference on a struct cgroup kptr. A cgroup
|
||||
* kptr acquired by this kfunc which is not subsequently stored in a map, must
|
||||
* be released by calling bpf_cgroup_release().
|
||||
* @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired.
|
||||
*/
|
||||
struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
|
||||
rcu_read_lock();
|
||||
/* Another context could remove the cgroup from the map and release it
|
||||
* at any time, including after we've done the lookup above. This is
|
||||
* safe because we're in an RCU read region, so the cgroup is
|
||||
* guaranteed to remain valid until at least the rcu_read_unlock()
|
||||
* below.
|
||||
*/
|
||||
cgrp = READ_ONCE(*cgrpp);
|
||||
|
||||
if (cgrp && !cgroup_tryget(cgrp))
|
||||
/* If the cgroup had been removed from the map and freed as
|
||||
* described above, cgroup_tryget() will return false. The
|
||||
* cgroup will be freed at some point after the current RCU gp
|
||||
* has ended, so just return NULL to the user.
|
||||
*/
|
||||
cgrp = NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return cgrp;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cgroup_release - Release the reference acquired on a cgroup.
|
||||
* If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
|
||||
* not be freed until the current grace period has ended, even if its refcount
|
||||
* drops to 0.
|
||||
* @cgrp: The cgroup on which a reference is being released.
|
||||
*/
|
||||
void bpf_cgroup_release(struct cgroup *cgrp)
|
||||
{
|
||||
if (!cgrp)
|
||||
return;
|
||||
|
||||
cgroup_put(cgrp);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
|
||||
* array. A cgroup returned by this kfunc which is not subsequently stored in a
|
||||
* map, must be released by calling bpf_cgroup_release().
|
||||
* @cgrp: The cgroup for which we're performing a lookup.
|
||||
* @level: The level of ancestor to look up.
|
||||
*/
|
||||
struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
|
||||
{
|
||||
struct cgroup *ancestor;
|
||||
|
||||
if (level > cgrp->level || level < 0)
|
||||
return NULL;
|
||||
|
||||
ancestor = cgrp->ancestors[level];
|
||||
cgroup_get(ancestor);
|
||||
return ancestor;
|
||||
}
|
||||
#endif /* CONFIG_CGROUPS */
|
||||
|
||||
/**
|
||||
* bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
|
||||
* in the root pid namespace idr. If a task is returned, it must either be
|
||||
* stored in a map, or released with bpf_task_release().
|
||||
* @pid: The pid of the task being looked up.
|
||||
*/
|
||||
struct task_struct *bpf_task_from_pid(s32 pid)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
rcu_read_lock();
|
||||
p = find_task_by_pid_ns(pid, &init_pid_ns);
|
||||
if (p)
|
||||
bpf_task_acquire(p);
|
||||
rcu_read_unlock();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void *bpf_cast_to_kern_ctx(void *obj)
|
||||
{
|
||||
return obj;
|
||||
}
|
||||
|
||||
void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
|
||||
{
|
||||
return obj__ign;
|
||||
}
|
||||
|
||||
void bpf_rcu_read_lock(void)
|
||||
{
|
||||
rcu_read_lock();
|
||||
}
|
||||
|
||||
void bpf_rcu_read_unlock(void)
|
||||
{
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
__diag_pop();
|
||||
|
||||
BTF_SET8_START(generic_btf_ids)
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
|
||||
#endif
|
||||
BTF_SET8_END(tracing_btf_ids)
|
||||
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
|
||||
BTF_ID_FLAGS(func, bpf_list_push_front)
|
||||
BTF_ID_FLAGS(func, bpf_list_push_back)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
|
||||
#ifdef CONFIG_CGROUPS
|
||||
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
|
||||
BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
|
||||
#endif
|
||||
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_SET8_END(generic_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set tracing_kfunc_set = {
|
||||
static const struct btf_kfunc_id_set generic_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &tracing_btf_ids,
|
||||
.set = &generic_btf_ids,
|
||||
};
|
||||
|
||||
|
||||
BTF_ID_LIST(generic_dtor_ids)
|
||||
BTF_ID(struct, task_struct)
|
||||
BTF_ID(func, bpf_task_release)
|
||||
#ifdef CONFIG_CGROUPS
|
||||
BTF_ID(struct, cgroup)
|
||||
BTF_ID(func, bpf_cgroup_release)
|
||||
#endif
|
||||
|
||||
BTF_SET8_START(common_btf_ids)
|
||||
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
|
||||
BTF_ID_FLAGS(func, bpf_rdonly_cast)
|
||||
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
|
||||
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
|
||||
BTF_SET8_END(common_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &common_btf_ids,
|
||||
};
|
||||
|
||||
static int __init kfunc_init(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tracing_kfunc_set);
|
||||
int ret;
|
||||
const struct btf_id_dtor_kfunc generic_dtors[] = {
|
||||
{
|
||||
.btf_id = generic_dtor_ids[0],
|
||||
.kfunc_btf_id = generic_dtor_ids[1]
|
||||
},
|
||||
#ifdef CONFIG_CGROUPS
|
||||
{
|
||||
.btf_id = generic_dtor_ids[2],
|
||||
.kfunc_btf_id = generic_dtor_ids[3]
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
|
||||
ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
|
||||
ARRAY_SIZE(generic_dtors),
|
||||
THIS_MODULE);
|
||||
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
|
||||
}
|
||||
|
||||
late_initcall(kfunc_init);
|
||||
|
||||
@@ -151,7 +151,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely((flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)))
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)))
|
||||
return -EINVAL;
|
||||
|
||||
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
|
||||
|
||||
@@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
struct bpf_map *inner_map, *inner_map_meta;
|
||||
u32 inner_map_meta_size;
|
||||
struct fd f;
|
||||
int ret;
|
||||
|
||||
f = fdget(inner_map_ufd);
|
||||
inner_map = __bpf_map_get(f);
|
||||
@@ -20,18 +21,13 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
|
||||
/* Does not support >1 level map-in-map */
|
||||
if (inner_map->inner_map_meta) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-EINVAL);
|
||||
ret = -EINVAL;
|
||||
goto put;
|
||||
}
|
||||
|
||||
if (!inner_map->ops->map_meta_equal) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-ENOTSUPP);
|
||||
}
|
||||
|
||||
if (map_value_has_spin_lock(inner_map)) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-ENOTSUPP);
|
||||
ret = -ENOTSUPP;
|
||||
goto put;
|
||||
}
|
||||
|
||||
inner_map_meta_size = sizeof(*inner_map_meta);
|
||||
@@ -41,8 +37,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
|
||||
inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
|
||||
if (!inner_map_meta) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ret = -ENOMEM;
|
||||
goto put;
|
||||
}
|
||||
|
||||
inner_map_meta->map_type = inner_map->map_type;
|
||||
@@ -50,9 +46,33 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
inner_map_meta->value_size = inner_map->value_size;
|
||||
inner_map_meta->map_flags = inner_map->map_flags;
|
||||
inner_map_meta->max_entries = inner_map->max_entries;
|
||||
inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
|
||||
inner_map_meta->timer_off = inner_map->timer_off;
|
||||
inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map);
|
||||
|
||||
inner_map_meta->record = btf_record_dup(inner_map->record);
|
||||
if (IS_ERR(inner_map_meta->record)) {
|
||||
/* btf_record_dup returns NULL or valid pointer in case of
|
||||
* invalid/empty/valid, but ERR_PTR in case of errors. During
|
||||
* equality NULL or IS_ERR is equivalent.
|
||||
*/
|
||||
ret = PTR_ERR(inner_map_meta->record);
|
||||
goto free;
|
||||
}
|
||||
if (inner_map_meta->record) {
|
||||
struct btf_field_offs *field_offs;
|
||||
/* If btf_record is !IS_ERR_OR_NULL, then field_offs is always
|
||||
* valid.
|
||||
*/
|
||||
field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!field_offs) {
|
||||
ret = -ENOMEM;
|
||||
goto free_rec;
|
||||
}
|
||||
inner_map_meta->field_offs = field_offs;
|
||||
}
|
||||
/* Note: We must use the same BTF, as we also used btf_record_dup above
|
||||
* which relies on BTF being same for both maps, as some members like
|
||||
* record->fields.list_head have pointers like value_rec pointing into
|
||||
* inner_map->btf.
|
||||
*/
|
||||
if (inner_map->btf) {
|
||||
btf_get(inner_map->btf);
|
||||
inner_map_meta->btf = inner_map->btf;
|
||||
@@ -68,11 +88,19 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
|
||||
fdput(f);
|
||||
return inner_map_meta;
|
||||
free_rec:
|
||||
btf_record_free(inner_map_meta->record);
|
||||
free:
|
||||
kfree(inner_map_meta);
|
||||
put:
|
||||
fdput(f);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
void bpf_map_meta_free(struct bpf_map *map_meta)
|
||||
{
|
||||
bpf_map_free_kptr_off_tab(map_meta);
|
||||
kfree(map_meta->field_offs);
|
||||
bpf_map_free_record(map_meta);
|
||||
btf_put(map_meta->btf);
|
||||
kfree(map_meta);
|
||||
}
|
||||
@@ -84,9 +112,8 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
|
||||
return meta0->map_type == meta1->map_type &&
|
||||
meta0->key_size == meta1->key_size &&
|
||||
meta0->value_size == meta1->value_size &&
|
||||
meta0->timer_off == meta1->timer_off &&
|
||||
meta0->map_flags == meta1->map_flags &&
|
||||
bpf_map_equal_kptr_off_tab(meta0, meta1);
|
||||
btf_record_equal(meta0->record, meta1->record);
|
||||
}
|
||||
|
||||
void *bpf_map_fd_get_ptr(struct bpf_map *map,
|
||||
|
||||
@@ -171,9 +171,24 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
|
||||
memcg = get_memcg(c);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
for (i = 0; i < cnt; i++) {
|
||||
obj = __alloc(c, node);
|
||||
if (!obj)
|
||||
break;
|
||||
/*
|
||||
* free_by_rcu is only manipulated by irq work refill_work().
|
||||
* IRQ works on the same CPU are called sequentially, so it is
|
||||
* safe to use __llist_del_first() here. If alloc_bulk() is
|
||||
* invoked by the initial prefill, there will be no running
|
||||
* refill_work(), so __llist_del_first() is fine as well.
|
||||
*
|
||||
* In most cases, objects on free_by_rcu are from the same CPU.
|
||||
* If some objects come from other CPUs, it doesn't incur any
|
||||
* harm because NUMA_NO_NODE means the preference for current
|
||||
* numa node and it is not a guarantee.
|
||||
*/
|
||||
obj = __llist_del_first(&c->free_by_rcu);
|
||||
if (!obj) {
|
||||
obj = __alloc(c, node);
|
||||
if (!obj)
|
||||
break;
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
/* In RT irq_work runs in per-cpu kthread, so disable
|
||||
* interrupts to avoid preemption and interrupts and
|
||||
@@ -222,9 +237,13 @@ static void __free_rcu(struct rcu_head *head)
|
||||
|
||||
static void __free_rcu_tasks_trace(struct rcu_head *head)
|
||||
{
|
||||
struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu);
|
||||
|
||||
call_rcu(&c->rcu, __free_rcu);
|
||||
/* If RCU Tasks Trace grace period implies RCU grace period,
|
||||
* there is no need to invoke call_rcu().
|
||||
*/
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
__free_rcu(head);
|
||||
else
|
||||
call_rcu(head, __free_rcu);
|
||||
}
|
||||
|
||||
static void enque_to_free(struct bpf_mem_cache *c, void *obj)
|
||||
@@ -253,8 +272,9 @@ static void do_call_rcu(struct bpf_mem_cache *c)
|
||||
*/
|
||||
__llist_add(llnode, &c->waiting_for_gp);
|
||||
/* Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
|
||||
* Then use call_rcu() to wait for normal progs to finish
|
||||
* and finally do free_one() on each element.
|
||||
* If RCU Tasks Trace grace period implies RCU grace period, free
|
||||
* these elements directly, else use call_rcu() to wait for normal
|
||||
* progs to finish and finally do free_one() on each element.
|
||||
*/
|
||||
call_rcu_tasks_trace(&c->rcu, __free_rcu_tasks_trace);
|
||||
}
|
||||
@@ -444,9 +464,17 @@ static void free_mem_alloc(struct bpf_mem_alloc *ma)
|
||||
{
|
||||
/* waiting_for_gp lists was drained, but __free_rcu might
|
||||
* still execute. Wait for it now before we freeing percpu caches.
|
||||
*
|
||||
* rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
|
||||
* but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
|
||||
* to wait for the pending __free_rcu_tasks_trace() and __free_rcu(),
|
||||
* so if call_rcu(head, __free_rcu) is skipped due to
|
||||
* rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
|
||||
* using rcu_trace_implies_rcu_gp() as well.
|
||||
*/
|
||||
rcu_barrier_tasks_trace();
|
||||
rcu_barrier();
|
||||
if (!rcu_trace_implies_rcu_gp())
|
||||
rcu_barrier();
|
||||
free_mem_alloc_no_barrier(ma);
|
||||
}
|
||||
|
||||
|
||||
@@ -447,7 +447,7 @@ BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags)
|
||||
|
||||
const struct bpf_func_proto bpf_ringbuf_reserve_proto = {
|
||||
.func = bpf_ringbuf_reserve,
|
||||
.ret_type = RET_PTR_TO_ALLOC_MEM_OR_NULL,
|
||||
.ret_type = RET_PTR_TO_RINGBUF_MEM_OR_NULL,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
@@ -490,7 +490,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
|
||||
const struct bpf_func_proto bpf_ringbuf_submit_proto = {
|
||||
.func = bpf_ringbuf_submit,
|
||||
.ret_type = RET_VOID,
|
||||
.arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
|
||||
.arg1_type = ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
@@ -503,7 +503,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
|
||||
const struct bpf_func_proto bpf_ringbuf_discard_proto = {
|
||||
.func = bpf_ringbuf_discard,
|
||||
.ret_type = RET_VOID,
|
||||
.arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
|
||||
.arg1_type = ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
|
||||
@@ -175,8 +175,8 @@ static void maybe_wait_bpf_programs(struct bpf_map *map)
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
|
||||
void *value, __u64 flags)
|
||||
static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
|
||||
void *key, void *value, __u64 flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
@@ -190,7 +190,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
|
||||
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
|
||||
return sock_map_update_elem_sys(map, key, value, flags);
|
||||
} else if (IS_FD_PROG_ARRAY(map)) {
|
||||
return bpf_fd_array_map_update_elem(map, f.file, key, value,
|
||||
return bpf_fd_array_map_update_elem(map, map_file, key, value,
|
||||
flags);
|
||||
}
|
||||
|
||||
@@ -205,12 +205,12 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
|
||||
flags);
|
||||
} else if (IS_FD_ARRAY(map)) {
|
||||
rcu_read_lock();
|
||||
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
|
||||
err = bpf_fd_array_map_update_elem(map, map_file, key, value,
|
||||
flags);
|
||||
rcu_read_unlock();
|
||||
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
|
||||
rcu_read_lock();
|
||||
err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
|
||||
err = bpf_fd_htab_map_update_elem(map, map_file, key, value,
|
||||
flags);
|
||||
rcu_read_unlock();
|
||||
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
|
||||
@@ -495,114 +495,181 @@ static void bpf_map_release_memcg(struct bpf_map *map)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int bpf_map_kptr_off_cmp(const void *a, const void *b)
|
||||
static int btf_field_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b;
|
||||
const struct btf_field *f1 = a, *f2 = b;
|
||||
|
||||
if (off_desc1->offset < off_desc2->offset)
|
||||
if (f1->offset < f2->offset)
|
||||
return -1;
|
||||
else if (off_desc1->offset > off_desc2->offset)
|
||||
else if (f1->offset > f2->offset)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset)
|
||||
struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset,
|
||||
enum btf_field_type type)
|
||||
{
|
||||
/* Since members are iterated in btf_find_field in increasing order,
|
||||
* offsets appended to kptr_off_tab are in increasing order, so we can
|
||||
* do bsearch to find exact match.
|
||||
*/
|
||||
struct bpf_map_value_off *tab;
|
||||
struct btf_field *field;
|
||||
|
||||
if (!map_value_has_kptrs(map))
|
||||
if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & type))
|
||||
return NULL;
|
||||
tab = map->kptr_off_tab;
|
||||
return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp);
|
||||
field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp);
|
||||
if (!field || !(field->type & type))
|
||||
return NULL;
|
||||
return field;
|
||||
}
|
||||
|
||||
void bpf_map_free_kptr_off_tab(struct bpf_map *map)
|
||||
void btf_record_free(struct btf_record *rec)
|
||||
{
|
||||
struct bpf_map_value_off *tab = map->kptr_off_tab;
|
||||
int i;
|
||||
|
||||
if (!map_value_has_kptrs(map))
|
||||
if (IS_ERR_OR_NULL(rec))
|
||||
return;
|
||||
for (i = 0; i < tab->nr_off; i++) {
|
||||
if (tab->off[i].kptr.module)
|
||||
module_put(tab->off[i].kptr.module);
|
||||
btf_put(tab->off[i].kptr.btf);
|
||||
}
|
||||
kfree(tab);
|
||||
map->kptr_off_tab = NULL;
|
||||
}
|
||||
|
||||
struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map)
|
||||
{
|
||||
struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab;
|
||||
int size, i;
|
||||
|
||||
if (!map_value_has_kptrs(map))
|
||||
return ERR_PTR(-ENOENT);
|
||||
size = offsetof(struct bpf_map_value_off, off[tab->nr_off]);
|
||||
new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!new_tab)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
/* Do a deep copy of the kptr_off_tab */
|
||||
for (i = 0; i < tab->nr_off; i++) {
|
||||
btf_get(tab->off[i].kptr.btf);
|
||||
if (tab->off[i].kptr.module && !try_module_get(tab->off[i].kptr.module)) {
|
||||
while (i--) {
|
||||
if (tab->off[i].kptr.module)
|
||||
module_put(tab->off[i].kptr.module);
|
||||
btf_put(tab->off[i].kptr.btf);
|
||||
}
|
||||
kfree(new_tab);
|
||||
return ERR_PTR(-ENXIO);
|
||||
}
|
||||
}
|
||||
return new_tab;
|
||||
}
|
||||
|
||||
bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b)
|
||||
{
|
||||
struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab;
|
||||
bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b);
|
||||
int size;
|
||||
|
||||
if (!a_has_kptr && !b_has_kptr)
|
||||
return true;
|
||||
if (a_has_kptr != b_has_kptr)
|
||||
return false;
|
||||
if (tab_a->nr_off != tab_b->nr_off)
|
||||
return false;
|
||||
size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]);
|
||||
return !memcmp(tab_a, tab_b, size);
|
||||
}
|
||||
|
||||
/* Caller must ensure map_value_has_kptrs is true. Note that this function can
|
||||
* be called on a map value while the map_value is visible to BPF programs, as
|
||||
* it ensures the correct synchronization, and we already enforce the same using
|
||||
* the bpf_kptr_xchg helper on the BPF program side for referenced kptrs.
|
||||
*/
|
||||
void bpf_map_free_kptrs(struct bpf_map *map, void *map_value)
|
||||
{
|
||||
struct bpf_map_value_off *tab = map->kptr_off_tab;
|
||||
unsigned long *btf_id_ptr;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < tab->nr_off; i++) {
|
||||
struct bpf_map_value_off_desc *off_desc = &tab->off[i];
|
||||
unsigned long old_ptr;
|
||||
|
||||
btf_id_ptr = map_value + off_desc->offset;
|
||||
if (off_desc->type == BPF_KPTR_UNREF) {
|
||||
u64 *p = (u64 *)btf_id_ptr;
|
||||
|
||||
WRITE_ONCE(*p, 0);
|
||||
for (i = 0; i < rec->cnt; i++) {
|
||||
switch (rec->fields[i].type) {
|
||||
case BPF_SPIN_LOCK:
|
||||
case BPF_TIMER:
|
||||
break;
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
if (rec->fields[i].kptr.module)
|
||||
module_put(rec->fields[i].kptr.module);
|
||||
btf_put(rec->fields[i].kptr.btf);
|
||||
break;
|
||||
case BPF_LIST_HEAD:
|
||||
case BPF_LIST_NODE:
|
||||
/* Nothing to release for bpf_list_head */
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
kfree(rec);
|
||||
}
|
||||
|
||||
void bpf_map_free_record(struct bpf_map *map)
|
||||
{
|
||||
btf_record_free(map->record);
|
||||
map->record = NULL;
|
||||
}
|
||||
|
||||
struct btf_record *btf_record_dup(const struct btf_record *rec)
|
||||
{
|
||||
const struct btf_field *fields;
|
||||
struct btf_record *new_rec;
|
||||
int ret, size, i;
|
||||
|
||||
if (IS_ERR_OR_NULL(rec))
|
||||
return NULL;
|
||||
size = offsetof(struct btf_record, fields[rec->cnt]);
|
||||
new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!new_rec)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
/* Do a deep copy of the btf_record */
|
||||
fields = rec->fields;
|
||||
new_rec->cnt = 0;
|
||||
for (i = 0; i < rec->cnt; i++) {
|
||||
switch (fields[i].type) {
|
||||
case BPF_SPIN_LOCK:
|
||||
case BPF_TIMER:
|
||||
break;
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
btf_get(fields[i].kptr.btf);
|
||||
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
|
||||
ret = -ENXIO;
|
||||
goto free;
|
||||
}
|
||||
break;
|
||||
case BPF_LIST_HEAD:
|
||||
case BPF_LIST_NODE:
|
||||
/* Nothing to acquire for bpf_list_head */
|
||||
break;
|
||||
default:
|
||||
ret = -EFAULT;
|
||||
WARN_ON_ONCE(1);
|
||||
goto free;
|
||||
}
|
||||
new_rec->cnt++;
|
||||
}
|
||||
return new_rec;
|
||||
free:
|
||||
btf_record_free(new_rec);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b)
|
||||
{
|
||||
bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b);
|
||||
int size;
|
||||
|
||||
if (!a_has_fields && !b_has_fields)
|
||||
return true;
|
||||
if (a_has_fields != b_has_fields)
|
||||
return false;
|
||||
if (rec_a->cnt != rec_b->cnt)
|
||||
return false;
|
||||
size = offsetof(struct btf_record, fields[rec_a->cnt]);
|
||||
/* btf_parse_fields uses kzalloc to allocate a btf_record, so unused
|
||||
* members are zeroed out. So memcmp is safe to do without worrying
|
||||
* about padding/unused fields.
|
||||
*
|
||||
* While spin_lock, timer, and kptr have no relation to map BTF,
|
||||
* list_head metadata is specific to map BTF, the btf and value_rec
|
||||
* members in particular. btf is the map BTF, while value_rec points to
|
||||
* btf_record in that map BTF.
|
||||
*
|
||||
* So while by default, we don't rely on the map BTF (which the records
|
||||
* were parsed from) matching for both records, which is not backwards
|
||||
* compatible, in case list_head is part of it, we implicitly rely on
|
||||
* that by way of depending on memcmp succeeding for it.
|
||||
*/
|
||||
return !memcmp(rec_a, rec_b, size);
|
||||
}
|
||||
|
||||
void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
|
||||
{
|
||||
if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER)))
|
||||
return;
|
||||
bpf_timer_cancel_and_free(obj + rec->timer_off);
|
||||
}
|
||||
|
||||
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
|
||||
{
|
||||
const struct btf_field *fields;
|
||||
int i;
|
||||
|
||||
if (IS_ERR_OR_NULL(rec))
|
||||
return;
|
||||
fields = rec->fields;
|
||||
for (i = 0; i < rec->cnt; i++) {
|
||||
const struct btf_field *field = &fields[i];
|
||||
void *field_ptr = obj + field->offset;
|
||||
|
||||
switch (fields[i].type) {
|
||||
case BPF_SPIN_LOCK:
|
||||
break;
|
||||
case BPF_TIMER:
|
||||
bpf_timer_cancel_and_free(field_ptr);
|
||||
break;
|
||||
case BPF_KPTR_UNREF:
|
||||
WRITE_ONCE(*(u64 *)field_ptr, 0);
|
||||
break;
|
||||
case BPF_KPTR_REF:
|
||||
field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
|
||||
break;
|
||||
case BPF_LIST_HEAD:
|
||||
if (WARN_ON_ONCE(rec->spin_lock_off < 0))
|
||||
continue;
|
||||
bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
|
||||
break;
|
||||
case BPF_LIST_NODE:
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
continue;
|
||||
}
|
||||
old_ptr = xchg(btf_id_ptr, 0);
|
||||
off_desc->kptr.dtor((void *)old_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -610,14 +677,24 @@ void bpf_map_free_kptrs(struct bpf_map *map, void *map_value)
|
||||
static void bpf_map_free_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_map *map = container_of(work, struct bpf_map, work);
|
||||
struct btf_field_offs *foffs = map->field_offs;
|
||||
struct btf_record *rec = map->record;
|
||||
|
||||
security_bpf_map_free(map);
|
||||
kfree(map->off_arr);
|
||||
bpf_map_release_memcg(map);
|
||||
/* implementation dependent freeing, map_free callback also does
|
||||
* bpf_map_free_kptr_off_tab, if needed.
|
||||
*/
|
||||
/* implementation dependent freeing */
|
||||
map->ops->map_free(map);
|
||||
/* Delay freeing of field_offs and btf_record for maps, as map_free
|
||||
* callback usually needs access to them. It is better to do it here
|
||||
* than require each callback to do the free itself manually.
|
||||
*
|
||||
* Note that the btf_record stashed in map->inner_map_meta->record was
|
||||
* already freed using the map_free callback for map in map case which
|
||||
* eventually calls bpf_map_free_meta, since inner_map_meta is only a
|
||||
* template bpf_map struct used during verification.
|
||||
*/
|
||||
kfree(foffs);
|
||||
btf_record_free(rec);
|
||||
}
|
||||
|
||||
static void bpf_map_put_uref(struct bpf_map *map)
|
||||
@@ -778,8 +855,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
struct bpf_map *map = filp->private_data;
|
||||
int err;
|
||||
|
||||
if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
|
||||
map_value_has_timer(map) || map_value_has_kptrs(map))
|
||||
if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (!(vma->vm_flags & VM_SHARED))
|
||||
@@ -906,84 +982,6 @@ int map_check_no_btf(const struct bpf_map *map,
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv)
|
||||
{
|
||||
const u32 a = *(const u32 *)_a;
|
||||
const u32 b = *(const u32 *)_b;
|
||||
|
||||
if (a < b)
|
||||
return -1;
|
||||
else if (a > b)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv)
|
||||
{
|
||||
struct bpf_map *map = (struct bpf_map *)priv;
|
||||
u32 *off_base = map->off_arr->field_off;
|
||||
u32 *a = _a, *b = _b;
|
||||
u8 *sz_a, *sz_b;
|
||||
|
||||
sz_a = map->off_arr->field_sz + (a - off_base);
|
||||
sz_b = map->off_arr->field_sz + (b - off_base);
|
||||
|
||||
swap(*a, *b);
|
||||
swap(*sz_a, *sz_b);
|
||||
}
|
||||
|
||||
static int bpf_map_alloc_off_arr(struct bpf_map *map)
|
||||
{
|
||||
bool has_spin_lock = map_value_has_spin_lock(map);
|
||||
bool has_timer = map_value_has_timer(map);
|
||||
bool has_kptrs = map_value_has_kptrs(map);
|
||||
struct bpf_map_off_arr *off_arr;
|
||||
u32 i;
|
||||
|
||||
if (!has_spin_lock && !has_timer && !has_kptrs) {
|
||||
map->off_arr = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!off_arr)
|
||||
return -ENOMEM;
|
||||
map->off_arr = off_arr;
|
||||
|
||||
off_arr->cnt = 0;
|
||||
if (has_spin_lock) {
|
||||
i = off_arr->cnt;
|
||||
|
||||
off_arr->field_off[i] = map->spin_lock_off;
|
||||
off_arr->field_sz[i] = sizeof(struct bpf_spin_lock);
|
||||
off_arr->cnt++;
|
||||
}
|
||||
if (has_timer) {
|
||||
i = off_arr->cnt;
|
||||
|
||||
off_arr->field_off[i] = map->timer_off;
|
||||
off_arr->field_sz[i] = sizeof(struct bpf_timer);
|
||||
off_arr->cnt++;
|
||||
}
|
||||
if (has_kptrs) {
|
||||
struct bpf_map_value_off *tab = map->kptr_off_tab;
|
||||
u32 *off = &off_arr->field_off[off_arr->cnt];
|
||||
u8 *sz = &off_arr->field_sz[off_arr->cnt];
|
||||
|
||||
for (i = 0; i < tab->nr_off; i++) {
|
||||
*off++ = tab->off[i].offset;
|
||||
*sz++ = sizeof(u64);
|
||||
}
|
||||
off_arr->cnt += tab->nr_off;
|
||||
}
|
||||
|
||||
if (off_arr->cnt == 1)
|
||||
return 0;
|
||||
sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]),
|
||||
map_off_arr_cmp, map_off_arr_swap, map);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
u32 btf_key_id, u32 btf_value_id)
|
||||
{
|
||||
@@ -1006,39 +1004,12 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
if (!value_type || value_size != map->value_size)
|
||||
return -EINVAL;
|
||||
|
||||
map->spin_lock_off = btf_find_spin_lock(btf, value_type);
|
||||
map->record = btf_parse_fields(btf, value_type,
|
||||
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
|
||||
map->value_size);
|
||||
if (!IS_ERR_OR_NULL(map->record)) {
|
||||
int i;
|
||||
|
||||
if (map_value_has_spin_lock(map)) {
|
||||
if (map->map_flags & BPF_F_RDONLY_PROG)
|
||||
return -EACCES;
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
|
||||
return -ENOTSUPP;
|
||||
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
|
||||
map->value_size) {
|
||||
WARN_ONCE(1,
|
||||
"verifier bug spin_lock_off %d value_size %d\n",
|
||||
map->spin_lock_off, map->value_size);
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
map->timer_off = btf_find_timer(btf, value_type);
|
||||
if (map_value_has_timer(map)) {
|
||||
if (map->map_flags & BPF_F_RDONLY_PROG)
|
||||
return -EACCES;
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
|
||||
if (map_value_has_kptrs(map)) {
|
||||
if (!bpf_capable()) {
|
||||
ret = -EPERM;
|
||||
goto free_map_tab;
|
||||
@@ -1047,15 +1018,60 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
ret = -EACCES;
|
||||
goto free_map_tab;
|
||||
}
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) {
|
||||
switch (map->record->field_mask & (1 << i)) {
|
||||
case 0:
|
||||
continue;
|
||||
case BPF_SPIN_LOCK:
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
}
|
||||
break;
|
||||
case BPF_TIMER:
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
}
|
||||
break;
|
||||
case BPF_KPTR_UNREF:
|
||||
case BPF_KPTR_REF:
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
}
|
||||
break;
|
||||
case BPF_LIST_HEAD:
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* Fail if map_type checks are missing for a field type */
|
||||
ret = -EOPNOTSUPP;
|
||||
goto free_map_tab;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = btf_check_and_fixup_fields(btf, map->record);
|
||||
if (ret < 0)
|
||||
goto free_map_tab;
|
||||
|
||||
if (map->ops->map_check_btf) {
|
||||
ret = map->ops->map_check_btf(map, btf, key_type, value_type);
|
||||
if (ret < 0)
|
||||
@@ -1064,7 +1080,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
|
||||
return ret;
|
||||
free_map_tab:
|
||||
bpf_map_free_kptr_off_tab(map);
|
||||
bpf_map_free_record(map);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1073,6 +1089,7 @@ free_map_tab:
|
||||
static int map_create(union bpf_attr *attr)
|
||||
{
|
||||
int numa_node = bpf_map_attr_numa_node(attr);
|
||||
struct btf_field_offs *foffs;
|
||||
struct bpf_map *map;
|
||||
int f_flags;
|
||||
int err;
|
||||
@@ -1117,8 +1134,6 @@ static int map_create(union bpf_attr *attr)
|
||||
mutex_init(&map->freeze_mutex);
|
||||
spin_lock_init(&map->owner.lock);
|
||||
|
||||
map->spin_lock_off = -EINVAL;
|
||||
map->timer_off = -EINVAL;
|
||||
if (attr->btf_key_type_id || attr->btf_value_type_id ||
|
||||
/* Even the map's value is a kernel's struct,
|
||||
* the bpf_prog.o must have BTF to begin with
|
||||
@@ -1154,13 +1169,17 @@ static int map_create(union bpf_attr *attr)
|
||||
attr->btf_vmlinux_value_type_id;
|
||||
}
|
||||
|
||||
err = bpf_map_alloc_off_arr(map);
|
||||
if (err)
|
||||
|
||||
foffs = btf_parse_field_offs(map->record);
|
||||
if (IS_ERR(foffs)) {
|
||||
err = PTR_ERR(foffs);
|
||||
goto free_map;
|
||||
}
|
||||
map->field_offs = foffs;
|
||||
|
||||
err = security_bpf_map_alloc(map);
|
||||
if (err)
|
||||
goto free_map_off_arr;
|
||||
goto free_map_field_offs;
|
||||
|
||||
err = bpf_map_alloc_id(map);
|
||||
if (err)
|
||||
@@ -1184,8 +1203,8 @@ static int map_create(union bpf_attr *attr)
|
||||
|
||||
free_map_sec:
|
||||
security_bpf_map_free(map);
|
||||
free_map_off_arr:
|
||||
kfree(map->off_arr);
|
||||
free_map_field_offs:
|
||||
kfree(map->field_offs);
|
||||
free_map:
|
||||
btf_put(map->btf);
|
||||
map->ops->map_free(map);
|
||||
@@ -1332,7 +1351,7 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
@@ -1405,7 +1424,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
@@ -1423,7 +1442,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
|
||||
goto free_key;
|
||||
}
|
||||
|
||||
err = bpf_map_update_value(map, f, key, value, attr->flags);
|
||||
err = bpf_map_update_value(map, f.file, key, value, attr->flags);
|
||||
|
||||
kvfree(value);
|
||||
free_key:
|
||||
@@ -1568,7 +1587,7 @@ int generic_map_delete_batch(struct bpf_map *map,
|
||||
return -EINVAL;
|
||||
|
||||
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -1609,23 +1628,21 @@ int generic_map_delete_batch(struct bpf_map *map,
|
||||
return err;
|
||||
}
|
||||
|
||||
int generic_map_update_batch(struct bpf_map *map,
|
||||
int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
|
||||
const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
void __user *values = u64_to_user_ptr(attr->batch.values);
|
||||
void __user *keys = u64_to_user_ptr(attr->batch.keys);
|
||||
u32 value_size, cp, max_count;
|
||||
int ufd = attr->batch.map_fd;
|
||||
void *key, *value;
|
||||
struct fd f;
|
||||
int err = 0;
|
||||
|
||||
if (attr->batch.elem_flags & ~BPF_F_LOCK)
|
||||
return -EINVAL;
|
||||
|
||||
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -1645,7 +1662,6 @@ int generic_map_update_batch(struct bpf_map *map,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
|
||||
for (cp = 0; cp < max_count; cp++) {
|
||||
err = -EFAULT;
|
||||
if (copy_from_user(key, keys + cp * map->key_size,
|
||||
@@ -1653,7 +1669,7 @@ int generic_map_update_batch(struct bpf_map *map,
|
||||
copy_from_user(value, values + cp * value_size, value_size))
|
||||
break;
|
||||
|
||||
err = bpf_map_update_value(map, f, key, value,
|
||||
err = bpf_map_update_value(map, map_file, key, value,
|
||||
attr->batch.elem_flags);
|
||||
|
||||
if (err)
|
||||
@@ -1666,7 +1682,6 @@ int generic_map_update_batch(struct bpf_map *map,
|
||||
|
||||
kvfree(value);
|
||||
kvfree(key);
|
||||
fdput(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1688,7 +1703,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
|
||||
return -EINVAL;
|
||||
|
||||
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map))
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK))
|
||||
return -EINVAL;
|
||||
|
||||
value_size = bpf_map_value_size(map);
|
||||
@@ -1810,7 +1825,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
@@ -1881,8 +1896,7 @@ static int map_freeze(const union bpf_attr *attr)
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
|
||||
map_value_has_timer(map) || map_value_has_kptrs(map)) {
|
||||
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) {
|
||||
fdput(f);
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
@@ -2117,11 +2131,11 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
|
||||
|
||||
st = per_cpu_ptr(prog->stats, cpu);
|
||||
do {
|
||||
start = u64_stats_fetch_begin_irq(&st->syncp);
|
||||
start = u64_stats_fetch_begin(&st->syncp);
|
||||
tnsecs = u64_stats_read(&st->nsecs);
|
||||
tcnt = u64_stats_read(&st->cnt);
|
||||
tmisses = u64_stats_read(&st->misses);
|
||||
} while (u64_stats_fetch_retry_irq(&st->syncp, start));
|
||||
} while (u64_stats_fetch_retry(&st->syncp, start));
|
||||
nsecs += tnsecs;
|
||||
cnt += tcnt;
|
||||
misses += tmisses;
|
||||
@@ -4460,13 +4474,13 @@ put_file:
|
||||
|
||||
#define BPF_MAP_BATCH_LAST_FIELD batch.flags
|
||||
|
||||
#define BPF_DO_BATCH(fn) \
|
||||
#define BPF_DO_BATCH(fn, ...) \
|
||||
do { \
|
||||
if (!fn) { \
|
||||
err = -ENOTSUPP; \
|
||||
goto err_put; \
|
||||
} \
|
||||
err = fn(map, attr, uattr); \
|
||||
err = fn(__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
static int bpf_map_do_batch(const union bpf_attr *attr,
|
||||
@@ -4500,13 +4514,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
|
||||
}
|
||||
|
||||
if (cmd == BPF_MAP_LOOKUP_BATCH)
|
||||
BPF_DO_BATCH(map->ops->map_lookup_batch);
|
||||
BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr);
|
||||
else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
|
||||
BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
|
||||
BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr);
|
||||
else if (cmd == BPF_MAP_UPDATE_BATCH)
|
||||
BPF_DO_BATCH(map->ops->map_update_batch);
|
||||
BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr);
|
||||
else
|
||||
BPF_DO_BATCH(map->ops->map_delete_batch);
|
||||
BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
|
||||
err_put:
|
||||
if (has_write)
|
||||
bpf_map_write_active_dec(map);
|
||||
@@ -5133,13 +5147,14 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
|
||||
|
||||
run_ctx.bpf_cookie = 0;
|
||||
run_ctx.saved_run_ctx = NULL;
|
||||
if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
|
||||
if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
|
||||
/* recursion detected */
|
||||
bpf_prog_put(prog);
|
||||
return -EBUSY;
|
||||
}
|
||||
attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
|
||||
__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
|
||||
__bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */,
|
||||
&run_ctx);
|
||||
bpf_prog_put(prog);
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
@@ -864,7 +864,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
|
||||
* [2..MAX_U64] - execute bpf prog and record execution time.
|
||||
* This is start time.
|
||||
*/
|
||||
u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
|
||||
static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
|
||||
__acquires(RCU)
|
||||
{
|
||||
rcu_read_lock();
|
||||
@@ -901,7 +901,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
|
||||
}
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
|
||||
static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
__releases(RCU)
|
||||
{
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
@@ -912,8 +913,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
__acquires(RCU)
|
||||
{
|
||||
/* Runtime stats are exported via actual BPF_LSM_CGROUP
|
||||
@@ -927,8 +928,8 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
|
||||
return NO_START_TIME;
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
__releases(RCU)
|
||||
{
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
@@ -937,7 +938,8 @@ void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
|
||||
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
{
|
||||
rcu_read_lock_trace();
|
||||
migrate_disable();
|
||||
@@ -953,8 +955,8 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r
|
||||
return bpf_prog_start_time();
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
{
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
|
||||
@@ -964,8 +966,30 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
{
|
||||
rcu_read_lock_trace();
|
||||
migrate_disable();
|
||||
might_fault();
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
return bpf_prog_start_time();
|
||||
}
|
||||
|
||||
static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
{
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
|
||||
update_prog_stats(prog, start);
|
||||
migrate_enable();
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
__acquires(RCU)
|
||||
{
|
||||
rcu_read_lock();
|
||||
@@ -976,8 +1000,8 @@ u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
|
||||
return bpf_prog_start_time();
|
||||
}
|
||||
|
||||
void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
|
||||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
__releases(RCU)
|
||||
{
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
@@ -997,6 +1021,36 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
|
||||
percpu_ref_put(&tr->pcref);
|
||||
}
|
||||
|
||||
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
|
||||
{
|
||||
bool sleepable = prog->aux->sleepable;
|
||||
|
||||
if (bpf_prog_check_recur(prog))
|
||||
return sleepable ? __bpf_prog_enter_sleepable_recur :
|
||||
__bpf_prog_enter_recur;
|
||||
|
||||
if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
|
||||
prog->expected_attach_type == BPF_LSM_CGROUP)
|
||||
return __bpf_prog_enter_lsm_cgroup;
|
||||
|
||||
return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter;
|
||||
}
|
||||
|
||||
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
|
||||
{
|
||||
bool sleepable = prog->aux->sleepable;
|
||||
|
||||
if (bpf_prog_check_recur(prog))
|
||||
return sleepable ? __bpf_prog_exit_sleepable_recur :
|
||||
__bpf_prog_exit_recur;
|
||||
|
||||
if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM &&
|
||||
prog->expected_attach_type == BPF_LSM_CGROUP)
|
||||
return __bpf_prog_exit_lsm_cgroup;
|
||||
|
||||
return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit;
|
||||
}
|
||||
|
||||
int __weak
|
||||
arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
|
||||
const struct btf_func_model *m, u32 flags,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5353,6 +5353,7 @@ static void css_free_rwork_fn(struct work_struct *work)
|
||||
atomic_dec(&cgrp->root->nr_cgrps);
|
||||
cgroup1_pidlist_destroy_all(cgrp);
|
||||
cancel_work_sync(&cgrp->release_agent_work);
|
||||
bpf_cgrp_storage_free(cgrp);
|
||||
|
||||
if (cgroup_parent(cgrp)) {
|
||||
/*
|
||||
|
||||
@@ -113,9 +113,40 @@ int static_key_count(struct static_key *key)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(static_key_count);
|
||||
|
||||
void static_key_slow_inc_cpuslocked(struct static_key *key)
|
||||
/*
|
||||
* static_key_fast_inc_not_disabled - adds a user for a static key
|
||||
* @key: static key that must be already enabled
|
||||
*
|
||||
* The caller must make sure that the static key can't get disabled while
|
||||
* in this function. It doesn't patch jump labels, only adds a user to
|
||||
* an already enabled static key.
|
||||
*
|
||||
* Returns true if the increment was done. Unlike refcount_t the ref counter
|
||||
* is not saturated, but will fail to increment on overflow.
|
||||
*/
|
||||
bool static_key_fast_inc_not_disabled(struct static_key *key)
|
||||
{
|
||||
int v;
|
||||
|
||||
STATIC_KEY_CHECK_USE(key);
|
||||
/*
|
||||
* Negative key->enabled has a special meaning: it sends
|
||||
* static_key_slow_inc() down the slow path, and it is non-zero
|
||||
* so it counts as "enabled" in jump_label_update(). Note that
|
||||
* atomic_inc_unless_negative() checks >= 0, so roll our own.
|
||||
*/
|
||||
v = atomic_read(&key->enabled);
|
||||
do {
|
||||
if (v <= 0 || (v + 1) < 0)
|
||||
return false;
|
||||
} while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1)));
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(static_key_fast_inc_not_disabled);
|
||||
|
||||
bool static_key_slow_inc_cpuslocked(struct static_key *key)
|
||||
{
|
||||
lockdep_assert_cpus_held();
|
||||
|
||||
/*
|
||||
@@ -124,15 +155,9 @@ void static_key_slow_inc_cpuslocked(struct static_key *key)
|
||||
* jump_label_update() process. At the same time, however,
|
||||
* the jump_label_update() call below wants to see
|
||||
* static_key_enabled(&key) for jumps to be updated properly.
|
||||
*
|
||||
* So give a special meaning to negative key->enabled: it sends
|
||||
* static_key_slow_inc() down the slow path, and it is non-zero
|
||||
* so it counts as "enabled" in jump_label_update(). Note that
|
||||
* atomic_inc_unless_negative() checks >= 0, so roll our own.
|
||||
*/
|
||||
for (int v = atomic_read(&key->enabled); v > 0; )
|
||||
if (likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1)))
|
||||
return;
|
||||
if (static_key_fast_inc_not_disabled(key))
|
||||
return true;
|
||||
|
||||
jump_label_lock();
|
||||
if (atomic_read(&key->enabled) == 0) {
|
||||
@@ -144,16 +169,23 @@ void static_key_slow_inc_cpuslocked(struct static_key *key)
|
||||
*/
|
||||
atomic_set_release(&key->enabled, 1);
|
||||
} else {
|
||||
atomic_inc(&key->enabled);
|
||||
if (WARN_ON_ONCE(!static_key_fast_inc_not_disabled(key))) {
|
||||
jump_label_unlock();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
jump_label_unlock();
|
||||
return true;
|
||||
}
|
||||
|
||||
void static_key_slow_inc(struct static_key *key)
|
||||
bool static_key_slow_inc(struct static_key *key)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
cpus_read_lock();
|
||||
static_key_slow_inc_cpuslocked(key);
|
||||
ret = static_key_slow_inc_cpuslocked(key);
|
||||
cpus_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(static_key_slow_inc);
|
||||
|
||||
|
||||
@@ -494,7 +494,6 @@ unsigned long module_kallsyms_lookup_name(const char *name)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LIVEPATCH
|
||||
int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
struct module *, unsigned long),
|
||||
void *data)
|
||||
@@ -531,4 +530,3 @@ out:
|
||||
mutex_unlock(&module_mutex);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_LIVEPATCH */
|
||||
|
||||
@@ -1535,6 +1535,8 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
|
||||
{
|
||||
// Wait for late-stage exiting tasks to finish exiting.
|
||||
// These might have passed the call to exit_tasks_rcu_finish().
|
||||
|
||||
// If you remove the following line, update rcu_trace_implies_rcu_gp()!!!
|
||||
synchronize_rcu();
|
||||
// Any tasks that exit after this point will set
|
||||
// TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs.
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/bpf_perf_event.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/filter.h>
|
||||
@@ -773,7 +774,7 @@ BPF_CALL_0(bpf_get_current_task_btf)
|
||||
const struct bpf_func_proto bpf_get_current_task_btf_proto = {
|
||||
.func = bpf_get_current_task_btf,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_PTR_TO_BTF_ID,
|
||||
.ret_type = RET_PTR_TO_BTF_ID_TRUSTED,
|
||||
.ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
|
||||
};
|
||||
|
||||
@@ -1456,6 +1457,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_get_current_cgroup_id_proto;
|
||||
case BPF_FUNC_get_current_ancestor_cgroup_id:
|
||||
return &bpf_get_current_ancestor_cgroup_id_proto;
|
||||
case BPF_FUNC_cgrp_storage_get:
|
||||
return &bpf_cgrp_storage_get_proto;
|
||||
case BPF_FUNC_cgrp_storage_delete:
|
||||
return &bpf_cgrp_storage_delete_proto;
|
||||
#endif
|
||||
case BPF_FUNC_send_signal:
|
||||
return &bpf_send_signal_proto;
|
||||
@@ -1480,9 +1485,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_get_task_stack:
|
||||
return &bpf_get_task_stack_proto;
|
||||
case BPF_FUNC_copy_from_user:
|
||||
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
|
||||
return &bpf_copy_from_user_proto;
|
||||
case BPF_FUNC_copy_from_user_task:
|
||||
return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
|
||||
return &bpf_copy_from_user_task_proto;
|
||||
case BPF_FUNC_snprintf_btf:
|
||||
return &bpf_snprintf_btf_proto;
|
||||
case BPF_FUNC_per_cpu_ptr:
|
||||
@@ -1490,8 +1495,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_this_cpu_ptr:
|
||||
return &bpf_this_cpu_ptr_proto;
|
||||
case BPF_FUNC_task_storage_get:
|
||||
if (bpf_prog_check_recur(prog))
|
||||
return &bpf_task_storage_get_recur_proto;
|
||||
return &bpf_task_storage_get_proto;
|
||||
case BPF_FUNC_task_storage_delete:
|
||||
if (bpf_prog_check_recur(prog))
|
||||
return &bpf_task_storage_delete_recur_proto;
|
||||
return &bpf_task_storage_delete_proto;
|
||||
case BPF_FUNC_for_each_map_elem:
|
||||
return &bpf_for_each_map_elem_proto;
|
||||
@@ -2452,6 +2461,8 @@ struct bpf_kprobe_multi_link {
|
||||
unsigned long *addrs;
|
||||
u64 *cookies;
|
||||
u32 cnt;
|
||||
u32 mods_cnt;
|
||||
struct module **mods;
|
||||
};
|
||||
|
||||
struct bpf_kprobe_multi_run_ctx {
|
||||
@@ -2507,6 +2518,14 @@ error:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void kprobe_multi_put_modules(struct module **mods, u32 cnt)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < cnt; i++)
|
||||
module_put(mods[i]);
|
||||
}
|
||||
|
||||
static void free_user_syms(struct user_syms *us)
|
||||
{
|
||||
kvfree(us->syms);
|
||||
@@ -2519,6 +2538,7 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link)
|
||||
|
||||
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
|
||||
unregister_fprobe(&kmulti_link->fp);
|
||||
kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt);
|
||||
}
|
||||
|
||||
static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
|
||||
@@ -2528,6 +2548,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
|
||||
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
|
||||
kvfree(kmulti_link->addrs);
|
||||
kvfree(kmulti_link->cookies);
|
||||
kfree(kmulti_link->mods);
|
||||
kfree(kmulti_link);
|
||||
}
|
||||
|
||||
@@ -2550,7 +2571,7 @@ static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void
|
||||
swap(*cookie_a, *cookie_b);
|
||||
}
|
||||
|
||||
static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b)
|
||||
static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b)
|
||||
{
|
||||
const unsigned long *addr_a = a, *addr_b = b;
|
||||
|
||||
@@ -2561,7 +2582,7 @@ static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b)
|
||||
|
||||
static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv)
|
||||
{
|
||||
return __bpf_kprobe_multi_cookie_cmp(a, b);
|
||||
return bpf_kprobe_multi_addrs_cmp(a, b);
|
||||
}
|
||||
|
||||
static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
|
||||
@@ -2579,7 +2600,7 @@ static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
|
||||
return 0;
|
||||
entry_ip = run_ctx->entry_ip;
|
||||
addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
|
||||
__bpf_kprobe_multi_cookie_cmp);
|
||||
bpf_kprobe_multi_addrs_cmp);
|
||||
if (!addr)
|
||||
return 0;
|
||||
cookie = link->cookies + (addr - link->addrs);
|
||||
@@ -2663,6 +2684,71 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv)
|
||||
}
|
||||
}
|
||||
|
||||
struct module_addr_args {
|
||||
unsigned long *addrs;
|
||||
u32 addrs_cnt;
|
||||
struct module **mods;
|
||||
int mods_cnt;
|
||||
int mods_cap;
|
||||
};
|
||||
|
||||
static int module_callback(void *data, const char *name,
|
||||
struct module *mod, unsigned long addr)
|
||||
{
|
||||
struct module_addr_args *args = data;
|
||||
struct module **mods;
|
||||
|
||||
/* We iterate all modules symbols and for each we:
|
||||
* - search for it in provided addresses array
|
||||
* - if found we check if we already have the module pointer stored
|
||||
* (we iterate modules sequentially, so we can check just the last
|
||||
* module pointer)
|
||||
* - take module reference and store it
|
||||
*/
|
||||
if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr),
|
||||
bpf_kprobe_multi_addrs_cmp))
|
||||
return 0;
|
||||
|
||||
if (args->mods && args->mods[args->mods_cnt - 1] == mod)
|
||||
return 0;
|
||||
|
||||
if (args->mods_cnt == args->mods_cap) {
|
||||
args->mods_cap = max(16, args->mods_cap * 3 / 2);
|
||||
mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL);
|
||||
if (!mods)
|
||||
return -ENOMEM;
|
||||
args->mods = mods;
|
||||
}
|
||||
|
||||
if (!try_module_get(mod))
|
||||
return -EINVAL;
|
||||
|
||||
args->mods[args->mods_cnt] = mod;
|
||||
args->mods_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
|
||||
{
|
||||
struct module_addr_args args = {
|
||||
.addrs = addrs,
|
||||
.addrs_cnt = addrs_cnt,
|
||||
};
|
||||
int err;
|
||||
|
||||
/* We return either err < 0 in case of error, ... */
|
||||
err = module_kallsyms_on_each_symbol(module_callback, &args);
|
||||
if (err) {
|
||||
kprobe_multi_put_modules(args.mods, args.mods_cnt);
|
||||
kfree(args.mods);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* or number of modules found if everything is ok. */
|
||||
*mods = args.mods;
|
||||
return args.mods_cnt;
|
||||
}
|
||||
|
||||
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_kprobe_multi_link *link = NULL;
|
||||
@@ -2773,10 +2859,25 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
|
||||
bpf_kprobe_multi_cookie_cmp,
|
||||
bpf_kprobe_multi_cookie_swap,
|
||||
link);
|
||||
} else {
|
||||
/*
|
||||
* We need to sort addrs array even if there are no cookies
|
||||
* provided, to allow bsearch in get_modules_for_addrs.
|
||||
*/
|
||||
sort(addrs, cnt, sizeof(*addrs),
|
||||
bpf_kprobe_multi_addrs_cmp, NULL);
|
||||
}
|
||||
|
||||
err = get_modules_for_addrs(&link->mods, addrs, cnt);
|
||||
if (err < 0) {
|
||||
bpf_link_cleanup(&link_primer);
|
||||
return err;
|
||||
}
|
||||
link->mods_cnt = err;
|
||||
|
||||
err = register_fprobe_ips(&link->fp, addrs, cnt);
|
||||
if (err) {
|
||||
kprobe_multi_put_modules(link->mods, link->mods_cnt);
|
||||
bpf_link_cleanup(&link_primer);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -8257,6 +8257,10 @@ struct kallsyms_data {
|
||||
size_t found;
|
||||
};
|
||||
|
||||
/* This function gets called for all kernel and module symbols
|
||||
* and returns 1 in case we resolved all the requested symbols,
|
||||
* 0 otherwise.
|
||||
*/
|
||||
static int kallsyms_callback(void *data, const char *name,
|
||||
struct module *mod, unsigned long addr)
|
||||
{
|
||||
@@ -8299,17 +8303,19 @@ static int kallsyms_callback(void *data, const char *name,
|
||||
int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs)
|
||||
{
|
||||
struct kallsyms_data args;
|
||||
int err;
|
||||
int found_all;
|
||||
|
||||
memset(addrs, 0, sizeof(*addrs) * cnt);
|
||||
args.addrs = addrs;
|
||||
args.syms = sorted_syms;
|
||||
args.cnt = cnt;
|
||||
args.found = 0;
|
||||
err = kallsyms_on_each_symbol(kallsyms_callback, &args);
|
||||
if (err < 0)
|
||||
return err;
|
||||
return args.found == args.cnt ? 0 : -ESRCH;
|
||||
|
||||
found_all = kallsyms_on_each_symbol(kallsyms_callback, &args);
|
||||
if (found_all)
|
||||
return 0;
|
||||
found_all = module_kallsyms_on_each_symbol(kallsyms_callback, &args);
|
||||
return found_all ? 0 : -ESRCH;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
Reference in New Issue
Block a user