Merge 6.1.116 into android14-6.1-lts

Changes in 6.1.116
	cpufreq: Generalize of_perf_domain_get_sharing_cpumask phandle format
	cpufreq: Avoid a bad reference count on CPU node
	selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map test
	mm: remove kern_addr_valid() completely
	fs/proc/kcore: avoid bounce buffer for ktext data
	fs/proc/kcore: convert read_kcore() to read_kcore_iter()
	fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
	fs/proc/kcore.c: allow translation of physical memory addresses
	cgroup: Fix potential overflow issue when checking max_depth
	wifi: iwlegacy: Fix "field-spanning write" warning in il_enqueue_hcmd()
	mac80211: MAC80211_MESSAGE_TRACING should depend on TRACING
	wifi: mac80211: skip non-uploaded keys in ieee80211_iter_keys
	wifi: ath11k: Fix invalid ring usage in full monitor mode
	wifi: brcm80211: BRCM_TRACING should depend on TRACING
	RDMA/cxgb4: Dump vendor specific QP details
	RDMA/mlx5: Round max_rd_atomic/max_dest_rd_atomic up instead of down
	RDMA/bnxt_re: synchronize the qp-handle table array
	wifi: iwlwifi: mvm: disconnect station vifs if recovery failed
	wifi: iwlwifi: mvm: Fix response handling in iwl_mvm_send_recovery_cmd()
	ASoC: cs42l51: Fix some error handling paths in cs42l51_probe()
	macsec: Fix use-after-free while sending the offloading packet
	net: stmmac: TSO: Fix unbalanced DMA map/unmap for non-paged SKB data
	ipv4: ip_tunnel: Fix suspicious RCU usage warning in ip_tunnel_init_flow()
	gtp: allow -1 to be specified as file description from userspace
	net/sched: stop qdisc_tree_reduce_backlog on TC_H_ROOT
	netdevsim: Add trailing zero to terminate the string in nsim_nexthop_bucket_activity_write()
	bpf: Fix out-of-bounds write in trie_get_next_key()
	netfilter: Fix use-after-free in get_info()
	netfilter: nf_reject_ipv6: fix potential crash in nf_send_reset6()
	Bluetooth: hci: fix null-ptr-deref in hci_read_supported_codecs
	net: skip offload for NETIF_F_IPV6_CSUM if ipv6 header contains extension
	mlxsw: spectrum_ptp: Add missing verification before pushing Tx header
	mlxsw: spectrum_router: Add support for double entry RIFs
	mlxsw: spectrum_ipip: Rename Spectrum-2 ip6gre operations
	mlxsw: spectrum_ipip: Fix memory leak when changing remote IPv6 address
	netfilter: nft_payload: sanitize offset and length before calling skb_checksum()
	iomap: convert iomap_unshare_iter to use large folios
	iomap: improve shared block detection in iomap_unshare_iter
	iomap: don't bother unsharing delalloc extents
	iomap: share iomap_unshare_iter predicate code with fsdax
	fsdax: remove zeroing code from dax_unshare_iter
	fsdax: dax_unshare_iter needs to copy entire blocks
	iomap: turn iomap_want_unshare_iter into an inline function
	compiler-gcc: be consistent with underscores use for `no_sanitize`
	compiler-gcc: remove attribute support check for `__no_sanitize_address__`
	kasan: Fix Software Tag-Based KASAN with GCC
	firmware: arm_sdei: Fix the input parameter of cpuhp_remove_state()
	afs: Automatically generate trace tag enums
	afs: Fix missing subdir edit when renamed between parent dirs
	ACPI: CPPC: Make rmw_lock a raw_spin_lock
	fs/ntfs3: Check if more than chunk-size bytes are written
	fs/ntfs3: Fix warning possible deadlock in ntfs_set_state
	fs/ntfs3: Stale inode instead of bad
	fs/ntfs3: Fix possible deadlock in mi_read
	fs/ntfs3: Additional check in ni_clear()
	scsi: scsi_transport_fc: Allow setting rport state to current state
	net: amd: mvme147: Fix probe banner message
	NFS: remove revoked delegation from server's delegation list
	misc: sgi-gru: Don't disable preemption in GRU driver
	usb: gadget: dummy_hcd: Switch to hrtimer transfer scheduler
	usb: gadget: dummy_hcd: Set transfer interval to 1 microframe
	usb: gadget: dummy_hcd: execute hrtimer callback in softirq context
	USB: gadget: dummy-hcd: Fix "task hung" problem
	ALSA: usb-audio: Add quirks for Dell WD19 dock
	usbip: tools: Fix detach_port() invalid port error path
	usb: phy: Fix API devm_usb_put_phy() can not release the phy
	usb: typec: fix unreleased fwnode_handle in typec_port_register_altmodes()
	xhci: Fix Link TRB DMA in command ring stopped completion event
	xhci: Use pm_runtime_get to prevent RPM on unsupported systems
	Revert "driver core: Fix uevent_show() vs driver detach race"
	wifi: mac80211: do not pass a stopped vif to the driver in .get_txpower
	wifi: ath10k: Fix memory leak in management tx
	wifi: cfg80211: clear wdev->cqm_config pointer on free
	wifi: iwlegacy: Clear stale interrupts before resuming device
	staging: iio: frequency: ad9832: fix division by zero in ad9832_calc_freqreg()
	iio: adc: ad7124: fix division by zero in ad7124_set_channel_odr()
	iio: light: veml6030: fix microlux value calculation
	nilfs2: fix potential deadlock with newly created symlinks
	block: fix sanity checks in blk_rq_map_user_bvec
	cgroup/bpf: use a dedicated workqueue for cgroup bpf destruction
	riscv: vdso: Prevent the compiler from inserting calls to memset()
	ALSA: hda/realtek: Limit internal Mic boost on Dell platform
	riscv: efi: Set NX compat flag in PE/COFF header
	riscv: Use '%u' to format the output of 'cpu'
	riscv: Remove unused GENERATING_ASM_OFFSETS
	riscv: Remove duplicated GET_RM
	cxl/acpi: Move rescan to the workqueue
	cxl/port: Fix cxl_bus_rescan() vs bus_rescan_devices()
	mm/page_alloc: rename ALLOC_HIGH to ALLOC_MIN_RESERVE
	mm/page_alloc: treat RT tasks similar to __GFP_HIGH
	mm/page_alloc: explicitly record high-order atomic allocations in alloc_flags
	mm/page_alloc: explicitly define what alloc flags deplete min reserves
	mm/page_alloc: explicitly define how __GFP_HIGH non-blocking allocations accesses reserves
	mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves
	ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow
	mctp i2c: handle NULL header address
	ALSA: hda/realtek: Fix headset mic on TUXEDO Stellaris 16 Gen6 mb1
	nvmet-auth: assign dh_key to NULL after kfree_sensitive
	kasan: remove vmalloc_percpu test
	io_uring: rename kiocb_end_write() local helper
	fs: create kiocb_{start,end}_write() helpers
	io_uring: use kiocb_{start,end}_write() helpers
	io_uring/rw: fix missing NOWAIT check for O_DIRECT start write
	mm: migrate: try again if THP split is failed due to page refcnt
	migrate: convert unmap_and_move() to use folios
	migrate: convert migrate_pages() to use folios
	mm/migrate.c: stop using 0 as NULL pointer
	migrate_pages: organize stats with struct migrate_pages_stats
	migrate_pages: separate hugetlb folios migration
	migrate_pages: restrict number of pages to migrate in batch
	migrate_pages: split unmap_and_move() to _unmap() and _move()
	vmscan,migrate: fix page count imbalance on node stats when demoting pages
	io_uring: always lock __io_cqring_overflow_flush
	x86/bugs: Use code segment selector for VERW operand
	wifi: mac80211: fix NULL dereference at band check in starting tx ba session
	nilfs2: fix kernel bug due to missing clearing of checked flag
	wifi: iwlwifi: mvm: fix 6 GHz scan construction
	mm: shmem: fix data-race in shmem_getattr()
	LoongArch: Fix build errors due to backported TIMENS
	mtd: spi-nor: winbond: fix w25q128 regression
	drm/amd/display: Add null checks for 'stream' and 'plane' before dereferencing
	drm/amd/display: Skip on writeback when it's not applicable
	vt: prevent kernel-infoleak in con_font_get()
	mm: avoid gcc complaint about pointer casting
	migrate_pages_batch: fix statistics for longterm pin retry
	Linux 6.1.116

Change-Id: Iaffbf84fc3f7e545b5a8d2956b3c57df84abdab4
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2024-11-28 18:38:58 +00:00
24 changed files with 677 additions and 308 deletions

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
VERSION = 6 VERSION = 6
PATCHLEVEL = 1 PATCHLEVEL = 1
SUBLEVEL = 115 SUBLEVEL = 116
EXTRAVERSION = EXTRAVERSION =
NAME = Curry Ramen NAME = Curry Ramen

View File

@@ -40,6 +40,8 @@ static struct page *vdso_pages[] = { NULL };
struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_data *vdso_data = generic_vdso_data.data;
struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata;
static struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
{ {
current->mm->context.vdso = (void *)(new_vma->vm_start); current->mm->context.vdso = (void *)(new_vma->vm_start);
@@ -139,13 +141,37 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm); mmap_read_lock(mm);
for_each_vma(vmi, vma) { for_each_vma(vmi, vma) {
unsigned long size = vma->vm_end - vma->vm_start;
if (vma_is_special_mapping(vma, &vdso_info.data_mapping)) if (vma_is_special_mapping(vma, &vdso_info.data_mapping))
zap_vma_pages(vma); zap_page_range(vma, vma->vm_start, size);
} }
mmap_read_unlock(mm); mmap_read_unlock(mm);
return 0; return 0;
} }
static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
if (likely(vma->vm_mm == current->mm))
return current->nsproxy->time_ns->vvar_page;
/*
* VM_PFNMAP | VM_IO protect .fault() handler from being called
* through interfaces like /proc/$pid/mem or
* process_vm_{readv,writev}() as long as there's no .access()
* in special_mapping_vmops.
* For more details check_vma_flags() and __access_remote_vm()
*/
WARN(1, "vvar_page accessed remotely");
return NULL;
}
#else
static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
return NULL;
}
#endif #endif
static unsigned long vdso_base(void) static unsigned long vdso_base(void)

View File

@@ -211,7 +211,16 @@
*/ */
.macro CLEAR_CPU_BUFFERS .macro CLEAR_CPU_BUFFERS
ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
verw _ASM_RIP(mds_verw_sel) #ifdef CONFIG_X86_64
verw mds_verw_sel(%rip)
#else
/*
* In 32bit mode, the memory operand must be a %cs reference. The data
* segments may not be usable (vm86 mode), and the stack segment may not
* be flat (ESPFIX32).
*/
verw %cs:mds_verw_sel
#endif
.Lskip_verw_\@: .Lskip_verw_\@:
.endm .endm

View File

@@ -2990,6 +2990,10 @@ static int dm_resume(void *handle)
/* Do mst topology probing after resuming cached state*/ /* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter); drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) { drm_for_each_connector_iter(connector, &iter) {
if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
aconnector = to_amdgpu_dm_connector(connector); aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch || if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_port) aconnector->mst_port)
@@ -5722,6 +5726,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
&aconnector->base.probed_modes : &aconnector->base.probed_modes :
&aconnector->base.modes; &aconnector->base.modes;
if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
return NULL;
if (aconnector->freesync_vid_base.clock != 0) if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base; return &aconnector->freesync_vid_base;
@@ -8242,6 +8249,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
continue; continue;
notify: notify:
if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
aconnector = to_amdgpu_dm_connector(connector); aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock); mutex_lock(&adev->dm.audio_lock);

View File

@@ -762,6 +762,9 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
stream = dc->current_state->streams[0]; stream = dc->current_state->streams[0];
plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL); plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL);
if (!stream || !plane)
return false;
if (stream && plane) { if (stream && plane) {
cursor_cache_enable = stream->cursor_position.enable && cursor_cache_enable = stream->cursor_position.enable &&
plane->address.grph.cursor_cache_addr.quad_part; plane->address.grph.cursor_cache_addr.quad_part;

View File

@@ -120,9 +120,10 @@ static const struct flash_info winbond_nor_parts[] = {
NO_SFDP_FLAGS(SECT_4K) }, NO_SFDP_FLAGS(SECT_4K) },
{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16) { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16)
NO_SFDP_FLAGS(SECT_4K) }, NO_SFDP_FLAGS(SECT_4K) },
{ "w25q128", INFO(0xef4018, 0, 0, 0) { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256)
PARSE_SFDP FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ |
SPI_NOR_QUAD_READ) },
{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512) { "w25q256", INFO(0xef4019, 0, 64 * 1024, 512)
NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)
.fixups = &w25q256_fixups }, .fixups = &w25q256_fixups },

View File

@@ -547,6 +547,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
if (len > MCTP_I2C_MAXMTU) if (len > MCTP_I2C_MAXMTU)
return -EMSGSIZE; return -EMSGSIZE;
if (!daddr || !saddr)
return -EINVAL;
lldst = *((u8 *)daddr); lldst = *((u8 *)daddr);
llsrc = *((u8 *)saddr); llsrc = *((u8 *)saddr);

View File

@@ -1739,7 +1739,8 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm *mvm,
&cp->channel_config[ch_cnt]; &cp->channel_config[ch_cnt];
u32 s_ssid_bitmap = 0, bssid_bitmap = 0, flags = 0; u32 s_ssid_bitmap = 0, bssid_bitmap = 0, flags = 0;
u8 j, k, s_max = 0, b_max = 0, n_used_bssid_entries; u8 k, s_max = 0, b_max = 0, n_used_bssid_entries;
u32 j;
bool force_passive, found = false, allow_passive = true, bool force_passive, found = false, allow_passive = true,
unsolicited_probe_on_chan = false, psc_no_listen = false; unsolicited_probe_on_chan = false, psc_no_listen = false;

View File

@@ -101,6 +101,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
pr_debug("%s: ctrl %d failed to generate private key, err %d\n", pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
__func__, ctrl->cntlid, ret); __func__, ctrl->cntlid, ret);
kfree_sensitive(ctrl->dh_key); kfree_sensitive(ctrl->dh_key);
ctrl->dh_key = NULL;
return ret; return ret;
} }
ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm); ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);

View File

@@ -4593,7 +4593,7 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op)
int c; int c;
if (op->data) { if (op->data) {
font.data = kmalloc(max_font_size, GFP_KERNEL); font.data = kzalloc(max_font_size, GFP_KERNEL);
if (!font.data) if (!font.data)
return -ENOMEM; return -ENOMEM;
} else } else

View File

@@ -404,6 +404,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
ClearPageUptodate(page); ClearPageUptodate(page);
ClearPageMappedToDisk(page); ClearPageMappedToDisk(page);
ClearPageChecked(page);
if (page_has_buffers(page)) { if (page_has_buffers(page)) {
struct buffer_head *bh, *head; struct buffer_head *bh, *head;

View File

@@ -1784,6 +1784,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
return 0; return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
if (byte_start > id_count || byte_start + byte_len > id_count) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
}
ret = ocfs2_truncate_inline(inode, di_bh, byte_start, ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
byte_start + byte_len, 0); byte_start + byte_len, 0);
if (ret) { if (ret) {

View File

@@ -3039,6 +3039,42 @@ static inline void file_end_write(struct file *file)
__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
} }
/**
* kiocb_start_write - get write access to a superblock for async file io
* @iocb: the io context we want to submit the write with
*
* This is a variant of sb_start_write() for async io submission.
* Should be matched with a call to kiocb_end_write().
*/
static inline void kiocb_start_write(struct kiocb *iocb)
{
struct inode *inode = file_inode(iocb->ki_filp);
sb_start_write(inode->i_sb);
/*
* Fool lockdep by telling it the lock got released so that it
* doesn't complain about the held lock when we return to userspace.
*/
__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
}
/**
* kiocb_end_write - drop write access to a superblock after async file io
* @iocb: the io context we sumbitted the write with
*
* Should be matched with a call to kiocb_start_write().
*/
static inline void kiocb_end_write(struct kiocb *iocb)
{
struct inode *inode = file_inode(iocb->ki_filp);
/*
* Tell lockdep we inherited freeze protection from submission thread.
*/
__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
sb_end_write(inode->i_sb);
}
/* /*
* This is used for regular files where some users -- especially the * This is used for regular files where some users -- especially the
* currently executed binary in a process, previously handled via * currently executed binary in a process, previously handled via

View File

@@ -18,6 +18,7 @@ struct migration_target_control;
* - zero on page migration success; * - zero on page migration success;
*/ */
#define MIGRATEPAGE_SUCCESS 0 #define MIGRATEPAGE_SUCCESS 0
#define MIGRATEPAGE_UNMAP 1
/** /**
* struct movable_operations - Driver page migration * struct movable_operations - Driver page migration

View File

@@ -593,6 +593,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
bool all_flushed; bool all_flushed;
size_t cqe_size = sizeof(struct io_uring_cqe); size_t cqe_size = sizeof(struct io_uring_cqe);
lockdep_assert_held(&ctx->uring_lock);
if (!force && __io_cqring_events(ctx) == ctx->cq_entries) if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
return false; return false;
@@ -647,12 +649,9 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
bool ret = true; bool ret = true;
if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) {
/* iopoll syncs against uring_lock, not completion_lock */ mutex_lock(&ctx->uring_lock);
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&ctx->uring_lock);
ret = __io_cqring_overflow_flush(ctx, false); ret = __io_cqring_overflow_flush(ctx, false);
if (ctx->flags & IORING_SETUP_IOPOLL) mutex_unlock(&ctx->uring_lock);
mutex_unlock(&ctx->uring_lock);
} }
return ret; return ret;
@@ -1405,6 +1404,8 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
int ret = 0; int ret = 0;
unsigned long check_cq; unsigned long check_cq;
lockdep_assert_held(&ctx->uring_lock);
if (!io_allowed_run_tw(ctx)) if (!io_allowed_run_tw(ctx))
return -EEXIST; return -EEXIST;

View File

@@ -220,17 +220,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
} }
#endif #endif
static void kiocb_end_write(struct io_kiocb *req) static void io_req_end_write(struct io_kiocb *req)
{ {
/*
* Tell lockdep we inherited freeze protection from submission
* thread.
*/
if (req->flags & REQ_F_ISREG) { if (req->flags & REQ_F_ISREG) {
struct super_block *sb = file_inode(req->file)->i_sb; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
__sb_writers_acquired(sb, SB_FREEZE_WRITE); kiocb_end_write(&rw->kiocb);
sb_end_write(sb);
} }
} }
@@ -243,7 +238,7 @@ static void io_req_io_end(struct io_kiocb *req)
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
if (rw->kiocb.ki_flags & IOCB_WRITE) { if (rw->kiocb.ki_flags & IOCB_WRITE) {
kiocb_end_write(req); io_req_end_write(req);
fsnotify_modify(req->file); fsnotify_modify(req->file);
} else { } else {
fsnotify_access(req->file); fsnotify_access(req->file);
@@ -307,7 +302,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
struct io_kiocb *req = cmd_to_io_kiocb(rw); struct io_kiocb *req = cmd_to_io_kiocb(rw);
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); io_req_end_write(req);
if (unlikely(res != req->cqe.res)) { if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) { if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
@@ -844,6 +839,25 @@ done:
return kiocb_done(req, ret, issue_flags); return kiocb_done(req, ret, issue_flags);
} }
static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb)
{
struct inode *inode;
bool ret;
if (!(req->flags & REQ_F_ISREG))
return true;
if (!(kiocb->ki_flags & IOCB_NOWAIT)) {
kiocb_start_write(kiocb);
return true;
}
inode = file_inode(kiocb->ki_filp);
ret = sb_start_write_trylock(inode->i_sb);
if (ret)
__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
return ret;
}
int io_write(struct io_kiocb *req, unsigned int issue_flags) int io_write(struct io_kiocb *req, unsigned int issue_flags)
{ {
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
@@ -897,18 +911,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
return ret; return ret;
} }
/* if (unlikely(!io_kiocb_start_write(req, kiocb)))
* Open-code file_start_write here to grab freeze protection, return -EAGAIN;
* which will be released by another thread in
* io_complete_rw(). Fool lockdep by telling it the lock got
* released so that it doesn't complain about the held lock when
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
sb_start_write(file_inode(req->file)->i_sb);
__sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
kiocb->ki_flags |= IOCB_WRITE; kiocb->ki_flags |= IOCB_WRITE;
if (likely(req->file->f_op->write_iter)) if (likely(req->file->f_op->write_iter))
@@ -956,7 +960,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
io->bytes_done += ret2; io->bytes_done += ret2;
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); io_req_end_write(req);
return ret ? ret : -EAGAIN; return ret ? ret : -EAGAIN;
} }
done: done:
@@ -967,7 +971,7 @@ copy_iov:
ret = io_setup_async_rw(req, iovec, s, false); ret = io_setup_async_rw(req, iovec, s, false);
if (!ret) { if (!ret) {
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req); io_req_end_write(req);
return -EAGAIN; return -EAGAIN;
} }
return ret; return ret;

View File

@@ -2868,7 +2868,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
* split PMDs * split PMDs
*/ */
if (!can_split_folio(folio, &extra_pins)) { if (!can_split_folio(folio, &extra_pins)) {
ret = -EBUSY; ret = -EAGAIN;
goto out_unlock; goto out_unlock;
} }
@@ -2920,7 +2920,7 @@ fail:
xas_unlock(&xas); xas_unlock(&xas);
local_irq_enable(); local_irq_enable();
remap_page(folio, folio_nr_pages(folio)); remap_page(folio, folio_nr_pages(folio));
ret = -EBUSY; ret = -EAGAIN;
} }
out_unlock: out_unlock:

View File

@@ -757,7 +757,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_OOM ALLOC_NO_WATERMARKS #define ALLOC_OOM ALLOC_NO_WATERMARKS
#endif #endif
#define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
* to 25% of the min watermark or
* 62.5% if __GFP_HIGH is set.
*/
#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
* of the min watermark. * of the min watermark.
*/ */
@@ -771,6 +774,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
/* Flags that allow allocations below the min watermark. */
#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
enum ttu_flags; enum ttu_flags;
struct tlbflush_unmap_batch; struct tlbflush_unmap_batch;

View File

@@ -1260,32 +1260,6 @@ static void vm_map_ram_tags(struct kunit *test)
free_pages((unsigned long)p_ptr, 1); free_pages((unsigned long)p_ptr, 1);
} }
static void vmalloc_percpu(struct kunit *test)
{
char __percpu *ptr;
int cpu;
/*
* This test is specifically crafted for the software tag-based mode,
* the only tag-based mode that poisons percpu mappings.
*/
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
for_each_possible_cpu(cpu) {
char *c_ptr = per_cpu_ptr(ptr, cpu);
KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
/* Make sure that in-bounds accesses don't crash the kernel. */
*c_ptr = 0;
}
free_percpu(ptr);
}
/* /*
* Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
* KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
@@ -1439,7 +1413,6 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmalloc_oob),
KUNIT_CASE(vmap_tags), KUNIT_CASE(vmap_tags),
KUNIT_CASE(vm_map_ram_tags), KUNIT_CASE(vm_map_ram_tags),
KUNIT_CASE(vmalloc_percpu),
KUNIT_CASE(match_all_not_assigned), KUNIT_CASE(match_all_not_assigned),
KUNIT_CASE(match_all_ptr_tag), KUNIT_CASE(match_all_ptr_tag),
KUNIT_CASE(match_all_mem_tag), KUNIT_CASE(match_all_mem_tag),

View File

@@ -1018,11 +1018,59 @@ out:
return rc; return rc;
} }
static int __unmap_and_move(struct folio *src, struct folio *dst, /*
* To record some information during migration, we use some unused
* fields (mapping and private) of struct folio of the newly allocated
* destination folio. This is safe because nobody is using them
* except us.
*/
union migration_ptr {
struct anon_vma *anon_vma;
struct address_space *mapping;
};
static void __migrate_folio_record(struct folio *dst,
unsigned long page_was_mapped,
struct anon_vma *anon_vma)
{
union migration_ptr ptr = { .anon_vma = anon_vma };
dst->mapping = ptr.mapping;
dst->private = (void *)page_was_mapped;
}
static void __migrate_folio_extract(struct folio *dst,
int *page_was_mappedp,
struct anon_vma **anon_vmap)
{
union migration_ptr ptr = { .mapping = dst->mapping };
*anon_vmap = ptr.anon_vma;
*page_was_mappedp = (unsigned long)dst->private;
dst->mapping = NULL;
dst->private = NULL;
}
/* Cleanup src folio upon migration success */
static void migrate_folio_done(struct folio *src,
enum migrate_reason reason)
{
/*
* Compaction can migrate also non-LRU pages which are
* not accounted to NR_ISOLATED_*. They can be recognized
* as __PageMovable
*/
if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION)
mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
folio_is_file_lru(src), -folio_nr_pages(src));
if (reason != MR_MEMORY_FAILURE)
/* We release the page in page_handle_poison. */
folio_put(src);
}
static int __migrate_folio_unmap(struct folio *src, struct folio *dst,
int force, enum migrate_mode mode) int force, enum migrate_mode mode)
{ {
int rc = -EAGAIN; int rc = -EAGAIN;
bool page_was_mapped = false; int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL; struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(&src->page); bool is_lru = !__PageMovable(&src->page);
@@ -1098,8 +1146,8 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
goto out_unlock; goto out_unlock;
if (unlikely(!is_lru)) { if (unlikely(!is_lru)) {
rc = move_to_new_folio(dst, src, mode); __migrate_folio_record(dst, page_was_mapped, anon_vma);
goto out_unlock_both; return MIGRATEPAGE_UNMAP;
} }
/* /*
@@ -1124,11 +1172,42 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
VM_BUG_ON_FOLIO(folio_test_anon(src) && VM_BUG_ON_FOLIO(folio_test_anon(src) &&
!folio_test_ksm(src) && !anon_vma, src); !folio_test_ksm(src) && !anon_vma, src);
try_to_migrate(src, 0); try_to_migrate(src, 0);
page_was_mapped = true; page_was_mapped = 1;
} }
if (!folio_mapped(src)) if (!folio_mapped(src)) {
rc = move_to_new_folio(dst, src, mode); __migrate_folio_record(dst, page_was_mapped, anon_vma);
return MIGRATEPAGE_UNMAP;
}
if (page_was_mapped)
remove_migration_ptes(src, src, false);
out_unlock_both:
folio_unlock(dst);
out_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma)
put_anon_vma(anon_vma);
folio_unlock(src);
out:
return rc;
}
static int __migrate_folio_move(struct folio *src, struct folio *dst,
enum migrate_mode mode)
{
int rc;
int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(&src->page);
__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
rc = move_to_new_folio(dst, src, mode);
if (unlikely(!is_lru))
goto out_unlock_both;
/* /*
* When successful, push dst to LRU immediately: so that if it * When successful, push dst to LRU immediately: so that if it
@@ -1151,12 +1230,10 @@ static int __unmap_and_move(struct folio *src, struct folio *dst,
out_unlock_both: out_unlock_both:
folio_unlock(dst); folio_unlock(dst);
out_unlock:
/* Drop an anon_vma reference if we took one */ /* Drop an anon_vma reference if we took one */
if (anon_vma) if (anon_vma)
put_anon_vma(anon_vma); put_anon_vma(anon_vma);
folio_unlock(src); folio_unlock(src);
out:
/* /*
* If migration is successful, decrease refcount of dst, * If migration is successful, decrease refcount of dst,
* which will not free the page because new page owner increased * which will not free the page because new page owner increased
@@ -1168,80 +1245,92 @@ out:
return rc; return rc;
} }
/* /* Obtain the lock on page, remove all ptes. */
* Obtain the lock on page, remove all ptes and migrate the page static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
* to the newly allocated page in newpage. unsigned long private, struct folio *src,
*/ struct folio **dstp, int force,
static int unmap_and_move(new_page_t get_new_page, enum migrate_mode mode, enum migrate_reason reason,
free_page_t put_new_page, struct list_head *ret)
unsigned long private, struct page *page,
int force, enum migrate_mode mode,
enum migrate_reason reason,
struct list_head *ret)
{ {
struct folio *dst, *src = page_folio(page); struct folio *dst;
int rc = MIGRATEPAGE_SUCCESS; int rc = MIGRATEPAGE_UNMAP;
struct page *newpage = NULL; struct page *newpage = NULL;
if (!thp_migration_supported() && PageTransHuge(page)) if (!thp_migration_supported() && folio_test_transhuge(src))
return -ENOSYS; return -ENOSYS;
if (page_count(page) == 1) { if (folio_ref_count(src) == 1) {
/* Page was freed from under us. So we are done. */ /* Folio was freed from under us. So we are done. */
ClearPageActive(page); folio_clear_active(src);
ClearPageUnevictable(page); folio_clear_unevictable(src);
/* free_pages_prepare() will clear PG_isolated. */ /* free_pages_prepare() will clear PG_isolated. */
goto out; list_del(&src->lru);
migrate_folio_done(src, reason);
return MIGRATEPAGE_SUCCESS;
} }
newpage = get_new_page(page, private); newpage = get_new_page(&src->page, private);
if (!newpage) if (!newpage)
return -ENOMEM; return -ENOMEM;
dst = page_folio(newpage); dst = page_folio(newpage);
*dstp = dst;
newpage->private = 0; dst->private = NULL;
rc = __unmap_and_move(src, dst, force, mode); rc = __migrate_folio_unmap(src, dst, force, mode);
if (rc == MIGRATEPAGE_UNMAP)
return rc;
/*
* A folio that has not been unmapped will be restored to
* right list unless we want to retry.
*/
if (rc != -EAGAIN)
list_move_tail(&src->lru, ret);
if (put_new_page)
put_new_page(&dst->page, private);
else
folio_put(dst);
return rc;
}
/* Migrate the folio to the newly allocated folio in dst. */
static int migrate_folio_move(free_page_t put_new_page, unsigned long private,
struct folio *src, struct folio *dst,
enum migrate_mode mode, enum migrate_reason reason,
struct list_head *ret)
{
int rc;
rc = __migrate_folio_move(src, dst, mode);
if (rc == MIGRATEPAGE_SUCCESS) if (rc == MIGRATEPAGE_SUCCESS)
set_page_owner_migrate_reason(newpage, reason); set_page_owner_migrate_reason(&dst->page, reason);
out:
if (rc != -EAGAIN) { if (rc != -EAGAIN) {
/* /*
* A page that has been migrated has all references * A folio that has been migrated has all references
* removed and will be freed. A page that has not been * removed and will be freed. A folio that has not been
* migrated will have kept its references and be restored. * migrated will have kept its references and be restored.
*/ */
list_del(&page->lru); list_del(&src->lru);
} }
/* /*
* If migration is successful, releases reference grabbed during * If migration is successful, releases reference grabbed during
* isolation. Otherwise, restore the page to right list unless * isolation. Otherwise, restore the folio to right list unless
* we want to retry. * we want to retry.
*/ */
if (rc == MIGRATEPAGE_SUCCESS) { if (rc == MIGRATEPAGE_SUCCESS) {
/* migrate_folio_done(src, reason);
* Compaction can migrate also non-LRU pages which are
* not accounted to NR_ISOLATED_*. They can be recognized
* as __PageMovable
*/
if (likely(!__PageMovable(page)))
mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
page_is_file_lru(page), -thp_nr_pages(page));
if (reason != MR_MEMORY_FAILURE)
/*
* We release the page in page_handle_poison.
*/
put_page(page);
} else { } else {
if (rc != -EAGAIN) if (rc != -EAGAIN)
list_add_tail(&page->lru, ret); list_add_tail(&src->lru, ret);
if (put_new_page) if (put_new_page)
put_new_page(newpage, private); put_new_page(&dst->page, private);
else else
put_page(newpage); folio_put(dst);
} }
return rc; return rc;
@@ -1392,234 +1481,411 @@ out:
return rc; return rc;
} }
static inline int try_split_thp(struct page *page, struct list_head *split_pages) static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
{ {
int rc; int rc;
lock_page(page); folio_lock(folio);
rc = split_huge_page_to_list(page, split_pages); rc = split_folio_to_list(folio, split_folios);
unlock_page(page); folio_unlock(folio);
if (!rc) if (!rc)
list_move_tail(&page->lru, split_pages); list_move_tail(&folio->lru, split_folios);
return rc; return rc;
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR
#else
#define NR_MAX_BATCHED_MIGRATION 512
#endif
#define NR_MAX_MIGRATE_PAGES_RETRY 10
struct migrate_pages_stats {
int nr_succeeded; /* Normal and large folios migrated successfully, in
units of base pages */
int nr_failed_pages; /* Normal and large folios failed to be migrated, in
units of base pages. Untried folios aren't counted */
int nr_thp_succeeded; /* THP migrated successfully */
int nr_thp_failed; /* THP failed to be migrated */
int nr_thp_split; /* THP split before migrating */
};
/* /*
* migrate_pages - migrate the pages specified in a list, to the free pages * Returns the number of hugetlb folios that were not migrated, or an error code
* supplied as the target for the page migration * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable
* * any more because the list has become empty or no retryable hugetlb folios
* @from: The list of pages to be migrated. * exist any more. It is caller's responsibility to call putback_movable_pages()
* @get_new_page: The function used to allocate free pages to be used * only if ret != 0.
* as the target of the page migration.
* @put_new_page: The function used to free target pages if migration
* fails, or NULL if no special handling is necessary.
* @private: Private data to be passed on to get_new_page()
* @mode: The migration mode that specifies the constraints for
* page migration, if any.
* @reason: The reason for page migration.
* @ret_succeeded: Set to the number of normal pages migrated successfully if
* the caller passes a non-NULL pointer.
*
* The function returns after 10 attempts or if no pages are movable any more
* because the list has become empty or no retryable pages exist any more.
* It is caller's responsibility to call putback_movable_pages() to return pages
* to the LRU or free list only if ret != 0.
*
* Returns the number of {normal page, THP, hugetlb} that were not migrated, or
* an error code. The number of THP splits will be considered as the number of
* non-migrated THP, no matter how many subpages of the THP are migrated successfully.
*/ */
int migrate_pages(struct list_head *from, new_page_t get_new_page, static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
free_page_t put_new_page, unsigned long private, free_page_t put_new_page, unsigned long private,
enum migrate_mode mode, int reason, unsigned int *ret_succeeded) enum migrate_mode mode, int reason,
struct migrate_pages_stats *stats,
struct list_head *ret_folios)
{ {
int retry = 1; int retry = 1;
int thp_retry = 1;
int nr_failed = 0; int nr_failed = 0;
int nr_failed_pages = 0;
int nr_retry_pages = 0; int nr_retry_pages = 0;
int nr_succeeded = 0;
int nr_thp_succeeded = 0;
int nr_thp_failed = 0;
int nr_thp_split = 0;
int pass = 0; int pass = 0;
bool is_thp = false; struct folio *folio, *folio2;
struct page *page; int rc, nr_pages;
struct page *page2;
int rc, nr_subpages;
LIST_HEAD(ret_pages);
LIST_HEAD(thp_split_pages);
bool nosplit = (reason == MR_NUMA_MISPLACED);
bool no_subpage_counting = false;
trace_mm_migrate_pages_start(mode, reason); for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) {
thp_subpage_migration:
for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
retry = 0; retry = 0;
thp_retry = 0;
nr_retry_pages = 0; nr_retry_pages = 0;
list_for_each_entry_safe(page, page2, from, lru) { list_for_each_entry_safe(folio, folio2, from, lru) {
/* if (!folio_test_hugetlb(folio))
* THP statistics is based on the source huge page. continue;
* Capture required information that might get lost
* during migration. nr_pages = folio_nr_pages(folio);
*/
is_thp = PageTransHuge(page) && !PageHuge(page);
nr_subpages = compound_nr(page);
cond_resched(); cond_resched();
if (PageHuge(page)) rc = unmap_and_move_huge_page(get_new_page,
rc = unmap_and_move_huge_page(get_new_page, put_new_page, private,
put_new_page, private, page, &folio->page, pass > 2, mode,
pass > 2, mode, reason, reason, ret_folios);
&ret_pages);
else
rc = unmap_and_move(get_new_page, put_new_page,
private, page, pass > 2, mode,
reason, &ret_pages);
/* /*
* The rules are: * The rules are:
* Success: non hugetlb page will be freed, hugetlb * Success: hugetlb folio will be put back
* page will be put back
* -EAGAIN: stay on the from list * -EAGAIN: stay on the from list
* -ENOMEM: stay on the from list * -ENOMEM: stay on the from list
* -ENOSYS: stay on the from list * -ENOSYS: stay on the from list
* Other errno: put on ret_pages list then splice to * Other errno: put on ret_folios list
* from list
*/ */
switch(rc) { switch(rc) {
/*
* THP migration might be unsupported or the
* allocation could've failed so we should
* retry on the same page with the THP split
* to base pages.
*
* Sub-pages are put in thp_split_pages, and
* we will migrate them after the rest of the
* list is processed.
*/
case -ENOSYS: case -ENOSYS:
/* THP migration is unsupported */
if (is_thp) {
nr_thp_failed++;
if (!try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
break;
}
/* Hugetlb migration is unsupported */ /* Hugetlb migration is unsupported */
} else if (!no_subpage_counting) { nr_failed++;
nr_failed++; stats->nr_failed_pages += nr_pages;
} list_move_tail(&folio->lru, ret_folios);
nr_failed_pages += nr_subpages;
list_move_tail(&page->lru, &ret_pages);
break; break;
case -ENOMEM: case -ENOMEM:
/* /*
* When memory is low, don't bother to try to migrate * When memory is low, don't bother to try to migrate
* other pages, just exit. * other folios, just exit.
*/ */
if (is_thp) { stats->nr_failed_pages += nr_pages + nr_retry_pages;
nr_thp_failed++; return -ENOMEM;
/* THP NUMA faulting doesn't split THP to retry. */
if (!nosplit && !try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
break;
}
} else if (!no_subpage_counting) {
nr_failed++;
}
nr_failed_pages += nr_subpages + nr_retry_pages;
/*
* There might be some subpages of fail-to-migrate THPs
* left in thp_split_pages list. Move them back to migration
* list so that they could be put back to the right list by
* the caller otherwise the page refcnt will be leaked.
*/
list_splice_init(&thp_split_pages, from);
/* nr_failed isn't updated for not used */
nr_thp_failed += thp_retry;
goto out;
case -EAGAIN: case -EAGAIN:
if (is_thp) retry++;
thp_retry++; nr_retry_pages += nr_pages;
else if (!no_subpage_counting)
retry++;
nr_retry_pages += nr_subpages;
break; break;
case MIGRATEPAGE_SUCCESS: case MIGRATEPAGE_SUCCESS:
nr_succeeded += nr_subpages; stats->nr_succeeded += nr_pages;
if (is_thp)
nr_thp_succeeded++;
break; break;
default: default:
/* /*
* Permanent failure (-EBUSY, etc.): * Permanent failure (-EBUSY, etc.):
* unlike -EAGAIN case, the failed page is * unlike -EAGAIN case, the failed folio is
* removed from migration page list and not * removed from migration folio list and not
* retried in the next outer loop. * retried in the next outer loop.
*/ */
if (is_thp) nr_failed++;
nr_thp_failed++; stats->nr_failed_pages += nr_pages;
else if (!no_subpage_counting) break;
nr_failed++; }
}
}
/*
* nr_failed is number of hugetlb folios failed to be migrated. After
* NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb
* folios as failed.
*/
nr_failed += retry;
stats->nr_failed_pages += nr_retry_pages;
nr_failed_pages += nr_subpages; return nr_failed;
}
static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
free_page_t put_new_page, unsigned long private,
enum migrate_mode mode, int reason, struct list_head *ret_folios,
struct migrate_pages_stats *stats)
{
int retry = 1;
int large_retry = 1;
int thp_retry = 1;
int nr_failed = 0;
int nr_retry_pages = 0;
int nr_large_failed = 0;
int pass = 0;
bool is_large = false;
bool is_thp = false;
struct folio *folio, *folio2, *dst = NULL;
int rc, nr_pages;
LIST_HEAD(split_folios);
bool nosplit = (reason == MR_NUMA_MISPLACED);
bool no_split_folio_counting = false;
split_folio_migration:
for (pass = 0;
pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
pass++) {
retry = 0;
large_retry = 0;
thp_retry = 0;
nr_retry_pages = 0;
list_for_each_entry_safe(folio, folio2, from, lru) {
/*
* Large folio statistics is based on the source large
* folio. Capture required information that might get
* lost during migration.
*/
is_large = folio_test_large(folio);
is_thp = is_large && folio_test_pmd_mappable(folio);
nr_pages = folio_nr_pages(folio);
cond_resched();
rc = migrate_folio_unmap(get_new_page, put_new_page, private,
folio, &dst, pass > 2, mode,
reason, ret_folios);
if (rc == MIGRATEPAGE_UNMAP)
rc = migrate_folio_move(put_new_page, private,
folio, dst, mode,
reason, ret_folios);
/*
* The rules are:
* Success: folio will be freed
* -EAGAIN: stay on the from list
* -ENOMEM: stay on the from list
* -ENOSYS: stay on the from list
* Other errno: put on ret_folios list
*/
switch(rc) {
/*
* Large folio migration might be unsupported or
* the allocation could've failed so we should retry
* on the same folio with the large folio split
* to normal folios.
*
* Split folios are put in split_folios, and
* we will migrate them after the rest of the
* list is processed.
*/
case -ENOSYS:
/* Large folio migration is unsupported */
if (is_large) {
nr_large_failed++;
stats->nr_thp_failed += is_thp;
if (!try_split_folio(folio, &split_folios)) {
stats->nr_thp_split += is_thp;
break;
}
} else if (!no_split_folio_counting) {
nr_failed++;
}
stats->nr_failed_pages += nr_pages;
list_move_tail(&folio->lru, ret_folios);
break;
case -ENOMEM:
/*
* When memory is low, don't bother to try to migrate
* other folios, just exit.
*/
if (is_large) {
nr_large_failed++;
stats->nr_thp_failed += is_thp;
/* Large folio NUMA faulting doesn't split to retry. */
if (!nosplit) {
int ret = try_split_folio(folio, &split_folios);
if (!ret) {
stats->nr_thp_split += is_thp;
break;
} else if (reason == MR_LONGTERM_PIN &&
ret == -EAGAIN) {
/*
* Try again to split large folio to
* mitigate the failure of longterm pinning.
*/
large_retry++;
thp_retry += is_thp;
nr_retry_pages += nr_pages;
/* Undo duplicated failure counting. */
nr_large_failed--;
stats->nr_thp_failed -= is_thp;
break;
}
}
} else if (!no_split_folio_counting) {
nr_failed++;
}
stats->nr_failed_pages += nr_pages + nr_retry_pages;
/*
* There might be some split folios of fail-to-migrate large
* folios left in split_folios list. Move them to ret_folios
* list so that they could be put back to the right list by
* the caller otherwise the folio refcnt will be leaked.
*/
list_splice_init(&split_folios, ret_folios);
/* nr_failed isn't updated for not used */
nr_large_failed += large_retry;
stats->nr_thp_failed += thp_retry;
goto out;
case -EAGAIN:
if (is_large) {
large_retry++;
thp_retry += is_thp;
} else if (!no_split_folio_counting) {
retry++;
}
nr_retry_pages += nr_pages;
break;
case MIGRATEPAGE_SUCCESS:
stats->nr_succeeded += nr_pages;
stats->nr_thp_succeeded += is_thp;
break;
default:
/*
* Permanent failure (-EBUSY, etc.):
* unlike -EAGAIN case, the failed folio is
* removed from migration folio list and not
* retried in the next outer loop.
*/
if (is_large) {
nr_large_failed++;
stats->nr_thp_failed += is_thp;
} else if (!no_split_folio_counting) {
nr_failed++;
}
stats->nr_failed_pages += nr_pages;
break; break;
} }
} }
} }
nr_failed += retry; nr_failed += retry;
nr_thp_failed += thp_retry; nr_large_failed += large_retry;
nr_failed_pages += nr_retry_pages; stats->nr_thp_failed += thp_retry;
stats->nr_failed_pages += nr_retry_pages;
/* /*
* Try to migrate subpages of fail-to-migrate THPs, no nr_failed * Try to migrate split folios of fail-to-migrate large folios, no
* counting in this round, since all subpages of a THP is counted * nr_failed counting in this round, since all split folios of a
* as 1 failure in the first round. * large folio is counted as 1 failure in the first round.
*/ */
if (!list_empty(&thp_split_pages)) { if (!list_empty(&split_folios)) {
/* /*
* Move non-migrated pages (after 10 retries) to ret_pages * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY
* to avoid migrating them again. * retries) to ret_folios to avoid migrating them again.
*/ */
list_splice_init(from, &ret_pages); list_splice_init(from, ret_folios);
list_splice_init(&thp_split_pages, from); list_splice_init(&split_folios, from);
no_subpage_counting = true; no_split_folio_counting = true;
retry = 1; retry = 1;
goto thp_subpage_migration; goto split_folio_migration;
} }
rc = nr_failed + nr_thp_failed; rc = nr_failed + nr_large_failed;
out:
return rc;
}
/*
* migrate_pages - migrate the folios specified in a list, to the free folios
* supplied as the target for the page migration
*
* @from: The list of folios to be migrated.
* @get_new_page: The function used to allocate free folios to be used
* as the target of the folio migration.
* @put_new_page: The function used to free target folios if migration
* fails, or NULL if no special handling is necessary.
* @private: Private data to be passed on to get_new_page()
* @mode: The migration mode that specifies the constraints for
* folio migration, if any.
* @reason: The reason for folio migration.
* @ret_succeeded: Set to the number of folios migrated successfully if
* the caller passes a non-NULL pointer.
*
* The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios
* are movable any more because the list has become empty or no retryable folios
* exist any more. It is caller's responsibility to call putback_movable_pages()
* only if ret != 0.
*
* Returns the number of {normal folio, large folio, hugetlb} that were not
* migrated, or an error code. The number of large folio splits will be
* considered as the number of non-migrated large folio, no matter how many
* split folios of the large folio are migrated successfully.
*/
int migrate_pages(struct list_head *from, new_page_t get_new_page,
free_page_t put_new_page, unsigned long private,
enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
{
int rc, rc_gather;
int nr_pages;
struct folio *folio, *folio2;
LIST_HEAD(folios);
LIST_HEAD(ret_folios);
struct migrate_pages_stats stats;
trace_mm_migrate_pages_start(mode, reason);
memset(&stats, 0, sizeof(stats));
rc_gather = migrate_hugetlbs(from, get_new_page, put_new_page, private,
mode, reason, &stats, &ret_folios);
if (rc_gather < 0)
goto out;
again:
nr_pages = 0;
list_for_each_entry_safe(folio, folio2, from, lru) {
/* Retried hugetlb folios will be kept in list */
if (folio_test_hugetlb(folio)) {
list_move_tail(&folio->lru, &ret_folios);
continue;
}
nr_pages += folio_nr_pages(folio);
if (nr_pages > NR_MAX_BATCHED_MIGRATION)
break;
}
if (nr_pages > NR_MAX_BATCHED_MIGRATION)
list_cut_before(&folios, from, &folio->lru);
else
list_splice_init(from, &folios);
rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,
mode, reason, &ret_folios, &stats);
list_splice_tail_init(&folios, &ret_folios);
if (rc < 0) {
rc_gather = rc;
goto out;
}
rc_gather += rc;
if (!list_empty(from))
goto again;
out: out:
/* /*
* Put the permanent failure page back to migration list, they * Put the permanent failure folio back to migration list, they
* will be put back to the right list by the caller. * will be put back to the right list by the caller.
*/ */
list_splice(&ret_pages, from); list_splice(&ret_folios, from);
/* /*
* Return 0 in case all subpages of fail-to-migrate THPs are * Return 0 in case all split folios of fail-to-migrate large folios
* migrated successfully. * are migrated successfully.
*/ */
if (list_empty(from)) if (list_empty(from))
rc = 0; rc_gather = 0;
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded);
count_vm_events(PGMIGRATE_FAIL, nr_failed_pages); count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages);
count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded);
count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed);
count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split);
trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded, trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages,
nr_thp_failed, nr_thp_split, mode, reason); stats.nr_thp_succeeded, stats.nr_thp_failed,
stats.nr_thp_split, mode, reason);
if (ret_succeeded) if (ret_succeeded)
*ret_succeeded = nr_succeeded; *ret_succeeded = stats.nr_succeeded;
return rc; return rc_gather;
} }
EXPORT_SYMBOL_GPL(migrate_pages); EXPORT_SYMBOL_GPL(migrate_pages);

View File

@@ -3873,12 +3873,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
alloc_flags); alloc_flags);
/* /*
* If the allocation fails, allow OOM handling access * If the allocation fails, allow OOM handling and
* to HIGHATOMIC reserves as failing now is worse than * order-0 (atomic) allocs access to HIGHATOMIC
* failing a high-order atomic allocation in the * reserves as failing now is worse than failing a
* future. * high-order atomic allocation in the future.
*/ */
if (!page && (alloc_flags & ALLOC_OOM)) if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) { if (!page) {
@@ -4095,15 +4095,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
static inline long __zone_watermark_unusable_free(struct zone *z, static inline long __zone_watermark_unusable_free(struct zone *z,
unsigned int order, unsigned int alloc_flags) unsigned int order, unsigned int alloc_flags)
{ {
const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
long unusable_free = (1 << order) - 1; long unusable_free = (1 << order) - 1;
/* /*
* If the caller does not have rights to ALLOC_HARDER then subtract * If the caller does not have rights to reserves below the min
* the high-atomic reserves. This will over-estimate the size of the * watermark then subtract the high-atomic reserves. This will
* atomic reserve but it avoids a search. * over-estimate the size of the atomic reserve but it avoids a search.
*/ */
if (likely(!alloc_harder)) if (likely(!(alloc_flags & ALLOC_RESERVES)))
unusable_free += z->nr_reserved_highatomic; unusable_free += z->nr_reserved_highatomic;
#ifdef CONFIG_CMA #ifdef CONFIG_CMA
@@ -4127,25 +4126,37 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
{ {
long min = mark; long min = mark;
int o; int o;
const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
/* free_pages may go negative - that's OK */ /* free_pages may go negative - that's OK */
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
if (alloc_flags & ALLOC_MIN_RESERVE) if (unlikely(alloc_flags & ALLOC_RESERVES)) {
min -= min / 2;
if (unlikely(alloc_harder)) {
/* /*
* OOM victims can try even harder than normal ALLOC_HARDER * __GFP_HIGH allows access to 50% of the min reserve as well
* as OOM.
*/
if (alloc_flags & ALLOC_MIN_RESERVE) {
min -= min / 2;
/*
* Non-blocking allocations (e.g. GFP_ATOMIC) can
* access more reserves than just __GFP_HIGH. Other
* non-blocking allocations requests such as GFP_NOWAIT
* or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
* access to the min reserve.
*/
if (alloc_flags & ALLOC_NON_BLOCK)
min -= min / 4;
}
/*
* OOM victims can try even harder than the normal reserve
* users on the grounds that it's definitely going to be in * users on the grounds that it's definitely going to be in
* the exit path shortly and free memory. Any allocation it * the exit path shortly and free memory. Any allocation it
* makes during the free path will be small and short-lived. * makes during the free path will be small and short-lived.
*/ */
if (alloc_flags & ALLOC_OOM) if (alloc_flags & ALLOC_OOM)
min -= min / 2; min -= min / 2;
else
min -= min / 4;
} }
/* /*
@@ -5002,28 +5013,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
* The caller may dip into page reserves a bit more if the caller * The caller may dip into page reserves a bit more if the caller
* cannot run direct reclaim, or if the caller has realtime scheduling * cannot run direct reclaim, or if the caller has realtime scheduling
* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
* set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH). * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
*/ */
alloc_flags |= (__force int) alloc_flags |= (__force int)
(gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)); (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
if (gfp_mask & __GFP_ATOMIC) { if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
/* /*
* Not worth trying to allocate harder for __GFP_NOMEMALLOC even * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
* if it can't schedule. * if it can't schedule.
*/ */
if (!(gfp_mask & __GFP_NOMEMALLOC)) { if (!(gfp_mask & __GFP_NOMEMALLOC)) {
alloc_flags |= ALLOC_HARDER; alloc_flags |= ALLOC_NON_BLOCK;
if (order > 0) if (order > 0)
alloc_flags |= ALLOC_HIGHATOMIC; alloc_flags |= ALLOC_HIGHATOMIC;
} }
/* /*
* Ignore cpuset mems for GFP_ATOMIC rather than fail, see the * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
* comment for __cpuset_node_allowed(). * GFP_ATOMIC) rather than fail, see the comment for
* __cpuset_node_allowed().
*/ */
alloc_flags &= ~ALLOC_CPUSET; if (alloc_flags & ALLOC_MIN_RESERVE)
alloc_flags &= ~ALLOC_CPUSET;
} else if (unlikely(rt_task(current)) && in_task()) } else if (unlikely(rt_task(current)) && in_task())
alloc_flags |= ALLOC_MIN_RESERVE; alloc_flags |= ALLOC_MIN_RESERVE;
@@ -5468,12 +5481,13 @@ nopage:
WARN_ON_ONCE_GFP(costly_order, gfp_mask); WARN_ON_ONCE_GFP(costly_order, gfp_mask);
/* /*
* Help non-failing allocations by giving them access to memory * Help non-failing allocations by giving some access to memory
* reserves but do not use ALLOC_NO_WATERMARKS because this * reserves normally used for high priority non-blocking
* allocations but do not use ALLOC_NO_WATERMARKS because this
* could deplete whole memory reserves which would just make * could deplete whole memory reserves which would just make
* the situation worse * the situation worse.
*/ */
page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
if (page) if (page)
goto got_pg; goto got_pg;

View File

@@ -1091,7 +1091,9 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
stat->attributes_mask |= (STATX_ATTR_APPEND | stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_IMMUTABLE | STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP); STATX_ATTR_NODUMP);
inode_lock_shared(inode);
generic_fillattr(&init_user_ns, inode, stat); generic_fillattr(&init_user_ns, inode, stat);
inode_unlock_shared(inode);
if (shmem_is_huge(NULL, inode, 0, false)) if (shmem_is_huge(NULL, inode, 0, false))
stat->blksize = HPAGE_PMD_SIZE; stat->blksize = HPAGE_PMD_SIZE;

View File

@@ -593,7 +593,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
return -EINVAL; return -EINVAL;
if (!pubsta->deflink.ht_cap.ht_supported && if (!pubsta->deflink.ht_cap.ht_supported &&
sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) !pubsta->deflink.vht_cap.vht_supported &&
!pubsta->deflink.he_cap.has_he &&
!pubsta->deflink.eht_cap.has_eht)
return -EINVAL; return -EINVAL;
if (WARN_ON_ONCE(!local->ops->ampdu_action)) if (WARN_ON_ONCE(!local->ops->ampdu_action))

View File

@@ -10214,6 +10214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),