diff --git a/Makefile b/Makefile index d750bfb591bc..45889b8fdf0c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 115 +SUBLEVEL = 116 EXTRAVERSION = NAME = Curry Ramen diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 59aa9dd466e8..64eb5386e7b2 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -40,6 +40,8 @@ static struct page *vdso_pages[] = { NULL }; struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; +static struct page *find_timens_vvar_page(struct vm_area_struct *vma); + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { current->mm->context.vdso = (void *)(new_vma->vm_start); @@ -139,13 +141,37 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_lock(mm); for_each_vma(vmi, vma) { + unsigned long size = vma->vm_end - vma->vm_start; + if (vma_is_special_mapping(vma, &vdso_info.data_mapping)) - zap_vma_pages(vma); + zap_page_range(vma, vma->vm_start, size); } mmap_read_unlock(mm); return 0; } + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} #endif static unsigned long vdso_base(void) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1e481d308e18..daf58a96e0a7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -211,7 +211,16 @@ */ .macro CLEAR_CPU_BUFFERS ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF - verw _ASM_RIP(mds_verw_sel) +#ifdef CONFIG_X86_64 + verw mds_verw_sel(%rip) +#else + /* + * In 32bit mode, the memory operand must be a %cs reference. The data + * segments may not be usable (vm86 mode), and the stack segment may not + * be flat (ESPFIX32). + */ + verw %cs:mds_verw_sel +#endif .Lskip_verw_\@: .endm diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8f7130f7d8c6..8dc0f70df24f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2990,6 +2990,10 @@ static int dm_resume(void *handle) /* Do mst topology probing after resuming cached state*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); if (aconnector->dc_link->type != dc_connection_mst_branch || aconnector->mst_port) @@ -5722,6 +5726,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, &aconnector->base.probed_modes : &aconnector->base.modes; + if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + return NULL; + if (aconnector->freesync_vid_base.clock != 0) return &aconnector->freesync_vid_base; @@ -8242,6 +8249,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev, continue; notify: + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + continue; + aconnector = to_amdgpu_dm_connector(connector); mutex_lock(&adev->dm.audio_lock); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 407f7889e8fd..7a643690fdc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -762,6 +762,9 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) stream = dc->current_state->streams[0]; plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL); + if (!stream || !plane) + return false; + if (stream && plane) { cursor_cache_enable = stream->cursor_position.enable && plane->address.grph.cursor_cache_addr.quad_part; diff --git a/drivers/mtd/spi-nor/winbond.c b/drivers/mtd/spi-nor/winbond.c index b7c775b615e8..58aba52022bf 100644 --- a/drivers/mtd/spi-nor/winbond.c +++ b/drivers/mtd/spi-nor/winbond.c @@ -120,9 +120,10 @@ static const struct flash_info winbond_nor_parts[] = { NO_SFDP_FLAGS(SECT_4K) }, { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16) NO_SFDP_FLAGS(SECT_4K) }, - { "w25q128", INFO(0xef4018, 0, 0, 0) - PARSE_SFDP - FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, + { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256) + FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) + NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | + SPI_NOR_QUAD_READ) }, { "w25q256", INFO(0xef4019, 0, 64 * 1024, 512) NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) .fixups = &w25q256_fixups }, diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 1d67a3ca1fd1..7635a8b3c35c 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -547,6 +547,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; + if (!daddr || !saddr) + return -EINVAL; + lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 1785fded6290..2a4c59c71448 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1739,7 +1739,8 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm *mvm, &cp->channel_config[ch_cnt]; u32 s_ssid_bitmap = 0, bssid_bitmap = 0, flags = 0; - u8 j, k, s_max = 0, b_max = 0, n_used_bssid_entries; + u8 k, s_max = 0, b_max = 0, n_used_bssid_entries; + u32 j; bool force_passive, found = false, allow_passive = true, unsolicited_probe_on_chan = false, psc_no_listen = false; diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index aacc05ec00c2..74791078fdeb 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -101,6 +101,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) pr_debug("%s: ctrl %d failed to generate private key, err %d\n", __func__, ctrl->cntlid, ret); kfree_sensitive(ctrl->dh_key); + ctrl->dh_key = NULL; return ret; } ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 48a9ed7c93c9..f9b9cb6144b9 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4593,7 +4593,7 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) int c; if (op->data) { - font.data = kmalloc(max_font_size, GFP_KERNEL); + font.data = kzalloc(max_font_size, GFP_KERNEL); if (!font.data) return -ENOMEM; } else diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 19bc8eea2b35..6bc4cda804e1 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -404,6 +404,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) ClearPageUptodate(page); ClearPageMappedToDisk(page); + ClearPageChecked(page); if (page_has_buffers(page)) { struct buffer_head *bh, *head; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f502bb2ce2ea..ea7c79e8ce42 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1784,6 +1784,14 @@ int ocfs2_remove_inode_range(struct inode *inode, return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di); + + if (byte_start > id_count || byte_start + byte_len > id_count) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + ret = ocfs2_truncate_inline(inode, di_bh, byte_start, byte_start + byte_len, 0); if (ret) { diff --git a/include/linux/fs.h b/include/linux/fs.h index b5ae6668528b..1a193f28f91e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3039,6 +3039,42 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +/** + * kiocb_start_write - get write access to a superblock for async file io + * @iocb: the io context we want to submit the write with + * + * This is a variant of sb_start_write() for async io submission. + * Should be matched with a call to kiocb_end_write(). + */ +static inline void kiocb_start_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + sb_start_write(inode->i_sb); + /* + * Fool lockdep by telling it the lock got released so that it + * doesn't complain about the held lock when we return to userspace. + */ + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); +} + +/** + * kiocb_end_write - drop write access to a superblock after async file io + * @iocb: the io context we sumbitted the write with + * + * Should be matched with a call to kiocb_start_write(). + */ +static inline void kiocb_end_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + /* + * Tell lockdep we inherited freeze protection from submission thread. + */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + sb_end_write(inode->i_sb); +} + /* * This is used for regular files where some users -- especially the * currently executed binary in a process, previously handled via diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3ef77f52a4f0..7376074f2e1e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -18,6 +18,7 @@ struct migration_target_control; * - zero on page migration success; */ #define MIGRATEPAGE_SUCCESS 0 +#define MIGRATEPAGE_UNMAP 1 /** * struct movable_operations - Driver page migration diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 38ed0af255cc..1f3f5afb8219 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -593,6 +593,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) bool all_flushed; size_t cqe_size = sizeof(struct io_uring_cqe); + lockdep_assert_held(&ctx->uring_lock); + if (!force && __io_cqring_events(ctx) == ctx->cq_entries) return false; @@ -647,12 +649,9 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) bool ret = true; if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); + mutex_lock(&ctx->uring_lock); ret = __io_cqring_overflow_flush(ctx, false); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); + mutex_unlock(&ctx->uring_lock); } return ret; @@ -1405,6 +1404,8 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) int ret = 0; unsigned long check_cq; + lockdep_assert_held(&ctx->uring_lock); + if (!io_allowed_run_tw(ctx)) return -EEXIST; diff --git a/io_uring/rw.c b/io_uring/rw.c index 038e6b13a749..9d6e17a244ae 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -220,17 +220,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) } #endif -static void kiocb_end_write(struct io_kiocb *req) +static void io_req_end_write(struct io_kiocb *req) { - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ if (req->flags & REQ_F_ISREG) { - struct super_block *sb = file_inode(req->file)->i_sb; + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); - __sb_writers_acquired(sb, SB_FREEZE_WRITE); - sb_end_write(sb); + kiocb_end_write(&rw->kiocb); } } @@ -243,7 +238,7 @@ static void io_req_io_end(struct io_kiocb *req) struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); if (rw->kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); + io_req_end_write(req); fsnotify_modify(req->file); } else { fsnotify_access(req->file); @@ -307,7 +302,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) struct io_kiocb *req = cmd_to_io_kiocb(rw); if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); if (unlikely(res != req->cqe.res)) { if (res == -EAGAIN && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; @@ -844,6 +839,25 @@ done: return kiocb_done(req, ret, issue_flags); } +static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb) +{ + struct inode *inode; + bool ret; + + if (!(req->flags & REQ_F_ISREG)) + return true; + if (!(kiocb->ki_flags & IOCB_NOWAIT)) { + kiocb_start_write(kiocb); + return true; + } + + inode = file_inode(kiocb->ki_filp); + ret = sb_start_write_trylock(inode->i_sb); + if (ret) + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + return ret; +} + int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); @@ -897,18 +911,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) return ret; } - /* - * Open-code file_start_write here to grab freeze protection, - * which will be released by another thread in - * io_complete_rw(). Fool lockdep by telling it the lock got - * released so that it doesn't complain about the held lock when - * we return to userspace. - */ - if (req->flags & REQ_F_ISREG) { - sb_start_write(file_inode(req->file)->i_sb); - __sb_writers_release(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE); - } + if (unlikely(!io_kiocb_start_write(req, kiocb))) + return -EAGAIN; kiocb->ki_flags |= IOCB_WRITE; if (likely(req->file->f_op->write_iter)) @@ -956,7 +960,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) io->bytes_done += ret2; if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); return ret ? ret : -EAGAIN; } done: @@ -967,7 +971,7 @@ copy_iov: ret = io_setup_async_rw(req, iovec, s, false); if (!ret) { if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); return -EAGAIN; } return ret; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 77bee3b4fadd..15f1d01e56f7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2868,7 +2868,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) * split PMDs */ if (!can_split_folio(folio, &extra_pins)) { - ret = -EBUSY; + ret = -EAGAIN; goto out_unlock; } @@ -2920,7 +2920,7 @@ fail: xas_unlock(&xas); local_irq_enable(); remap_page(folio, folio_nr_pages(folio)); - ret = -EBUSY; + ret = -EAGAIN; } out_unlock: diff --git a/mm/internal.h b/mm/internal.h index 16d643b362df..4b3eb742c3de 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -757,7 +757,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif -#define ALLOC_HARDER 0x10 /* try to alloc harder */ +#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access + * to 25% of the min watermark or + * 62.5% if __GFP_HIGH is set. + */ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% * of the min watermark. */ @@ -771,6 +774,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ +/* Flags that allow allocations below the min watermark. */ +#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) + enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 35aa8d09d955..20d515d9bf65 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1260,32 +1260,6 @@ static void vm_map_ram_tags(struct kunit *test) free_pages((unsigned long)p_ptr, 1); } -static void vmalloc_percpu(struct kunit *test) -{ - char __percpu *ptr; - int cpu; - - /* - * This test is specifically crafted for the software tag-based mode, - * the only tag-based mode that poisons percpu mappings. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); - - ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); - - for_each_possible_cpu(cpu) { - char *c_ptr = per_cpu_ptr(ptr, cpu); - - KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure that in-bounds accesses don't crash the kernel. */ - *c_ptr = 0; - } - - free_percpu(ptr); -} - /* * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based @@ -1439,7 +1413,6 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmap_tags), KUNIT_CASE(vm_map_ram_tags), - KUNIT_CASE(vmalloc_percpu), KUNIT_CASE(match_all_not_assigned), KUNIT_CASE(match_all_ptr_tag), KUNIT_CASE(match_all_mem_tag), diff --git a/mm/migrate.c b/mm/migrate.c index 954c04f28eb9..e567ea2807df 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1018,11 +1018,59 @@ out: return rc; } -static int __unmap_and_move(struct folio *src, struct folio *dst, +/* + * To record some information during migration, we use some unused + * fields (mapping and private) of struct folio of the newly allocated + * destination folio. This is safe because nobody is using them + * except us. + */ +union migration_ptr { + struct anon_vma *anon_vma; + struct address_space *mapping; +}; +static void __migrate_folio_record(struct folio *dst, + unsigned long page_was_mapped, + struct anon_vma *anon_vma) +{ + union migration_ptr ptr = { .anon_vma = anon_vma }; + dst->mapping = ptr.mapping; + dst->private = (void *)page_was_mapped; +} + +static void __migrate_folio_extract(struct folio *dst, + int *page_was_mappedp, + struct anon_vma **anon_vmap) +{ + union migration_ptr ptr = { .mapping = dst->mapping }; + *anon_vmap = ptr.anon_vma; + *page_was_mappedp = (unsigned long)dst->private; + dst->mapping = NULL; + dst->private = NULL; +} + +/* Cleanup src folio upon migration success */ +static void migrate_folio_done(struct folio *src, + enum migrate_reason reason) +{ + /* + * Compaction can migrate also non-LRU pages which are + * not accounted to NR_ISOLATED_*. They can be recognized + * as __PageMovable + */ + if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION) + mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + + folio_is_file_lru(src), -folio_nr_pages(src)); + + if (reason != MR_MEMORY_FAILURE) + /* We release the page in page_handle_poison. */ + folio_put(src); +} + +static int __migrate_folio_unmap(struct folio *src, struct folio *dst, int force, enum migrate_mode mode) { int rc = -EAGAIN; - bool page_was_mapped = false; + int page_was_mapped = 0; struct anon_vma *anon_vma = NULL; bool is_lru = !__PageMovable(&src->page); @@ -1098,8 +1146,8 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, goto out_unlock; if (unlikely(!is_lru)) { - rc = move_to_new_folio(dst, src, mode); - goto out_unlock_both; + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return MIGRATEPAGE_UNMAP; } /* @@ -1124,11 +1172,42 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, VM_BUG_ON_FOLIO(folio_test_anon(src) && !folio_test_ksm(src) && !anon_vma, src); try_to_migrate(src, 0); - page_was_mapped = true; + page_was_mapped = 1; } - if (!folio_mapped(src)) - rc = move_to_new_folio(dst, src, mode); + if (!folio_mapped(src)) { + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return MIGRATEPAGE_UNMAP; + } + + if (page_was_mapped) + remove_migration_ptes(src, src, false); + +out_unlock_both: + folio_unlock(dst); +out_unlock: + /* Drop an anon_vma reference if we took one */ + if (anon_vma) + put_anon_vma(anon_vma); + folio_unlock(src); +out: + + return rc; +} + +static int __migrate_folio_move(struct folio *src, struct folio *dst, + enum migrate_mode mode) +{ + int rc; + int page_was_mapped = 0; + struct anon_vma *anon_vma = NULL; + bool is_lru = !__PageMovable(&src->page); + + __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); + + rc = move_to_new_folio(dst, src, mode); + if (unlikely(!is_lru)) + goto out_unlock_both; /* * When successful, push dst to LRU immediately: so that if it @@ -1151,12 +1230,10 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, out_unlock_both: folio_unlock(dst); -out_unlock: /* Drop an anon_vma reference if we took one */ if (anon_vma) put_anon_vma(anon_vma); folio_unlock(src); -out: /* * If migration is successful, decrease refcount of dst, * which will not free the page because new page owner increased @@ -1168,80 +1245,92 @@ out: return rc; } -/* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. - */ -static int unmap_and_move(new_page_t get_new_page, - free_page_t put_new_page, - unsigned long private, struct page *page, - int force, enum migrate_mode mode, - enum migrate_reason reason, - struct list_head *ret) +/* Obtain the lock on page, remove all ptes. */ +static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page, + unsigned long private, struct folio *src, + struct folio **dstp, int force, + enum migrate_mode mode, enum migrate_reason reason, + struct list_head *ret) { - struct folio *dst, *src = page_folio(page); - int rc = MIGRATEPAGE_SUCCESS; + struct folio *dst; + int rc = MIGRATEPAGE_UNMAP; struct page *newpage = NULL; - if (!thp_migration_supported() && PageTransHuge(page)) + if (!thp_migration_supported() && folio_test_transhuge(src)) return -ENOSYS; - if (page_count(page) == 1) { - /* Page was freed from under us. So we are done. */ - ClearPageActive(page); - ClearPageUnevictable(page); + if (folio_ref_count(src) == 1) { + /* Folio was freed from under us. So we are done. */ + folio_clear_active(src); + folio_clear_unevictable(src); /* free_pages_prepare() will clear PG_isolated. */ - goto out; + list_del(&src->lru); + migrate_folio_done(src, reason); + return MIGRATEPAGE_SUCCESS; } - newpage = get_new_page(page, private); + newpage = get_new_page(&src->page, private); if (!newpage) return -ENOMEM; dst = page_folio(newpage); + *dstp = dst; - newpage->private = 0; - rc = __unmap_and_move(src, dst, force, mode); + dst->private = NULL; + rc = __migrate_folio_unmap(src, dst, force, mode); + if (rc == MIGRATEPAGE_UNMAP) + return rc; + + /* + * A folio that has not been unmapped will be restored to + * right list unless we want to retry. + */ + if (rc != -EAGAIN) + list_move_tail(&src->lru, ret); + + if (put_new_page) + put_new_page(&dst->page, private); + else + folio_put(dst); + + return rc; +} + +/* Migrate the folio to the newly allocated folio in dst. */ +static int migrate_folio_move(free_page_t put_new_page, unsigned long private, + struct folio *src, struct folio *dst, + enum migrate_mode mode, enum migrate_reason reason, + struct list_head *ret) +{ + int rc; + + rc = __migrate_folio_move(src, dst, mode); if (rc == MIGRATEPAGE_SUCCESS) - set_page_owner_migrate_reason(newpage, reason); + set_page_owner_migrate_reason(&dst->page, reason); -out: if (rc != -EAGAIN) { /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been + * A folio that has been migrated has all references + * removed and will be freed. A folio that has not been * migrated will have kept its references and be restored. */ - list_del(&page->lru); + list_del(&src->lru); } /* * If migration is successful, releases reference grabbed during - * isolation. Otherwise, restore the page to right list unless + * isolation. Otherwise, restore the folio to right list unless * we want to retry. */ if (rc == MIGRATEPAGE_SUCCESS) { - /* - * Compaction can migrate also non-LRU pages which are - * not accounted to NR_ISOLATED_*. They can be recognized - * as __PageMovable - */ - if (likely(!__PageMovable(page))) - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -thp_nr_pages(page)); - - if (reason != MR_MEMORY_FAILURE) - /* - * We release the page in page_handle_poison. - */ - put_page(page); + migrate_folio_done(src, reason); } else { if (rc != -EAGAIN) - list_add_tail(&page->lru, ret); + list_add_tail(&src->lru, ret); if (put_new_page) - put_new_page(newpage, private); + put_new_page(&dst->page, private); else - put_page(newpage); + folio_put(dst); } return rc; @@ -1392,234 +1481,411 @@ out: return rc; } -static inline int try_split_thp(struct page *page, struct list_head *split_pages) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) { int rc; - lock_page(page); - rc = split_huge_page_to_list(page, split_pages); - unlock_page(page); + folio_lock(folio); + rc = split_folio_to_list(folio, split_folios); + folio_unlock(folio); if (!rc) - list_move_tail(&page->lru, split_pages); + list_move_tail(&folio->lru, split_folios); return rc; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR +#else +#define NR_MAX_BATCHED_MIGRATION 512 +#endif +#define NR_MAX_MIGRATE_PAGES_RETRY 10 + +struct migrate_pages_stats { + int nr_succeeded; /* Normal and large folios migrated successfully, in + units of base pages */ + int nr_failed_pages; /* Normal and large folios failed to be migrated, in + units of base pages. Untried folios aren't counted */ + int nr_thp_succeeded; /* THP migrated successfully */ + int nr_thp_failed; /* THP failed to be migrated */ + int nr_thp_split; /* THP split before migrating */ +}; + /* - * migrate_pages - migrate the pages specified in a list, to the free pages - * supplied as the target for the page migration - * - * @from: The list of pages to be migrated. - * @get_new_page: The function used to allocate free pages to be used - * as the target of the page migration. - * @put_new_page: The function used to free target pages if migration - * fails, or NULL if no special handling is necessary. - * @private: Private data to be passed on to get_new_page() - * @mode: The migration mode that specifies the constraints for - * page migration, if any. - * @reason: The reason for page migration. - * @ret_succeeded: Set to the number of normal pages migrated successfully if - * the caller passes a non-NULL pointer. - * - * The function returns after 10 attempts or if no pages are movable any more - * because the list has become empty or no retryable pages exist any more. - * It is caller's responsibility to call putback_movable_pages() to return pages - * to the LRU or free list only if ret != 0. - * - * Returns the number of {normal page, THP, hugetlb} that were not migrated, or - * an error code. The number of THP splits will be considered as the number of - * non-migrated THP, no matter how many subpages of the THP are migrated successfully. + * Returns the number of hugetlb folios that were not migrated, or an error code + * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable + * any more because the list has become empty or no retryable hugetlb folios + * exist any more. It is caller's responsibility to call putback_movable_pages() + * only if ret != 0. */ -int migrate_pages(struct list_head *from, new_page_t get_new_page, - free_page_t put_new_page, unsigned long private, - enum migrate_mode mode, int reason, unsigned int *ret_succeeded) +static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, + struct migrate_pages_stats *stats, + struct list_head *ret_folios) { int retry = 1; - int thp_retry = 1; int nr_failed = 0; - int nr_failed_pages = 0; int nr_retry_pages = 0; - int nr_succeeded = 0; - int nr_thp_succeeded = 0; - int nr_thp_failed = 0; - int nr_thp_split = 0; int pass = 0; - bool is_thp = false; - struct page *page; - struct page *page2; - int rc, nr_subpages; - LIST_HEAD(ret_pages); - LIST_HEAD(thp_split_pages); - bool nosplit = (reason == MR_NUMA_MISPLACED); - bool no_subpage_counting = false; + struct folio *folio, *folio2; + int rc, nr_pages; - trace_mm_migrate_pages_start(mode, reason); - -thp_subpage_migration: - for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { + for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) { retry = 0; - thp_retry = 0; nr_retry_pages = 0; - list_for_each_entry_safe(page, page2, from, lru) { - /* - * THP statistics is based on the source huge page. - * Capture required information that might get lost - * during migration. - */ - is_thp = PageTransHuge(page) && !PageHuge(page); - nr_subpages = compound_nr(page); + list_for_each_entry_safe(folio, folio2, from, lru) { + if (!folio_test_hugetlb(folio)) + continue; + + nr_pages = folio_nr_pages(folio); + cond_resched(); - if (PageHuge(page)) - rc = unmap_and_move_huge_page(get_new_page, - put_new_page, private, page, - pass > 2, mode, reason, - &ret_pages); - else - rc = unmap_and_move(get_new_page, put_new_page, - private, page, pass > 2, mode, - reason, &ret_pages); + rc = unmap_and_move_huge_page(get_new_page, + put_new_page, private, + &folio->page, pass > 2, mode, + reason, ret_folios); /* * The rules are: - * Success: non hugetlb page will be freed, hugetlb - * page will be put back + * Success: hugetlb folio will be put back * -EAGAIN: stay on the from list * -ENOMEM: stay on the from list * -ENOSYS: stay on the from list - * Other errno: put on ret_pages list then splice to - * from list + * Other errno: put on ret_folios list */ switch(rc) { - /* - * THP migration might be unsupported or the - * allocation could've failed so we should - * retry on the same page with the THP split - * to base pages. - * - * Sub-pages are put in thp_split_pages, and - * we will migrate them after the rest of the - * list is processed. - */ case -ENOSYS: - /* THP migration is unsupported */ - if (is_thp) { - nr_thp_failed++; - if (!try_split_thp(page, &thp_split_pages)) { - nr_thp_split++; - break; - } /* Hugetlb migration is unsupported */ - } else if (!no_subpage_counting) { - nr_failed++; - } - - nr_failed_pages += nr_subpages; - list_move_tail(&page->lru, &ret_pages); + nr_failed++; + stats->nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, ret_folios); break; case -ENOMEM: /* * When memory is low, don't bother to try to migrate - * other pages, just exit. + * other folios, just exit. */ - if (is_thp) { - nr_thp_failed++; - /* THP NUMA faulting doesn't split THP to retry. */ - if (!nosplit && !try_split_thp(page, &thp_split_pages)) { - nr_thp_split++; - break; - } - } else if (!no_subpage_counting) { - nr_failed++; - } - - nr_failed_pages += nr_subpages + nr_retry_pages; - /* - * There might be some subpages of fail-to-migrate THPs - * left in thp_split_pages list. Move them back to migration - * list so that they could be put back to the right list by - * the caller otherwise the page refcnt will be leaked. - */ - list_splice_init(&thp_split_pages, from); - /* nr_failed isn't updated for not used */ - nr_thp_failed += thp_retry; - goto out; + stats->nr_failed_pages += nr_pages + nr_retry_pages; + return -ENOMEM; case -EAGAIN: - if (is_thp) - thp_retry++; - else if (!no_subpage_counting) - retry++; - nr_retry_pages += nr_subpages; + retry++; + nr_retry_pages += nr_pages; break; case MIGRATEPAGE_SUCCESS: - nr_succeeded += nr_subpages; - if (is_thp) - nr_thp_succeeded++; + stats->nr_succeeded += nr_pages; break; default: /* * Permanent failure (-EBUSY, etc.): - * unlike -EAGAIN case, the failed page is - * removed from migration page list and not + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not * retried in the next outer loop. */ - if (is_thp) - nr_thp_failed++; - else if (!no_subpage_counting) - nr_failed++; + nr_failed++; + stats->nr_failed_pages += nr_pages; + break; + } + } + } + /* + * nr_failed is number of hugetlb folios failed to be migrated. After + * NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb + * folios as failed. + */ + nr_failed += retry; + stats->nr_failed_pages += nr_retry_pages; - nr_failed_pages += nr_subpages; + return nr_failed; +} + +static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, struct list_head *ret_folios, + struct migrate_pages_stats *stats) +{ + int retry = 1; + int large_retry = 1; + int thp_retry = 1; + int nr_failed = 0; + int nr_retry_pages = 0; + int nr_large_failed = 0; + int pass = 0; + bool is_large = false; + bool is_thp = false; + struct folio *folio, *folio2, *dst = NULL; + int rc, nr_pages; + LIST_HEAD(split_folios); + bool nosplit = (reason == MR_NUMA_MISPLACED); + bool no_split_folio_counting = false; + +split_folio_migration: + for (pass = 0; + pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); + pass++) { + retry = 0; + large_retry = 0; + thp_retry = 0; + nr_retry_pages = 0; + + list_for_each_entry_safe(folio, folio2, from, lru) { + /* + * Large folio statistics is based on the source large + * folio. Capture required information that might get + * lost during migration. + */ + is_large = folio_test_large(folio); + is_thp = is_large && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); + + cond_resched(); + + rc = migrate_folio_unmap(get_new_page, put_new_page, private, + folio, &dst, pass > 2, mode, + reason, ret_folios); + if (rc == MIGRATEPAGE_UNMAP) + rc = migrate_folio_move(put_new_page, private, + folio, dst, mode, + reason, ret_folios); + /* + * The rules are: + * Success: folio will be freed + * -EAGAIN: stay on the from list + * -ENOMEM: stay on the from list + * -ENOSYS: stay on the from list + * Other errno: put on ret_folios list + */ + switch(rc) { + /* + * Large folio migration might be unsupported or + * the allocation could've failed so we should retry + * on the same folio with the large folio split + * to normal folios. + * + * Split folios are put in split_folios, and + * we will migrate them after the rest of the + * list is processed. + */ + case -ENOSYS: + /* Large folio migration is unsupported */ + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; + if (!try_split_folio(folio, &split_folios)) { + stats->nr_thp_split += is_thp; + break; + } + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, ret_folios); + break; + case -ENOMEM: + /* + * When memory is low, don't bother to try to migrate + * other folios, just exit. + */ + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; + /* Large folio NUMA faulting doesn't split to retry. */ + if (!nosplit) { + int ret = try_split_folio(folio, &split_folios); + + if (!ret) { + stats->nr_thp_split += is_thp; + break; + } else if (reason == MR_LONGTERM_PIN && + ret == -EAGAIN) { + /* + * Try again to split large folio to + * mitigate the failure of longterm pinning. + */ + large_retry++; + thp_retry += is_thp; + nr_retry_pages += nr_pages; + /* Undo duplicated failure counting. */ + nr_large_failed--; + stats->nr_thp_failed -= is_thp; + break; + } + } + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages + nr_retry_pages; + /* + * There might be some split folios of fail-to-migrate large + * folios left in split_folios list. Move them to ret_folios + * list so that they could be put back to the right list by + * the caller otherwise the folio refcnt will be leaked. + */ + list_splice_init(&split_folios, ret_folios); + /* nr_failed isn't updated for not used */ + nr_large_failed += large_retry; + stats->nr_thp_failed += thp_retry; + goto out; + case -EAGAIN: + if (is_large) { + large_retry++; + thp_retry += is_thp; + } else if (!no_split_folio_counting) { + retry++; + } + nr_retry_pages += nr_pages; + break; + case MIGRATEPAGE_SUCCESS: + stats->nr_succeeded += nr_pages; + stats->nr_thp_succeeded += is_thp; + break; + default: + /* + * Permanent failure (-EBUSY, etc.): + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not + * retried in the next outer loop. + */ + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages; break; } } } nr_failed += retry; - nr_thp_failed += thp_retry; - nr_failed_pages += nr_retry_pages; + nr_large_failed += large_retry; + stats->nr_thp_failed += thp_retry; + stats->nr_failed_pages += nr_retry_pages; /* - * Try to migrate subpages of fail-to-migrate THPs, no nr_failed - * counting in this round, since all subpages of a THP is counted - * as 1 failure in the first round. + * Try to migrate split folios of fail-to-migrate large folios, no + * nr_failed counting in this round, since all split folios of a + * large folio is counted as 1 failure in the first round. */ - if (!list_empty(&thp_split_pages)) { + if (!list_empty(&split_folios)) { /* - * Move non-migrated pages (after 10 retries) to ret_pages - * to avoid migrating them again. + * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY + * retries) to ret_folios to avoid migrating them again. */ - list_splice_init(from, &ret_pages); - list_splice_init(&thp_split_pages, from); - no_subpage_counting = true; + list_splice_init(from, ret_folios); + list_splice_init(&split_folios, from); + no_split_folio_counting = true; retry = 1; - goto thp_subpage_migration; + goto split_folio_migration; } - rc = nr_failed + nr_thp_failed; + rc = nr_failed + nr_large_failed; +out: + return rc; +} + +/* + * migrate_pages - migrate the folios specified in a list, to the free folios + * supplied as the target for the page migration + * + * @from: The list of folios to be migrated. + * @get_new_page: The function used to allocate free folios to be used + * as the target of the folio migration. + * @put_new_page: The function used to free target folios if migration + * fails, or NULL if no special handling is necessary. + * @private: Private data to be passed on to get_new_page() + * @mode: The migration mode that specifies the constraints for + * folio migration, if any. + * @reason: The reason for folio migration. + * @ret_succeeded: Set to the number of folios migrated successfully if + * the caller passes a non-NULL pointer. + * + * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios + * are movable any more because the list has become empty or no retryable folios + * exist any more. It is caller's responsibility to call putback_movable_pages() + * only if ret != 0. + * + * Returns the number of {normal folio, large folio, hugetlb} that were not + * migrated, or an error code. The number of large folio splits will be + * considered as the number of non-migrated large folio, no matter how many + * split folios of the large folio are migrated successfully. + */ +int migrate_pages(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, unsigned int *ret_succeeded) +{ + int rc, rc_gather; + int nr_pages; + struct folio *folio, *folio2; + LIST_HEAD(folios); + LIST_HEAD(ret_folios); + struct migrate_pages_stats stats; + + trace_mm_migrate_pages_start(mode, reason); + + memset(&stats, 0, sizeof(stats)); + + rc_gather = migrate_hugetlbs(from, get_new_page, put_new_page, private, + mode, reason, &stats, &ret_folios); + if (rc_gather < 0) + goto out; +again: + nr_pages = 0; + list_for_each_entry_safe(folio, folio2, from, lru) { + /* Retried hugetlb folios will be kept in list */ + if (folio_test_hugetlb(folio)) { + list_move_tail(&folio->lru, &ret_folios); + continue; + } + + nr_pages += folio_nr_pages(folio); + if (nr_pages > NR_MAX_BATCHED_MIGRATION) + break; + } + if (nr_pages > NR_MAX_BATCHED_MIGRATION) + list_cut_before(&folios, from, &folio->lru); + else + list_splice_init(from, &folios); + rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, + mode, reason, &ret_folios, &stats); + list_splice_tail_init(&folios, &ret_folios); + if (rc < 0) { + rc_gather = rc; + goto out; + } + rc_gather += rc; + if (!list_empty(from)) + goto again; out: /* - * Put the permanent failure page back to migration list, they + * Put the permanent failure folio back to migration list, they * will be put back to the right list by the caller. */ - list_splice(&ret_pages, from); + list_splice(&ret_folios, from); /* - * Return 0 in case all subpages of fail-to-migrate THPs are - * migrated successfully. + * Return 0 in case all split folios of fail-to-migrate large folios + * are migrated successfully. */ if (list_empty(from)) - rc = 0; + rc_gather = 0; - count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); - count_vm_events(PGMIGRATE_FAIL, nr_failed_pages); - count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); - count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); - count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); - trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded, - nr_thp_failed, nr_thp_split, mode, reason); + count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded); + count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages); + count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded); + count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed); + count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); + trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, + stats.nr_thp_succeeded, stats.nr_thp_failed, + stats.nr_thp_split, mode, reason); if (ret_succeeded) - *ret_succeeded = nr_succeeded; + *ret_succeeded = stats.nr_succeeded; - return rc; + return rc_gather; } EXPORT_SYMBOL_GPL(migrate_pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 559f6d4d87ec..d8e7266fb283 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3873,12 +3873,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, alloc_flags); /* - * If the allocation fails, allow OOM handling access - * to HIGHATOMIC reserves as failing now is worse than - * failing a high-order atomic allocation in the - * future. + * If the allocation fails, allow OOM handling and + * order-0 (atomic) allocs access to HIGHATOMIC + * reserves as failing now is worse than failing a + * high-order atomic allocation in the future. */ - if (!page && (alloc_flags & ALLOC_OOM)) + if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK))) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { @@ -4095,15 +4095,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); static inline long __zone_watermark_unusable_free(struct zone *z, unsigned int order, unsigned int alloc_flags) { - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); long unusable_free = (1 << order) - 1; /* - * If the caller does not have rights to ALLOC_HARDER then subtract - * the high-atomic reserves. This will over-estimate the size of the - * atomic reserve but it avoids a search. + * If the caller does not have rights to reserves below the min + * watermark then subtract the high-atomic reserves. This will + * over-estimate the size of the atomic reserve but it avoids a search. */ - if (likely(!alloc_harder)) + if (likely(!(alloc_flags & ALLOC_RESERVES))) unusable_free += z->nr_reserved_highatomic; #ifdef CONFIG_CMA @@ -4127,25 +4126,37 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, { long min = mark; int o; - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); /* free_pages may go negative - that's OK */ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); - if (alloc_flags & ALLOC_MIN_RESERVE) - min -= min / 2; - - if (unlikely(alloc_harder)) { + if (unlikely(alloc_flags & ALLOC_RESERVES)) { /* - * OOM victims can try even harder than normal ALLOC_HARDER + * __GFP_HIGH allows access to 50% of the min reserve as well + * as OOM. + */ + if (alloc_flags & ALLOC_MIN_RESERVE) { + min -= min / 2; + + /* + * Non-blocking allocations (e.g. GFP_ATOMIC) can + * access more reserves than just __GFP_HIGH. Other + * non-blocking allocations requests such as GFP_NOWAIT + * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get + * access to the min reserve. + */ + if (alloc_flags & ALLOC_NON_BLOCK) + min -= min / 4; + } + + /* + * OOM victims can try even harder than the normal reserve * users on the grounds that it's definitely going to be in * the exit path shortly and free memory. Any allocation it * makes during the free path will be small and short-lived. */ if (alloc_flags & ALLOC_OOM) min -= min / 2; - else - min -= min / 4; } /* @@ -5002,28 +5013,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will - * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH). + * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH). */ alloc_flags |= (__force int) (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)); - if (gfp_mask & __GFP_ATOMIC) { + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { /* * Not worth trying to allocate harder for __GFP_NOMEMALLOC even * if it can't schedule. */ if (!(gfp_mask & __GFP_NOMEMALLOC)) { - alloc_flags |= ALLOC_HARDER; + alloc_flags |= ALLOC_NON_BLOCK; if (order > 0) alloc_flags |= ALLOC_HIGHATOMIC; } /* - * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the - * comment for __cpuset_node_allowed(). + * Ignore cpuset mems for non-blocking __GFP_HIGH (probably + * GFP_ATOMIC) rather than fail, see the comment for + * __cpuset_node_allowed(). */ - alloc_flags &= ~ALLOC_CPUSET; + if (alloc_flags & ALLOC_MIN_RESERVE) + alloc_flags &= ~ALLOC_CPUSET; } else if (unlikely(rt_task(current)) && in_task()) alloc_flags |= ALLOC_MIN_RESERVE; @@ -5468,12 +5481,13 @@ nopage: WARN_ON_ONCE_GFP(costly_order, gfp_mask); /* - * Help non-failing allocations by giving them access to memory - * reserves but do not use ALLOC_NO_WATERMARKS because this + * Help non-failing allocations by giving some access to memory + * reserves normally used for high priority non-blocking + * allocations but do not use ALLOC_NO_WATERMARKS because this * could deplete whole memory reserves which would just make - * the situation worse + * the situation worse. */ - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac); if (page) goto got_pg; diff --git a/mm/shmem.c b/mm/shmem.c index 1bf06946e7a8..8f1a870821b7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1091,7 +1091,9 @@ static int shmem_getattr(struct user_namespace *mnt_userns, stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); + inode_lock_shared(inode); generic_fillattr(&init_user_ns, inode, stat); + inode_unlock_shared(inode); if (shmem_is_huge(NULL, inode, 0, false)) stat->blksize = HPAGE_PMD_SIZE; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index e26a72f3a104..1241ab7a86bb 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -593,7 +593,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (!pubsta->deflink.ht_cap.ht_supported && - sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) + !pubsta->deflink.vht_cap.vht_supported && + !pubsta->deflink.he_cap.has_he && + !pubsta->deflink.eht_cap.has_eht) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3cbd9cf80be9..d750c6e6eb98 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10214,6 +10214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),