Merge 6d2453c3db ("drivers/block/zram/zram_drv.c: do not keep dangling zcomp pointer after zram reset") into android-mainline

Steps on the way to 6.1-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I6564be3323a42b38af91f7d78e551dd17b6f242c
This commit is contained in:
Greg Kroah-Hartman
2022-10-20 11:55:55 +02:00
19 changed files with 419 additions and 214 deletions

View File

@@ -94,6 +94,11 @@ Usage
Page allocated via order XXX, ...
PFN XXX ...
// Detailed stack
By default, it will do full pfn dump, to start with a given pfn,
page_owner supports fseek.
FILE *fp = fopen("/sys/kernel/debug/page_owner", "r");
fseek(fp, pfn_start, SEEK_SET);
The ``page_owner_sort`` tool ignores ``PFN`` rows, puts the remaining rows
in buf, uses regexp to extract the page order value, counts the times

View File

@@ -1710,9 +1710,6 @@ out:
static void zram_reset_device(struct zram *zram)
{
struct zcomp *comp;
u64 disksize;
down_write(&zram->init_lock);
zram->limit_pages = 0;
@@ -1722,17 +1719,15 @@ static void zram_reset_device(struct zram *zram)
return;
}
comp = zram->comp;
disksize = zram->disksize;
zram->disksize = 0;
set_capacity_and_notify(zram->disk, 0);
part_stat_set_all(zram->disk->part0, 0);
/* I/O operation under all of CPU are done so let's free */
zram_meta_free(zram, disksize);
zram_meta_free(zram, zram->disksize);
zram->disksize = 0;
memset(&zram->stats, 0, sizeof(zram->stats));
zcomp_destroy(comp);
zcomp_destroy(zram->comp);
zram->comp = NULL;
reset_bdev(zram);
up_write(&zram->init_lock);

View File

@@ -2520,7 +2520,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
extern unsigned long find_min_pfn_with_active_regions(void);
#ifndef CONFIG_NUMA
static inline int early_pfn_to_nid(unsigned long pfn)

View File

@@ -55,7 +55,8 @@ static inline void page_ext_init(void)
}
#endif
struct page_ext *lookup_page_ext(const struct page *page);
extern struct page_ext *page_ext_get(struct page *page);
extern void page_ext_put(struct page_ext *page_ext);
static inline struct page_ext *page_ext_next(struct page_ext *curr)
{
@@ -71,11 +72,6 @@ static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
{
}
static inline struct page_ext *lookup_page_ext(const struct page *page)
{
return NULL;
}
static inline void page_ext_init(void)
{
}
@@ -87,5 +83,14 @@ static inline void page_ext_init_flatmem_late(void)
static inline void page_ext_init_flatmem(void)
{
}
static inline struct page_ext *page_ext_get(struct page *page)
{
return NULL;
}
static inline void page_ext_put(struct page_ext *page_ext)
{
}
#endif /* CONFIG_PAGE_EXTENSION */
#endif /* __LINUX_PAGE_EXT_H */

View File

@@ -13,65 +13,79 @@
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
static inline bool folio_test_young(struct folio *folio)
{
struct page_ext *page_ext = lookup_page_ext(&folio->page);
struct page_ext *page_ext = page_ext_get(&folio->page);
bool page_young;
if (unlikely(!page_ext))
return false;
return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
page_young = test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
page_ext_put(page_ext);
return page_young;
}
static inline void folio_set_young(struct folio *folio)
{
struct page_ext *page_ext = lookup_page_ext(&folio->page);
struct page_ext *page_ext = page_ext_get(&folio->page);
if (unlikely(!page_ext))
return;
set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
page_ext_put(page_ext);
}
static inline bool folio_test_clear_young(struct folio *folio)
{
struct page_ext *page_ext = lookup_page_ext(&folio->page);
struct page_ext *page_ext = page_ext_get(&folio->page);
bool page_young;
if (unlikely(!page_ext))
return false;
return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
page_young = test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
page_ext_put(page_ext);
return page_young;
}
static inline bool folio_test_idle(struct folio *folio)
{
struct page_ext *page_ext = lookup_page_ext(&folio->page);
struct page_ext *page_ext = page_ext_get(&folio->page);
bool page_idle;
if (unlikely(!page_ext))
return false;
return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags);
page_ext_put(page_ext);
return page_idle;
}
static inline void folio_set_idle(struct folio *folio)
{
struct page_ext *page_ext = lookup_page_ext(&folio->page);
struct page_ext *page_ext = page_ext_get(&folio->page);
if (unlikely(!page_ext))
return;
set_bit(PAGE_EXT_IDLE, &page_ext->flags);
page_ext_put(page_ext);
}
static inline void folio_clear_idle(struct folio *folio)
{
struct page_ext *page_ext = lookup_page_ext(&folio->page);
struct page_ext *page_ext = page_ext_get(&folio->page);
if (unlikely(!page_ext))
return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
page_ext_put(page_ext);
}
#endif /* !CONFIG_64BIT */

View File

@@ -15,12 +15,12 @@ struct mm_walk;
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
* split_huge_page() instead of handling it explicitly.
* @pte_entry: if set, called for each non-empty PTE (lowest-level)
* entry
* @pte_entry: if set, called for each PTE (lowest-level) entry,
* including empty ones
* @pte_hole: if set, called for each hole at all levels,
* depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD
* 4:PTE. Any folded depths (where PTRS_PER_P?D is equal
* to 1) are skipped.
* depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD.
* Any folded depths (where PTRS_PER_P?D is equal to 1)
* are skipped.
* @hugetlb_entry: if set, called for each hugetlb entry
* @test_walk: caller specific callback function to determine whether
* we walk over the current vma or not. Returning 0 means

View File

@@ -302,9 +302,14 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
pte_t *pte;
spinlock_t *ptl;
if (pmd_huge(*pmd)) {
if (pmd_trans_huge(*pmd)) {
ptl = pmd_lock(walk->mm, pmd);
if (pmd_huge(*pmd)) {
if (!pmd_present(*pmd)) {
spin_unlock(ptl);
return 0;
}
if (pmd_trans_huge(*pmd)) {
damon_pmdp_mkold(pmd, walk->mm, addr);
spin_unlock(ptl);
return 0;
@@ -429,9 +434,14 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
struct damon_young_walk_private *priv = walk->private;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_huge(*pmd)) {
if (pmd_trans_huge(*pmd)) {
ptl = pmd_lock(walk->mm, pmd);
if (!pmd_huge(*pmd)) {
if (!pmd_present(*pmd)) {
spin_unlock(ptl);
return 0;
}
if (!pmd_trans_huge(*pmd)) {
spin_unlock(ptl);
goto regular_page;
}

200
mm/gup.c
View File

@@ -1927,20 +1927,16 @@ struct page *get_dump_page(unsigned long addr)
#ifdef CONFIG_MIGRATION
/*
* Check whether all pages are pinnable. If some pages are not pinnable migrate
* them and unpin all the pages. Returns -EAGAIN if pages were unpinned or zero
* if all pages are pinnable and in the right zone. Other errors indicate
* migration failure.
* Returns the number of collected pages. Return value is always >= 0.
*/
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages,
unsigned int gup_flags)
static unsigned long collect_longterm_unpinnable_pages(
struct list_head *movable_page_list,
unsigned long nr_pages,
struct page **pages)
{
unsigned long i;
unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
LIST_HEAD(movable_page_list);
bool drain_allow = true, coherent_pages = false;
int ret = 0;
bool drain_allow = true;
for (i = 0; i < nr_pages; i++) {
struct folio *folio = page_folio(pages[i]);
@@ -1949,45 +1945,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
continue;
prev_folio = folio;
/*
* Device coherent pages are managed by a driver and should not
* be pinned indefinitely as it prevents the driver moving the
* page. So when trying to pin with FOLL_LONGTERM instead try
* to migrate the page out of device memory.
*/
if (folio_is_device_coherent(folio)) {
/*
* We always want a new GUP lookup with device coherent
* pages.
*/
pages[i] = 0;
coherent_pages = true;
/*
* Migration will fail if the page is pinned, so convert
* the pin on the source page to a normal reference.
*/
if (gup_flags & FOLL_PIN) {
get_page(&folio->page);
unpin_user_page(&folio->page);
}
if (migrate_device_coherent_page(&folio->page)) {
ret = -EBUSY;
break;
}
continue;
}
if (folio_is_longterm_pinnable(folio))
continue;
/*
* Try to move out any movable page before pinning the range.
*/
collected++;
if (folio_is_device_coherent(folio))
continue;
if (folio_test_hugetlb(folio)) {
if (isolate_hugetlb(&folio->page,
&movable_page_list))
ret = -EBUSY;
isolate_hugetlb(&folio->page, movable_page_list);
continue;
}
@@ -1996,61 +1963,122 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
drain_allow = false;
}
if (folio_isolate_lru(folio)) {
ret = -EBUSY;
if (!folio_isolate_lru(folio))
continue;
}
list_add_tail(&folio->lru, &movable_page_list);
list_add_tail(&folio->lru, movable_page_list);
node_stat_mod_folio(folio,
NR_ISOLATED_ANON + folio_is_file_lru(folio),
folio_nr_pages(folio));
}
/*
* If list is empty, and no isolation errors, means that all pages are
* in the correct zone. If there were device coherent pages some pages
* have been unpinned.
*/
if (list_empty(&movable_page_list) && !ret && !coherent_pages)
return 0;
return collected;
}
/*
* Unpins all pages and migrates device coherent pages and movable_page_list.
* Returns -EAGAIN if all pages were successfully migrated or -errno for failure
* (or partial success).
*/
static int migrate_longterm_unpinnable_pages(
struct list_head *movable_page_list,
unsigned long nr_pages,
struct page **pages)
{
int ret;
unsigned long i;
/*
* Unpin all pages. If device coherent pages were found
* migrate_device_coherent_page() will have dropped the pin and set
* pages[i] == NULL.
*/
for (i = 0; i < nr_pages; i++) {
if (!pages[i])
continue;
struct folio *folio = page_folio(pages[i]);
if (gup_flags & FOLL_PIN)
unpin_user_page(pages[i]);
else
put_page(pages[i]);
if (folio_is_device_coherent(folio)) {
/*
* Migration will fail if the page is pinned, so convert
* the pin on the source page to a normal reference.
*/
pages[i] = NULL;
folio_get(folio);
gup_put_folio(folio, 1, FOLL_PIN);
if (migrate_device_coherent_page(&folio->page)) {
ret = -EBUSY;
goto err;
}
continue;
}
/*
* We can't migrate pages with unexpected references, so drop
* the reference obtained by __get_user_pages_locked().
* Migrating pages have been added to movable_page_list after
* calling folio_isolate_lru() which takes a reference so the
* page won't be freed if it's migrating.
*/
unpin_user_page(pages[i]);
pages[i] = NULL;
}
if (!list_empty(&movable_page_list)) {
if (!list_empty(movable_page_list)) {
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
.gfp_mask = GFP_USER | __GFP_NOWARN,
};
ret = migrate_pages(&movable_page_list, alloc_migration_target,
NULL, (unsigned long)&mtc, MIGRATE_SYNC,
MR_LONGTERM_PIN, NULL);
if (ret > 0) /* number of pages not migrated */
if (migrate_pages(movable_page_list, alloc_migration_target,
NULL, (unsigned long)&mtc, MIGRATE_SYNC,
MR_LONGTERM_PIN, NULL)) {
ret = -ENOMEM;
goto err;
}
}
if (ret && !list_empty(&movable_page_list))
putback_movable_pages(&movable_page_list);
putback_movable_pages(movable_page_list);
return ret ? ret : -EAGAIN;
return -EAGAIN;
err:
for (i = 0; i < nr_pages; i++)
if (pages[i])
unpin_user_page(pages[i]);
putback_movable_pages(movable_page_list);
return ret;
}
/*
* Check whether all pages are *allowed* to be pinned. Rather confusingly, all
* pages in the range are required to be pinned via FOLL_PIN, before calling
* this routine.
*
* If any pages in the range are not allowed to be pinned, then this routine
* will migrate those pages away, unpin all the pages in the range and return
* -EAGAIN. The caller should re-pin the entire range with FOLL_PIN and then
* call this routine again.
*
* If an error other than -EAGAIN occurs, this indicates a migration failure.
* The caller should give up, and propagate the error back up the call stack.
*
* If everything is OK and all pages in the range are allowed to be pinned, then
* this routine leaves all pages pinned and returns zero for success.
*/
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages)
{
unsigned long collected;
LIST_HEAD(movable_page_list);
collected = collect_longterm_unpinnable_pages(&movable_page_list,
nr_pages, pages);
if (!collected)
return 0;
return migrate_longterm_unpinnable_pages(&movable_page_list, nr_pages,
pages);
}
#else
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages,
unsigned int gup_flags)
struct page **pages)
{
return 0;
}
@@ -2073,6 +2101,17 @@ static long __gup_longterm_locked(struct mm_struct *mm,
if (!(gup_flags & FOLL_LONGTERM))
return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
NULL, gup_flags);
/*
* If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
* implies FOLL_PIN (although the reverse is not true). Therefore it is
* correct to unconditionally call check_and_migrate_movable_pages()
* which assumes pages have been pinned via FOLL_PIN.
*
* Enforce the above reasoning by asserting that FOLL_PIN is set.
*/
if (WARN_ON(!(gup_flags & FOLL_PIN)))
return -EINVAL;
flags = memalloc_pin_save();
do {
nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
@@ -2082,8 +2121,7 @@ static long __gup_longterm_locked(struct mm_struct *mm,
rc = nr_pinned_pages;
break;
}
rc = check_and_migrate_movable_pages(nr_pinned_pages, pages,
gup_flags);
rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
} while (rc == -EAGAIN);
memalloc_pin_restore(flags);

View File

@@ -770,8 +770,7 @@ static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
return;
entry = mk_pmd(zero_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
if (pgtable)
pgtable_trans_huge_deposit(mm, pmd, pgtable);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry);
mm_inc_nr_ptes(mm);
}
@@ -2644,6 +2643,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
gfp_t gfp;
mapping = head->mapping;
/* Truncated ? */
@@ -2652,8 +2653,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
goto out;
}
xas_split_alloc(&xas, head, compound_order(head),
mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
gfp = current_gfp_context(mapping_gfp_mask(mapping) &
GFP_RECLAIM_MASK);
if (folio_test_private(folio) &&
!filemap_release_folio(folio, gfp)) {
ret = -EBUSY;
goto out;
}
xas_split_alloc(&xas, head, compound_order(head), gfp);
if (xas_error(&xas)) {
ret = xas_error(&xas);
goto out;

View File

@@ -265,11 +265,10 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
static inline void reset_struct_pages(struct page *start)
{
int i;
struct page *from = start + NR_RESET_STRUCT_PAGE;
for (i = 0; i < NR_RESET_STRUCT_PAGE; i++)
memcpy(start + i, from, sizeof(*from));
BUILD_BUG_ON(NR_RESET_STRUCT_PAGE * 2 > PAGE_SIZE / sizeof(struct page));
memcpy(start, from, sizeof(*from) * NR_RESET_STRUCT_PAGE);
}
static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,

View File

@@ -1136,7 +1136,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
} while ((memcg = parent_mem_cgroup(memcg)));
/*
* When cgruop1 non-hierarchy mode is used,
* When cgroup1 non-hierarchy mode is used,
* parent_mem_cgroup() does not walk all the way up to the
* cgroup root (root_mem_cgroup). So we have to handle
* dead_memcg from cgroup root separately.
@@ -3969,6 +3969,8 @@ static const unsigned int memcg1_stats[] = {
NR_FILE_MAPPED,
NR_FILE_DIRTY,
NR_WRITEBACK,
WORKINGSET_REFAULT_ANON,
WORKINGSET_REFAULT_FILE,
MEMCG_SWAP,
};
@@ -3982,6 +3984,8 @@ static const char *const memcg1_stat_names[] = {
"mapped_file",
"dirty",
"writeback",
"workingset_refault_anon",
"workingset_refault_file",
"swap",
};
@@ -4010,7 +4014,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
nr = memcg_page_state_local(memcg, memcg1_stats[i]);
seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE);
seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
nr * memcg_page_state_unit(memcg1_stats[i]));
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
@@ -4041,7 +4046,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
continue;
nr = memcg_page_state(memcg, memcg1_stats[i]);
seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
(u64)nr * PAGE_SIZE);
(u64)nr * memcg_page_state_unit(memcg1_stats[i]));
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)

View File

@@ -413,7 +413,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
{
struct to_kill *tk, *next;
list_for_each_entry_safe (tk, next, to_kill, nd) {
list_for_each_entry_safe(tk, next, to_kill, nd) {
if (forcekill) {
/*
* In case something went wrong with munmapping
@@ -437,6 +437,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
pr_err("%#lx: Cannot send advisory machine check signal to %s:%d\n",
pfn, tk->tsk->comm, tk->tsk->pid);
}
list_del(&tk->nd);
put_task_struct(tk->tsk);
kfree(tk);
}
@@ -1401,7 +1402,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
struct address_space *mapping;
LIST_HEAD(tokill);
bool unmap_success;
int kill = 1, forcekill;
int forcekill;
bool mlocked = PageMlocked(hpage);
/*
@@ -1442,7 +1443,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
if (page_mkclean(hpage)) {
SetPageDirty(hpage);
} else {
kill = 0;
ttu |= TTU_IGNORE_HWPOISON;
pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
pfn);
@@ -1453,12 +1453,8 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* First collect all the processes that have the page
* mapped in dirty form. This has to be done before try_to_unmap,
* because ttu takes the rmap data structures down.
*
* Error handling: We ignore errors here because
* there's nothing that can be done.
*/
if (kill)
collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
if (PageHuge(hpage) && !PageAnon(hpage)) {
/*
@@ -1500,7 +1496,8 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory.
*/
forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL) ||
!unmap_success;
kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
return unmap_success;
@@ -1865,8 +1862,10 @@ retry:
if (hwpoison_filter(p)) {
hugetlb_clear_page_hwpoison(head);
res = -EOPNOTSUPP;
goto out;
unlock_page(head);
if (res == 1)
put_page(head);
return -EOPNOTSUPP;
}
/*
@@ -2357,7 +2356,7 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
if (PageSlab(page) || PageTable(page))
if (PageSlab(page) || PageTable(page) || PageReserved(page))
goto unlock_mutex;
ret = get_hwpoison_page(p, MF_UNPOISON);
@@ -2381,13 +2380,14 @@ int unpoison_memory(unsigned long pfn)
count = free_raw_hwp_pages(page, false);
if (count == 0) {
ret = -EBUSY;
put_page(page);
goto unlock_mutex;
}
}
freeit = !!TestClearPageHWPoison(p);
put_page(page);
if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) {
if (freeit) {
put_page(page);
ret = 0;
}
@@ -2437,11 +2437,11 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
}
/*
* __soft_offline_page handles hugetlb-pages and non-hugetlb pages.
* soft_offline_in_use_page handles hugetlb-pages and non-hugetlb pages.
* If the page is a non-dirty unmapped page-cache page, it simply invalidates.
* If the page is mapped, it migrates the contents over.
*/
static int __soft_offline_page(struct page *page)
static int soft_offline_in_use_page(struct page *page)
{
long ret = 0;
unsigned long pfn = page_to_pfn(page);
@@ -2454,6 +2454,14 @@ static int __soft_offline_page(struct page *page)
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
if (!huge && PageTransHuge(hpage)) {
if (try_to_split_thp_page(page)) {
pr_info("soft offline: %#lx: thp split failed\n", pfn);
return -EBUSY;
}
hpage = page;
}
lock_page(page);
if (!PageHuge(page))
wait_on_page_writeback(page);
@@ -2503,29 +2511,6 @@ static int __soft_offline_page(struct page *page)
return ret;
}
static int soft_offline_in_use_page(struct page *page)
{
struct page *hpage = compound_head(page);
if (!PageHuge(page) && PageTransHuge(hpage))
if (try_to_split_thp_page(page) < 0) {
pr_info("soft offline: %#lx: thp split failed\n",
page_to_pfn(page));
return -EBUSY;
}
return __soft_offline_page(page);
}
static int soft_offline_free_page(struct page *page)
{
int rc = 0;
if (!page_handle_poison(page, true, false))
rc = -EBUSY;
return rc;
}
static void put_ref_page(struct page *page)
{
if (page)
@@ -2593,8 +2578,6 @@ retry:
if (hwpoison_filter(page)) {
if (ret > 0)
put_page(page);
else
put_ref_page(ref_page);
mutex_unlock(&mf_mutex);
return -EOPNOTSUPP;
@@ -2603,7 +2586,7 @@ retry:
if (ret > 0) {
ret = soft_offline_in_use_page(page);
} else if (ret == 0) {
if (soft_offline_free_page(page) && try_again) {
if (!page_handle_poison(page, true, false) && try_again) {
try_again = false;
flags &= ~MF_COUNT_INCREASED;
goto retry;

View File

@@ -7957,17 +7957,6 @@ unsigned long __init node_map_pfn_alignment(void)
return ~accl_mask + 1;
}
/**
* find_min_pfn_with_active_regions - Find the minimum PFN registered
*
* Return: the minimum PFN based on information provided via
* memblock_set_node().
*/
unsigned long __init find_min_pfn_with_active_regions(void)
{
return PHYS_PFN(memblock_start_of_DRAM());
}
/*
* early_calculate_totalpages()
* Sum pages in active regions for movable zone.
@@ -8260,7 +8249,7 @@ void __init free_area_init(unsigned long *max_zone_pfn)
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
start_pfn = find_min_pfn_with_active_regions();
start_pfn = PHYS_PFN(memblock_start_of_DRAM());
descending = arch_has_descending_max_zone_pfns();
for (i = 0; i < MAX_NR_ZONES; i++) {

View File

@@ -193,7 +193,7 @@ int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
old = xchg(&counter->max, nr_pages);
if (page_counter_read(counter) <= usage)
if (page_counter_read(counter) <= usage || nr_pages >= old)
return 0;
counter->max = old;

View File

@@ -9,6 +9,7 @@
#include <linux/page_owner.h>
#include <linux/page_idle.h>
#include <linux/page_table_check.h>
#include <linux/rcupdate.h>
/*
* struct page extension
@@ -59,6 +60,10 @@
* can utilize this callback to initialize the state of it correctly.
*/
#ifdef CONFIG_SPARSEMEM
#define PAGE_EXT_INVALID (0x1)
#endif
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
static bool need_page_idle(void)
{
@@ -84,6 +89,7 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
unsigned long page_ext_size = sizeof(struct page_ext);
static unsigned long total_usage;
static struct page_ext *lookup_page_ext(const struct page *page);
static bool __init invoke_need_callbacks(void)
{
@@ -125,6 +131,48 @@ static inline struct page_ext *get_entry(void *base, unsigned long index)
return base + page_ext_size * index;
}
/**
* page_ext_get() - Get the extended information for a page.
* @page: The page we're interested in.
*
* Ensures that the page_ext will remain valid until page_ext_put()
* is called.
*
* Return: NULL if no page_ext exists for this page.
* Context: Any context. Caller may not sleep until they have called
* page_ext_put().
*/
struct page_ext *page_ext_get(struct page *page)
{
struct page_ext *page_ext;
rcu_read_lock();
page_ext = lookup_page_ext(page);
if (!page_ext) {
rcu_read_unlock();
return NULL;
}
return page_ext;
}
/**
* page_ext_put() - Working with page extended information is done.
* @page_ext - Page extended information received from page_ext_get().
*
* The page extended information of the page may not be valid after this
* function is called.
*
* Return: None.
* Context: Any context with corresponding page_ext_get() is called.
*/
void page_ext_put(struct page_ext *page_ext)
{
if (unlikely(!page_ext))
return;
rcu_read_unlock();
}
#ifndef CONFIG_SPARSEMEM
@@ -133,12 +181,13 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
pgdat->node_page_ext = NULL;
}
struct page_ext *lookup_page_ext(const struct page *page)
static struct page_ext *lookup_page_ext(const struct page *page)
{
unsigned long pfn = page_to_pfn(page);
unsigned long index;
struct page_ext *base;
WARN_ON_ONCE(!rcu_read_lock_held());
base = NODE_DATA(page_to_nid(page))->node_page_ext;
/*
* The sanity checks the page allocator does upon freeing a
@@ -206,20 +255,27 @@ fail:
}
#else /* CONFIG_SPARSEMEM */
static bool page_ext_invalid(struct page_ext *page_ext)
{
return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID);
}
struct page_ext *lookup_page_ext(const struct page *page)
static struct page_ext *lookup_page_ext(const struct page *page)
{
unsigned long pfn = page_to_pfn(page);
struct mem_section *section = __pfn_to_section(pfn);
struct page_ext *page_ext = READ_ONCE(section->page_ext);
WARN_ON_ONCE(!rcu_read_lock_held());
/*
* The sanity checks the page allocator does upon freeing a
* page can reach here before the page_ext arrays are
* allocated when feeding a range of pages to the allocator
* for the first time during bootup or memory hotplug.
*/
if (!section->page_ext)
if (page_ext_invalid(page_ext))
return NULL;
return get_entry(section->page_ext, pfn);
return get_entry(page_ext, pfn);
}
static void *__meminit alloc_page_ext(size_t size, int nid)
@@ -298,9 +354,30 @@ static void __free_page_ext(unsigned long pfn)
ms = __pfn_to_section(pfn);
if (!ms || !ms->page_ext)
return;
base = get_entry(ms->page_ext, pfn);
base = READ_ONCE(ms->page_ext);
/*
* page_ext here can be valid while doing the roll back
* operation in online_page_ext().
*/
if (page_ext_invalid(base))
base = (void *)base - PAGE_EXT_INVALID;
WRITE_ONCE(ms->page_ext, NULL);
base = get_entry(base, pfn);
free_page_ext(base);
ms->page_ext = NULL;
}
static void __invalidate_page_ext(unsigned long pfn)
{
struct mem_section *ms;
void *val;
ms = __pfn_to_section(pfn);
if (!ms || !ms->page_ext)
return;
val = (void *)ms->page_ext + PAGE_EXT_INVALID;
WRITE_ONCE(ms->page_ext, val);
}
static int __meminit online_page_ext(unsigned long start_pfn,
@@ -343,6 +420,20 @@ static int __meminit offline_page_ext(unsigned long start_pfn,
start = SECTION_ALIGN_DOWN(start_pfn);
end = SECTION_ALIGN_UP(start_pfn + nr_pages);
/*
* Freeing of page_ext is done in 3 steps to avoid
* use-after-free of it:
* 1) Traverse all the sections and mark their page_ext
* as invalid.
* 2) Wait for all the existing users of page_ext who
* started before invalidation to finish.
* 3) Free the page_ext.
*/
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
__invalidate_page_ext(pfn);
synchronize_rcu();
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
__free_page_ext(pfn);
return 0;

View File

@@ -141,7 +141,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
struct page_owner *page_owner;
u64 free_ts_nsec = local_clock();
page_ext = lookup_page_ext(page);
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
@@ -153,6 +153,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
page_owner->free_ts_nsec = free_ts_nsec;
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
}
static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -183,19 +184,21 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
noinline void __set_page_owner(struct page *page, unsigned short order,
gfp_t gfp_mask)
{
struct page_ext *page_ext = lookup_page_ext(page);
struct page_ext *page_ext;
depot_stack_handle_t handle;
handle = save_stack(gfp_mask);
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
handle = save_stack(gfp_mask);
__set_page_owner_handle(page_ext, handle, order, gfp_mask);
page_ext_put(page_ext);
}
void __set_page_owner_migrate_reason(struct page *page, int reason)
{
struct page_ext *page_ext = lookup_page_ext(page);
struct page_ext *page_ext = page_ext_get(page);
struct page_owner *page_owner;
if (unlikely(!page_ext))
@@ -203,12 +206,13 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
page_owner = get_page_owner(page_ext);
page_owner->last_migrate_reason = reason;
page_ext_put(page_ext);
}
void __split_page_owner(struct page *page, unsigned int nr)
{
int i;
struct page_ext *page_ext = lookup_page_ext(page);
struct page_ext *page_ext = page_ext_get(page);
struct page_owner *page_owner;
if (unlikely(!page_ext))
@@ -219,17 +223,25 @@ void __split_page_owner(struct page *page, unsigned int nr)
page_owner->order = 0;
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
}
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
struct page_ext *old_ext = lookup_page_ext(&old->page);
struct page_ext *new_ext = lookup_page_ext(&newfolio->page);
struct page_ext *old_ext;
struct page_ext *new_ext;
struct page_owner *old_page_owner, *new_page_owner;
if (unlikely(!old_ext || !new_ext))
old_ext = page_ext_get(&old->page);
if (unlikely(!old_ext))
return;
new_ext = page_ext_get(&newfolio->page);
if (unlikely(!new_ext)) {
page_ext_put(old_ext);
return;
}
old_page_owner = get_page_owner(old_ext);
new_page_owner = get_page_owner(new_ext);
new_page_owner->order = old_page_owner->order;
@@ -254,6 +266,8 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
page_ext_put(new_ext);
page_ext_put(old_ext);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -307,12 +321,12 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (PageReserved(page))
continue;
page_ext = lookup_page_ext(page);
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
continue;
if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
goto ext_put_continue;
page_owner = get_page_owner(page_ext);
page_mt = gfp_migratetype(page_owner->gfp_mask);
@@ -323,9 +337,12 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
count[pageblock_mt]++;
pfn = block_end_pfn;
page_ext_put(page_ext);
break;
}
pfn += (1UL << page_owner->order) - 1;
ext_put_continue:
page_ext_put(page_ext);
}
}
@@ -435,7 +452,7 @@ err:
void __dump_page_owner(const struct page *page)
{
struct page_ext *page_ext = lookup_page_ext(page);
struct page_ext *page_ext = page_ext_get((void *)page);
struct page_owner *page_owner;
depot_stack_handle_t handle;
gfp_t gfp_mask;
@@ -452,6 +469,7 @@ void __dump_page_owner(const struct page *page)
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
pr_alert("page_owner info is not present (never set?)\n");
page_ext_put(page_ext);
return;
}
@@ -482,6 +500,7 @@ void __dump_page_owner(const struct page *page)
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
migrate_reason_names[page_owner->last_migrate_reason]);
page_ext_put(page_ext);
}
static ssize_t
@@ -497,8 +516,10 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
return -EINVAL;
page = NULL;
pfn = min_low_pfn + *ppos;
if (*ppos == 0)
pfn = min_low_pfn;
else
pfn = *ppos;
/* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
pfn++;
@@ -507,6 +528,14 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
/* Find an allocated page */
for (; pfn < max_pfn; pfn++) {
/*
* This temporary page_owner is required so
* that we can avoid the context switches while holding
* the rcu lock and copying the page owner information to
* user through copy_to_user() or GFP_KERNEL allocations.
*/
struct page_owner page_owner_tmp;
/*
* If the new page is in a new MAX_ORDER_NR_PAGES area,
* validate the area as existing, skip it if not
@@ -525,7 +554,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
continue;
}
page_ext = lookup_page_ext(page);
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
continue;
@@ -534,14 +563,14 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
* because we don't hold the zone lock.
*/
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
continue;
goto ext_put_continue;
/*
* Although we do have the info about past allocation of free
* pages, it's not relevant for current memory usage.
*/
if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
goto ext_put_continue;
page_owner = get_page_owner(page_ext);
@@ -550,7 +579,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
* would inflate the stats.
*/
if (!IS_ALIGNED(pfn, 1 << page_owner->order))
continue;
goto ext_put_continue;
/*
* Access to page_ext->handle isn't synchronous so we should
@@ -558,18 +587,37 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
*/
handle = READ_ONCE(page_owner->handle);
if (!handle)
continue;
goto ext_put_continue;
/* Record the next PFN to read in the file offset */
*ppos = (pfn - min_low_pfn) + 1;
*ppos = pfn + 1;
page_owner_tmp = *page_owner;
page_ext_put(page_ext);
return print_page_owner(buf, count, pfn, page,
page_owner, handle);
&page_owner_tmp, handle);
ext_put_continue:
page_ext_put(page_ext);
}
return 0;
}
static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case SEEK_SET:
file->f_pos = offset;
break;
case SEEK_CUR:
file->f_pos += offset;
break;
default:
return -EINVAL;
}
return file->f_pos;
}
static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
{
unsigned long pfn = zone->zone_start_pfn;
@@ -617,18 +665,20 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
if (PageReserved(page))
continue;
page_ext = lookup_page_ext(page);
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
continue;
/* Maybe overlapping zone */
if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
continue;
goto ext_put_continue;
/* Found early allocated page */
__set_page_owner_handle(page_ext, early_handle,
0, 0);
count++;
ext_put_continue:
page_ext_put(page_ext);
}
cond_resched();
}
@@ -660,6 +710,7 @@ static void init_early_allocated_pages(void)
static const struct file_operations proc_page_owner_operations = {
.read = read_page_owner,
.llseek = lseek_page_owner,
};
static int __init pageowner_init(void)

View File

@@ -68,7 +68,7 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
return;
page = pfn_to_page(pfn);
page_ext = lookup_page_ext(page);
page_ext = page_ext_get(page);
anon = PageAnon(page);
for (i = 0; i < pgcnt; i++) {
@@ -83,6 +83,7 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
}
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
}
/*
@@ -103,7 +104,7 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
return;
page = pfn_to_page(pfn);
page_ext = lookup_page_ext(page);
page_ext = page_ext_get(page);
anon = PageAnon(page);
for (i = 0; i < pgcnt; i++) {
@@ -118,6 +119,7 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
}
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
}
/*
@@ -126,9 +128,10 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
*/
void __page_table_check_zero(struct page *page, unsigned int order)
{
struct page_ext *page_ext = lookup_page_ext(page);
struct page_ext *page_ext;
unsigned long i;
page_ext = page_ext_get(page);
BUG_ON(!page_ext);
for (i = 0; i < (1ul << order); i++) {
struct page_table_check *ptc = get_page_table_check(page_ext);
@@ -137,6 +140,7 @@ void __page_table_check_zero(struct page *page, unsigned int order)
BUG_ON(atomic_read(&ptc->file_map_count));
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
}
void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,

View File

@@ -482,7 +482,15 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
return err;
}
/*
/**
* walk_page_range_novma - walk a range of pagetables not backed by a vma
* @mm: mm_struct representing the target process of page table walk
* @start: start address of the virtual address range
* @end: end address of the virtual address range
* @ops: operation to call during the walk
* @pgd: pgd to walk if different from mm->pgd
* @private: private data for callbacks' usage
*
* Similar to walk_page_range() but can walk any page tables even if they are
* not backed by VMAs. Because 'unusual' entries may be walked this function
* will also not lock the PTEs for the pte_entry() callback. This is useful for

View File

@@ -590,7 +590,7 @@ int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
int err;
err = vmap_range_noflush(addr, addr + (1UL << page_shift),
__pa(page_address(pages[i])), prot,
page_to_phys(pages[i]), prot,
page_shift);
if (err)
return err;