From c970afd4549e679bce32bae57381a2f3c2c06369 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 24 Jun 2022 13:54:17 +0100 Subject: [PATCH] BACKPORT: mm/page_alloc: add page->buddy_list and page->pcp_list Patch series "Drain remote per-cpu directly", v5. Some setups, notably NOHZ_FULL CPUs, may be running realtime or latency-sensitive applications that cannot tolerate interference due to per-cpu drain work queued by __drain_all_pages(). Introduce a new mechanism to remotely drain the per-cpu lists. It is made possible by remotely locking 'struct per_cpu_pages' new per-cpu spinlocks. This has two advantages, the time to drain is more predictable and other unrelated tasks are not interrupted. This series has the same intent as Nicolas' series "mm/page_alloc: Remote per-cpu lists drain support" -- avoid interference of a high priority task due to a workqueue item draining per-cpu page lists. While many workloads can tolerate a brief interruption, it may cause a real-time task running on a NOHZ_FULL CPU to miss a deadline and at minimum, the draining is non-deterministic. Currently an IRQ-safe local_lock protects the page allocator per-cpu lists. The local_lock on its own prevents migration and the IRQ disabling protects from corruption due to an interrupt arriving while a page allocation is in progress. This series adjusts the locking. A spinlock is added to struct per_cpu_pages to protect the list contents while local_lock_irq is ultimately replaced by just the spinlock in the final patch. This allows a remote CPU to safely. Follow-on work should allow the spin_lock_irqsave to be converted to spin_lock to avoid IRQs being disabled/enabled in most cases. The follow-on patch will be one kernel release later as it is relatively high risk and it'll make bisections more clear if there are any problems. Patch 1 is a cosmetic patch to clarify when page->lru is storing buddy pages and when it is storing per-cpu pages. Patch 2 shrinks per_cpu_pages to make room for a spin lock. Strictly speaking this is not necessary but it avoids per_cpu_pages consuming another cache line. Patch 3 is a preparation patch to avoid code duplication. Patch 4 is a minor correction. Patch 5 uses a spin_lock to protect the per_cpu_pages contents while still relying on local_lock to prevent migration, stabilise the pcp lookup and prevent IRQ reentrancy. Patch 6 remote drains per-cpu pages directly instead of using a workqueue. Patch 7 uses a normal spinlock instead of local_lock for remote draining This patch (of 7): The page allocator uses page->lru for storing pages on either buddy or PCP lists. Create page->buddy_list and page->pcp_list as a union with page->lru. This is simply to clarify what type of list a page is on in the page allocator. No functional change intended. [minchan@kernel.org: fix page lru fields in macros] Link: https://lkml.kernel.org/r/20220624125423.6126-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Tested-by: Minchan Kim Acked-by: Minchan Kim Reviewed-by: Nicolas Saenz Julienne Acked-by: Vlastimil Babka Tested-by: Yu Zhao Cc: Marcelo Tosatti Cc: Michal Hocko Cc: Hugh Dickins Cc: Marek Szyprowski Signed-off-by: Andrew Morton Bug: 230899966 (cherry picked from commit bf75f200569dd05ac2112797f44548beb6b4be26) [surenb: fixed trivial merge conflicts] Change-Id: Ieef253fa28c2a411008da64b38716f6401a66961 Signed-off-by: Suren Baghdasaryan --- include/linux/mm_types.h | 18 ++++++++++++------ mm/page_alloc.c | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d290cba826fc..42786e6364ef 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -80,12 +80,18 @@ struct page { */ union { struct { /* Page cache and anonymous pages */ - /** - * @lru: Pageout list, eg. active_list protected by - * lruvec->lru_lock. Sometimes used as a generic list - * by the page owner. - */ - struct list_head lru; + union { + /** + * @lru: Pageout list, eg. active_list protected by + * lruvec->lru_lock. Sometimes used as a generic list + * by the page owner. + */ + struct list_head lru; + + /* Or, free page */ + struct list_head buddy_list; + struct list_head pcp_list; + }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; pgoff_t index; /* Our offset within mapping. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7c73d54f5c66..73143b079418 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -794,7 +794,7 @@ static inline bool set_page_guard(struct zone *zone, struct page *page, return false; __SetPageGuard(page); - INIT_LIST_HEAD(&page->lru); + INIT_LIST_HEAD(&page->buddy_list); set_page_private(page, order); /* Guard pages are not available for any usage */ __mod_zone_freepage_state(zone, -(1 << order), migratetype); @@ -971,7 +971,7 @@ static inline void add_to_free_list(struct page *page, struct zone *zone, { struct free_area *area = &zone->free_area[order]; - list_add(&page->lru, &area->free_list[migratetype]); + list_add(&page->buddy_list, &area->free_list[migratetype]); area->nr_free++; } @@ -981,7 +981,7 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone, { struct free_area *area = &zone->free_area[order]; - list_add_tail(&page->lru, &area->free_list[migratetype]); + list_add_tail(&page->buddy_list, &area->free_list[migratetype]); area->nr_free++; } @@ -995,7 +995,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone, { struct free_area *area = &zone->free_area[order]; - list_move_tail(&page->lru, &area->free_list[migratetype]); + list_move_tail(&page->buddy_list, &area->free_list[migratetype]); } static inline void del_page_from_free_list(struct page *page, struct zone *zone, @@ -1005,7 +1005,7 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, if (page_reported(page)) __ClearPageReported(page); - list_del(&page->lru); + list_del(&page->buddy_list); __ClearPageBuddy(page); set_page_private(page, 0); zone->free_area[order].nr_free--; @@ -1508,9 +1508,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, order = pindex_to_order(pindex); BUILD_BUG_ON(MAX_ORDER >= (1<lru); + list_del(&page->pcp_list); nr_freed += 1 << order; count -= 1 << order; @@ -3150,7 +3150,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * for IO devices that can merge IO requests if the physical * pages are ordered properly. */ - list_add_tail(&page->lru, list); + list_add_tail(&page->pcp_list, list); allocated++; if (is_migrate_cma(get_pcppage_migratetype(page))) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, @@ -3433,7 +3433,7 @@ void mark_free_pages(struct zone *zone) for_each_migratetype_order(order, t) { list_for_each_entry(page, - &zone->free_area[order].free_list[t], lru) { + &zone->free_area[order].free_list[t], buddy_list) { unsigned long i; pfn = page_to_pfn(page); @@ -3515,7 +3515,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn, __count_vm_event(PGFREE); pcp = this_cpu_ptr(zone->per_cpu_pageset); pindex = order_to_pindex(migratetype, order); - list_add(&page->lru, &pcp->lists[pindex]); + list_add(&page->pcp_list, &pcp->lists[pindex]); pcp->count += 1 << order; high = nr_pcp_high(pcp, zone); if (pcp->count >= high) { @@ -3774,8 +3774,8 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, return NULL; } - page = list_first_entry(list, struct page, lru); - list_del(&page->lru); + page = list_first_entry(list, struct page, pcp_list); + list_del(&page->pcp_list); pcp->count -= 1 << order; } while (check_new_pcp(page));