mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 02:50:49 +09:00
BACKPORT: mm/page_alloc: add page->buddy_list and page->pcp_list
Patch series "Drain remote per-cpu directly", v5.
Some setups, notably NOHZ_FULL CPUs, may be running realtime or
latency-sensitive applications that cannot tolerate interference due to
per-cpu drain work queued by __drain_all_pages(). Introduce a new
mechanism to remotely drain the per-cpu lists. It is made possible by
remotely locking 'struct per_cpu_pages' new per-cpu spinlocks. This has
two advantages, the time to drain is more predictable and other unrelated
tasks are not interrupted.
This series has the same intent as Nicolas' series "mm/page_alloc: Remote
per-cpu lists drain support" -- avoid interference of a high priority task
due to a workqueue item draining per-cpu page lists. While many workloads
can tolerate a brief interruption, it may cause a real-time task running
on a NOHZ_FULL CPU to miss a deadline and at minimum, the draining is
non-deterministic.
Currently an IRQ-safe local_lock protects the page allocator per-cpu
lists. The local_lock on its own prevents migration and the IRQ disabling
protects from corruption due to an interrupt arriving while a page
allocation is in progress.
This series adjusts the locking. A spinlock is added to struct
per_cpu_pages to protect the list contents while local_lock_irq is
ultimately replaced by just the spinlock in the final patch. This allows
a remote CPU to safely. Follow-on work should allow the spin_lock_irqsave
to be converted to spin_lock to avoid IRQs being disabled/enabled in most
cases. The follow-on patch will be one kernel release later as it is
relatively high risk and it'll make bisections more clear if there are any
problems.
Patch 1 is a cosmetic patch to clarify when page->lru is storing buddy pages
and when it is storing per-cpu pages.
Patch 2 shrinks per_cpu_pages to make room for a spin lock. Strictly speaking
this is not necessary but it avoids per_cpu_pages consuming another
cache line.
Patch 3 is a preparation patch to avoid code duplication.
Patch 4 is a minor correction.
Patch 5 uses a spin_lock to protect the per_cpu_pages contents while still
relying on local_lock to prevent migration, stabilise the pcp
lookup and prevent IRQ reentrancy.
Patch 6 remote drains per-cpu pages directly instead of using a workqueue.
Patch 7 uses a normal spinlock instead of local_lock for remote draining
This patch (of 7):
The page allocator uses page->lru for storing pages on either buddy or PCP
lists. Create page->buddy_list and page->pcp_list as a union with
page->lru. This is simply to clarify what type of list a page is on in
the page allocator.
No functional change intended.
[minchan@kernel.org: fix page lru fields in macros]
Link: https://lkml.kernel.org/r/20220624125423.6126-2-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Tested-by: Minchan Kim <minchan@kernel.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Yu Zhao <yuzhao@google.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 230899966
(cherry picked from commit bf75f20056)
[surenb: fixed trivial merge conflicts]
Change-Id: Ieef253fa28c2a411008da64b38716f6401a66961
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
committed by
Carlos Llamas
parent
e4d3bd98c3
commit
c970afd454
@@ -80,12 +80,18 @@ struct page {
|
||||
*/
|
||||
union {
|
||||
struct { /* Page cache and anonymous pages */
|
||||
/**
|
||||
* @lru: Pageout list, eg. active_list protected by
|
||||
* lruvec->lru_lock. Sometimes used as a generic list
|
||||
* by the page owner.
|
||||
*/
|
||||
struct list_head lru;
|
||||
union {
|
||||
/**
|
||||
* @lru: Pageout list, eg. active_list protected by
|
||||
* lruvec->lru_lock. Sometimes used as a generic list
|
||||
* by the page owner.
|
||||
*/
|
||||
struct list_head lru;
|
||||
|
||||
/* Or, free page */
|
||||
struct list_head buddy_list;
|
||||
struct list_head pcp_list;
|
||||
};
|
||||
/* See page-flags.h for PAGE_MAPPING_FLAGS */
|
||||
struct address_space *mapping;
|
||||
pgoff_t index; /* Our offset within mapping. */
|
||||
|
||||
@@ -794,7 +794,7 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
|
||||
return false;
|
||||
|
||||
__SetPageGuard(page);
|
||||
INIT_LIST_HEAD(&page->lru);
|
||||
INIT_LIST_HEAD(&page->buddy_list);
|
||||
set_page_private(page, order);
|
||||
/* Guard pages are not available for any usage */
|
||||
__mod_zone_freepage_state(zone, -(1 << order), migratetype);
|
||||
@@ -971,7 +971,7 @@ static inline void add_to_free_list(struct page *page, struct zone *zone,
|
||||
{
|
||||
struct free_area *area = &zone->free_area[order];
|
||||
|
||||
list_add(&page->lru, &area->free_list[migratetype]);
|
||||
list_add(&page->buddy_list, &area->free_list[migratetype]);
|
||||
area->nr_free++;
|
||||
}
|
||||
|
||||
@@ -981,7 +981,7 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
|
||||
{
|
||||
struct free_area *area = &zone->free_area[order];
|
||||
|
||||
list_add_tail(&page->lru, &area->free_list[migratetype]);
|
||||
list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
|
||||
area->nr_free++;
|
||||
}
|
||||
|
||||
@@ -995,7 +995,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
|
||||
{
|
||||
struct free_area *area = &zone->free_area[order];
|
||||
|
||||
list_move_tail(&page->lru, &area->free_list[migratetype]);
|
||||
list_move_tail(&page->buddy_list, &area->free_list[migratetype]);
|
||||
}
|
||||
|
||||
static inline void del_page_from_free_list(struct page *page, struct zone *zone,
|
||||
@@ -1005,7 +1005,7 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
|
||||
if (page_reported(page))
|
||||
__ClearPageReported(page);
|
||||
|
||||
list_del(&page->lru);
|
||||
list_del(&page->buddy_list);
|
||||
__ClearPageBuddy(page);
|
||||
set_page_private(page, 0);
|
||||
zone->free_area[order].nr_free--;
|
||||
@@ -1508,9 +1508,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
order = pindex_to_order(pindex);
|
||||
BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
|
||||
do {
|
||||
page = list_last_entry(list, struct page, lru);
|
||||
page = list_last_entry(list, struct page, pcp_list);
|
||||
/* must delete to avoid corrupting pcp list */
|
||||
list_del(&page->lru);
|
||||
list_del(&page->pcp_list);
|
||||
nr_freed += 1 << order;
|
||||
count -= 1 << order;
|
||||
|
||||
@@ -3150,7 +3150,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
||||
* for IO devices that can merge IO requests if the physical
|
||||
* pages are ordered properly.
|
||||
*/
|
||||
list_add_tail(&page->lru, list);
|
||||
list_add_tail(&page->pcp_list, list);
|
||||
allocated++;
|
||||
if (is_migrate_cma(get_pcppage_migratetype(page)))
|
||||
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
|
||||
@@ -3433,7 +3433,7 @@ void mark_free_pages(struct zone *zone)
|
||||
|
||||
for_each_migratetype_order(order, t) {
|
||||
list_for_each_entry(page,
|
||||
&zone->free_area[order].free_list[t], lru) {
|
||||
&zone->free_area[order].free_list[t], buddy_list) {
|
||||
unsigned long i;
|
||||
|
||||
pfn = page_to_pfn(page);
|
||||
@@ -3515,7 +3515,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
|
||||
__count_vm_event(PGFREE);
|
||||
pcp = this_cpu_ptr(zone->per_cpu_pageset);
|
||||
pindex = order_to_pindex(migratetype, order);
|
||||
list_add(&page->lru, &pcp->lists[pindex]);
|
||||
list_add(&page->pcp_list, &pcp->lists[pindex]);
|
||||
pcp->count += 1 << order;
|
||||
high = nr_pcp_high(pcp, zone);
|
||||
if (pcp->count >= high) {
|
||||
@@ -3774,8 +3774,8 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page = list_first_entry(list, struct page, lru);
|
||||
list_del(&page->lru);
|
||||
page = list_first_entry(list, struct page, pcp_list);
|
||||
list_del(&page->pcp_list);
|
||||
pcp->count -= 1 << order;
|
||||
} while (check_new_pcp(page));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user