From 71c7092b68fbeacad40d777cebd705621924e8a9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 12 Apr 2023 16:03:03 -0700 Subject: [PATCH] ANDROID: Revert "mm: remove cleancache" This reverts commit 0a4ee518185e902758191d968600399f3bc2be31. Conflicts: Documentation/mm/cleancache.rst Documentation/vm/index.rst arch/arm/configs/bcm2835_defconfig arch/arm/configs/qcom_defconfig arch/m68k/configs/amiga_defconfig arch/m68k/configs/apollo_defconfig arch/m68k/configs/atari_defconfig arch/m68k/configs/bvme6000_defconfig arch/m68k/configs/hp300_defconfig arch/m68k/configs/mac_defconfig arch/m68k/configs/multi_defconfig arch/m68k/configs/mvme147_defconfig arch/m68k/configs/mvme16x_defconfig arch/m68k/configs/q40_defconfig arch/m68k/configs/sun3_defconfig arch/m68k/configs/sun3x_defconfig arch/s390/configs/debug_defconfig arch/s390/configs/defconfig fs/f2fs/data.c fs/mpage.c 1. Skip documentation which was refactored. 2. Skip defconfigs unused in Android. 3. Replaced deprecated __submit_bio() with f2fs_submit_read_bio() 4. Replaced PageUptodate() with folio_test_uptodate() 5. Replaced SetPageUptodate() with folio_mark_uptodate() 6. Changed cleancache_get_page() call to use folio->page Bug: 271544708 Change-Id: I93359509f7799de72f31b002a2539565d1bda9d6 Signed-off-by: Suren Baghdasaryan --- Documentation/mm/frontswap.rst | 12 +- MAINTAINERS | 7 + block/bdev.c | 5 + fs/btrfs/extent_io.c | 10 ++ fs/btrfs/super.c | 2 + fs/ext4/readpage.c | 6 + fs/ext4/super.c | 3 + fs/f2fs/data.c | 13 ++ fs/mpage.c | 7 + fs/ntfs3/ntfs_fs.h | 1 + fs/ocfs2/super.c | 2 + fs/super.c | 3 + include/linux/cleancache.h | 124 +++++++++++++ include/linux/fs.h | 5 + mm/Kconfig | 22 +++ mm/Makefile | 1 + mm/cleancache.c | 315 +++++++++++++++++++++++++++++++++ mm/filemap.c | 11 ++ mm/truncate.c | 15 +- 19 files changed, 560 insertions(+), 4 deletions(-) create mode 100644 include/linux/cleancache.h create mode 100644 mm/cleancache.c diff --git a/Documentation/mm/frontswap.rst b/Documentation/mm/frontswap.rst index feecc5e24477..613716d359fb 100644 --- a/Documentation/mm/frontswap.rst +++ b/Documentation/mm/frontswap.rst @@ -8,6 +8,12 @@ Frontswap provides a "transcendent memory" interface for swap pages. In some environments, dramatic performance savings may be obtained because swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. +(Note, frontswap -- and :ref:`cleancache` (merged at 3.0) -- are the "frontends" +and the only necessary changes to the core kernel for transcendent memory; +all other supporting code -- the "backends" -- is implemented as drivers. +See the LWN.net article `Transcendent memory in a nutshell`_ +for a detailed overview of frontswap and related kernel parts) + .. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/ Frontswap is so named because it can be thought of as the opposite of @@ -75,9 +81,11 @@ This interface is ideal when data is transformed to a different form and size (such as with compression) or secretly moved (as might be useful for write-balancing for some RAM-like devices). Swap pages (and evicted page-cache pages) are a great use for this kind of slower-than-RAM- -but-much-faster-than-disk "pseudo-RAM device". +but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and +cleancache) interface to transcendent memory provides a nice way to read +and write -- and indirectly "name" -- the pages. -Frontswap with a fairly small impact on the kernel, +Frontswap -- and cleancache -- with a fairly small impact on the kernel, provides a huge amount of flexibility for more dynamic, flexible RAM utilization in various system configurations: diff --git a/MAINTAINERS b/MAINTAINERS index d6556c059a4d..2e0a79e6f4ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5083,6 +5083,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: include/linux/cfi.h F: kernel/cfi.c +CLEANCACHE API +M: Konrad Rzeszutek Wilk +L: linux-kernel@vger.kernel.org +S: Maintained +F: include/linux/cleancache.h +F: mm/cleancache.c + CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/block/bdev.c b/block/bdev.c index d699ecdb3260..87d66d16b666 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,10 @@ void invalidate_bdev(struct block_device *bdev) lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); } + /* 99% of the time, we don't need to flush the cleancache on the bdev. + * But, for the strange corners, lets be cautious + */ + cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 58785dc7080a..fe99865ca987 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "misc.h" #include "extent_io.h" @@ -1764,6 +1765,15 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, goto out; } + if (!PageUptodate(page)) { + if (cleancache_get_page(page) == 0) { + BUG_ON(blocksize != PAGE_SIZE); + unlock_extent(tree, start, end, NULL); + unlock_page(page); + goto out; + } + } + if (page->index == last_byte >> PAGE_SHIFT) { size_t zero_offset = offset_in_page(last_byte); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e43b16199e22..2a2c2e679cd8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1476,6 +1477,7 @@ static int btrfs_fill_super(struct super_block *sb, goto fail_close; } + cleancache_init_fs(sb); sb->s_flags |= SB_ACTIVE; return 0; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index babaa7160c55..ef456cd48e03 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "ext4.h" @@ -346,6 +347,11 @@ int ext4_mpage_readpages(struct inode *inode, } else if (fully_mapped) { SetPageMappedToDisk(page); } + if (fully_mapped && blocks_per_page == 1 && + !PageUptodate(page) && cleancache_get_page(page) == 0) { + SetPageUptodate(page); + goto confused; + } /* * This page will go to BIO. Do we need to send this diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8cafd09c8c98..6dabe76f49d6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -3064,6 +3065,8 @@ done: EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); + + cleancache_init_fs(sb); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 10d5468b8f43..2cdefb3ab7af 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2119,6 +2120,12 @@ got_it: block_nr = map->m_pblk + block_in_file - map->m_lblk; SetPageMappedToDisk(page); + if (!PageUptodate(page) && (!PageSwapCache(page) && + !cleancache_get_page(page))) { + SetPageUptodate(page); + goto confused; + } + if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; @@ -2176,6 +2183,12 @@ submit_and_realloc: F2FS_BLKSIZE); *last_block_in_bio = block_nr; goto out; +confused: + if (bio) { + f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); + bio = NULL; + } + unlock_page(page); out: *bio_ret = bio; return ret; diff --git a/fs/mpage.c b/fs/mpage.c index 0f8ae954a579..f24c3f1fe77b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "internal.h" /* @@ -261,6 +262,12 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) folio_set_mappedtodisk(folio); } + if (fully_mapped && blocks_per_page == 1 && !folio_test_uptodate(folio) && + cleancache_get_page(&folio->page) == 0) { + folio_mark_uptodate(folio); + goto confused; + } + /* * This folio will go to BIO. Do we need to send this BIO off first? */ diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 2c791222c4e2..ca8b4d273feb 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0b0e6a132101..0adc1e053486 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -2274,6 +2275,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto out_system_inodes; } + cleancache_init_shared_fs(sb); osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM); if (!osb->ocfs2_wq) { diff --git a/fs/super.c b/fs/super.c index 6dc56055b8c6..25345c156d82 100644 --- a/fs/super.c +++ b/fs/super.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -259,6 +260,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_time_gran = 1000000000; s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; + s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; @@ -327,6 +329,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { + cleancache_invalidate_fs(s); unregister_shrinker(&s->s_shrink); fs->kill_sb(s); diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h new file mode 100644 index 000000000000..5f5730c1d324 --- /dev/null +++ b/include/linux/cleancache.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CLEANCACHE_H +#define _LINUX_CLEANCACHE_H + +#include +#include +#include + +#define CLEANCACHE_NO_POOL -1 +#define CLEANCACHE_NO_BACKEND -2 +#define CLEANCACHE_NO_BACKEND_SHARED -3 + +#define CLEANCACHE_KEY_MAX 6 + +/* + * cleancache requires every file with a page in cleancache to have a + * unique key unless/until the file is removed/truncated. For some + * filesystems, the inode number is unique, but for "modern" filesystems + * an exportable filehandle is required (see exportfs.h) + */ +struct cleancache_filekey { + union { + ino_t ino; + __u32 fh[CLEANCACHE_KEY_MAX]; + u32 key[CLEANCACHE_KEY_MAX]; + } u; +}; + +struct cleancache_ops { + int (*init_fs)(size_t); + int (*init_shared_fs)(uuid_t *uuid, size_t); + int (*get_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*put_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t); + void (*invalidate_inode)(int, struct cleancache_filekey); + void (*invalidate_fs)(int); +}; + +extern int cleancache_register_ops(const struct cleancache_ops *ops); +extern void __cleancache_init_fs(struct super_block *); +extern void __cleancache_init_shared_fs(struct super_block *); +extern int __cleancache_get_page(struct page *); +extern void __cleancache_put_page(struct page *); +extern void __cleancache_invalidate_page(struct address_space *, struct page *); +extern void __cleancache_invalidate_inode(struct address_space *); +extern void __cleancache_invalidate_fs(struct super_block *); + +#ifdef CONFIG_CLEANCACHE +#define cleancache_enabled (1) +static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) +{ + return mapping->host->i_sb->cleancache_poolid >= 0; +} +static inline bool cleancache_fs_enabled(struct page *page) +{ + return cleancache_fs_enabled_mapping(page->mapping); +} +#else +#define cleancache_enabled (0) +#define cleancache_fs_enabled(_page) (0) +#define cleancache_fs_enabled_mapping(_page) (0) +#endif + +/* + * The shim layer provided by these inline functions allows the compiler + * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE + * is disabled, to a single global variable check if CONFIG_CLEANCACHE + * is enabled but no cleancache "backend" has dynamically enabled it, + * and, for the most frequent cleancache ops, to a single global variable + * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled + * and a cleancache backend has dynamically enabled cleancache, but the + * filesystem referenced by that cleancache op has not enabled cleancache. + * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially + * no measurable performance impact. + */ + +static inline void cleancache_init_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_fs(sb); +} + +static inline void cleancache_init_shared_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_shared_fs(sb); +} + +static inline int cleancache_get_page(struct page *page) +{ + if (cleancache_enabled && cleancache_fs_enabled(page)) + return __cleancache_get_page(page); + return -1; +} + +static inline void cleancache_put_page(struct page *page) +{ + if (cleancache_enabled && cleancache_fs_enabled(page)) + __cleancache_put_page(page); +} + +static inline void cleancache_invalidate_page(struct address_space *mapping, + struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_invalidate_page(mapping, page); +} + +static inline void cleancache_invalidate_inode(struct address_space *mapping) +{ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_invalidate_inode(mapping); +} + +static inline void cleancache_invalidate_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_invalidate_fs(sb); +} + +#endif /* _LINUX_CLEANCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 521a6f1c2418..9e307111e944 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1537,6 +1537,11 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ + /* + * Saved pool identifier for cleancache (-1 means none) + */ + int cleancache_poolid; + struct shrinker s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ diff --git a/mm/Kconfig b/mm/Kconfig index f41b9630fe32..6889bb51cd9a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -823,6 +823,28 @@ config USE_PERCPU_NUMA_NODE_ID config HAVE_SETUP_PER_CPU_AREA bool +config CLEANCACHE + bool "Enable cleancache driver to cache clean pages if tmem is present" + help + Cleancache can be thought of as a page-granularity victim cache + for clean pages that the kernel's pageframe replacement algorithm + (PFRA) would like to keep around, but can't since there isn't enough + memory. So when the PFRA "evicts" a page, it first attempts to use + cleancache code to put the data contained in that page into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. And when a cleancache-enabled + filesystem wishes to access a page in a file on disk, it first + checks cleancache to see if it already contains it; if it does, + the page is copied into the kernel and a disk access is avoided. + When a transcendent memory driver is available (such as zcache or + Xen transcendent memory), a significant I/O reduction + may be achieved. When none is available, all cleancache calls + are reduced to a single pointer-compare-against-NULL resulting + in a negligible performance hit. + + If unsure, say Y to enable cleancache + config FRONTSWAP bool diff --git a/mm/Makefile b/mm/Makefile index 8e105e5b3e29..5ef58b2081d4 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o +obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZBUD) += zbud.o diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100644 index 000000000000..db7eee9c0886 --- /dev/null +++ b/mm/cleancache.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Cleancache frontend + * + * This code provides the generic "frontend" layer to call a matching + * "backend" driver implementation of cleancache. See + * Documentation/vm/cleancache.rst for more information. + * + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + */ + +#include +#include +#include +#include +#include +#include + +/* + * cleancache_ops is set by cleancache_register_ops to contain the pointers + * to the cleancache "backend" implementation functions. + */ +static const struct cleancache_ops *cleancache_ops __read_mostly; + +/* + * Counters available via /sys/kernel/debug/cleancache (if debugfs is + * properly configured. These are for information only so are not protected + * against increment races. + */ +static u64 cleancache_succ_gets; +static u64 cleancache_failed_gets; +static u64 cleancache_puts; +static u64 cleancache_invalidates; + +static void cleancache_register_ops_sb(struct super_block *sb, void *unused) +{ + switch (sb->cleancache_poolid) { + case CLEANCACHE_NO_BACKEND: + __cleancache_init_fs(sb); + break; + case CLEANCACHE_NO_BACKEND_SHARED: + __cleancache_init_shared_fs(sb); + break; + } +} + +/* + * Register operations for cleancache. Returns 0 on success. + */ +int cleancache_register_ops(const struct cleancache_ops *ops) +{ + if (cmpxchg(&cleancache_ops, NULL, ops)) + return -EBUSY; + + /* + * A cleancache backend can be built as a module and hence loaded after + * a cleancache enabled filesystem has called cleancache_init_fs. To + * handle such a scenario, here we call ->init_fs or ->init_shared_fs + * for each active super block. To differentiate between local and + * shared filesystems, we temporarily initialize sb->cleancache_poolid + * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED + * respectively in case there is no backend registered at the time + * cleancache_init_fs or cleancache_init_shared_fs is called. + * + * Since filesystems can be mounted concurrently with cleancache + * backend registration, we have to be careful to guarantee that all + * cleancache enabled filesystems that has been mounted by the time + * cleancache_register_ops is called has got and all mounted later will + * get cleancache_poolid. This is assured by the following statements + * tied together: + * + * a) iterate_supers skips only those super blocks that has started + * ->kill_sb + * + * b) if iterate_supers encounters a super block that has not finished + * ->mount yet, it waits until it is finished + * + * c) cleancache_init_fs is called from ->mount and + * cleancache_invalidate_fs is called from ->kill_sb + * + * d) we call iterate_supers after cleancache_ops has been set + * + * From a) it follows that if iterate_supers skips a super block, then + * either the super block is already dead, in which case we do not need + * to bother initializing cleancache for it, or it was mounted after we + * initiated iterate_supers. In the latter case, it must have seen + * cleancache_ops set according to d) and initialized cleancache from + * ->mount by itself according to c). This proves that we call + * ->init_fs at least once for each active super block. + * + * From b) and c) it follows that if iterate_supers encounters a super + * block that has already started ->init_fs, it will wait until ->mount + * and hence ->init_fs has finished, then check cleancache_poolid, see + * that it has already been set and therefore do nothing. This proves + * that we call ->init_fs no more than once for each super block. + * + * Combined together, the last two paragraphs prove the function + * correctness. + * + * Note that various cleancache callbacks may proceed before this + * function is called or even concurrently with it, but since + * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop + * until the corresponding ->init_fs has been actually called and + * cleancache_ops has been set. + */ + iterate_supers(cleancache_register_ops_sb, NULL); + return 0; +} +EXPORT_SYMBOL(cleancache_register_ops); + +/* Called by a cleancache-enabled filesystem at time of mount */ +void __cleancache_init_fs(struct super_block *sb) +{ + int pool_id = CLEANCACHE_NO_BACKEND; + + if (cleancache_ops) { + pool_id = cleancache_ops->init_fs(PAGE_SIZE); + if (pool_id < 0) + pool_id = CLEANCACHE_NO_POOL; + } + sb->cleancache_poolid = pool_id; +} +EXPORT_SYMBOL(__cleancache_init_fs); + +/* Called by a cleancache-enabled clustered filesystem at time of mount */ +void __cleancache_init_shared_fs(struct super_block *sb) +{ + int pool_id = CLEANCACHE_NO_BACKEND_SHARED; + + if (cleancache_ops) { + pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE); + if (pool_id < 0) + pool_id = CLEANCACHE_NO_POOL; + } + sb->cleancache_poolid = pool_id; +} +EXPORT_SYMBOL(__cleancache_init_shared_fs); + +/* + * If the filesystem uses exportable filehandles, use the filehandle as + * the key, else use the inode number. + */ +static int cleancache_get_key(struct inode *inode, + struct cleancache_filekey *key) +{ + int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); + int len = 0, maxlen = CLEANCACHE_KEY_MAX; + struct super_block *sb = inode->i_sb; + + key->u.ino = inode->i_ino; + if (sb->s_export_op != NULL) { + fhfn = sb->s_export_op->encode_fh; + if (fhfn) { + len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); + if (len <= FILEID_ROOT || len == FILEID_INVALID) + return -1; + if (maxlen > CLEANCACHE_KEY_MAX) + return -1; + } + } + return 0; +} + +/* + * "Get" data from cleancache associated with the poolid/inode/index + * that were specified when the data was put to cleanache and, if + * successful, use it to fill the specified page with data and return 0. + * The pageframe is unchanged and returns -1 if the get fails. + * Page must be locked by caller. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +int __cleancache_get_page(struct page *page) +{ + int ret = -1; + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) { + cleancache_failed_gets++; + goto out; + } + + VM_BUG_ON_PAGE(!PageLocked(page), page); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id < 0) + goto out; + + if (cleancache_get_key(page->mapping->host, &key) < 0) + goto out; + + ret = cleancache_ops->get_page(pool_id, key, page->index, page); + if (ret == 0) + cleancache_succ_gets++; + else + cleancache_failed_gets++; +out: + return ret; +} +EXPORT_SYMBOL(__cleancache_get_page); + +/* + * "Put" data from a page to cleancache and associate it with the + * (previously-obtained per-filesystem) poolid and the page's, + * inode and page index. Page must be locked. Note that a put_page + * always "succeeds", though a subsequent get_page may succeed or fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +void __cleancache_put_page(struct page *page) +{ + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) { + cleancache_puts++; + return; + } + + VM_BUG_ON_PAGE(!PageLocked(page), page); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id >= 0 && + cleancache_get_key(page->mapping->host, &key) >= 0) { + cleancache_ops->put_page(pool_id, key, page->index, page); + cleancache_puts++; + } +} +EXPORT_SYMBOL(__cleancache_put_page); + +/* + * Invalidate any data from cleancache associated with the poolid and the + * page's inode and page index so that a subsequent "get" will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +void __cleancache_invalidate_page(struct address_space *mapping, + struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) + return; + + if (pool_id >= 0) { + VM_BUG_ON_PAGE(!PageLocked(page), page); + if (cleancache_get_key(mapping->host, &key) >= 0) { + cleancache_ops->invalidate_page(pool_id, + key, page->index); + cleancache_invalidates++; + } + } +} +EXPORT_SYMBOL(__cleancache_invalidate_page); + +/* + * Invalidate all data from cleancache associated with the poolid and the + * mappings's inode so that all subsequent gets to this poolid/inode + * will fail. + * + * The function has two checks before any action is taken - whether + * a backend is registered and whether the sb->cleancache_poolid + * is correct. + */ +void __cleancache_invalidate_inode(struct address_space *mapping) +{ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (!cleancache_ops) + return; + + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) + cleancache_ops->invalidate_inode(pool_id, key); +} +EXPORT_SYMBOL(__cleancache_invalidate_inode); + +/* + * Called by any cleancache-enabled filesystem at time of unmount; + * note that pool_id is surrendered and may be returned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs. + */ +void __cleancache_invalidate_fs(struct super_block *sb) +{ + int pool_id; + + pool_id = sb->cleancache_poolid; + sb->cleancache_poolid = CLEANCACHE_NO_POOL; + + if (cleancache_ops && pool_id >= 0) + cleancache_ops->invalidate_fs(pool_id); +} +EXPORT_SYMBOL(__cleancache_invalidate_fs); + +static int __init init_cleancache(void) +{ +#ifdef CONFIG_DEBUG_FS + struct dentry *root = debugfs_create_dir("cleancache", NULL); + + debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets); + debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets); + debugfs_create_u64("puts", 0444, root, &cleancache_puts); + debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates); +#endif + return 0; +} +module_init(init_cleancache) diff --git a/mm/filemap.c b/mm/filemap.c index 322aea78058a..695d92428173 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -150,6 +151,16 @@ static void filemap_unaccount_folio(struct address_space *mapping, { long nr; + /* + * if we're uptodate, flush out into the cleancache, otherwise + * invalidate any existing cleancache entries. We can't leave + * stale data around in the cleancache once our page is gone + */ + if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio)) + cleancache_put_page(&folio->page); + else + cleancache_invalidate_page(mapping, &folio->page); + VM_BUG_ON_FOLIO(folio_mapped(folio), folio); if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", diff --git a/mm/truncate.c b/mm/truncate.c index c0be77e5c008..81da63d9ee0b 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -21,6 +21,7 @@ #include #include /* grr. try_to_release_page */ #include +#include #include #include "internal.h" @@ -236,6 +237,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) */ folio_zero_range(folio, offset, length); + cleancache_invalidate_page(folio->mapping, &folio->page); if (folio_has_private(folio)) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) @@ -340,7 +342,7 @@ void truncate_inode_pages_range(struct address_space *mapping, bool same_folio; if (mapping_empty(mapping)) - return; + goto out; /* * 'start' and 'end' always covers the range of pages to be fully @@ -431,6 +433,9 @@ void truncate_inode_pages_range(struct address_space *mapping, folio_batch_release(&fbatch); index++; } + +out: + cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -484,6 +489,10 @@ void truncate_inode_pages_final(struct address_space *mapping) xa_unlock_irq(&mapping->i_pages); } + /* + * Cleancache needs notification even if there are no pages or shadow + * entries. + */ truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); @@ -637,7 +646,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int did_range_unmap = 0; if (mapping_empty(mapping)) - return 0; + goto out; folio_batch_init(&fbatch); index = start; @@ -701,6 +710,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, if (dax_mapping(mapping)) { unmap_mapping_pages(mapping, start, end - start + 1, false); } +out: + cleancache_invalidate_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);