From cd333a037cfa887baa9a824e765f56def9e7664b Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <michel@lespinasse.org>
Date: Thu, 29 Apr 2021 10:28:25 -0700
Subject: [PATCH] BACKPORT: FROMLIST: mm: implement speculative handling in
 filemap_fault()

Extend filemap_fault() to handle speculative faults.

In the speculative case, we will only be fishing existing pages out of
the page cache. The logic we use mirrors what is done in the
non-speculative case, assuming that pages are found in the page cache,
are up to date and not already locked, and that readahead is not
necessary at this time. In all other cases, the fault is aborted to be
handled non-speculatively.

Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
Link: https://lore.kernel.org/all/20210407014502.24091-26-michel@lespinasse.org/

Conflicts:
    mm/filemap.c

1. Added back file_ra_state variable used by SPF path.

Bug: 161210518
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I82eba7fcfc81876245c2e65bc5ae3d33ddfcc368
---
 mm/filemap.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 2c4c722eefb6..a5d7898ed9db 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3025,7 +3025,9 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
  *
- * vma->vm_mm->mmap_lock must be held on entry.
+ * If FAULT_FLAG_SPECULATIVE is set, this function runs within an rcu
+ * read locked section and with mmap lock not held.
+ * Otherwise, vma->vm_mm->mmap_lock must be held on entry.
  *
  * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
  * may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
@@ -3043,6 +3045,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 	struct file *file = vmf->vma->vm_file;
 	struct file *fpin = NULL;
 	struct address_space *mapping = file->f_mapping;
+	struct file_ra_state *ra = &file->f_ra;
 	struct inode *inode = mapping->host;
 	pgoff_t offset = vmf->pgoff;
 	pgoff_t max_off;
@@ -3050,6 +3053,47 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 	vm_fault_t ret = 0;
 	bool mapping_locked = false;
 
+	if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+		page = find_get_page(mapping, offset);
+		if (unlikely(!page) || unlikely(PageReadahead(page)))
+			return VM_FAULT_RETRY;
+
+		if (!trylock_page(page))
+			return VM_FAULT_RETRY;
+
+		if (unlikely(compound_head(page)->mapping != mapping))
+			goto page_unlock;
+		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+		if (unlikely(!PageUptodate(page)))
+			goto page_unlock;
+
+		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+		if (unlikely(offset >= max_off))
+			goto page_unlock;
+
+		/*
+		 * Update readahead mmap_miss statistic.
+		 *
+		 * Note that we are not sure if finish_fault() will
+		 * manage to complete the transaction. If it fails,
+		 * we'll come back to filemap_fault() non-speculative
+		 * case which will update mmap_miss a second time.
+		 * This is not ideal, we would prefer to guarantee the
+		 * update will happen exactly once.
+		 */
+		if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) {
+			unsigned int mmap_miss = READ_ONCE(ra->mmap_miss);
+			if (mmap_miss)
+				WRITE_ONCE(ra->mmap_miss, --mmap_miss);
+		}
+
+		vmf->page = page;
+		return VM_FAULT_LOCKED;
+page_unlock:
+		unlock_page(page);
+		return VM_FAULT_RETRY;
+	}
+
 	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 	if (unlikely(offset >= max_off))
 		return VM_FAULT_SIGBUS;