mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 02:21:52 +09:00
KVM: x86/mmu: Ensure that kvm_release_pfn_clean() takes exact pfn from kvm_faultin_pfn()
Since 5.16 and prior to 6.13 KVM can't be used with FSDAX guest memory (PMD pages). To reproduce the issue you need to reserve guest memory with `memmap=` cmdline, create and mount FS in DAX mode (tested both XFS and ext4), see doc link below. ndctl command for test: ndctl create-namespace -v -e namespace1.0 --map=dev --mode=fsdax -a 2M Then pass memory object to qemu like: -m 8G -object memory-backend-file,id=ram0,size=8G,\ mem-path=/mnt/pmem/guestmem,share=on,prealloc=on,dump=off,align=2097152 \ -numa node,memdev=ram0,cpus=0-1 QEMU fails to run guest with error: kvm run failed Bad address and there are two warnings in dmesg: WARN_ON_ONCE(!page_count(page)) in kvm_is_zone_device_page() and WARN_ON_ONCE(folio_ref_count(folio) <= 0) in try_grab_folio() (v6.6.63) It looks like in the past assumption was made that pfn won't change from faultin_pfn() to release_pfn_clean(), e.g. see commit4cd071d13c("KVM: x86/mmu: Move calls to thp_adjust() down a level") But kvm_page_fault structure made pfn part of mutable state, so now release_pfn_clean() can take hugepage-adjusted pfn. And it works for all cases (/dev/shm, hugetlb, devdax) except fsdax. Apparently in fsdax mode faultin-pfn and adjusted-pfn may refer to different folios, so we're getting get_page/put_page imbalance. To solve this preserve faultin pfn in separate local variable and pass it in kvm_release_pfn_clean(). Patch tested for all mentioned guest memory backends with tdp_mmu={0,1}. No bug in upstream as it was solved fundamentally by commit 8dd861cc07e2 ("KVM: x86/mmu: Put refcounted pages instead of blindly releasing pfns") and related patch series. Link: https://nvdimm.docs.kernel.org/2mib_fs_dax.html Fixes:2f6305dd56("KVM: MMU: change kvm_tdp_mmu_map() arguments to kvm_page_fault") Co-developed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Reviewed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Nikolay Kuratov <kniv@yandex-team.ru> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
21bc72eef0
commit
4118bd1834
@@ -4245,6 +4245,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
|
||||
|
||||
unsigned long mmu_seq;
|
||||
kvm_pfn_t orig_pfn;
|
||||
int r;
|
||||
|
||||
fault->gfn = fault->addr >> PAGE_SHIFT;
|
||||
@@ -4272,6 +4273,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
if (r != RET_PF_CONTINUE)
|
||||
return r;
|
||||
|
||||
orig_pfn = fault->pfn;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
|
||||
if (is_tdp_mmu_fault)
|
||||
@@ -4296,7 +4299,7 @@ out_unlock:
|
||||
read_unlock(&vcpu->kvm->mmu_lock);
|
||||
else
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(fault->pfn);
|
||||
kvm_release_pfn_clean(orig_pfn);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@@ -790,6 +790,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
|
||||
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct guest_walker walker;
|
||||
kvm_pfn_t orig_pfn;
|
||||
int r;
|
||||
unsigned long mmu_seq;
|
||||
bool is_self_change_mapping;
|
||||
@@ -868,6 +869,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
walker.pte_access &= ~ACC_EXEC_MASK;
|
||||
}
|
||||
|
||||
orig_pfn = fault->pfn;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
@@ -881,7 +884,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
|
||||
out_unlock:
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(fault->pfn);
|
||||
kvm_release_pfn_clean(orig_pfn);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user