From 1441ca1494d74a2c9d6c9bd8dd633d9ebff1daba Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Fri, 22 Jul 2022 19:43:16 -0700 Subject: [PATCH 01/22] kvm: x86: mmu: Drop the need_remote_flush() function This is only used by kvm_mmu_pte_write(), which no longer actually creates the new SPTE and instead just clears the old SPTE. So we just need to check if the old SPTE was shadow-present instead of calling need_remote_flush(). Hence we can drop this function. It was incomplete anyway as it didn't take access-tracking into account. This patch should not result in any functional change. Signed-off-by: Junaid Shahid Reviewed-by: David Matlack Reviewed-by: Sean Christopherson Message-Id: <20220723024316.2725328-1-junaids@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 126fa9aec64c..fae2c0863e45 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } -static bool need_remote_flush(u64 old, u64 new) -{ - if (!is_shadow_present_pte(old)) - return false; - if (!is_shadow_present_pte(new)) - return true; - if ((old ^ new) & SPTE_BASE_ADDR_MASK) - return true; - old ^= shadow_nx_mask; - new ^= shadow_nx_mask; - return (old & ~new & SPTE_PERM_MASK) != 0; -} - static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) { @@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); if (gentry && sp->role.level != PG_LEVEL_4K) ++vcpu->kvm->stat.mmu_pde_zapped; - if (need_remote_flush(entry, *spte)) + if (is_shadow_present_pte(entry)) flush = true; ++spte; } From b64d740ea7ddc929d97b28de4c0665f7d5db9e2a Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 10 Aug 2022 15:49:39 -0700 Subject: [PATCH 02/22] kvm: x86: mmu: Always flush TLBs when enabling dirty logging When A/D bits are not available, KVM uses a software access tracking mechanism, which involves making the SPTEs inaccessible. However, the clear_young() MMU notifier does not flush TLBs. So it is possible that there may still be stale, potentially writable, TLB entries. This is usually fine, but can be problematic when enabling dirty logging, because it currently only does a TLB flush if any SPTEs were modified. But if all SPTEs are in access-tracked state, then there won't be a TLB flush, which means that the guest could still possibly write to memory and not have it reflected in the dirty bitmap. So just unconditionally flush the TLBs when enabling dirty logging. As an alternative, KVM could explicitly check the MMU-Writable bit when write-protecting SPTEs to decide if a flush is needed (instead of checking the Writable bit), but given that a flush almost always happens anyway, so just making it unconditional seems simpler. Signed-off-by: Junaid Shahid Message-Id: <20220810224939.2611160-1-junaids@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 45 +++++++---------------------------------- arch/x86/kvm/mmu/spte.h | 14 +++++++++---- arch/x86/kvm/x86.c | 44 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fae2c0863e45..e418ef3ecfcb 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6072,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); - flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, - start_level, KVM_MAX_HUGEPAGE_LEVEL, - false); + slot_handle_level(kvm, memslot, slot_rmap_write_protect, + start_level, KVM_MAX_HUGEPAGE_LEVEL, false); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); + kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); } - - /* - * Flush TLBs if any SPTEs had to be write-protected to ensure that - * guest writes are reflected in the dirty bitmap before the memslot - * update completes, i.e. before enabling dirty logging is visible to - * userspace. - * - * Perform the TLB flush outside the mmu_lock to reduce the amount of - * time the lock is held. However, this does mean that another CPU can - * now grab mmu_lock and encounter a write-protected SPTE while CPUs - * still have a writable mapping for the associated GFN in their TLB. - * - * This is safe but requires KVM to be careful when making decisions - * based on the write-protection status of an SPTE. Specifically, KVM - * also write-protects SPTEs to monitor changes to guest page tables - * during shadow paging, and must guarantee no CPUs can write to those - * page before the lock is dropped. As mentioned in the previous - * paragraph, a write-protected SPTE is no guarantee that CPU cannot - * perform writes. So to determine if a TLB flush is truly required, KVM - * will clear a separate software-only bit (MMU-writable) and skip the - * flush if-and-only-if this bit was already clear. - * - * See is_writable_pte() for more details. - */ - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) @@ -6480,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { - bool flush = false; - if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); /* * Clear dirty bits only on 4k SPTEs since the legacy MMU only * support dirty logging at a 4k granularity. */ - flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); + slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); + kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); } /* + * The caller will flush the TLBs after this function returns. + * * It's also safe to flush TLBs out of mmu lock here as currently this * function is only used for dirty logging, in which case flushing TLB * out of mmu lock also guarantees no dirty pages will be lost in * dirty_bitmap. */ - if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } void kvm_mmu_zap_all(struct kvm *kvm) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index f3744eea45f5..7670c13ce251 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, } /* - * An shadow-present leaf SPTE may be non-writable for 3 possible reasons: + * A shadow-present leaf SPTE may be non-writable for 4 possible reasons: * * 1. To intercept writes for dirty logging. KVM write-protects huge pages * so that they can be split be split down into the dirty logging @@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * read-only memslot or guest memory backed by a read-only VMA. Writes to * such pages are disallowed entirely. * - * To keep track of why a given SPTE is write-protected, KVM uses 2 - * software-only bits in the SPTE: + * 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this + * case, the SPTE is access-protected, not just write-protected! + * + * For cases #1 and #4, KVM can safely make such SPTEs writable without taking + * mmu_lock as capturing the Accessed/Dirty state doesn't require taking it. + * To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits + * in the SPTE: * * shadow_mmu_writable_mask, aka MMU-writable - * Cleared on SPTEs that KVM is currently write-protecting for shadow paging @@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging * (which does not clear the MMU-writable bit), does not flush TLBs before * dropping the lock, as it only needs to synchronize guest writes with the - * dirty bitmap. + * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for + * access-tracking via the clear_young() MMU notifier also does not flush TLBs. * * So, there is the problem: clearing the MMU-writable bit can encounter a * write-protected SPTE while CPUs still have writable mappings for that SPTE diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 205ebdc2b11b..d7374d768296 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12473,6 +12473,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } else { kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); } + + /* + * Unconditionally flush the TLBs after enabling dirty logging. + * A flush is almost always going to be necessary (see below), + * and unconditionally flushing allows the helpers to omit + * the subtly complex checks when removing write access. + * + * Do the flush outside of mmu_lock to reduce the amount of + * time mmu_lock is held. Flushing after dropping mmu_lock is + * safe as KVM only needs to guarantee the slot is fully + * write-protected before returning to userspace, i.e. before + * userspace can consume the dirty status. + * + * Flushing outside of mmu_lock requires KVM to be careful when + * making decisions based on writable status of an SPTE, e.g. a + * !writable SPTE doesn't guarantee a CPU can't perform writes. + * + * Specifically, KVM also write-protects guest page tables to + * monitor changes when using shadow paging, and must guarantee + * no CPUs can write to those page before mmu_lock is dropped. + * Because CPUs may have stale TLB entries at this point, a + * !writable SPTE doesn't guarantee CPUs can't perform writes. + * + * KVM also allows making SPTES writable outside of mmu_lock, + * e.g. to allow dirty logging without taking mmu_lock. + * + * To handle these scenarios, KVM uses a separate software-only + * bit (MMU-writable) to track if a SPTE is !writable due to + * a guest page table being write-protected (KVM clears the + * MMU-writable flag when write-protecting for shadow paging). + * + * The use of MMU-writable is also the primary motivation for + * the unconditional flush. Because KVM must guarantee that a + * CPU doesn't contain stale, writable TLB entries for a + * !MMU-writable SPTE, KVM must flush if it encounters any + * MMU-writable SPTE regardless of whether the actual hardware + * writable bit was set. I.e. KVM is almost guaranteed to need + * to flush, while unconditionally flushing allows the "remove + * write access" helpers to ignore MMU-writable entirely. + * + * See is_writable_pte() for more details (the case involving + * access-tracked SPTEs is particularly relevant). + */ + kvm_arch_flush_remote_tlbs_memslot(kvm, new); } } From 020dac4187968535f089f83f376a72beb3451311 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 10 Aug 2022 14:30:50 -0700 Subject: [PATCH 03/22] KVM: VMX: Heed the 'msr' argument in msr_write_intercepted() Regardless of the 'msr' argument passed to the VMX version of msr_write_intercepted(), the function always checks to see if a specific MSR (IA32_SPEC_CTRL) is intercepted for write. This behavior seems unintentional and unexpected. Modify the function so that it checks to see if the provided 'msr' index is intercepted for write. Fixes: 67f4b9969c30 ("KVM: nVMX: Handle dynamic MSR intercept toggling") Cc: Sean Christopherson Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Message-Id: <20220810213050.2655000-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d7f8331d6f7e..c9b49a09e6b5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) return true; - return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, - MSR_IA32_SPEC_CTRL); + return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); } unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) From 67ef8664cc5b113f6c49b01d2a0e4cbc589623dd Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Jul 2022 23:48:37 +0000 Subject: [PATCH 04/22] KVM: selftests: Fix KVM_EXCEPTION_MAGIC build with Clang Change KVM_EXCEPTION_MAGIC to use the all-caps "ULL", rather than lower case. This fixes a build failure with Clang: In file included from x86_64/hyperv_features.c:13: include/x86_64/processor.h:825:9: error: unexpected token in argument list return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); ^ include/x86_64/processor.h:802:15: note: expanded from macro 'kvm_asm_safe' asm volatile(KVM_ASM_SAFE(insn) \ ^ include/x86_64/processor.h:785:2: note: expanded from macro 'KVM_ASM_SAFE' "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ ^ :1:18: note: instantiated into assembly here mov $0xabacadabaull, %r9 ^ Fixes: 3b23054cd3f5 ("KVM: selftests: Add x86-64 support for exception fixup") Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Message-Id: <20220722234838.2160385-2-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 45edf45821d0..51c6661aca77 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -754,7 +754,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); /* If a toddler were to say "abracadabra". */ -#define KVM_EXCEPTION_MAGIC 0xabacadabaull +#define KVM_EXCEPTION_MAGIC 0xabacadabaULL /* * KVM selftest exception fixup uses registers to coordinate with the exception From 372d07084593dc7a399bf9bee815711b1fb1bcf2 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Jul 2022 23:48:38 +0000 Subject: [PATCH 05/22] KVM: selftests: Fix ambiguous mov in KVM_ASM_SAFE() Change the mov in KVM_ASM_SAFE() that zeroes @vector to a movb to make it unambiguous. This fixes a build failure with Clang since, unlike the GNU assembler, the LLVM integrated assembler rejects ambiguous X86 instructions that don't have suffixes: In file included from x86_64/hyperv_features.c:13: include/x86_64/processor.h:825:9: error: ambiguous instructions require an explicit suffix (could be 'movb', 'movw', 'movl', or 'movq') return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); ^ include/x86_64/processor.h:802:15: note: expanded from macro 'kvm_asm_safe' asm volatile(KVM_ASM_SAFE(insn) \ ^ include/x86_64/processor.h:788:16: note: expanded from macro 'KVM_ASM_SAFE' "1: " insn "\n\t" \ ^ :5:2: note: instantiated into assembly here mov $0, 15(%rsp) ^ It seems like this change could introduce undesirable behavior in the future, e.g. if someone used a type larger than a u8 for @vector, since KVM_ASM_SAFE() will only zero the bottom byte. I tried changing the type of @vector to an int to see what would happen. GCC failed to compile due to a size mismatch between `movb` and `%eax`. Clang succeeded in compiling, but the generated code looked correct, so perhaps it will not be an issue. That being said it seems like there could be a better solution to this issue that does not assume @vector is a u8. Fixes: 3b23054cd3f5 ("KVM: selftests: Add x86-64 support for exception fixup") Signed-off-by: David Matlack Reviewed-by: Sean Christopherson Message-Id: <20220722234838.2160385-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 51c6661aca77..0cbc71b7af50 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -786,7 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ "1: " insn "\n\t" \ - "mov $0, %[vector]\n\t" \ + "movb $0, %[vector]\n\t" \ "jmp 3f\n\t" \ "2:\n\t" \ "mov %%r9b, %[vector]\n\t" \ From fd0cd59f322b1919bfc68cd245d51906f7f1ba2a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:35 +0100 Subject: [PATCH 06/22] riscv: kvm: vcpu_timer: fix unused variable warnings In two places, csr is set but never used: arch/riscv/kvm/vcpu_timer.c:302:23: warning: variable 'csr' set but not used [-Wunused-but-set-variable] struct kvm_vcpu_csr *csr; ^ arch/riscv/kvm/vcpu_timer.c:327:23: warning: variable 'csr' set but not used [-Wunused-but-set-variable] struct kvm_vcpu_csr *csr; ^ Remove the variable. Fixes: 8f5cb44b1bae ("RISC-V: KVM: Support sstc extension") Reviewed-by: Palmer Dabbelt Signed-off-by: Conor Dooley Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_timer.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 16f50c46ba39..185f2386a747 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -299,7 +299,6 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_csr *csr; struct kvm_vcpu_timer *t = &vcpu->arch.timer; kvm_riscv_vcpu_update_timedelta(vcpu); @@ -307,7 +306,6 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) if (!t->sstc_enabled) return; - csr = &vcpu->arch.guest_csr; #if defined(CONFIG_32BIT) csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); @@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) { - struct kvm_vcpu_csr *csr; struct kvm_vcpu_timer *t = &vcpu->arch.timer; if (!t->sstc_enabled) return; - csr = &vcpu->arch.guest_csr; t = &vcpu->arch.timer; #if defined(CONFIG_32BIT) t->next_cycles = csr_read(CSR_VSTIMECMP); From 3e5e56c60a14776e2a49837b55b03bc193fd91f7 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 14 Aug 2022 15:12:36 +0100 Subject: [PATCH 07/22] riscv: kvm: move extern sbi_ext declarations to a header Sparse complains about missing statics in the declarations of several variables: arch/riscv/kvm/vcpu_sbi_replace.c:38:37: warning: symbol 'vcpu_sbi_ext_time' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_replace.c:73:37: warning: symbol 'vcpu_sbi_ext_ipi' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_replace.c:126:37: warning: symbol 'vcpu_sbi_ext_rfence' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_replace.c:170:37: warning: symbol 'vcpu_sbi_ext_srst' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_base.c:69:37: warning: symbol 'vcpu_sbi_ext_base' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_base.c:90:37: warning: symbol 'vcpu_sbi_ext_experimental' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_base.c:96:37: warning: symbol 'vcpu_sbi_ext_vendor' was not declared. Should it be static? arch/riscv/kvm/vcpu_sbi_hsm.c:115:37: warning: symbol 'vcpu_sbi_ext_hsm' was not declared. Should it be static? These variables are however used in vcpu_sbi.c where they are declared as extern. Move them to kvm_vcpu_sbi.h which is handily already included by the three other files. Fixes: a046c2d8578c ("RISC-V: KVM: Reorganize SBI code by moving SBI v0.1 to its own file") Fixes: 5f862df5585c ("RISC-V: KVM: Add v0.1 replacement SBI extensions defined in v0.2") Fixes: 3e1d86569c21 ("RISC-V: KVM: Add SBI HSM extension in KVM") Reviewed-by: Palmer Dabbelt Signed-off-by: Conor Dooley Signed-off-by: Anup Patel --- arch/riscv/include/asm/kvm_vcpu_sbi.h | 12 ++++++++++++ arch/riscv/kvm/vcpu_sbi.c | 12 +----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 83d6d4d2b1df..26a446a34057 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, u32 type, u64 flags); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); +#ifdef CONFIG_RISCV_SBI_V01 +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; +#endif +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; + #endif /* __RISCV_KVM_VCPU_SBI_H__ */ diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index d45e7da3f0d3..f96991d230bf 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err) }; } -#ifdef CONFIG_RISCV_SBI_V01 -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; -#else +#ifndef CONFIG_RISCV_SBI_V01 static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { .extid_start = -1UL, .extid_end = -1UL, .handler = NULL, }; #endif -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; -extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { &vcpu_sbi_ext_v01, From 9b1ac04698a4bfec146322502cdcd9904c1777fa Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 4 Aug 2022 13:18:52 +0530 Subject: [PATCH 08/22] powerpc/papr_scm: Fix nvdimm event mappings Commit 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support") added performance monitoring support for papr-scm nvdimm devices via perf interface. Commit also added an array in papr_scm_priv structure called "nvdimm_events_map", which got filled based on the result of H_SCM_PERFORMANCE_STATS hcall. Currently there is an assumption that the order of events in the stats buffer, returned by the hypervisor is same. And order also happens to matches with the events specified in nvdimm driver code. But this assumption is not documented in Power Architecture Platform Requirements (PAPR) document. Although the order of events happens to be same on current generation od system, but it might not be true in future generation systems. Fix the issue, by adding a static mapping for nvdimm events to corresponding stat-id, and removing the dynamic map from papr_scm_priv structure. Also remove the function papr_scm_pmu_check_events from papr_scm.c file, as we no longer need to copy stat-ids dynamically. Fixes: 4c08d4bbc089 ("powerpc/papr_scm: Add perf interface support") Reported-by: Aneesh Kumar K.V Signed-off-by: Kajol Jain Reviewed-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220804074852.55157-1-kjain@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 88 +++++++---------------- 1 file changed, 27 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 20f6ed813bff..54740af21557 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -124,9 +124,6 @@ struct papr_scm_priv { /* The bits which needs to be overridden */ u64 health_bitmap_inject_mask; - - /* array to have event_code and stat_id mappings */ - u8 *nvdimm_events_map; }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -350,6 +347,25 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, #ifdef CONFIG_PERF_EVENTS #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) +static const char * const nvdimm_events_map[] = { + [1] = "CtlResCt", + [2] = "CtlResTm", + [3] = "PonSecs ", + [4] = "MemLife ", + [5] = "CritRscU", + [6] = "HostLCnt", + [7] = "HostSCnt", + [8] = "HostSDur", + [9] = "HostLDur", + [10] = "MedRCnt ", + [11] = "MedWCnt ", + [12] = "MedRDur ", + [13] = "MedWDur ", + [14] = "CchRHCnt", + [15] = "CchWHCnt", + [16] = "FastWCnt", +}; + static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) { struct papr_scm_perf_stat *stat; @@ -357,11 +373,15 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, struct papr_scm_priv *p = dev_get_drvdata(dev); int rc, size; + /* Invalid eventcode */ + if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map)) + return -EINVAL; + /* Allocate request buffer enough to hold single performance stat */ size = sizeof(struct papr_scm_perf_stats) + sizeof(struct papr_scm_perf_stat); - if (!p || !p->nvdimm_events_map) + if (!p) return -EINVAL; stats = kzalloc(size, GFP_KERNEL); @@ -370,7 +390,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, stat = &stats->scm_statistic[0]; memcpy(&stat->stat_id, - &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)], + nvdimm_events_map[event->attr.config], sizeof(stat->stat_id)); stat->stat_val = 0; @@ -458,56 +478,6 @@ static void papr_scm_pmu_del(struct perf_event *event, int flags) papr_scm_pmu_read(event); } -static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu) -{ - struct papr_scm_perf_stat *stat; - struct papr_scm_perf_stats *stats; - u32 available_events; - int index, rc = 0; - - if (!p->stat_buffer_len) - return -ENOENT; - - available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) - / sizeof(struct papr_scm_perf_stat); - if (available_events == 0) - return -EOPNOTSUPP; - - /* Allocate the buffer for phyp where stats are written */ - stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); - if (!stats) { - rc = -ENOMEM; - return rc; - } - - /* Called to get list of events supported */ - rc = drc_pmem_query_stats(p, stats, 0); - if (rc) - goto out; - - /* - * Allocate memory and populate nvdimm_event_map. - * Allocate an extra element for NULL entry - */ - p->nvdimm_events_map = kcalloc(available_events + 1, - sizeof(stat->stat_id), - GFP_KERNEL); - if (!p->nvdimm_events_map) { - rc = -ENOMEM; - goto out; - } - - /* Copy all stat_ids to event map */ - for (index = 0, stat = stats->scm_statistic; - index < available_events; index++, ++stat) { - memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)], - &stat->stat_id, sizeof(stat->stat_id)); - } -out: - kfree(stats); - return rc; -} - static void papr_scm_pmu_register(struct papr_scm_priv *p) { struct nvdimm_pmu *nd_pmu; @@ -519,8 +489,7 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) goto pmu_err_print; } - rc = papr_scm_pmu_check_events(p, nd_pmu); - if (rc) + if (!p->stat_buffer_len) goto pmu_check_events_err; nd_pmu->pmu.task_ctx_nr = perf_invalid_context; @@ -539,7 +508,7 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) rc = register_nvdimm_pmu(nd_pmu, p->pdev); if (rc) - goto pmu_register_err; + goto pmu_check_events_err; /* * Set archdata.priv value to nvdimm_pmu structure, to handle the @@ -548,8 +517,6 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) p->pdev->archdata.priv = nd_pmu; return; -pmu_register_err: - kfree(p->nvdimm_events_map); pmu_check_events_err: kfree(nd_pmu); pmu_err_print: @@ -1560,7 +1527,6 @@ static int papr_scm_remove(struct platform_device *pdev) unregister_nvdimm_pmu(pdev->archdata.priv); pdev->archdata.priv = NULL; - kfree(p->nvdimm_events_map); kfree(p->bus_desc.provider_name); kfree(p); From 0382a35bef70ecc074db67192ff8d37737d02b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 20 Aug 2022 13:51:13 +0200 Subject: [PATCH 09/22] powerpc/pci: Enable PCI domains in /proc when PCI bus numbers are not unique MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit powerpc systems with more PCIe controllers and more PCI domains, where on more PCI domains are same PCI numbers, when kernel is compiled with CONFIG_PROC_FS=y and CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT=y options, kernel prints "proc_dir_entry 'pci/01' already registered" error message. proc_dir_entry 'pci/01' already registered WARNING: CPU: 0 PID: 1 at fs/proc/generic.c:377 proc_register+0x1a8/0x1ac ... NIP proc_register+0x1a8/0x1ac LR proc_register+0x1a8/0x1ac Call Trace: proc_register+0x1a8/0x1ac (unreliable) _proc_mkdir+0x78/0xa4 pci_proc_attach_device+0x11c/0x168 pci_proc_init+0x80/0x98 do_one_initcall+0x80/0x284 kernel_init_freeable+0x1f4/0x2a0 kernel_init+0x24/0x150 ret_from_kernel_thread+0x5c/0x64 This regression started appearing after commit 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") in case in each mPCIe slot is connected PCIe card and therefore PCI bus 1 is populated in for every PCIe controller / PCI domain. The reason is that PCI procfs code expects that when PCI bus numbers are not unique across all PCI domains, function pci_proc_domain() returns true for domain dependent buses. Fix this issue by setting PCI_ENABLE_PROC_DOMAINS and PCI_COMPAT_DOMAIN_0 flags for 32-bit powerpc code when CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT is enabled. Same approach is already implemented for 64-bit powerpc code (where PCI bus numbers are always domain dependent). Fixes: 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") Signed-off-by: Pali Rohár [mpe: Trim change log oops message] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220820115113.30581-1-pali@kernel.org --- arch/powerpc/kernel/pci_32.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 433965bf37b4..855b59892c5c 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -245,6 +245,15 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); +#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + /* + * Enable PCI domains in /proc when PCI bus numbers are not unique + * across all PCI domains to prevent conflicts. And keep PCI domain 0 + * backward compatible in /proc for video cards. + */ + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); +#endif + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) pci_assign_all_buses = 1; From c7acee3d2f128a38b68fb7af85dbbd91bfd0b4ad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 21 Aug 2022 01:51:29 +0900 Subject: [PATCH 10/22] powerpc: align syscall table for ppc32 Christophe Leroy reported that commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing CONFIG_MODULE_REL_CRCS") broke mpc85xx_defconfig + CONFIG_RELOCATABLE=y. LD vmlinux SYSMAP System.map SORTTAB vmlinux CHKREL vmlinux WARNING: 451 bad relocations c0b312a9 R_PPC_UADDR32 .head.text-0x3ff9ed54 c0b312ad R_PPC_UADDR32 .head.text-0x3ffac224 c0b312b1 R_PPC_UADDR32 .head.text-0x3ffb09f4 c0b312b5 R_PPC_UADDR32 .head.text-0x3fe184dc c0b312b9 R_PPC_UADDR32 .head.text-0x3fe183a8 ... The compiler emits a bunch of R_PPC_UADDR32, which is not supported by arch/powerpc/kernel/reloc_32.S. The reason is there exists an unaligned symbol. $ powerpc-linux-gnu-nm -n vmlinux ... c0b31258 d spe_aligninfo c0b31298 d __func__.0 c0b312a9 D sys_call_table c0b319b8 d __func__.0 Commit 7b4537199a4a is not the root cause. Even before that, I can reproduce the same issue for mpc85xx_defconfig + CONFIG_RELOCATABLE=y + CONFIG_MODVERSIONS=n. It is just that nobody noticed because when CONFIG_MODVERSIONS is enabled, a __crc_* symbol inserted before sys_call_table was hiding the unalignment issue. Adding alignment to the syscall table for ppc32 fixes the issue. Cc: stable@vger.kernel.org Reported-by: Christophe Leroy Signed-off-by: Masahiro Yamada Tested-by: Christophe Leroy [mpe: Trim change log discussion, add Cc stable] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/lkml/38605f6a-a568-f884-f06f-ea4da5b214f0@csgroup.eu/ Link: https://lore.kernel.org/r/20220820165129.1147589-1-masahiroy@kernel.org --- arch/powerpc/kernel/systbl.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index cb3358886203..6c1db3b6de2d 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -18,6 +18,7 @@ .p2align 3 #define __SYSCALL(nr, entry) .8byte entry #else + .p2align 2 #define __SYSCALL(nr, entry) .long entry #endif From 310d1344e3c58cc2d625aa4e52cfcb7d8a26fcbf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 23 Aug 2022 21:59:51 +1000 Subject: [PATCH 11/22] Revert "powerpc: Remove unused FW_FEATURE_NATIVE references" This reverts commit 79b74a68486765a4fe685ac4069bc71366c538f5. It broke booting on IBM Cell machines when the kernel is also built with CONFIG_PPC_PS3=y. That's because FW_FEATURE_NATIVE_ALWAYS = 0 does have an important effect, which is to clear the PS3 ALWAYS features from FW_FEATURE_ALWAYS. Note that CONFIG_PPC_NATIVE has since been renamed CONFIG_PPC_HASH_MMU_NATIVE. Fixes: 79b74a684867 ("powerpc: Remove unused FW_FEATURE_NATIVE references") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220823115952.1203106-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/firmware.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 398e0b5e485f..ed6db13a1d7c 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -83,6 +83,8 @@ enum { FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, + FW_FEATURE_NATIVE_POSSIBLE = 0, + FW_FEATURE_NATIVE_ALWAYS = 0, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_POSSIBLE | @@ -92,6 +94,9 @@ enum { #endif #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_POSSIBLE | +#endif +#ifdef CONFIG_PPC_HASH_MMU_NATIVE + FW_FEATURE_NATIVE_ALWAYS | #endif 0, FW_FEATURE_ALWAYS = @@ -103,6 +108,9 @@ enum { #endif #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_ALWAYS & +#endif +#ifdef CONFIG_PPC_HASH_MMU_NATIVE + FW_FEATURE_NATIVE_ALWAYS & #endif FW_FEATURE_POSSIBLE, From 91926d8b7e71aaf5f84f0cf208fc5a8b7a761050 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 23 Aug 2022 21:59:52 +1000 Subject: [PATCH 12/22] powerpc/rtas: Fix RTAS MSR[HV] handling for Cell The semi-recent changes to MSR handling when entering RTAS (firmware) cause crashes on IBM Cell machines. An example trace: kernel tried to execute user page (2fff01a8) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0x2fff01a8 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=4 NUMA Cell Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 6.0.0-rc2-00433-gede0a8d3307a #207 NIP: 000000002fff01a8 LR: 0000000000032608 CTR: 0000000000000000 REGS: c0000000015236b0 TRAP: 0400 Tainted: G W (6.0.0-rc2-00433-gede0a8d3307a) MSR: 0000000008001002 CR: 00000000 XER: 20000000 ... NIP 0x2fff01a8 LR 0x32608 Call Trace: 0xc00000000143c5f8 (unreliable) .rtas_call+0x224/0x320 .rtas_get_boot_time+0x70/0x150 .read_persistent_clock64+0x114/0x140 .read_persistent_wall_and_boot_offset+0x24/0x80 .timekeeping_init+0x40/0x29c .start_kernel+0x674/0x8f0 start_here_common+0x1c/0x50 Unlike PAPR platforms where RTAS is only used in guests, on the IBM Cell machines Linux runs with MSR[HV] set but also uses RTAS, provided by SLOF. Fix it by copying the MSR[HV] bit from the MSR value we've just read using mfmsr into the value used for RTAS. It seems like we could also fix it using an #ifdef CELL to set MSR[HV], but that doesn't work because it's possible to build a single kernel image that runs on both Cell native and pseries. Fixes: b6b1c3ce06ca ("powerpc/rtas: Keep MSR[RI] set when calling RTAS") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Michael Ellerman Reviewed-by: Jordan Niethe Link: https://lore.kernel.org/r/20220823115952.1203106-2-mpe@ellerman.id.au --- arch/powerpc/kernel/rtas_entry.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S index 9a434d42e660..6ce95ddadbcd 100644 --- a/arch/powerpc/kernel/rtas_entry.S +++ b/arch/powerpc/kernel/rtas_entry.S @@ -109,8 +109,12 @@ __enter_rtas: * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if * MSR[S] is set, it will remain when entering RTAS. + * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV + * from the saved MSR value and insert into the value RTAS will use. */ + extrdi r0, r6, 1, 63 - MSR_HV_LG LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + insrdi r6, r0, 1, 63 - MSR_HV_LG li r0,0 mtmsrd r0,1 /* disable RI before using SRR0/1 */ From ca922fecda6caa5162409406dc3b663062d75089 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Fri, 19 Aug 2022 14:29:45 +0200 Subject: [PATCH 13/22] KVM: s390: pci: Hook to access KVM lowlevel from VFIO We have a cross dependency between KVM and VFIO when using s390 vfio_pci_zdev extensions for PCI passthrough To be able to keep both subsystem modular we add a registering hook inside the S390 core code. This fixes a build problem when VFIO is built-in and KVM is built as a module. Reported-by: Randy Dunlap Reported-by: kernel test robot Reviewed-by: Matthew Rosato Reviewed-by: Niklas Schnelle Signed-off-by: Pierre Morel Fixes: 09340b2fca007 ("KVM: s390: pci: add routines to start/stop interpretive execution") Cc: Acked-by: Janosch Frank Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20220819122945.9309-1-pmorel@linux.ibm.com Message-Id: <20220819122945.9309-1-pmorel@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 17 ++++++----------- arch/s390/kvm/pci.c | 12 ++++++++---- arch/s390/pci/Makefile | 2 +- arch/s390/pci/pci_kvm_hook.c | 11 +++++++++++ drivers/vfio/pci/vfio_pci_zdev.c | 8 ++++++-- 5 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 arch/s390/pci/pci_kvm_hook.c diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f39092e0ceaa..b1e98a9ed152 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); -#ifdef CONFIG_VFIO_PCI_ZDEV_KVM -int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm); -void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev); -#else -static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev, - struct kvm *kvm) -{ - return -EPERM; -} -static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {} -#endif +struct zpci_kvm_hook { + int (*kvm_register)(void *opaque, struct kvm *kvm); + void (*kvm_unregister)(void *opaque); +}; + +extern struct zpci_kvm_hook zpci_kvm_hook; #endif diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 4946fb7757d6..bb8c335d17b9 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) * available, enable them and let userspace indicate whether or not they will * be used (specify SHM bit to disable). */ -int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) +static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { + struct zpci_dev *zdev = opaque; int rc; if (!zdev) @@ -510,10 +511,10 @@ err: kvm_put_kvm(kvm); return rc; } -EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm); -void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) +static void kvm_s390_pci_unregister_kvm(void *opaque) { + struct zpci_dev *zdev = opaque; struct kvm *kvm; if (!zdev) @@ -566,7 +567,6 @@ out: kvm_put_kvm(kvm); } -EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm); void kvm_s390_pci_init_list(struct kvm *kvm) { @@ -678,6 +678,8 @@ int kvm_s390_pci_init(void) spin_lock_init(&aift->gait_lock); mutex_init(&aift->aift_lock); + zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; + zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; return 0; } @@ -685,6 +687,8 @@ int kvm_s390_pci_init(void) void kvm_s390_pci_exit(void) { mutex_destroy(&aift->aift_lock); + zpci_kvm_hook.kvm_register = NULL; + zpci_kvm_hook.kvm_unregister = NULL; kfree(aift); } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index bf557a1b789c..5ae31ca9dd44 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o + pci_bus.o pci_kvm_hook.o obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c new file mode 100644 index 000000000000..ff34baf50a3e --- /dev/null +++ b/arch/s390/pci/pci_kvm_hook.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VFIO ZPCI devices support + * + * Copyright (C) IBM Corp. 2022. All rights reserved. + * Author(s): Pierre Morel + */ +#include + +struct zpci_kvm_hook zpci_kvm_hook; +EXPORT_SYMBOL_GPL(zpci_kvm_hook); diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index e163aa9f6144..0cbdcd14f1c8 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -151,7 +151,10 @@ int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) if (!vdev->vdev.kvm) return 0; - return kvm_s390_pci_register_kvm(zdev, vdev->vdev.kvm); + if (zpci_kvm_hook.kvm_register) + return zpci_kvm_hook.kvm_register(zdev, vdev->vdev.kvm); + + return -ENOENT; } void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) @@ -161,5 +164,6 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) if (!zdev || !vdev->vdev.kvm) return; - kvm_s390_pci_unregister_kvm(zdev); + if (zpci_kvm_hook.kvm_unregister) + zpci_kvm_hook.kvm_unregister(zdev); } From 814816d71e29934d0a76ee259b54c0b80c3b0e4a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 23 Aug 2022 18:36:35 +0200 Subject: [PATCH 14/22] powerpc: Fix hard_irq_disable() with sanitizer As reported by Zhouyi Zhou, WRITE_ONCE() is not atomic as expected when KASAN or KCSAN are compiled in. Fix it by re-implementing it using inline assembly. Fixes: 077fc62b2b66 ("powerpc/irq: remove inline assembly in hard_irq_disable macro") Reported-by: Zhouyi Zhou Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a8298991b3df049a54ee8e558838e34265812014.1661272586.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 26ede09c521d..3c8cb48f88ae 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -282,7 +282,8 @@ static inline bool pmi_irq_pending(void) flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (!arch_irqs_disabled_flags(flags)) { \ - WRITE_ONCE(local_paca->saved_r1, current_stack_pointer);\ + asm volatile("std%X0 %1,%0" : "=m" (local_paca->saved_r1) \ + : "r" (current_stack_pointer)); \ trace_hardirqs_off(); \ } \ } while(0) From 0204750bd4c6ccc2fb7417618477f10373b33f56 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 30 Aug 2022 10:49:47 -0700 Subject: [PATCH 15/22] KVM: x86: Mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES KVM should not claim to virtualize unknown IA32_ARCH_CAPABILITIES bits. When kvm_get_arch_capabilities() was originally written, there were only a few bits defined in this MSR, and KVM could virtualize all of them. However, over the years, several bits have been defined that KVM cannot just blindly pass through to the guest without additional work (such as virtualizing an MSR promised by the IA32_ARCH_CAPABILITES feature bit). Define a mask of supported IA32_ARCH_CAPABILITIES bits, and mask off any other bits that are set in the hardware MSR. Cc: Paolo Bonzini Fixes: 5b76a3cff011 ("KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry") Signed-off-by: Jim Mattson Reviewed-by: Vipin Sharma Reviewed-by: Xiaoyao Li Message-Id: <20220830174947.2182144-1-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d7374d768296..78160b19b403 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1557,12 +1557,32 @@ static const u32 msr_based_features_all[] = { static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; static unsigned int num_msr_based_features; +/* + * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM + * does not yet virtualize. These include: + * 10 - MISC_PACKAGE_CTRLS + * 11 - ENERGY_FILTERING_CTL + * 12 - DOITM + * 18 - FB_CLEAR_CTRL + * 21 - XAPIC_DISABLE_STATUS + * 23 - OVERCLOCKING_STATUS + */ + +#define KVM_SUPPORTED_ARCH_CAP \ + (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \ + ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ + ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ + ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO) + static u64 kvm_get_arch_capabilities(void) { u64 data = 0; - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); + data &= KVM_SUPPORTED_ARCH_CAP; + } /* * If nx_huge_pages is enabled, KVM's shadow paging will ensure that @@ -1610,9 +1630,6 @@ static u64 kvm_get_arch_capabilities(void) */ } - /* Guests don't need to know "Fill buffer clear control" exists */ - data &= ~ARCH_CAP_FB_CLEAR_CTRL; - return data; } From 3c0ba05ce9c92f94ef5b7f7f1e0102c4a3cd2a77 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 1 Sep 2022 20:23:00 +0800 Subject: [PATCH 16/22] KVM: x86: fix memoryleak in kvm_arch_vcpu_create() When allocating memory for mci_ctl2_banks fails, KVM doesn't release mce_banks leading to memoryleak. Fix this issue by calling kfree() for it when kcalloc() fails. Fixes: 281b52780b57 ("KVM: x86: Add emulation for MSR_IA32_MCx_CTL2 MSRs.") Signed-off-by: Miaohe Lin Message-Id: <20220901122300.22298-1-linmiaohe@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 78160b19b403..54ee086943c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11580,7 +11580,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64), GFP_KERNEL_ACCOUNT); if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks) - goto fail_free_pio_data; + goto fail_free_mce_banks; vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, @@ -11634,7 +11634,6 @@ free_wbinvd_dirty_mask: fail_free_mce_banks: kfree(vcpu->arch.mce_banks); kfree(vcpu->arch.mci_ctl2_banks); -fail_free_pio_data: free_page((unsigned long)vcpu->arch.pio_data); fail_free_lapic: kvm_free_lapic(vcpu); From 87693645ae89d9f9779e0bc53606bf228ec36411 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 31 Aug 2022 11:35:24 +0800 Subject: [PATCH 17/22] perf/x86/core: Completely disable guest PEBS via guest's global_ctrl When a guest PEBS counter is cross-mapped by a host counter, software will remove the corresponding bit in the arr[global_ctrl].guest and expect hardware to perform a change of state "from enable to disable" via the msr_slot[] switch during the vmx transaction. The real world is that if user adjust the counter overflow value small enough, it still opens a tiny race window for the previously PEBS-enabled counter to write cross-mapped PEBS records into the guest's PEBS buffer, when arr[global_ctrl].guest has been prioritised (switch_msr_special stuff) to switch into the enabled state, while the arr[pebs_enable].guest has not. Close this window by clearing invalid bits in the arr[global_ctrl].guest. Cc: linux-perf-users@vger.kernel.org Cc: Kan Liang Cc: Peter Zijlstra Cc: Sean Christopherson Fixes: 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations") Signed-off-by: Like Xu Message-Id: <20220831033524.58561-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/events/intel/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2db93498ff71..75cdd11ab014 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4052,8 +4052,9 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) /* Disable guest PEBS if host PEBS is enabled. */ arr[pebs_enable].guest = 0; } else { - /* Disable guest PEBS for cross-mapped PEBS counters. */ + /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */ arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; + arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask; /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ arr[global_ctrl].guest |= arr[pebs_enable].guest; } From 22c6a0ef6b2631cda406a9c0c26774b0b3463b7b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Aug 2022 12:41:25 -0400 Subject: [PATCH 18/22] KVM: x86: check validity of argument to KVM_SET_MP_STATE An invalid argument to KVM_SET_MP_STATE has no effect other than making the vCPU fail to run at the next KVM_RUN. Since it is extremely unlikely that any userspace is relying on it, fail with -EINVAL just like for other architectures. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54ee086943c1..43a6a7efc6ec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10669,7 +10669,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu) case KVM_MP_STATE_INIT_RECEIVED: break; default: - return -EINTR; + WARN_ON_ONCE(1); + break; } return 1; } @@ -11110,9 +11111,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_load(vcpu); - if (!lapic_in_kernel(vcpu) && - mp_state->mp_state != KVM_MP_STATE_RUNNABLE) + switch (mp_state->mp_state) { + case KVM_MP_STATE_UNINITIALIZED: + case KVM_MP_STATE_HALTED: + case KVM_MP_STATE_AP_RESET_HOLD: + case KVM_MP_STATE_INIT_RECEIVED: + case KVM_MP_STATE_SIPI_RECEIVED: + if (!lapic_in_kernel(vcpu)) + goto out; + break; + + case KVM_MP_STATE_RUNNABLE: + break; + + default: goto out; + } /* * KVM_MP_STATE_INIT_RECEIVED means the processor is in From 684c68d92e2e1b97fa2f31c35c1b0f7671a8618a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Aug 2022 23:10:52 +1000 Subject: [PATCH 19/22] Revert "powerpc/irq: Don't open code irq_soft_mask helpers" This reverts commit ef5b570d3700fbb8628a58da0487486ceeb713cd. Zhouyi reported that commit is causing crashes when running rcutorture with KASAN enabled: BUG: using smp_processor_id() in preemptible [00000000] code: rcu_torture_rea/100 caller is rcu_preempt_deferred_qs_irqrestore+0x74/0xed0 CPU: 4 PID: 100 Comm: rcu_torture_rea Tainted: G W 5.19.0-rc5-next-20220708-dirty #253 Call Trace: dump_stack_lvl+0xbc/0x108 (unreliable) check_preemption_disabled+0x154/0x160 rcu_preempt_deferred_qs_irqrestore+0x74/0xed0 __rcu_read_unlock+0x290/0x3b0 rcu_torture_read_unlock+0x30/0xb0 rcutorture_one_extend+0x198/0x810 rcu_torture_one_read+0x58c/0xc90 rcu_torture_reader+0x12c/0x360 kthread+0x1e8/0x220 ret_from_kernel_thread+0x5c/0x64 KASAN will generate instrumentation instructions around the WRITE_ONCE(local_paca->irq_soft_mask, mask): 0xc000000000295cb0 <+0>: addis r2,r12,774 0xc000000000295cb4 <+4>: addi r2,r2,16464 0xc000000000295cb8 <+8>: mflr r0 0xc000000000295cbc <+12>: bl 0xc00000000008bb4c 0xc000000000295cc0 <+16>: mflr r0 0xc000000000295cc4 <+20>: std r31,-8(r1) 0xc000000000295cc8 <+24>: addi r3,r13,2354 0xc000000000295ccc <+28>: mr r31,r13 0xc000000000295cd0 <+32>: std r0,16(r1) 0xc000000000295cd4 <+36>: stdu r1,-48(r1) 0xc000000000295cd8 <+40>: bl 0xc000000000609b98 <__asan_store1+8> 0xc000000000295cdc <+44>: nop 0xc000000000295ce0 <+48>: li r9,1 0xc000000000295ce4 <+52>: stb r9,2354(r31) 0xc000000000295ce8 <+56>: addi r1,r1,48 0xc000000000295cec <+60>: ld r0,16(r1) 0xc000000000295cf0 <+64>: ld r31,-8(r1) 0xc000000000295cf4 <+68>: mtlr r0 If there is a context switch before "stb r9,2354(r31)", r31 may not equal to r13, in such case, irq soft mask will not work. The usual solution of marking the code ineligible for instrumentation forces the code out-of-line, which we would prefer to avoid. Christophe proposed a partial revert, but Nick raised some concerns with that. So for now do a full revert. Reported-by: Zhouyi Zhou [mpe: Construct change log based on Zhouyi's original report] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220831131052.42250-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/hw_irq.h | 43 ++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3c8cb48f88ae..983551859891 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -113,7 +113,14 @@ static inline void __hard_RI_enable(void) static inline notrace unsigned long irq_soft_mask_return(void) { - return READ_ONCE(local_paca->irq_soft_mask); + unsigned long flags; + + asm volatile( + "lbz %0,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, irq_soft_mask))); + + return flags; } /* @@ -140,24 +147,46 @@ static inline notrace void irq_soft_mask_set(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON(mask && !(mask & IRQS_DISABLED)); - WRITE_ONCE(local_paca->irq_soft_mask, mask); - barrier(); + asm volatile( + "stb %0,%1(13)" + : + : "r" (mask), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "memory"); } static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { - unsigned long flags = irq_soft_mask_return(); + unsigned long flags; - irq_soft_mask_set(mask); +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); return flags; } static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags = irq_soft_mask_return(); + unsigned long flags, tmp; - irq_soft_mask_set(flags | mask); + asm volatile( + "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" + : "=&r" (flags), "=r" (tmp) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); +#endif return flags; } From 6cf07810e9ef8535d60160d13bf0fd05f2af38e7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Aug 2022 08:12:56 -0700 Subject: [PATCH 20/22] powerpc/papr_scm: Ensure rc is always initialized in papr_scm_pmu_register() Clang warns: arch/powerpc/platforms/pseries/papr_scm.c:492:6: warning: variable 'rc' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (!p->stat_buffer_len) ^~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/pseries/papr_scm.c:523:64: note: uninitialized use occurs here dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); ^~ include/linux/dev_printk.h:150:67: note: expanded from macro 'dev_info' dev_printk_index_wrap(_dev_info, KERN_INFO, dev, dev_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~ include/linux/dev_printk.h:110:23: note: expanded from macro 'dev_printk_index_wrap' _p_func(dev, fmt, ##__VA_ARGS__); \ ^~~~~~~~~~~ arch/powerpc/platforms/pseries/papr_scm.c:492:2: note: remove the 'if' if its condition is always false if (!p->stat_buffer_len) ^~~~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/pseries/papr_scm.c:484:8: note: initialize the variable 'rc' to silence this warning int rc, nodeid; ^ = 0 1 warning generated. The call to papr_scm_pmu_check_events() was eliminated but a return code was not added to the if statement. Add the same return code from papr_scm_pmu_check_events() for this condition so there is no more warning. Fixes: 9b1ac04698a4 ("powerpc/papr_scm: Fix nvdimm event mappings") Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1701 Link: https://lore.kernel.org/r/20220830151256.1473169-1-nathan@kernel.org --- arch/powerpc/platforms/pseries/papr_scm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 54740af21557..2f8385523a13 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -489,8 +489,10 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p) goto pmu_err_print; } - if (!p->stat_buffer_len) + if (!p->stat_buffer_len) { + rc = -ENOENT; goto pmu_check_events_err; + } nd_pmu->pmu.task_ctx_nr = perf_invalid_context; nd_pmu->pmu.name = nvdimm_name(p->nvdimm); From b0839b281c427e844143dba3893e25c83cdd6c17 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 1 Sep 2022 10:59:13 -0700 Subject: [PATCH 21/22] Makefile.extrawarn: re-enable -Wformat for clang; take 2 -Wformat was recently re-enabled for builds with clang, then quickly re-disabled, due to concerns stemming from the frequency of default argument promotion related warning instances. commit 258fafcd0683 ("Makefile.extrawarn: re-enable -Wformat for clang") commit 21f9c8a13bb2 ("Revert "Makefile.extrawarn: re-enable -Wformat for clang"") ISO WG14 has ratified N2562 to address default argument promotion explicitly for printf, as part of the upcoming ISO C2X standard. The behavior of clang was changed in clang-16 to not warn for the cited cases in all language modes. Add a version check, so that users of clang-16 now get the full effect of -Wformat. For older clang versions, re-enable flags under the -Wformat group that way users still get some useful checks related to format strings, without noisy default argument promotion warnings. I intentionally omitted -Wformat-y2k and -Wformat-security from being re-enabled, which are also part of -Wformat in clang-16. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Link: https://github.com/llvm/llvm-project/issues/57102 Link: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf Suggested-by: Justin Stitt Suggested-by: Nathan Chancellor Suggested-by: Youngmin Nam Signed-off-by: Nick Desaulniers Reviewed-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- scripts/Makefile.extrawarn | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 0621c39a3955..6ae482158bc4 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -47,7 +47,19 @@ else ifdef CONFIG_CC_IS_CLANG KBUILD_CFLAGS += -Wno-initializer-overrides +# Clang before clang-16 would warn on default argument promotions. +ifeq ($(shell [ $(CONFIG_CLANG_VERSION) -lt 160000 ] && echo y),y) +# Disable -Wformat KBUILD_CFLAGS += -Wno-format +# Then re-enable flags that were part of the -Wformat group that aren't +# problematic. +KBUILD_CFLAGS += -Wformat-extra-args -Wformat-invalid-specifier +KBUILD_CFLAGS += -Wformat-zero-length -Wnonnull +# Requires clang-12+. +ifeq ($(shell [ $(CONFIG_CLANG_VERSION) -ge 120000 ] && echo y),y) +KBUILD_CFLAGS += -Wformat-insufficient-args +endif +endif KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare From 7e18e42e4b280c85b76967a9106a13ca61c16179 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Sep 2022 13:10:01 -0700 Subject: [PATCH 22/22] Linux 6.0-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 952d354069a4..a4f71076cacb 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*