mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 10:58:48 +09:00
Merge "Merge 6.0-rc4 into android-mainline" into android-mainline
This commit is contained in:
2
Makefile
2
Makefile
@@ -2,7 +2,7 @@
|
||||
VERSION = 6
|
||||
PATCHLEVEL = 0
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc3
|
||||
EXTRAVERSION = -rc4
|
||||
NAME = Hurr durr I'ma ninja sloth
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
||||
@@ -83,6 +83,8 @@ enum {
|
||||
FW_FEATURE_POWERNV_ALWAYS = 0,
|
||||
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
|
||||
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
|
||||
FW_FEATURE_NATIVE_POSSIBLE = 0,
|
||||
FW_FEATURE_NATIVE_ALWAYS = 0,
|
||||
FW_FEATURE_POSSIBLE =
|
||||
#ifdef CONFIG_PPC_PSERIES
|
||||
FW_FEATURE_PSERIES_POSSIBLE |
|
||||
@@ -92,6 +94,9 @@ enum {
|
||||
#endif
|
||||
#ifdef CONFIG_PPC_PS3
|
||||
FW_FEATURE_PS3_POSSIBLE |
|
||||
#endif
|
||||
#ifdef CONFIG_PPC_HASH_MMU_NATIVE
|
||||
FW_FEATURE_NATIVE_ALWAYS |
|
||||
#endif
|
||||
0,
|
||||
FW_FEATURE_ALWAYS =
|
||||
@@ -103,6 +108,9 @@ enum {
|
||||
#endif
|
||||
#ifdef CONFIG_PPC_PS3
|
||||
FW_FEATURE_PS3_ALWAYS &
|
||||
#endif
|
||||
#ifdef CONFIG_PPC_HASH_MMU_NATIVE
|
||||
FW_FEATURE_NATIVE_ALWAYS &
|
||||
#endif
|
||||
FW_FEATURE_POSSIBLE,
|
||||
|
||||
|
||||
@@ -113,7 +113,14 @@ static inline void __hard_RI_enable(void)
|
||||
|
||||
static inline notrace unsigned long irq_soft_mask_return(void)
|
||||
{
|
||||
return READ_ONCE(local_paca->irq_soft_mask);
|
||||
unsigned long flags;
|
||||
|
||||
asm volatile(
|
||||
"lbz %0,%1(13)"
|
||||
: "=r" (flags)
|
||||
: "i" (offsetof(struct paca_struct, irq_soft_mask)));
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -140,24 +147,46 @@ static inline notrace void irq_soft_mask_set(unsigned long mask)
|
||||
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
|
||||
WARN_ON(mask && !(mask & IRQS_DISABLED));
|
||||
|
||||
WRITE_ONCE(local_paca->irq_soft_mask, mask);
|
||||
barrier();
|
||||
asm volatile(
|
||||
"stb %0,%1(13)"
|
||||
:
|
||||
: "r" (mask),
|
||||
"i" (offsetof(struct paca_struct, irq_soft_mask))
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
|
||||
{
|
||||
unsigned long flags = irq_soft_mask_return();
|
||||
unsigned long flags;
|
||||
|
||||
irq_soft_mask_set(mask);
|
||||
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
|
||||
WARN_ON(mask && !(mask & IRQS_DISABLED));
|
||||
#endif
|
||||
|
||||
asm volatile(
|
||||
"lbz %0,%1(13); stb %2,%1(13)"
|
||||
: "=&r" (flags)
|
||||
: "i" (offsetof(struct paca_struct, irq_soft_mask)),
|
||||
"r" (mask)
|
||||
: "memory");
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
|
||||
{
|
||||
unsigned long flags = irq_soft_mask_return();
|
||||
unsigned long flags, tmp;
|
||||
|
||||
irq_soft_mask_set(flags | mask);
|
||||
asm volatile(
|
||||
"lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
|
||||
: "=&r" (flags), "=r" (tmp)
|
||||
: "i" (offsetof(struct paca_struct, irq_soft_mask)),
|
||||
"r" (mask)
|
||||
: "memory");
|
||||
|
||||
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
|
||||
WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
|
||||
#endif
|
||||
|
||||
return flags;
|
||||
}
|
||||
@@ -282,7 +311,8 @@ static inline bool pmi_irq_pending(void)
|
||||
flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \
|
||||
local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
|
||||
if (!arch_irqs_disabled_flags(flags)) { \
|
||||
WRITE_ONCE(local_paca->saved_r1, current_stack_pointer);\
|
||||
asm volatile("std%X0 %1,%0" : "=m" (local_paca->saved_r1) \
|
||||
: "r" (current_stack_pointer)); \
|
||||
trace_hardirqs_off(); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
@@ -245,6 +245,15 @@ static int __init pcibios_init(void)
|
||||
|
||||
printk(KERN_INFO "PCI: Probing PCI hardware\n");
|
||||
|
||||
#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
|
||||
/*
|
||||
* Enable PCI domains in /proc when PCI bus numbers are not unique
|
||||
* across all PCI domains to prevent conflicts. And keep PCI domain 0
|
||||
* backward compatible in /proc for video cards.
|
||||
*/
|
||||
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
|
||||
#endif
|
||||
|
||||
if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
|
||||
pci_assign_all_buses = 1;
|
||||
|
||||
|
||||
@@ -109,8 +109,12 @@ __enter_rtas:
|
||||
* its critical regions (as specified in PAPR+ section 7.2.1). MSR[S]
|
||||
* is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if
|
||||
* MSR[S] is set, it will remain when entering RTAS.
|
||||
* If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV
|
||||
* from the saved MSR value and insert into the value RTAS will use.
|
||||
*/
|
||||
extrdi r0, r6, 1, 63 - MSR_HV_LG
|
||||
LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI)
|
||||
insrdi r6, r0, 1, 63 - MSR_HV_LG
|
||||
|
||||
li r0,0
|
||||
mtmsrd r0,1 /* disable RI before using SRR0/1 */
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
.p2align 3
|
||||
#define __SYSCALL(nr, entry) .8byte entry
|
||||
#else
|
||||
.p2align 2
|
||||
#define __SYSCALL(nr, entry) .long entry
|
||||
#endif
|
||||
|
||||
|
||||
@@ -124,9 +124,6 @@ struct papr_scm_priv {
|
||||
|
||||
/* The bits which needs to be overridden */
|
||||
u64 health_bitmap_inject_mask;
|
||||
|
||||
/* array to have event_code and stat_id mappings */
|
||||
u8 *nvdimm_events_map;
|
||||
};
|
||||
|
||||
static int papr_scm_pmem_flush(struct nd_region *nd_region,
|
||||
@@ -350,6 +347,25 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)
|
||||
|
||||
static const char * const nvdimm_events_map[] = {
|
||||
[1] = "CtlResCt",
|
||||
[2] = "CtlResTm",
|
||||
[3] = "PonSecs ",
|
||||
[4] = "MemLife ",
|
||||
[5] = "CritRscU",
|
||||
[6] = "HostLCnt",
|
||||
[7] = "HostSCnt",
|
||||
[8] = "HostSDur",
|
||||
[9] = "HostLDur",
|
||||
[10] = "MedRCnt ",
|
||||
[11] = "MedWCnt ",
|
||||
[12] = "MedRDur ",
|
||||
[13] = "MedWDur ",
|
||||
[14] = "CchRHCnt",
|
||||
[15] = "CchWHCnt",
|
||||
[16] = "FastWCnt",
|
||||
};
|
||||
|
||||
static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
|
||||
{
|
||||
struct papr_scm_perf_stat *stat;
|
||||
@@ -357,11 +373,15 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev,
|
||||
struct papr_scm_priv *p = dev_get_drvdata(dev);
|
||||
int rc, size;
|
||||
|
||||
/* Invalid eventcode */
|
||||
if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map))
|
||||
return -EINVAL;
|
||||
|
||||
/* Allocate request buffer enough to hold single performance stat */
|
||||
size = sizeof(struct papr_scm_perf_stats) +
|
||||
sizeof(struct papr_scm_perf_stat);
|
||||
|
||||
if (!p || !p->nvdimm_events_map)
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
stats = kzalloc(size, GFP_KERNEL);
|
||||
@@ -370,7 +390,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev,
|
||||
|
||||
stat = &stats->scm_statistic[0];
|
||||
memcpy(&stat->stat_id,
|
||||
&p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)],
|
||||
nvdimm_events_map[event->attr.config],
|
||||
sizeof(stat->stat_id));
|
||||
stat->stat_val = 0;
|
||||
|
||||
@@ -458,56 +478,6 @@ static void papr_scm_pmu_del(struct perf_event *event, int flags)
|
||||
papr_scm_pmu_read(event);
|
||||
}
|
||||
|
||||
static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
|
||||
{
|
||||
struct papr_scm_perf_stat *stat;
|
||||
struct papr_scm_perf_stats *stats;
|
||||
u32 available_events;
|
||||
int index, rc = 0;
|
||||
|
||||
if (!p->stat_buffer_len)
|
||||
return -ENOENT;
|
||||
|
||||
available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats))
|
||||
/ sizeof(struct papr_scm_perf_stat);
|
||||
if (available_events == 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Allocate the buffer for phyp where stats are written */
|
||||
stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
|
||||
if (!stats) {
|
||||
rc = -ENOMEM;
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Called to get list of events supported */
|
||||
rc = drc_pmem_query_stats(p, stats, 0);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Allocate memory and populate nvdimm_event_map.
|
||||
* Allocate an extra element for NULL entry
|
||||
*/
|
||||
p->nvdimm_events_map = kcalloc(available_events + 1,
|
||||
sizeof(stat->stat_id),
|
||||
GFP_KERNEL);
|
||||
if (!p->nvdimm_events_map) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Copy all stat_ids to event map */
|
||||
for (index = 0, stat = stats->scm_statistic;
|
||||
index < available_events; index++, ++stat) {
|
||||
memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)],
|
||||
&stat->stat_id, sizeof(stat->stat_id));
|
||||
}
|
||||
out:
|
||||
kfree(stats);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void papr_scm_pmu_register(struct papr_scm_priv *p)
|
||||
{
|
||||
struct nvdimm_pmu *nd_pmu;
|
||||
@@ -519,9 +489,10 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p)
|
||||
goto pmu_err_print;
|
||||
}
|
||||
|
||||
rc = papr_scm_pmu_check_events(p, nd_pmu);
|
||||
if (rc)
|
||||
if (!p->stat_buffer_len) {
|
||||
rc = -ENOENT;
|
||||
goto pmu_check_events_err;
|
||||
}
|
||||
|
||||
nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
|
||||
nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
|
||||
@@ -539,7 +510,7 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p)
|
||||
|
||||
rc = register_nvdimm_pmu(nd_pmu, p->pdev);
|
||||
if (rc)
|
||||
goto pmu_register_err;
|
||||
goto pmu_check_events_err;
|
||||
|
||||
/*
|
||||
* Set archdata.priv value to nvdimm_pmu structure, to handle the
|
||||
@@ -548,8 +519,6 @@ static void papr_scm_pmu_register(struct papr_scm_priv *p)
|
||||
p->pdev->archdata.priv = nd_pmu;
|
||||
return;
|
||||
|
||||
pmu_register_err:
|
||||
kfree(p->nvdimm_events_map);
|
||||
pmu_check_events_err:
|
||||
kfree(nd_pmu);
|
||||
pmu_err_print:
|
||||
@@ -1560,7 +1529,6 @@ static int papr_scm_remove(struct platform_device *pdev)
|
||||
unregister_nvdimm_pmu(pdev->archdata.priv);
|
||||
|
||||
pdev->archdata.priv = NULL;
|
||||
kfree(p->nvdimm_events_map);
|
||||
kfree(p->bus_desc.provider_name);
|
||||
kfree(p);
|
||||
|
||||
|
||||
@@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
|
||||
u32 type, u64 flags);
|
||||
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
|
||||
|
||||
#ifdef CONFIG_RISCV_SBI_V01
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
|
||||
#endif
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
|
||||
|
||||
#endif /* __RISCV_KVM_VCPU_SBI_H__ */
|
||||
|
||||
@@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err)
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RISCV_SBI_V01
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
|
||||
#else
|
||||
#ifndef CONFIG_RISCV_SBI_V01
|
||||
static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
|
||||
.extid_start = -1UL,
|
||||
.extid_end = -1UL,
|
||||
.handler = NULL,
|
||||
};
|
||||
#endif
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
|
||||
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
|
||||
|
||||
static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
|
||||
&vcpu_sbi_ext_v01,
|
||||
|
||||
@@ -299,7 +299,6 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr;
|
||||
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
|
||||
|
||||
kvm_riscv_vcpu_update_timedelta(vcpu);
|
||||
@@ -307,7 +306,6 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
|
||||
if (!t->sstc_enabled)
|
||||
return;
|
||||
|
||||
csr = &vcpu->arch.guest_csr;
|
||||
#if defined(CONFIG_32BIT)
|
||||
csr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
|
||||
csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
|
||||
@@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr;
|
||||
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
|
||||
|
||||
if (!t->sstc_enabled)
|
||||
return;
|
||||
|
||||
csr = &vcpu->arch.guest_csr;
|
||||
t = &vcpu->arch.timer;
|
||||
#if defined(CONFIG_32BIT)
|
||||
t->next_cycles = csr_read(CSR_VSTIMECMP);
|
||||
|
||||
@@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
#define __KVM_HAVE_ARCH_VM_FREE
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
|
||||
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm);
|
||||
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev);
|
||||
#else
|
||||
static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {}
|
||||
#endif
|
||||
struct zpci_kvm_hook {
|
||||
int (*kvm_register)(void *opaque, struct kvm *kvm);
|
||||
void (*kvm_unregister)(void *opaque);
|
||||
};
|
||||
|
||||
extern struct zpci_kvm_hook zpci_kvm_hook;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
|
||||
* available, enable them and let userspace indicate whether or not they will
|
||||
* be used (specify SHM bit to disable).
|
||||
*/
|
||||
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
|
||||
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
|
||||
{
|
||||
struct zpci_dev *zdev = opaque;
|
||||
int rc;
|
||||
|
||||
if (!zdev)
|
||||
@@ -510,10 +511,10 @@ err:
|
||||
kvm_put_kvm(kvm);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
|
||||
|
||||
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
|
||||
static void kvm_s390_pci_unregister_kvm(void *opaque)
|
||||
{
|
||||
struct zpci_dev *zdev = opaque;
|
||||
struct kvm *kvm;
|
||||
|
||||
if (!zdev)
|
||||
@@ -566,7 +567,6 @@ out:
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm);
|
||||
|
||||
void kvm_s390_pci_init_list(struct kvm *kvm)
|
||||
{
|
||||
@@ -678,6 +678,8 @@ int kvm_s390_pci_init(void)
|
||||
|
||||
spin_lock_init(&aift->gait_lock);
|
||||
mutex_init(&aift->aift_lock);
|
||||
zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
|
||||
zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -685,6 +687,8 @@ int kvm_s390_pci_init(void)
|
||||
void kvm_s390_pci_exit(void)
|
||||
{
|
||||
mutex_destroy(&aift->aift_lock);
|
||||
zpci_kvm_hook.kvm_register = NULL;
|
||||
zpci_kvm_hook.kvm_unregister = NULL;
|
||||
|
||||
kfree(aift);
|
||||
}
|
||||
|
||||
@@ -5,5 +5,5 @@
|
||||
|
||||
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
|
||||
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
|
||||
pci_bus.o
|
||||
pci_bus.o pci_kvm_hook.o
|
||||
obj-$(CONFIG_PCI_IOV) += pci_iov.o
|
||||
|
||||
11
arch/s390/pci/pci_kvm_hook.c
Normal file
11
arch/s390/pci/pci_kvm_hook.c
Normal file
@@ -0,0 +1,11 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* VFIO ZPCI devices support
|
||||
*
|
||||
* Copyright (C) IBM Corp. 2022. All rights reserved.
|
||||
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
|
||||
*/
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
struct zpci_kvm_hook zpci_kvm_hook;
|
||||
EXPORT_SYMBOL_GPL(zpci_kvm_hook);
|
||||
@@ -4052,8 +4052,9 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
|
||||
/* Disable guest PEBS if host PEBS is enabled. */
|
||||
arr[pebs_enable].guest = 0;
|
||||
} else {
|
||||
/* Disable guest PEBS for cross-mapped PEBS counters. */
|
||||
/* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */
|
||||
arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
|
||||
arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask;
|
||||
/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
|
||||
arr[global_ctrl].guest |= arr[pebs_enable].guest;
|
||||
}
|
||||
|
||||
@@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
|
||||
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
|
||||
}
|
||||
|
||||
static bool need_remote_flush(u64 old, u64 new)
|
||||
{
|
||||
if (!is_shadow_present_pte(old))
|
||||
return false;
|
||||
if (!is_shadow_present_pte(new))
|
||||
return true;
|
||||
if ((old ^ new) & SPTE_BASE_ADDR_MASK)
|
||||
return true;
|
||||
old ^= shadow_nx_mask;
|
||||
new ^= shadow_nx_mask;
|
||||
return (old & ~new & SPTE_PERM_MASK) != 0;
|
||||
}
|
||||
|
||||
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
|
||||
int *bytes)
|
||||
{
|
||||
@@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
|
||||
if (gentry && sp->role.level != PG_LEVEL_4K)
|
||||
++vcpu->kvm->stat.mmu_pde_zapped;
|
||||
if (need_remote_flush(entry, *spte))
|
||||
if (is_shadow_present_pte(entry))
|
||||
flush = true;
|
||||
++spte;
|
||||
}
|
||||
@@ -6085,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot,
|
||||
int start_level)
|
||||
{
|
||||
bool flush = false;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm)) {
|
||||
write_lock(&kvm->mmu_lock);
|
||||
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
|
||||
start_level, KVM_MAX_HUGEPAGE_LEVEL,
|
||||
false);
|
||||
slot_handle_level(kvm, memslot, slot_rmap_write_protect,
|
||||
start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
read_lock(&kvm->mmu_lock);
|
||||
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
|
||||
kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush TLBs if any SPTEs had to be write-protected to ensure that
|
||||
* guest writes are reflected in the dirty bitmap before the memslot
|
||||
* update completes, i.e. before enabling dirty logging is visible to
|
||||
* userspace.
|
||||
*
|
||||
* Perform the TLB flush outside the mmu_lock to reduce the amount of
|
||||
* time the lock is held. However, this does mean that another CPU can
|
||||
* now grab mmu_lock and encounter a write-protected SPTE while CPUs
|
||||
* still have a writable mapping for the associated GFN in their TLB.
|
||||
*
|
||||
* This is safe but requires KVM to be careful when making decisions
|
||||
* based on the write-protection status of an SPTE. Specifically, KVM
|
||||
* also write-protects SPTEs to monitor changes to guest page tables
|
||||
* during shadow paging, and must guarantee no CPUs can write to those
|
||||
* page before the lock is dropped. As mentioned in the previous
|
||||
* paragraph, a write-protected SPTE is no guarantee that CPU cannot
|
||||
* perform writes. So to determine if a TLB flush is truly required, KVM
|
||||
* will clear a separate software-only bit (MMU-writable) and skip the
|
||||
* flush if-and-only-if this bit was already clear.
|
||||
*
|
||||
* See is_writable_pte() for more details.
|
||||
*/
|
||||
if (flush)
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
}
|
||||
|
||||
static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
|
||||
@@ -6493,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
bool flush = false;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm)) {
|
||||
write_lock(&kvm->mmu_lock);
|
||||
/*
|
||||
* Clear dirty bits only on 4k SPTEs since the legacy MMU only
|
||||
* support dirty logging at a 4k granularity.
|
||||
*/
|
||||
flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
|
||||
slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
read_lock(&kvm->mmu_lock);
|
||||
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
|
||||
kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller will flush the TLBs after this function returns.
|
||||
*
|
||||
* It's also safe to flush TLBs out of mmu lock here as currently this
|
||||
* function is only used for dirty logging, in which case flushing TLB
|
||||
* out of mmu lock also guarantees no dirty pages will be lost in
|
||||
* dirty_bitmap.
|
||||
*/
|
||||
if (flush)
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
}
|
||||
|
||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
|
||||
@@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
|
||||
}
|
||||
|
||||
/*
|
||||
* An shadow-present leaf SPTE may be non-writable for 3 possible reasons:
|
||||
* A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
|
||||
*
|
||||
* 1. To intercept writes for dirty logging. KVM write-protects huge pages
|
||||
* so that they can be split be split down into the dirty logging
|
||||
@@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
|
||||
* read-only memslot or guest memory backed by a read-only VMA. Writes to
|
||||
* such pages are disallowed entirely.
|
||||
*
|
||||
* To keep track of why a given SPTE is write-protected, KVM uses 2
|
||||
* software-only bits in the SPTE:
|
||||
* 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this
|
||||
* case, the SPTE is access-protected, not just write-protected!
|
||||
*
|
||||
* For cases #1 and #4, KVM can safely make such SPTEs writable without taking
|
||||
* mmu_lock as capturing the Accessed/Dirty state doesn't require taking it.
|
||||
* To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits
|
||||
* in the SPTE:
|
||||
*
|
||||
* shadow_mmu_writable_mask, aka MMU-writable -
|
||||
* Cleared on SPTEs that KVM is currently write-protecting for shadow paging
|
||||
@@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
|
||||
* shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging
|
||||
* (which does not clear the MMU-writable bit), does not flush TLBs before
|
||||
* dropping the lock, as it only needs to synchronize guest writes with the
|
||||
* dirty bitmap.
|
||||
* dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for
|
||||
* access-tracking via the clear_young() MMU notifier also does not flush TLBs.
|
||||
*
|
||||
* So, there is the problem: clearing the MMU-writable bit can encounter a
|
||||
* write-protected SPTE while CPUs still have writable mappings for that SPTE
|
||||
|
||||
@@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
|
||||
if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
|
||||
return true;
|
||||
|
||||
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
|
||||
MSR_IA32_SPEC_CTRL);
|
||||
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
|
||||
}
|
||||
|
||||
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
|
||||
|
||||
@@ -1557,12 +1557,32 @@ static const u32 msr_based_features_all[] = {
|
||||
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
|
||||
static unsigned int num_msr_based_features;
|
||||
|
||||
/*
|
||||
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
|
||||
* does not yet virtualize. These include:
|
||||
* 10 - MISC_PACKAGE_CTRLS
|
||||
* 11 - ENERGY_FILTERING_CTL
|
||||
* 12 - DOITM
|
||||
* 18 - FB_CLEAR_CTRL
|
||||
* 21 - XAPIC_DISABLE_STATUS
|
||||
* 23 - OVERCLOCKING_STATUS
|
||||
*/
|
||||
|
||||
#define KVM_SUPPORTED_ARCH_CAP \
|
||||
(ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \
|
||||
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
|
||||
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
|
||||
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
|
||||
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
|
||||
|
||||
static u64 kvm_get_arch_capabilities(void)
|
||||
{
|
||||
u64 data = 0;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
|
||||
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
|
||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
|
||||
data &= KVM_SUPPORTED_ARCH_CAP;
|
||||
}
|
||||
|
||||
/*
|
||||
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
|
||||
@@ -1610,9 +1630,6 @@ static u64 kvm_get_arch_capabilities(void)
|
||||
*/
|
||||
}
|
||||
|
||||
/* Guests don't need to know "Fill buffer clear control" exists */
|
||||
data &= ~ARCH_CAP_FB_CLEAR_CTRL;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -10652,7 +10669,8 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
|
||||
case KVM_MP_STATE_INIT_RECEIVED:
|
||||
break;
|
||||
default:
|
||||
return -EINTR;
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@@ -11093,9 +11111,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
if (!lapic_in_kernel(vcpu) &&
|
||||
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
|
||||
switch (mp_state->mp_state) {
|
||||
case KVM_MP_STATE_UNINITIALIZED:
|
||||
case KVM_MP_STATE_HALTED:
|
||||
case KVM_MP_STATE_AP_RESET_HOLD:
|
||||
case KVM_MP_STATE_INIT_RECEIVED:
|
||||
case KVM_MP_STATE_SIPI_RECEIVED:
|
||||
if (!lapic_in_kernel(vcpu))
|
||||
goto out;
|
||||
break;
|
||||
|
||||
case KVM_MP_STATE_RUNNABLE:
|
||||
break;
|
||||
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM_MP_STATE_INIT_RECEIVED means the processor is in
|
||||
@@ -11563,7 +11594,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks)
|
||||
goto fail_free_pio_data;
|
||||
goto fail_free_mce_banks;
|
||||
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
|
||||
|
||||
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
|
||||
@@ -11617,7 +11648,6 @@ free_wbinvd_dirty_mask:
|
||||
fail_free_mce_banks:
|
||||
kfree(vcpu->arch.mce_banks);
|
||||
kfree(vcpu->arch.mci_ctl2_banks);
|
||||
fail_free_pio_data:
|
||||
free_page((unsigned long)vcpu->arch.pio_data);
|
||||
fail_free_lapic:
|
||||
kvm_free_lapic(vcpu);
|
||||
@@ -12473,6 +12503,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
||||
} else {
|
||||
kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unconditionally flush the TLBs after enabling dirty logging.
|
||||
* A flush is almost always going to be necessary (see below),
|
||||
* and unconditionally flushing allows the helpers to omit
|
||||
* the subtly complex checks when removing write access.
|
||||
*
|
||||
* Do the flush outside of mmu_lock to reduce the amount of
|
||||
* time mmu_lock is held. Flushing after dropping mmu_lock is
|
||||
* safe as KVM only needs to guarantee the slot is fully
|
||||
* write-protected before returning to userspace, i.e. before
|
||||
* userspace can consume the dirty status.
|
||||
*
|
||||
* Flushing outside of mmu_lock requires KVM to be careful when
|
||||
* making decisions based on writable status of an SPTE, e.g. a
|
||||
* !writable SPTE doesn't guarantee a CPU can't perform writes.
|
||||
*
|
||||
* Specifically, KVM also write-protects guest page tables to
|
||||
* monitor changes when using shadow paging, and must guarantee
|
||||
* no CPUs can write to those page before mmu_lock is dropped.
|
||||
* Because CPUs may have stale TLB entries at this point, a
|
||||
* !writable SPTE doesn't guarantee CPUs can't perform writes.
|
||||
*
|
||||
* KVM also allows making SPTES writable outside of mmu_lock,
|
||||
* e.g. to allow dirty logging without taking mmu_lock.
|
||||
*
|
||||
* To handle these scenarios, KVM uses a separate software-only
|
||||
* bit (MMU-writable) to track if a SPTE is !writable due to
|
||||
* a guest page table being write-protected (KVM clears the
|
||||
* MMU-writable flag when write-protecting for shadow paging).
|
||||
*
|
||||
* The use of MMU-writable is also the primary motivation for
|
||||
* the unconditional flush. Because KVM must guarantee that a
|
||||
* CPU doesn't contain stale, writable TLB entries for a
|
||||
* !MMU-writable SPTE, KVM must flush if it encounters any
|
||||
* MMU-writable SPTE regardless of whether the actual hardware
|
||||
* writable bit was set. I.e. KVM is almost guaranteed to need
|
||||
* to flush, while unconditionally flushing allows the "remove
|
||||
* write access" helpers to ignore MMU-writable entirely.
|
||||
*
|
||||
* See is_writable_pte() for more details (the case involving
|
||||
* access-tracked SPTEs is particularly relevant).
|
||||
*/
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -151,7 +151,10 @@ int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
|
||||
if (!vdev->vdev.kvm)
|
||||
return 0;
|
||||
|
||||
return kvm_s390_pci_register_kvm(zdev, vdev->vdev.kvm);
|
||||
if (zpci_kvm_hook.kvm_register)
|
||||
return zpci_kvm_hook.kvm_register(zdev, vdev->vdev.kvm);
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
|
||||
@@ -161,5 +164,6 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
|
||||
if (!zdev || !vdev->vdev.kvm)
|
||||
return;
|
||||
|
||||
kvm_s390_pci_unregister_kvm(zdev);
|
||||
if (zpci_kvm_hook.kvm_unregister)
|
||||
zpci_kvm_hook.kvm_unregister(zdev);
|
||||
}
|
||||
|
||||
@@ -47,7 +47,19 @@ else
|
||||
|
||||
ifdef CONFIG_CC_IS_CLANG
|
||||
KBUILD_CFLAGS += -Wno-initializer-overrides
|
||||
# Clang before clang-16 would warn on default argument promotions.
|
||||
ifeq ($(shell [ $(CONFIG_CLANG_VERSION) -lt 160000 ] && echo y),y)
|
||||
# Disable -Wformat
|
||||
KBUILD_CFLAGS += -Wno-format
|
||||
# Then re-enable flags that were part of the -Wformat group that aren't
|
||||
# problematic.
|
||||
KBUILD_CFLAGS += -Wformat-extra-args -Wformat-invalid-specifier
|
||||
KBUILD_CFLAGS += -Wformat-zero-length -Wnonnull
|
||||
# Requires clang-12+.
|
||||
ifeq ($(shell [ $(CONFIG_CLANG_VERSION) -ge 120000 ] && echo y),y)
|
||||
KBUILD_CFLAGS += -Wformat-insufficient-args
|
||||
endif
|
||||
endif
|
||||
KBUILD_CFLAGS += -Wno-sign-compare
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
|
||||
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
|
||||
|
||||
@@ -754,7 +754,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
|
||||
void (*handler)(struct ex_regs *));
|
||||
|
||||
/* If a toddler were to say "abracadabra". */
|
||||
#define KVM_EXCEPTION_MAGIC 0xabacadabaull
|
||||
#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
|
||||
|
||||
/*
|
||||
* KVM selftest exception fixup uses registers to coordinate with the exception
|
||||
@@ -786,7 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
|
||||
"lea 1f(%%rip), %%r10\n\t" \
|
||||
"lea 2f(%%rip), %%r11\n\t" \
|
||||
"1: " insn "\n\t" \
|
||||
"mov $0, %[vector]\n\t" \
|
||||
"movb $0, %[vector]\n\t" \
|
||||
"jmp 3f\n\t" \
|
||||
"2:\n\t" \
|
||||
"mov %%r9b, %[vector]\n\t" \
|
||||
|
||||
Reference in New Issue
Block a user