Merge 882ad2a2a8 ("Merge tag 'random-6.1-rc3-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random") into android-mainline

Steps on the way to 6.1-rc3

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I26543fbc9353bf7d07ecda8c06b98f6a6721f755
This commit is contained in:
Greg Kroah-Hartman
2022-10-30 09:45:15 +01:00
64 changed files with 553 additions and 187 deletions

View File

@@ -10,7 +10,7 @@ Description: A collection of all the memory tiers allocated.
What: /sys/devices/virtual/memory_tiering/memory_tierN/
/sys/devices/virtual/memory_tiering/memory_tierN/nodes
/sys/devices/virtual/memory_tiering/memory_tierN/nodelist
Date: August 2022
Contact: Linux memory management mailing list <linux-mm@kvack.org>
Description: Directory with details of a specific memory tier
@@ -21,5 +21,5 @@ Description: Directory with details of a specific memory tier
A smaller value of N implies a higher (faster) memory tier in the
hierarchy.
nodes: NUMA nodes that are part of this memory tier.
nodelist: NUMA nodes that are part of this memory tier.

View File

@@ -14530,7 +14530,7 @@ L: linux-nilfs@vger.kernel.org
S: Supported
W: https://nilfs.sourceforge.io/
W: https://nilfs.osdn.jp/
T: git git://github.com/konis/nilfs2.git
T: git https://github.com/konis/nilfs2.git
F: Documentation/filesystems/nilfs2.rst
F: fs/nilfs2/
F: include/trace/events/nilfs2.h

View File

@@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
if (radix_enabled())
return;
/*
* apply_to_page_range can call us this preempt enabled when
* operating on kernel page tables.
*/
preempt_disable();
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
@@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
if (batch->index)
__flush_tlb_pending(batch);
batch->active = 0;
preempt_enable();
}
#define arch_flush_lazy_mmu_mode() do {} while (0)

View File

@@ -813,6 +813,13 @@ kernel_dbg_exc:
EXCEPTION_COMMON(0x260)
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
/*
* XXX: Returning from performance_monitor_exception taken as a
* soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
* and could cause bugs in return or elsewhere. That case should just
* restore registers and return. There is a workaround for one known
* problem in interrupt_exit_kernel_prepare().
*/
bl performance_monitor_exception
b interrupt_return

View File

@@ -2357,9 +2357,21 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
lbz r4,PACAIRQSOFTMASK(r13)
cmpdi r4,IRQS_ENABLED
bne 1f
bl performance_monitor_exception_async
b interrupt_return_srr
1:
bl performance_monitor_exception_nmi
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
mtmsrd r9,1
kuap_kernel_restore r9, r10
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.

View File

@@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
/*
* CT_WARN_ON comes here via program_check_exception,
* so avoid recursion.
* CT_WARN_ON comes here via program_check_exception, so avoid
* recursion.
*
* Skip the assertion on PMIs on 64e to work around a problem caused
* by NMI PMIs incorrectly taking this interrupt return path, it's
* possible for this to hit after interrupt exit to user switches
* context to user. See also the comment in the performance monitor
* handler in exceptions-64e.S
*/
if (TRAP(regs) != INTERRUPT_PROGRAM)
if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
TRAP(regs) != INTERRUPT_PROGRAM &&
TRAP(regs) != INTERRUPT_PERFMON)
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap = kuap_get_and_assert_locked();

View File

@@ -532,15 +532,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
* Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context.
*
* The MSR[EE] check catches among other things the short incoherency
* in hard_irq_disable() between clearing MSR[EE] and setting
* PACA_IRQ_HARD_DIS.
*/
ld r12,_MSR(r1)
andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending
bne 1f // HARD_MASK is pending
// No HARD_MASK pending, clear possible HARD_DIS set by interrupt
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
stb r11,PACAIRQHAPPENED(r13)
b .Lfast_kernel_interrupt_return_\srr\()
/* Must clear MSR_EE from _MSR */
1: /* Must clear MSR_EE from _MSR */
#ifdef CONFIG_PPC_BOOK3S
li r10,0
/* Clear valid before changing _MSR */

View File

@@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
@@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV
config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE
help
Support running guest kernels in virtual machines on processors
@@ -190,6 +192,7 @@ config KVM_EXIT_TIMING
config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors"
depends on PPC_E500 && !PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select MMU_NOTIFIER
@@ -205,6 +208,7 @@ config KVM_E500V2
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select KVM_BOOKE_HV

View File

@@ -36,7 +36,17 @@ int exit_vmx_usercopy(void)
{
disable_kernel_altivec();
pagefault_enable();
preempt_enable();
preempt_enable_no_resched();
/*
* Must never explicitly call schedule (including preempt_enable())
* while in a kuap-unlocked user copy, because the AMR register will
* not be saved and restored across context switch. However preempt
* kernels need to be preempted as soon as possible if need_resched is
* set and we are preemptible. The hack here is to schedule a
* decrementer to fire here and reschedule for us if necessary.
*/
if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
set_dec(1);
return 0;
}

View File

@@ -43,6 +43,29 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
#ifdef CONFIG_LOCKDEP
static struct lockdep_map hpte_lock_map =
STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
static void acquire_hpte_lock(void)
{
lock_map_acquire(&hpte_lock_map);
}
static void release_hpte_lock(void)
{
lock_map_release(&hpte_lock_map);
}
#else
static void acquire_hpte_lock(void)
{
}
static void release_hpte_lock(void)
{
}
#endif
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
@@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
@@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
@@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
unsigned long flags;
int i;
local_irq_save(flags);
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
@@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
if (i == HPTES_PER_GROUP)
if (i == HPTES_PER_GROUP) {
local_irq_restore(flags);
return -1;
}
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
@@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
local_irq_restore(flags);
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
@@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
return i;
@@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0, local = 0;
unsigned long irqflags;
local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
@@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local);
local_irq_restore(irqflags);
return ret;
}
@@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
}
/*
@@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid;
long slot;
struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
return 0;
}
@@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
else
} else
native_unlock_hpte(hptep);
}
/*
@@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/*
* Invalidate the hpte. NOTE: this also unlocks it
*/
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
} else
native_unlock_hpte(hptep);
@@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
else
else {
release_hpte_lock();
hptep->v = 0;
}
} pte_iterate_hashed_end();
}

View File

@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms {
unsigned long start, end, newpp;
unsigned int step, nr_cpus, master_cpu;
unsigned int step, nr_cpus;
atomic_t master_cpu;
atomic_t cpu_counter;
};
@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{
struct change_memory_parms *parms = data;
if (parms->master_cpu != smp_processor_id())
// First CPU goes through, all others wait.
if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in
@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end;
chmem_parms.step = step;
chmem_parms.newpp = newpp;
chmem_parms.master_cpu = smp_processor_id();
atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock();

View File

@@ -1981,7 +1981,7 @@ repeat:
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static DEFINE_SPINLOCK(linear_map_hash_lock);
static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
@@ -2005,10 +2005,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
spin_lock(&linear_map_hash_lock);
raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
@@ -2018,14 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
spin_lock(&linear_map_hash_lock);
raw_spin_lock(&linear_map_hash_lock);
if (!(linear_map_hash_slots[lmi] & 0x80)) {
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
return;
}
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;

View File

@@ -35,6 +35,7 @@
#include <asm/drmem.h>
#include "pseries.h"
#include "vas.h" /* pseries_vas_dlpar_cpu() */
/*
* This isn't a module but we expose that to userspace
@@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
/*
* The hypervisor assigns VAS resources based
* on entitled capacity for shared mode.
* Reconfig VAS windows based on DLPAR CPU events.
*/
if (pseries_vas_dlpar_cpu() != 0)
retval = H_HARDWARE;
}
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);

View File

@@ -200,16 +200,41 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref;
int rc;
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
if (!rc) {
tsk_ref = &txwin->vas_win.task_ref;
vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
while (atomic_read(&txwin->pending_faults)) {
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
if (!rc) {
tsk_ref = &txwin->vas_win.task_ref;
vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
}
atomic_dec(&txwin->pending_faults);
}
return IRQ_HANDLED;
}
/*
* irq_default_primary_handler() can be used only with IRQF_ONESHOT
* which disables IRQ before executing the thread handler and enables
* it after. But this disabling interrupt sets the VAS IRQ OFF
* state in the hypervisor. If the NX generates fault interrupt
* during this window, the hypervisor will not deliver this
* interrupt to the LPAR. So use VAS specific IRQ handler instead
* of calling the default primary handler.
*/
static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
{
struct pseries_vas_window *txwin = data;
/*
* The thread hanlder will process this interrupt if it is
* already running.
*/
atomic_inc(&txwin->pending_faults);
return IRQ_WAKE_THREAD;
}
/*
* Allocate window and setup IRQ mapping.
*/
@@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq;
}
rc = request_threaded_irq(txwin->fault_virq, NULL,
pseries_vas_fault_thread_fn, IRQF_ONESHOT,
rc = request_threaded_irq(txwin->fault_virq,
pseries_vas_irq_handler,
pseries_vas_fault_thread_fn, 0,
txwin->name, txwin);
if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
@@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
mutex_unlock(&vas_pseries_mutex);
return rc;
}
int pseries_vas_dlpar_cpu(void)
{
int new_nr_creds, rc;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
}
/*
* Total number of default credits available (target_credits)
* in LPAR depends on number of cores configured. It varies based on
@@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn;
const __be32 *intserv = NULL;
int new_nr_creds, len, rc = 0;
int len;
/*
* For shared CPU partition, the hypervisor assigns total credits
* based on entitled core capacity. So updating VAS windows will
* be called from lparcfg_write().
*/
if (is_shared_processor())
return NOTIFY_OK;
if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE))
@@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv)
return NOTIFY_OK;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
return pseries_vas_dlpar_cpu();
}
static struct notifier_block pseries_vas_nb = {

View File

@@ -132,6 +132,7 @@ struct pseries_vas_window {
u64 flags;
char *name;
int fault_virq;
atomic_t pending_faults; /* Number of pending faults */
};
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
@@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS
int vas_migration_handler(int action);
int pseries_vas_dlpar_cpu(void);
#else
static inline int vas_migration_handler(int action)
{
return 0;
}
static inline int pseries_vas_dlpar_cpu(void)
{
return 0;
}
#endif
#endif /* _VAS_H */

View File

@@ -10,10 +10,13 @@
/* Even with __builtin_ the compiler may decide to use the out of line
function. */
#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#include <linux/kmsan_string.h>
#endif
#define __HAVE_ARCH_MEMCPY 1
#if defined(__SANITIZE_MEMORY__)
#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#undef memcpy
void *__msan_memcpy(void *dst, const void *src, size_t size);
#define memcpy __msan_memcpy
#else
extern void *memcpy(void *to, const void *from, size_t len);
@@ -21,7 +24,7 @@ extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
#if defined(__SANITIZE_MEMORY__)
#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
extern void *__msan_memset(void *s, int c, size_t n);
#undef memset
#define memset __msan_memset
@@ -67,7 +70,7 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
}
#define __HAVE_ARCH_MEMMOVE
#if defined(__SANITIZE_MEMORY__)
#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#undef memmove
void *__msan_memmove(void *dest, const void *src, size_t len);
#define memmove __msan_memmove

View File

@@ -254,24 +254,25 @@ extern void __put_user_nocheck_8(void);
#define __put_user_size(x, ptr, size, label) \
do { \
__typeof__(*(ptr)) __x = (x); /* eval x once */ \
__chk_user_ptr(ptr); \
__typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
__chk_user_ptr(__ptr); \
switch (size) { \
case 1: \
__put_user_goto(__x, ptr, "b", "iq", label); \
__put_user_goto(__x, __ptr, "b", "iq", label); \
break; \
case 2: \
__put_user_goto(__x, ptr, "w", "ir", label); \
__put_user_goto(__x, __ptr, "w", "ir", label); \
break; \
case 4: \
__put_user_goto(__x, ptr, "l", "ir", label); \
__put_user_goto(__x, __ptr, "l", "ir", label); \
break; \
case 8: \
__put_user_goto_u64(__x, ptr, label); \
__put_user_goto_u64(__x, __ptr, label); \
break; \
default: \
__put_user_bad(); \
} \
instrument_put_user(__x, ptr, size); \
instrument_put_user(__x, __ptr, size); \
} while (0)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT

View File

@@ -26,6 +26,7 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCSAN_SANITIZE := n
KMSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# These are adjustments to the compiler flags used for objects that

View File

@@ -611,6 +611,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
.nr_tags = 1,
};
u64 alloc_time_ns = 0;
struct request *rq;
unsigned int cpu;
unsigned int tag;
int ret;
@@ -660,8 +661,12 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
tag = blk_mq_get_tag(&data);
if (tag == BLK_MQ_NO_TAG)
goto out_queue_exit;
return blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag,
rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag,
alloc_time_ns);
rq->__data_len = 0;
rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL;
return rq;
out_queue_exit:
blk_queue_exit(q);

View File

@@ -410,9 +410,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
* Otherwise just allocate the device numbers for both the whole device
* and all partitions from the extended dev_t space.
*/
ret = -EINVAL;
if (disk->major) {
if (WARN_ON(!disk->minors))
return -EINVAL;
goto out_exit_elevator;
if (disk->minors > DISK_MAX_PARTS) {
pr_err("block: can't allocate more than %d partitions\n",
@@ -420,14 +421,14 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
disk->minors = DISK_MAX_PARTS;
}
if (disk->first_minor + disk->minors > MINORMASK + 1)
return -EINVAL;
goto out_exit_elevator;
} else {
if (WARN_ON(disk->minors))
return -EINVAL;
goto out_exit_elevator;
ret = blk_alloc_ext_minor();
if (ret < 0)
return ret;
goto out_exit_elevator;
disk->major = BLOCK_EXT_MAJOR;
disk->first_minor = ret;
}
@@ -540,6 +541,9 @@ out_device_del:
out_free_ext_minor:
if (disk->major == BLOCK_EXT_MAJOR)
blk_free_ext_minor(disk->first_minor);
out_exit_elevator:
if (disk->queue->elevator)
elevator_exit(disk->queue);
return ret;
}
EXPORT_SYMBOL(device_add_disk);

View File

@@ -7222,8 +7222,10 @@ static int __init rbd_sysfs_init(void)
int ret;
ret = device_register(&rbd_root_dev);
if (ret < 0)
if (ret < 0) {
put_device(&rbd_root_dev);
return ret;
}
ret = bus_register(&rbd_bus_type);
if (ret < 0)

View File

@@ -791,13 +791,13 @@ void __init random_init_early(const char *command_line)
#endif
for (i = 0, arch_bits = sizeof(entropy) * 8; i < ARRAY_SIZE(entropy);) {
longs = arch_get_random_seed_longs(entropy, ARRAY_SIZE(entropy) - i);
longs = arch_get_random_seed_longs_early(entropy, ARRAY_SIZE(entropy) - i);
if (longs) {
_mix_pool_bytes(entropy, sizeof(*entropy) * longs);
i += longs;
continue;
}
longs = arch_get_random_longs(entropy, ARRAY_SIZE(entropy) - i);
longs = arch_get_random_longs_early(entropy, ARRAY_SIZE(entropy) - i);
if (longs) {
_mix_pool_bytes(entropy, sizeof(*entropy) * longs);
i += longs;

View File

@@ -516,6 +516,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
/* set to a default value of 512 until the disk is validated */
blk_queue_logical_block_size(head->disk->queue, 512);
blk_set_stacking_limits(&head->disk->queue->limits);
blk_queue_dma_alignment(head->disk->queue, 3);
/* we need to propagate up the VMC settings */
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)

View File

@@ -387,7 +387,7 @@ static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
{
struct scatterlist sg;
sg_init_marker(&sg, 1);
sg_init_table(&sg, 1);
sg_set_page(&sg, page, len, off);
ahash_request_set_crypt(hash, &sg, NULL, len);
crypto_ahash_update(hash);
@@ -1141,6 +1141,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_request *req;
unsigned int noreclaim_flag;
int ret = 1;
if (!queue->request) {
@@ -1150,12 +1151,13 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
}
req = queue->request;
noreclaim_flag = memalloc_noreclaim_save();
if (req->state == NVME_TCP_SEND_CMD_PDU) {
ret = nvme_tcp_try_send_cmd_pdu(req);
if (ret <= 0)
goto done;
if (!nvme_tcp_has_inline_data(req))
return ret;
goto out;
}
if (req->state == NVME_TCP_SEND_H2C_PDU) {
@@ -1181,6 +1183,8 @@ done:
nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
}
out:
memalloc_noreclaim_restore(noreclaim_flag);
return ret;
}
@@ -1296,6 +1300,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
struct page *page;
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
unsigned int noreclaim_flag;
if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
return;
@@ -1308,7 +1313,11 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
queue->pf_cache.va = NULL;
}
noreclaim_flag = memalloc_noreclaim_save();
sock_release(queue->sock);
memalloc_noreclaim_restore(noreclaim_flag);
kfree(queue->pdu);
mutex_destroy(&queue->send_mutex);
mutex_destroy(&queue->queue_lock);

View File

@@ -2582,7 +2582,7 @@ static int lpfcdiag_loop_self_unreg(struct lpfc_hba *phba, uint16_t rpi)
*
* This function obtains the transmit and receive ids required to send
* an unsolicited ct command with a payload. A special lpfc FsType and CmdRsp
* flags are used to the unsolicted response handler is able to process
* flags are used to the unsolicited response handler is able to process
* the ct command sent on the same port.
**/
static int lpfcdiag_loop_get_xri(struct lpfc_hba *phba, uint16_t rpi,
@@ -2874,7 +2874,7 @@ out:
* @len: Number of data bytes
*
* This function allocates and posts a data buffer of sufficient size to receive
* an unsolicted CT command.
* an unsolicited CT command.
**/
static int lpfcdiag_sli3_loop_post_rxbufs(struct lpfc_hba *phba, uint16_t rxxri,
size_t len)

View File

@@ -90,7 +90,7 @@ lpfc_ct_ignore_hbq_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
get_job_ulpstatus(phba, piocbq));
}
lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
"0145 Ignoring unsolicted CT HBQ Size:%d "
"0145 Ignoring unsolicited CT HBQ Size:%d "
"status = x%x\n",
size, get_job_ulpstatus(phba, piocbq));
}

View File

@@ -5874,10 +5874,6 @@ fallback:
static
int megasas_get_device_list(struct megasas_instance *instance)
{
memset(instance->pd_list, 0,
(MEGASAS_MAX_PD * sizeof(struct megasas_pd_list)));
memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
if (instance->enable_fw_dev_list) {
if (megasas_host_device_list_query(instance, true))
return FAILED;
@@ -7220,7 +7216,7 @@ int megasas_alloc_ctrl_dma_buffers(struct megasas_instance *instance)
if (!fusion->ioc_init_request) {
dev_err(&pdev->dev,
"Failed to allocate PD list buffer\n");
"Failed to allocate ioc init request\n");
return -ENOMEM;
}
@@ -7439,7 +7435,6 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance)
(instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY))
instance->flag_ieee = 1;
megasas_dbg_lvl = 0;
instance->flag = 0;
instance->unload = 1;
instance->last_time = 0;
@@ -8762,33 +8757,26 @@ static
int megasas_update_device_list(struct megasas_instance *instance,
int event_type)
{
int dcmd_ret = DCMD_SUCCESS;
int dcmd_ret;
if (instance->enable_fw_dev_list) {
dcmd_ret = megasas_host_device_list_query(instance, false);
if (dcmd_ret != DCMD_SUCCESS)
goto out;
return megasas_host_device_list_query(instance, false);
} else {
if (event_type & SCAN_PD_CHANNEL) {
dcmd_ret = megasas_get_pd_list(instance);
if (dcmd_ret != DCMD_SUCCESS)
goto out;
return dcmd_ret;
}
if (event_type & SCAN_VD_CHANNEL) {
if (!instance->requestorId ||
megasas_get_ld_vf_affiliation(instance, 0)) {
dcmd_ret = megasas_ld_list_query(instance,
return megasas_ld_list_query(instance,
MR_LD_QUERY_TYPE_EXPOSED_TO_HOST);
if (dcmd_ret != DCMD_SUCCESS)
goto out;
}
}
}
out:
return dcmd_ret;
return DCMD_SUCCESS;
}
/**
@@ -8918,7 +8906,7 @@ megasas_aen_polling(struct work_struct *work)
sdev1 = scsi_device_lookup(instance->host,
MEGASAS_MAX_PD_CHANNELS +
(ld_target_id / MEGASAS_MAX_DEV_PER_CHANNEL),
(ld_target_id - MEGASAS_MAX_DEV_PER_CHANNEL),
(ld_target_id % MEGASAS_MAX_DEV_PER_CHANNEL),
0);
if (sdev1)
megasas_remove_scsi_device(sdev1);
@@ -9016,6 +9004,7 @@ static int __init megasas_init(void)
*/
pr_info("megasas: %s\n", MEGASAS_VERSION);
megasas_dbg_lvl = 0;
support_poll_for_event = 2;
support_device_change = 1;
support_nvme_encapsulation = true;

View File

@@ -4,5 +4,6 @@ config SCSI_MPI3MR
tristate "Broadcom MPI3 Storage Controller Device Driver"
depends on PCI && SCSI
select BLK_DEV_BSGLIB
select SCSI_SAS_ATTRS
help
MPI3 based Storage & RAID Controllers Driver.

View File

@@ -99,6 +99,7 @@ static void pm8001_map_queues(struct Scsi_Host *shost)
static struct scsi_host_template pm8001_sht = {
.module = THIS_MODULE,
.name = DRV_NAME,
.proc_name = DRV_NAME,
.queuecommand = sas_queuecommand,
.dma_need_drain = ata_scsi_dma_need_drain,
.target_alloc = sas_target_alloc,

View File

@@ -951,9 +951,9 @@ qla2x00_sysfs_read_dcbx_tlv(struct file *filp, struct kobject *kobj,
if (!capable(CAP_SYS_ADMIN) || off != 0 || count > DCBX_TLV_DATA_SIZE)
return 0;
mutex_lock(&vha->hw->optrom_mutex);
if (ha->dcbx_tlv)
goto do_read;
mutex_lock(&vha->hw->optrom_mutex);
if (qla2x00_chip_is_down(vha)) {
mutex_unlock(&vha->hw->optrom_mutex);
return 0;
@@ -3330,11 +3330,34 @@ struct fc_function_template qla2xxx_transport_vport_functions = {
.bsg_timeout = qla24xx_bsg_timeout,
};
static uint
qla2x00_get_host_supported_speeds(scsi_qla_host_t *vha, uint speeds)
{
uint supported_speeds = FC_PORTSPEED_UNKNOWN;
if (speeds & FDMI_PORT_SPEED_64GB)
supported_speeds |= FC_PORTSPEED_64GBIT;
if (speeds & FDMI_PORT_SPEED_32GB)
supported_speeds |= FC_PORTSPEED_32GBIT;
if (speeds & FDMI_PORT_SPEED_16GB)
supported_speeds |= FC_PORTSPEED_16GBIT;
if (speeds & FDMI_PORT_SPEED_8GB)
supported_speeds |= FC_PORTSPEED_8GBIT;
if (speeds & FDMI_PORT_SPEED_4GB)
supported_speeds |= FC_PORTSPEED_4GBIT;
if (speeds & FDMI_PORT_SPEED_2GB)
supported_speeds |= FC_PORTSPEED_2GBIT;
if (speeds & FDMI_PORT_SPEED_1GB)
supported_speeds |= FC_PORTSPEED_1GBIT;
return supported_speeds;
}
void
qla2x00_init_host_attr(scsi_qla_host_t *vha)
{
struct qla_hw_data *ha = vha->hw;
u32 speeds = FC_PORTSPEED_UNKNOWN;
u32 speeds = 0, fdmi_speed = 0;
fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count;
fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name);
@@ -3344,7 +3367,8 @@ qla2x00_init_host_attr(scsi_qla_host_t *vha)
fc_host_max_npiv_vports(vha->host) = ha->max_npiv_vports;
fc_host_npiv_vports_inuse(vha->host) = ha->cur_vport_count;
speeds = qla25xx_fdmi_port_speed_capability(ha);
fdmi_speed = qla25xx_fdmi_port_speed_capability(ha);
speeds = qla2x00_get_host_supported_speeds(vha, fdmi_speed);
fc_host_supported_speeds(vha->host) = speeds;
}

View File

@@ -284,6 +284,25 @@ void target_pr_kref_release(struct kref *kref)
complete(&deve->pr_comp);
}
/*
* Establish UA condition on SCSI device - all LUNs
*/
void target_dev_ua_allocate(struct se_device *dev, u8 asc, u8 ascq)
{
struct se_dev_entry *se_deve;
struct se_lun *lun;
spin_lock(&dev->se_port_lock);
list_for_each_entry(lun, &dev->dev_sep_list, lun_dev_link) {
spin_lock(&lun->lun_deve_lock);
list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link)
core_scsi3_ua_allocate(se_deve, asc, ascq);
spin_unlock(&lun->lun_deve_lock);
}
spin_unlock(&dev->se_port_lock);
}
static void
target_luns_data_has_changed(struct se_node_acl *nacl, struct se_dev_entry *new,
bool skip_new)

View File

@@ -230,14 +230,12 @@ static void iblock_unplug_device(struct se_dev_plug *se_plug)
clear_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags);
}
static unsigned long long iblock_emulate_read_cap_with_block_size(
struct se_device *dev,
struct block_device *bd,
struct request_queue *q)
static sector_t iblock_get_blocks(struct se_device *dev)
{
u32 block_size = bdev_logical_block_size(bd);
struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
u32 block_size = bdev_logical_block_size(ib_dev->ibd_bd);
unsigned long long blocks_long =
div_u64(bdev_nr_bytes(bd), block_size) - 1;
div_u64(bdev_nr_bytes(ib_dev->ibd_bd), block_size) - 1;
if (block_size == dev->dev_attrib.block_size)
return blocks_long;
@@ -829,15 +827,6 @@ fail:
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
static sector_t iblock_get_blocks(struct se_device *dev)
{
struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
struct block_device *bd = ib_dev->ibd_bd;
struct request_queue *q = bdev_get_queue(bd);
return iblock_emulate_read_cap_with_block_size(dev, bd, q);
}
static sector_t iblock_get_alignment_offset_lbas(struct se_device *dev)
{
struct iblock_dev *ib_dev = IBLOCK_DEV(dev);

View File

@@ -89,6 +89,7 @@ int target_configure_device(struct se_device *dev);
void target_free_device(struct se_device *);
int target_for_each_device(int (*fn)(struct se_device *dev, void *data),
void *data);
void target_dev_ua_allocate(struct se_device *dev, u8 asc, u8 ascq);
/* target_core_configfs.c */
extern struct configfs_item_operations target_core_dev_item_ops;

View File

@@ -2956,13 +2956,28 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key,
__core_scsi3_complete_pro_preempt(dev, pr_reg_n,
(preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL,
type, scope, preempt_type);
if (preempt_type == PREEMPT_AND_ABORT)
core_scsi3_release_preempt_and_abort(
&preempt_and_abort_list, pr_reg_n);
}
spin_unlock(&dev->dev_reservation_lock);
/*
* SPC-4 5.12.11.2.6 Preempting and aborting
* The actions described in this subclause shall be performed
* for all I_T nexuses that are registered with the non-zero
* SERVICE ACTION RESERVATION KEY value, without regard for
* whether the preempted I_T nexuses hold the persistent
* reservation. If the SERVICE ACTION RESERVATION KEY field is
* set to zero and an all registrants persistent reservation is
* present, the device server shall abort all commands for all
* registered I_T nexuses.
*/
if (preempt_type == PREEMPT_AND_ABORT) {
core_tmr_lun_reset(dev, NULL, &preempt_and_abort_list,
cmd);
core_scsi3_release_preempt_and_abort(
&preempt_and_abort_list, pr_reg_n);
}
if (pr_tmpl->pr_aptpl_active)
core_scsi3_update_and_write_aptpl(cmd->se_dev, true);
@@ -3022,7 +3037,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key,
if (calling_it_nexus)
continue;
if (pr_reg->pr_res_key != sa_res_key)
if (sa_res_key && pr_reg->pr_res_key != sa_res_key)
continue;
pr_reg_nacl = pr_reg->pr_reg_nacl;
@@ -3425,8 +3440,6 @@ after_iport_check:
* transport protocols where port names are not required;
* d) Register the reservation key specified in the SERVICE ACTION
* RESERVATION KEY field;
* e) Retain the reservation key specified in the SERVICE ACTION
* RESERVATION KEY field and associated information;
*
* Also, It is not an error for a REGISTER AND MOVE service action to
* register an I_T nexus that is already registered with the same
@@ -3448,6 +3461,12 @@ after_iport_check:
dest_pr_reg = __core_scsi3_locate_pr_reg(dev, dest_node_acl,
iport_ptr);
new_reg = 1;
} else {
/*
* e) Retain the reservation key specified in the SERVICE ACTION
* RESERVATION KEY field and associated information;
*/
dest_pr_reg->pr_res_key = sa_res_key;
}
/*
* f) Release the persistent reservation for the persistent reservation

View File

@@ -3531,8 +3531,7 @@ static void target_tmr_work(struct work_struct *work)
tmr->response = (!ret) ? TMR_FUNCTION_COMPLETE :
TMR_FUNCTION_REJECTED;
if (tmr->response == TMR_FUNCTION_COMPLETE) {
target_ua_allocate_lun(cmd->se_sess->se_node_acl,
cmd->orig_fe_lun, 0x29,
target_dev_ua_allocate(dev, 0x29,
ASCQ_29H_BUS_DEVICE_RESET_FUNCTION_OCCURRED);
}
break;

View File

@@ -779,7 +779,7 @@ static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 mask)
}
/**
* ufshcd_utmrl_clear - Clear a bit in UTRMLCLR register
* ufshcd_utmrl_clear - Clear a bit in UTMRLCLR register
* @hba: per adapter instance
* @pos: position of the bit to be cleared
*/
@@ -3118,7 +3118,7 @@ int ufshcd_query_flag_retry(struct ufs_hba *hba,
if (ret)
dev_err(hba->dev,
"%s: query attribute, opcode %d, idn %d, failed with error %d after %d retries\n",
"%s: query flag, opcode %d, idn %d, failed with error %d after %d retries\n",
__func__, opcode, idn, ret, retries);
return ret;
}

View File

@@ -383,7 +383,7 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
rgn = hpb->rgn_tbl + rgn_idx;
srgn = rgn->srgn_tbl + srgn_idx;
/* If command type is WRITE or DISCARD, set bitmap as drity */
/* If command type is WRITE or DISCARD, set bitmap as dirty */
if (ufshpb_is_write_or_discard(cmd)) {
ufshpb_iterate_rgn(hpb, rgn_idx, srgn_idx, srgn_offset,
transfer_len, true);
@@ -616,7 +616,7 @@ static void ufshpb_activate_subregion(struct ufshpb_lu *hpb,
static enum rq_end_io_ret ufshpb_umap_req_compl_fn(struct request *req,
blk_status_t error)
{
struct ufshpb_req *umap_req = (struct ufshpb_req *)req->end_io_data;
struct ufshpb_req *umap_req = req->end_io_data;
ufshpb_put_req(umap_req->hpb, umap_req);
return RQ_END_IO_NONE;
@@ -625,7 +625,7 @@ static enum rq_end_io_ret ufshpb_umap_req_compl_fn(struct request *req,
static enum rq_end_io_ret ufshpb_map_req_compl_fn(struct request *req,
blk_status_t error)
{
struct ufshpb_req *map_req = (struct ufshpb_req *) req->end_io_data;
struct ufshpb_req *map_req = req->end_io_data;
struct ufshpb_lu *hpb = map_req->hpb;
struct ufshpb_subregion *srgn;
unsigned long flags;

View File

@@ -118,7 +118,6 @@ int ufs_qcom_ice_init(struct ufs_qcom_host *host)
host->ice_mmio = devm_ioremap_resource(dev, res);
if (IS_ERR(host->ice_mmio)) {
err = PTR_ERR(host->ice_mmio);
dev_err(dev, "Failed to map ICE registers; err=%d\n", err);
return err;
}

View File

@@ -1012,7 +1012,6 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
@@ -1025,6 +1024,7 @@ static int exec_mmap(struct mm_struct *mm)
activate_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
lru_gen_add_mm(mm);
task_unlock(tsk);
lru_gen_use_mm(mm);
if (old_mm) {

View File

@@ -1741,10 +1741,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
static const char deprecated_msg[] =
"Mount option \"%s\" will be removed by %s\n"
"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
#define MOPT_SET 0x0001
#define MOPT_CLEAR 0x0002
#define MOPT_NOSUPPORT 0x0004

View File

@@ -506,8 +506,9 @@ static int squashfs_readahead_fragment(struct page **page,
squashfs_i(inode)->fragment_size);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
int error = buffer->error;
if (buffer->error)
if (error)
goto out;
expected += squashfs_i(inode)->fragment_offset;
@@ -529,7 +530,7 @@ static int squashfs_readahead_fragment(struct page **page,
out:
squashfs_cache_put(buffer);
return buffer->error;
return error;
}
static void squashfs_readahead(struct readahead_control *ractl)
@@ -557,6 +558,13 @@ static void squashfs_readahead(struct readahead_control *ractl)
int res, bsize;
u64 block = 0;
unsigned int expected;
struct page *last_page;
expected = start >> msblk->block_log == file_end ?
(i_size_read(inode) & (msblk->block_size - 1)) :
msblk->block_size;
max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT;
nr_pages = __readahead_batch(ractl, pages, max_pages);
if (!nr_pages)
@@ -566,13 +574,10 @@ static void squashfs_readahead(struct readahead_control *ractl)
goto skip_pages;
index = pages[0]->index >> shift;
if ((pages[nr_pages - 1]->index >> shift) != index)
goto skip_pages;
expected = index == file_end ?
(i_size_read(inode) & (msblk->block_size - 1)) :
msblk->block_size;
if (index == file_end && squashfs_i(inode)->fragment_block !=
SQUASHFS_INVALID_BLK) {
res = squashfs_readahead_fragment(pages, nr_pages,
@@ -593,15 +598,15 @@ static void squashfs_readahead(struct readahead_control *ractl)
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
squashfs_page_actor_free(actor);
last_page = squashfs_page_actor_free(actor);
if (res == expected) {
int bytes;
/* Last page (if present) may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
if (pages[nr_pages - 1]->index == file_end && bytes)
memzero_page(pages[nr_pages - 1], bytes,
if (index == file_end && bytes && last_page)
memzero_page(last_page, bytes,
PAGE_SIZE - bytes);
for (i = 0; i < nr_pages; i++) {

View File

@@ -71,11 +71,13 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
(actor->next_index != actor->page[actor->next_page]->index)) {
actor->next_index++;
actor->returned_pages++;
actor->last_page = NULL;
return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM);
}
actor->next_index++;
actor->returned_pages++;
actor->last_page = actor->page[actor->next_page];
return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]);
}
@@ -125,6 +127,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->returned_pages = 0;
actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
actor->pageaddr = NULL;
actor->last_page = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
actor->squashfs_first_page = direct_first_page;
actor->squashfs_next_page = direct_next_page;

View File

@@ -16,6 +16,7 @@ struct squashfs_page_actor {
void *(*squashfs_first_page)(struct squashfs_page_actor *);
void *(*squashfs_next_page)(struct squashfs_page_actor *);
void (*squashfs_finish_page)(struct squashfs_page_actor *);
struct page *last_page;
int pages;
int length;
int next_page;
@@ -29,10 +30,13 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
struct page **page, int pages, int length);
static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor)
static inline struct page *squashfs_page_actor_free(struct squashfs_page_actor *actor)
{
struct page *last_page = actor->last_page;
kfree(actor->tmp_buffer);
kfree(actor);
return last_page;
}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{

View File

@@ -853,7 +853,8 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *))
{
if (!iob || (req->rq_flags & RQF_ELV) || ioerror)
if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
(req->end_io && !blk_rq_is_passthrough(req)))
return false;
if (!iob->complete)

View File

@@ -43,11 +43,24 @@ extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
#else
#define __underlying_memchr __builtin_memchr
#define __underlying_memcmp __builtin_memcmp
#if defined(__SANITIZE_MEMORY__)
/*
* For KMSAN builds all memcpy/memset/memmove calls should be replaced by the
* corresponding __msan_XXX functions.
*/
#include <linux/kmsan_string.h>
#define __underlying_memcpy __msan_memcpy
#define __underlying_memmove __msan_memmove
#define __underlying_memset __msan_memset
#else
#define __underlying_memcpy __builtin_memcpy
#define __underlying_memmove __builtin_memmove
#define __underlying_memset __builtin_memset
#endif
#define __underlying_memchr __builtin_memchr
#define __underlying_memcmp __builtin_memcmp
#define __underlying_strcat __builtin_strcat
#define __underlying_strcpy __builtin_strcpy
#define __underlying_strlen __builtin_strlen

View File

@@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* KMSAN string functions API used in other headers.
*
* Copyright (C) 2022 Google LLC
* Author: Alexander Potapenko <glider@google.com>
*
*/
#ifndef _LINUX_KMSAN_STRING_H
#define _LINUX_KMSAN_STRING_H
/*
* KMSAN overrides the default memcpy/memset/memmove implementations in the
* kernel, which requires having __msan_XXX function prototypes in several other
* headers. Keep them in one place instead of open-coding.
*/
void *__msan_memcpy(void *dst, const void *src, size_t size);
void *__msan_memset(void *s, int c, size_t n);
void *__msan_memmove(void *dest, const void *src, size_t len);
#endif /* _LINUX_KMSAN_STRING_H */

View File

@@ -146,9 +146,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
static inline bool vma_can_userfault(struct vm_area_struct *vma,
unsigned long vm_flags)
{
if (vm_flags & VM_UFFD_MINOR)
return is_vm_hugetlb_page(vma) || vma_is_shmem(vma);
if ((vm_flags & VM_UFFD_MINOR) &&
(!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
return false;
#ifndef CONFIG_PTE_MARKER_UFFD_WP
/*
* If user requested uffd-wp but not enabled pte markers for

View File

@@ -1173,7 +1173,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
}
}
int __io_run_local_work(struct io_ring_ctx *ctx, bool locked)
int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked)
{
struct llist_node *node;
struct llist_node fake;
@@ -1192,7 +1192,7 @@ again:
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
req->io_task_work.func(req, &locked);
req->io_task_work.func(req, locked);
ret++;
node = next;
}
@@ -1208,7 +1208,7 @@ again:
goto again;
}
if (locked)
if (*locked)
io_submit_flush_completions(ctx);
trace_io_uring_local_work_run(ctx, ret, loops);
return ret;
@@ -1225,7 +1225,7 @@ int io_run_local_work(struct io_ring_ctx *ctx)
__set_current_state(TASK_RUNNING);
locked = mutex_trylock(&ctx->uring_lock);
ret = __io_run_local_work(ctx, locked);
ret = __io_run_local_work(ctx, &locked);
if (locked)
mutex_unlock(&ctx->uring_lock);
@@ -1446,8 +1446,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
io_task_work_pending(ctx)) {
u32 tail = ctx->cached_cq_tail;
if (!llist_empty(&ctx->work_llist))
__io_run_local_work(ctx, true);
(void) io_run_local_work_locked(ctx);
if (task_work_pending(current) ||
wq_list_empty(&ctx->iopoll_list)) {

View File

@@ -27,7 +27,7 @@ enum {
struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow);
bool io_req_cqe_overflow(struct io_kiocb *req);
int io_run_task_work_sig(struct io_ring_ctx *ctx);
int __io_run_local_work(struct io_ring_ctx *ctx, bool locked);
int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
void io_req_complete_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
@@ -277,9 +277,18 @@ static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
{
bool locked;
int ret;
if (llist_empty(&ctx->work_llist))
return 0;
return __io_run_local_work(ctx, true);
locked = true;
ret = __io_run_local_work(ctx, &locked);
/* shouldn't happen! */
if (WARN_ON_ONCE(!locked))
mutex_lock(&ctx->uring_lock);
return ret;
}
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)

View File

@@ -1329,11 +1329,11 @@ fail_msg_bytes:
#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
percpu_counter_destroy(&ns->percpu_msg_bytes);
percpu_counter_destroy(&ns->percpu_msg_hdrs);
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
percpu_counter_destroy(&ns->percpu_msg_bytes);
percpu_counter_destroy(&ns->percpu_msg_hdrs);
}
#endif

View File

@@ -400,8 +400,9 @@ config FRAME_WARN
default 1536 if (!64BIT && XTENSA)
default 1024 if !64BIT
default 2048 if 64BIT
default 0 if KMSAN
help
Tell gcc to warn at build time for stack frames larger than this.
Tell the compiler to warn at build time for stack frames larger than this.
Setting this too low will cause a lot of warnings.
Setting it to 0 disables the warning.

View File

@@ -2903,8 +2903,8 @@ static inline void *mtree_range_walk(struct ma_state *mas)
unsigned long max, min;
unsigned long prev_max, prev_min;
last = next = mas->node;
prev_min = min = mas->min;
next = mas->node;
min = mas->min;
max = mas->max;
do {
offset = 0;

View File

@@ -2462,7 +2462,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
* Fix up and warn once if private is unexpectedly set.
*/
if (!folio_test_swapcache(page_folio(head))) {
VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, head);
VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
page_tail->private = 0;
}

View File

@@ -1460,6 +1460,27 @@ static void scan_gray_list(void)
WARN_ON(!list_empty(&gray_list));
}
/*
* Conditionally call resched() in a object iteration loop while making sure
* that the given object won't go away without RCU read lock by performing a
* get_object() if !pinned.
*
* Return: false if can't do a cond_resched() due to get_object() failure
* true otherwise
*/
static bool kmemleak_cond_resched(struct kmemleak_object *object, bool pinned)
{
if (!pinned && !get_object(object))
return false;
rcu_read_unlock();
cond_resched();
rcu_read_lock();
if (!pinned)
put_object(object);
return true;
}
/*
* Scan data sections and all the referenced memory blocks allocated via the
* kernel's standard allocators. This function must be called with the
@@ -1471,7 +1492,7 @@ static void kmemleak_scan(void)
struct zone *zone;
int __maybe_unused i;
int new_leaks = 0;
int loop1_cnt = 0;
int loop_cnt = 0;
jiffies_last_scan = jiffies;
@@ -1480,7 +1501,6 @@ static void kmemleak_scan(void)
list_for_each_entry_rcu(object, &object_list, object_list) {
bool obj_pinned = false;
loop1_cnt++;
raw_spin_lock_irq(&object->lock);
#ifdef DEBUG
/*
@@ -1514,24 +1534,11 @@ static void kmemleak_scan(void)
raw_spin_unlock_irq(&object->lock);
/*
* Do a cond_resched() to avoid soft lockup every 64k objects.
* Make sure a reference has been taken so that the object
* won't go away without RCU read lock.
* Do a cond_resched() every 64k objects to avoid soft lockup.
*/
if (!(loop1_cnt & 0xffff)) {
if (!obj_pinned && !get_object(object)) {
/* Try the next object instead */
loop1_cnt--;
continue;
}
rcu_read_unlock();
cond_resched();
rcu_read_lock();
if (!obj_pinned)
put_object(object);
}
if (!(++loop_cnt & 0xffff) &&
!kmemleak_cond_resched(object, obj_pinned))
loop_cnt--; /* Try again on next object */
}
rcu_read_unlock();
@@ -1598,7 +1605,15 @@ static void kmemleak_scan(void)
* scan and color them gray until the next scan.
*/
rcu_read_lock();
loop_cnt = 0;
list_for_each_entry_rcu(object, &object_list, object_list) {
/*
* Do a cond_resched() every 64k objects to avoid soft lockup.
*/
if (!(++loop_cnt & 0xffff) &&
!kmemleak_cond_resched(object, false))
loop_cnt--; /* Try again on next object */
/*
* This is racy but we can save the overhead of lock/unlock
* calls. The missed objects, if any, should be caught in
@@ -1632,7 +1647,15 @@ static void kmemleak_scan(void)
* Scanning result reporting.
*/
rcu_read_lock();
loop_cnt = 0;
list_for_each_entry_rcu(object, &object_list, object_list) {
/*
* Do a cond_resched() every 64k objects to avoid soft lockup.
*/
if (!(++loop_cnt & 0xffff) &&
!kmemleak_cond_resched(object, false))
loop_cnt--; /* Try again on next object */
/*
* This is racy but we can save the overhead of lock/unlock
* calls. The missed objects, if any, should be caught in

View File

@@ -14,6 +14,7 @@
#include "kmsan.h"
#include <linux/gfp.h>
#include <linux/kmsan_string.h>
#include <linux/mm.h>
#include <linux/uaccess.h>

View File

@@ -167,6 +167,7 @@ void kmsan_copy_page_meta(struct page *dst, struct page *src)
__memcpy(origin_ptr_for(dst), origin_ptr_for(src), PAGE_SIZE);
kmsan_leave_runtime();
}
EXPORT_SYMBOL(kmsan_copy_page_meta);
void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags)
{

View File

@@ -813,7 +813,14 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
if (start & ~huge_page_mask(hstate_vma(vma)))
return false;
*end = ALIGN(*end, huge_page_size(hstate_vma(vma)));
/*
* Madvise callers expect the length to be rounded up to PAGE_SIZE
* boundaries, and may be unaware that this VMA uses huge pages.
* Avoid unexpected data loss by rounding down the number of
* huge pages freed.
*/
*end = ALIGN_DOWN(*end, huge_page_size(hstate_vma(vma)));
return true;
}
@@ -828,6 +835,9 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior))
return -EINVAL;
if (start == end)
return 0;
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_lock has been dropped, prev is stale */

View File

@@ -131,8 +131,8 @@ static void memory_tier_device_release(struct device *dev)
kfree(tier);
}
static ssize_t nodes_show(struct device *dev,
struct device_attribute *attr, char *buf)
static ssize_t nodelist_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int ret;
nodemask_t nmask;
@@ -143,10 +143,10 @@ static ssize_t nodes_show(struct device *dev,
mutex_unlock(&memory_tier_lock);
return ret;
}
static DEVICE_ATTR_RO(nodes);
static DEVICE_ATTR_RO(nodelist);
static struct attribute *memtier_dev_attrs[] = {
&dev_attr_nodes.attr,
&dev_attr_nodelist.attr,
NULL
};

View File

@@ -1582,6 +1582,13 @@ out:
*/
list_splice(&ret_pages, from);
/*
* Return 0 in case all subpages of fail-to-migrate THPs are
* migrated successfully.
*/
if (list_empty(from))
rc = 0;
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);

View File

@@ -2853,6 +2853,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (next->vm_flags != vma->vm_flags)
goto out;
if (start + size <= next->vm_end)
break;
prev = next;
}

View File

@@ -807,6 +807,7 @@ static void prep_compound_tail(struct page *head, int tail_idx)
p->mapping = TAIL_MAPPING;
set_compound_head(p, head);
set_page_private(p, 0);
}
void prep_compound_page(struct page *page, unsigned int order)

View File

@@ -330,7 +330,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
zone->zone_start_pfn);
if (skip_isolation) {
int mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
VM_BUG_ON(!is_migrate_isolate(mt));
} else {

View File

@@ -2424,9 +2424,26 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (!zeropage) { /* COPY */
page_kaddr = kmap_local_folio(folio, 0);
/*
* The read mmap_lock is held here. Despite the
* mmap_lock being read recursive a deadlock is still
* possible if a writer has taken a lock. For example:
*
* process A thread 1 takes read lock on own mmap_lock
* process A thread 2 calls mmap, blocks taking write lock
* process B thread 1 takes page fault, read lock on own mmap lock
* process B thread 2 calls mmap, blocks taking write lock
* process A thread 1 blocks taking read lock on process B
* process B thread 1 blocks taking read lock on process A
*
* Disable page faults to prevent potential deadlock
* and retry the copy outside the mmap_lock.
*/
pagefault_disable();
ret = copy_from_user(page_kaddr,
(const void __user *)src_addr,
PAGE_SIZE);
pagefault_enable();
kunmap_local(page_kaddr);
/* fallback to copy_from_user outside mmap_lock */

View File

@@ -157,11 +157,28 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!page)
goto out;
page_kaddr = kmap_atomic(page);
page_kaddr = kmap_local_page(page);
/*
* The read mmap_lock is held here. Despite the
* mmap_lock being read recursive a deadlock is still
* possible if a writer has taken a lock. For example:
*
* process A thread 1 takes read lock on own mmap_lock
* process A thread 2 calls mmap, blocks taking write lock
* process B thread 1 takes page fault, read lock on own mmap lock
* process B thread 2 calls mmap, blocks taking write lock
* process A thread 1 blocks taking read lock on process B
* process B thread 1 blocks taking read lock on process A
*
* Disable page faults to prevent potential deadlock
* and retry the copy outside the mmap_lock.
*/
pagefault_disable();
ret = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
kunmap_atomic(page_kaddr);
pagefault_enable();
kunmap_local(page_kaddr);
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
@@ -646,11 +663,11 @@ retry:
mmap_read_unlock(dst_mm);
BUG_ON(!page);
page_kaddr = kmap(page);
page_kaddr = kmap_local_page(page);
err = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
kunmap(page);
kunmap_local(page_kaddr);
if (unlikely(err)) {
err = -EFAULT;
goto out;