diff --git a/Makefile b/Makefile index c18344d3cd40..523eead51a48 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Gobble Gobble # *DOCUMENTATION* diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 6053674f9132..c2767a6a387e 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -102,12 +102,6 @@ extern void switch_fpu_return(void); */ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); -/* - * Tasks that are not using SVA have mm->pasid set to zero to note that they - * will not have the valid bit set in MSR_IA32_PASID while they are running. - */ -#define PASID_DISABLED 0 - /* Trap handling */ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu_sync_fpstate(struct fpu *fpu); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c410be738ae7..6a190c7f4d71 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,7 +742,7 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char *prepare_command_line(void) +static char * __init prepare_command_line(void) { #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1a1b1189225d..6ae38776e30e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -316,7 +316,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req = bd->rq; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; - unsigned int num; + int num; int qid = hctx->queue_num; bool notify = false; blk_status_t status; diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 13cbeb997cc1..58da08cc3d01 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -929,10 +929,8 @@ static int __init amd_iommu_v2_init(void) { int ret; - pr_info("AMD IOMMUv2 driver by Joerg Roedel \n"); - if (!amd_iommu_v2_supported()) { - pr_info("AMD IOMMUv2 functionality not available on this system\n"); + pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n"); /* * Load anyway to provide the symbols to other modules * which may use AMD IOMMUv2 optionally. @@ -947,6 +945,8 @@ static int __init amd_iommu_v2_init(void) amd_iommu_register_ppr_notifier(&ppr_nb); + pr_info("AMD IOMMUv2 loaded and initialized\n"); + return 0; out: diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c index b39d223926a4..71596fc62822 100644 --- a/drivers/iommu/intel/cap_audit.c +++ b/drivers/iommu/intel/cap_audit.c @@ -144,6 +144,7 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) { struct dmar_drhd_unit *d; struct intel_iommu *i; + int rc = 0; rcu_read_lock(); if (list_empty(&dmar_drhd_units)) @@ -169,11 +170,11 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) */ if (intel_cap_smts_sanity() && !intel_cap_flts_sanity() && !intel_cap_slts_sanity()) - return -EOPNOTSUPP; + rc = -EOPNOTSUPP; out: rcu_read_unlock(); - return 0; + return rc; } int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0bde0c8b4126..b6a8f3282411 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1339,13 +1339,11 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, pte = &pte[pfn_level_offset(pfn, level)]; do { - unsigned long level_pfn; + unsigned long level_pfn = pfn & level_mask(level); if (!dma_pte_present(pte)) goto next; - level_pfn = pfn & level_mask(level); - /* If range covers entire pagetable, free it */ if (start_pfn <= level_pfn && last_pfn >= level_pfn + level_size(level) - 1) { @@ -1366,7 +1364,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, freelist); } next: - pfn += level_size(level); + pfn = level_pfn + level_size(level); } while (!first_pte_in_page(++pte) && pfn <= last_pfn); if (first_pte) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 5cb260820eda..7f23ad61c094 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -200,8 +200,8 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte) #define DTE_HI_MASK2 GENMASK(7, 4) #define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */ #define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */ -#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36) -#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32) +#define PAGE_DESC_HI_MASK1 GENMASK_ULL(35, 32) +#define PAGE_DESC_HI_MASK2 GENMASK_ULL(39, 36) static inline phys_addr_t rk_dte_pt_address_v2(u32 dte) { diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 5f484fff8dbe..41b0cd17fcba 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -591,8 +591,11 @@ static void vdpasim_free(struct vdpa_device *vdpa) vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov); } - put_iova_domain(&vdpasim->iova); - iova_cache_put(); + if (vdpa_get_dma_dev(vdpa)) { + put_iova_domain(&vdpasim->iova); + iova_cache_put(); + } + kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 01c59ce7e250..29cced1cd277 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1014,12 +1014,12 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) mutex_lock(&d->mutex); filep->private_data = NULL; + vhost_vdpa_clean_irq(v); vhost_vdpa_reset(v); vhost_dev_stop(&v->vdev); vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); - vhost_vdpa_clean_irq(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); mutex_unlock(&d->mutex); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 938aefbc75ec..d6ca1c7ad513 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -511,8 +511,6 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) vhost_disable_notify(&vsock->dev, vq); do { - u32 len; - if (!vhost_vsock_more_replies(vsock)) { /* Stop tx until the device processes already * pending replies. Leave tx virtqueue @@ -540,7 +538,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) continue; } - len = pkt->len; + total_len += sizeof(pkt->hdr) + pkt->len; /* Deliver to monitoring devices all received packets */ virtio_transport_deliver_tap_pkt(pkt); @@ -553,9 +551,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) else virtio_transport_free_pkt(pkt); - len += sizeof(pkt->hdr); - vhost_add_used(vq, head, len); - total_len += len; + vhost_add_used(vq, head, 0); added = true; } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); diff --git a/kernel/cpu.c b/kernel/cpu.c index dbe042574f01..0b181330e2b7 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -588,6 +589,12 @@ static int bringup_cpu(unsigned int cpu) struct task_struct *idle = idle_thread_get(cpu); int ret; + /* + * Reset stale stack state from the last time this CPU was online. + */ + scs_task_reset(idle); + kasan_unpoison_task_stack(idle); + /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. diff --git a/kernel/events/core.c b/kernel/events/core.c index 97a77ca67d0b..6f508d89840a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9760,6 +9760,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, continue; if (event->attr.config != entry->type) continue; + /* Cannot deliver synchronous signal to other task. */ + if (event->attr.sigtrap) + continue; if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index c51387a43265..04a74d040a6d 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -105,9 +105,9 @@ * atomic_long_cmpxchg() will be used to obtain writer lock. * * There are three places where the lock handoff bit may be set or cleared. - * 1) rwsem_mark_wake() for readers. - * 2) rwsem_try_write_lock() for writers. - * 3) Error path of rwsem_down_write_slowpath(). + * 1) rwsem_mark_wake() for readers -- set, clear + * 2) rwsem_try_write_lock() for writers -- set, clear + * 3) rwsem_del_waiter() -- clear * * For all the above cases, wait_lock will be held. A writer must also * be the first one in the wait_list to be eligible for setting the handoff @@ -334,6 +334,9 @@ struct rwsem_waiter { struct task_struct *task; enum rwsem_waiter_type type; unsigned long timeout; + + /* Writer only, not initialized in reader */ + bool handoff_set; }; #define rwsem_first_waiter(sem) \ list_first_entry(&sem->wait_list, struct rwsem_waiter, list) @@ -344,12 +347,6 @@ enum rwsem_wake_type { RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ }; -enum writer_wait_state { - WRITER_NOT_FIRST, /* Writer is not first in wait list */ - WRITER_FIRST, /* Writer is first in wait list */ - WRITER_HANDOFF /* Writer is first & handoff needed */ -}; - /* * The typical HZ value is either 250 or 1000. So set the minimum waiting * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait @@ -365,6 +362,31 @@ enum writer_wait_state { */ #define MAX_READERS_WAKEUP 0x100 +static inline void +rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) +{ + lockdep_assert_held(&sem->wait_lock); + list_add_tail(&waiter->list, &sem->wait_list); + /* caller will set RWSEM_FLAG_WAITERS */ +} + +/* + * Remove a waiter from the wait_list and clear flags. + * + * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of + * this function. Modify with care. + */ +static inline void +rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) +{ + lockdep_assert_held(&sem->wait_lock); + list_del(&waiter->list); + if (likely(!list_empty(&sem->wait_list))) + return; + + atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); +} + /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must @@ -376,6 +398,8 @@ enum writer_wait_state { * preferably when the wait_lock is released * - woken process blocks are discarded from the list after having task zeroed * - writers are only marked woken if downgrading is false + * + * Implies rwsem_del_waiter() for all woken readers. */ static void rwsem_mark_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type, @@ -490,17 +514,24 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, adjustment = woken * RWSEM_READER_BIAS - adjustment; lockevent_cond_inc(rwsem_wake_reader, woken); - if (list_empty(&sem->wait_list)) { - /* hit end of list above */ - adjustment -= RWSEM_FLAG_WAITERS; - } - /* - * When we've woken a reader, we no longer need to force writers - * to give up the lock and we can clear HANDOFF. - */ - if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF)) - adjustment -= RWSEM_FLAG_HANDOFF; + oldcount = atomic_long_read(&sem->count); + if (list_empty(&sem->wait_list)) { + /* + * Combined with list_move_tail() above, this implies + * rwsem_del_waiter(). + */ + adjustment -= RWSEM_FLAG_WAITERS; + if (oldcount & RWSEM_FLAG_HANDOFF) + adjustment -= RWSEM_FLAG_HANDOFF; + } else if (woken) { + /* + * When we've woken a reader, we no longer need to force + * writers to give up the lock and we can clear HANDOFF. + */ + if (oldcount & RWSEM_FLAG_HANDOFF) + adjustment -= RWSEM_FLAG_HANDOFF; + } if (adjustment) atomic_long_add(adjustment, &sem->count); @@ -532,12 +563,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, * race conditions between checking the rwsem wait list and setting the * sem->count accordingly. * - * If wstate is WRITER_HANDOFF, it will make sure that either the handoff - * bit is set or the lock is acquired with handoff bit cleared. + * Implies rwsem_del_waiter() on success. */ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, - enum writer_wait_state wstate) + struct rwsem_waiter *waiter) { + bool first = rwsem_first_waiter(sem) == waiter; long count, new; lockdep_assert_held(&sem->wait_lock); @@ -546,13 +577,19 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, do { bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF); - if (has_handoff && wstate == WRITER_NOT_FIRST) - return false; + if (has_handoff) { + if (!first) + return false; + + /* First waiter inherits a previously set handoff bit */ + waiter->handoff_set = true; + } new = count; if (count & RWSEM_LOCK_MASK) { - if (has_handoff || (wstate != WRITER_HANDOFF)) + if (has_handoff || (!rt_task(waiter->task) && + !time_after(jiffies, waiter->timeout))) return false; new |= RWSEM_FLAG_HANDOFF; @@ -569,9 +606,17 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, * We have either acquired the lock with handoff bit cleared or * set the handoff bit. */ - if (new & RWSEM_FLAG_HANDOFF) + if (new & RWSEM_FLAG_HANDOFF) { + waiter->handoff_set = true; + lockevent_inc(rwsem_wlock_handoff); return false; + } + /* + * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on + * success. + */ + list_del(&waiter->list); rwsem_set_owner(sem); return true; } @@ -956,7 +1001,7 @@ queue: } adjustment += RWSEM_FLAG_WAITERS; } - list_add_tail(&waiter.list, &sem->wait_list); + rwsem_add_waiter(sem, &waiter); /* we're now waiting on the lock, but no longer actively locking */ count = atomic_long_add_return(adjustment, &sem->count); @@ -1002,11 +1047,7 @@ queue: return sem; out_nolock: - list_del(&waiter.list); - if (list_empty(&sem->wait_list)) { - atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF, - &sem->count); - } + rwsem_del_waiter(sem, &waiter); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock_fail); @@ -1020,9 +1061,7 @@ static struct rw_semaphore * rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) { long count; - enum writer_wait_state wstate; struct rwsem_waiter waiter; - struct rw_semaphore *ret = sem; DEFINE_WAKE_Q(wake_q); /* do optimistic spinning and steal lock if possible */ @@ -1038,16 +1077,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) waiter.task = current; waiter.type = RWSEM_WAITING_FOR_WRITE; waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT; + waiter.handoff_set = false; raw_spin_lock_irq(&sem->wait_lock); - - /* account for this before adding a new element to the list */ - wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST; - - list_add_tail(&waiter.list, &sem->wait_list); + rwsem_add_waiter(sem, &waiter); /* we're now waiting on the lock */ - if (wstate == WRITER_NOT_FIRST) { + if (rwsem_first_waiter(sem) != &waiter) { count = atomic_long_read(&sem->count); /* @@ -1083,13 +1119,16 @@ wait: /* wait until we successfully acquire the lock */ set_current_state(state); for (;;) { - if (rwsem_try_write_lock(sem, wstate)) { + if (rwsem_try_write_lock(sem, &waiter)) { /* rwsem_try_write_lock() implies ACQUIRE on success */ break; } raw_spin_unlock_irq(&sem->wait_lock); + if (signal_pending_state(state, current)) + goto out_nolock; + /* * After setting the handoff bit and failing to acquire * the lock, attempt to spin on owner to accelerate lock @@ -1098,7 +1137,7 @@ wait: * In this case, we attempt to acquire the lock again * without sleeping. */ - if (wstate == WRITER_HANDOFF) { + if (waiter.handoff_set) { enum owner_state owner_state; preempt_disable(); @@ -1109,66 +1148,26 @@ wait: goto trylock_again; } - /* Block until there are no active lockers. */ - for (;;) { - if (signal_pending_state(state, current)) - goto out_nolock; - - schedule(); - lockevent_inc(rwsem_sleep_writer); - set_current_state(state); - /* - * If HANDOFF bit is set, unconditionally do - * a trylock. - */ - if (wstate == WRITER_HANDOFF) - break; - - if ((wstate == WRITER_NOT_FIRST) && - (rwsem_first_waiter(sem) == &waiter)) - wstate = WRITER_FIRST; - - count = atomic_long_read(&sem->count); - if (!(count & RWSEM_LOCK_MASK)) - break; - - /* - * The setting of the handoff bit is deferred - * until rwsem_try_write_lock() is called. - */ - if ((wstate == WRITER_FIRST) && (rt_task(current) || - time_after(jiffies, waiter.timeout))) { - wstate = WRITER_HANDOFF; - lockevent_inc(rwsem_wlock_handoff); - break; - } - } + schedule(); + lockevent_inc(rwsem_sleep_writer); + set_current_state(state); trylock_again: raw_spin_lock_irq(&sem->wait_lock); } __set_current_state(TASK_RUNNING); - list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); lockevent_inc(rwsem_wlock); - - return ret; + return sem; out_nolock: __set_current_state(TASK_RUNNING); raw_spin_lock_irq(&sem->wait_lock); - list_del(&waiter.list); - - if (unlikely(wstate == WRITER_HANDOFF)) - atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count); - - if (list_empty(&sem->wait_list)) - atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count); - else + rwsem_del_waiter(sem, &waiter); + if (!list_empty(&sem->wait_list)) rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); wake_up_q(&wake_q); lockevent_inc(rwsem_wlock_fail); - return ERR_PTR(-EINTR); } @@ -1249,17 +1248,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem); - /* - * Optimize for the case when the rwsem is not locked at all. - */ - tmp = RWSEM_UNLOCKED_VALUE; - do { + tmp = atomic_long_read(&sem->count); + while (!(tmp & RWSEM_READ_FAILED_MASK)) { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, - tmp + RWSEM_READER_BIAS)) { + tmp + RWSEM_READER_BIAS)) { rwsem_set_reader_owned(sem); return 1; } - } while (!(tmp & RWSEM_READ_FAILED_MASK)); + } return 0; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2f6755224946..cdb30a523a69 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8691,9 +8691,6 @@ void __init init_idle(struct task_struct *idle, int cpu) idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); - scs_task_reset(idle); - kasan_unpoison_task_stack(idle); - #ifdef CONFIG_SMP /* * It's possible that init_idle() gets called multiple times on a task, @@ -8849,7 +8846,6 @@ void idle_task_exit(void) finish_arch_post_lock_switch(); } - scs_task_reset(current); /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f8965fd50d3b..92be9cb1d7d4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2693,7 +2693,7 @@ trace_create_new_event(struct trace_event_call *call, lockdep_is_held(&event_mutex)); if (!trace_pid_list_first(pid_list, &first) || - !trace_pid_list_first(pid_list, &first)) + !trace_pid_list_first(no_pid_list, &first)) file->flags |= EVENT_FILE_FL_PID_FILTER; file->event_call = call;