From 91932db1d96b2952299ce30c1c693d834d10ace6 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Mon, 11 Nov 2024 20:13:29 -0800 Subject: [PATCH 01/45] pps: Fix a use-after-free commit c79a39dc8d060b9e64e8b0fa9d245d44befeefbe upstream. On a board running ntpd and gpsd, I'm seeing a consistent use-after-free in sys_exit() from gpsd when rebooting: pps pps1: removed ------------[ cut here ]------------ kobject: '(null)' (00000000db4bec24): is not initialized, yet kobject_put() is being called. WARNING: CPU: 2 PID: 440 at lib/kobject.c:734 kobject_put+0x120/0x150 CPU: 2 UID: 299 PID: 440 Comm: gpsd Not tainted 6.11.0-rc6-00308-gb31c44928842 #1 Hardware name: Raspberry Pi 4 Model B Rev 1.1 (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : kobject_put+0x120/0x150 lr : kobject_put+0x120/0x150 sp : ffffffc0803d3ae0 x29: ffffffc0803d3ae0 x28: ffffff8042dc9738 x27: 0000000000000001 x26: 0000000000000000 x25: ffffff8042dc9040 x24: ffffff8042dc9440 x23: ffffff80402a4620 x22: ffffff8042ef4bd0 x21: ffffff80405cb600 x20: 000000000008001b x19: ffffff8040b3b6e0 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 696e6920746f6e20 x14: 7369203a29343263 x13: 205d303434542020 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: kobject_put+0x120/0x150 cdev_put+0x20/0x3c __fput+0x2c4/0x2d8 ____fput+0x1c/0x38 task_work_run+0x70/0xfc do_exit+0x2a0/0x924 do_group_exit+0x34/0x90 get_signal+0x7fc/0x8c0 do_signal+0x128/0x13b4 do_notify_resume+0xdc/0x160 el0_svc+0xd4/0xf8 el0t_64_sync_handler+0x140/0x14c el0t_64_sync+0x190/0x194 ---[ end trace 0000000000000000 ]--- ...followed by more symptoms of corruption, with similar stacks: refcount_t: underflow; use-after-free. kernel BUG at lib/list_debug.c:62! Kernel panic - not syncing: Oops - BUG: Fatal exception This happens because pps_device_destruct() frees the pps_device with the embedded cdev immediately after calling cdev_del(), but, as the comment above cdev_del() notes, fops for previously opened cdevs are still callable even after cdev_del() returns. I think this bug has always been there: I can't explain why it suddenly started happening every time I reboot this particular board. In commit d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source."), George Spelvin suggested removing the embedded cdev. That seems like the simplest way to fix this, so I've implemented his suggestion, using __register_chrdev() with pps_idr becoming the source of truth for which minor corresponds to which device. But now that pps_idr defines userspace visibility instead of cdev_add(), we need to be sure the pps->dev refcount can't reach zero while userspace can still find it again. So, the idr_remove() call moves to pps_unregister_cdev(), and pps_idr now holds a reference to pps->dev. pps_core: source serial1 got cdev (251:1) <...> pps pps1: removed pps_core: unregistering pps1 pps_core: deallocating pps1 Fixes: d953e0e837e6 ("pps: Fix a use-after free bug when unregistering a source.") Cc: stable@vger.kernel.org Signed-off-by: Calvin Owens Reviewed-by: Michal Schmidt Link: https://lore.kernel.org/r/a17975fd5ae99385791929e563f72564edbcf28f.1731383727.git.calvin@wbinvd.org Signed-off-by: Calvin Owens Signed-off-by: Greg Kroah-Hartman --- drivers/pps/clients/pps-gpio.c | 4 +- drivers/pps/clients/pps-ktimer.c | 4 +- drivers/pps/clients/pps-ldisc.c | 6 +- drivers/pps/clients/pps_parport.c | 4 +- drivers/pps/kapi.c | 10 +-- drivers/pps/kc.c | 10 +-- drivers/pps/pps.c | 127 ++++++++++++++++-------------- drivers/ptp/ptp_ocp.c | 2 +- include/linux/pps_kernel.h | 3 +- 9 files changed, 87 insertions(+), 83 deletions(-) diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 2f4b11b4dfcd..bf3b6f1aa984 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -214,8 +214,8 @@ static int pps_gpio_probe(struct platform_device *pdev) return -EINVAL; } - dev_info(data->pps->dev, "Registered IRQ %d as PPS source\n", - data->irq); + dev_dbg(&data->pps->dev, "Registered IRQ %d as PPS source\n", + data->irq); return 0; } diff --git a/drivers/pps/clients/pps-ktimer.c b/drivers/pps/clients/pps-ktimer.c index d33106bd7a29..2f465549b843 100644 --- a/drivers/pps/clients/pps-ktimer.c +++ b/drivers/pps/clients/pps-ktimer.c @@ -56,7 +56,7 @@ static struct pps_source_info pps_ktimer_info = { static void __exit pps_ktimer_exit(void) { - dev_info(pps->dev, "ktimer PPS source unregistered\n"); + dev_dbg(&pps->dev, "ktimer PPS source unregistered\n"); del_timer_sync(&ktimer); pps_unregister_source(pps); @@ -74,7 +74,7 @@ static int __init pps_ktimer_init(void) timer_setup(&ktimer, pps_ktimer_event, 0); mod_timer(&ktimer, jiffies + HZ); - dev_info(pps->dev, "ktimer PPS source registered\n"); + dev_dbg(&pps->dev, "ktimer PPS source registered\n"); return 0; } diff --git a/drivers/pps/clients/pps-ldisc.c b/drivers/pps/clients/pps-ldisc.c index 443d6bae19d1..fa5660f3c4b7 100644 --- a/drivers/pps/clients/pps-ldisc.c +++ b/drivers/pps/clients/pps-ldisc.c @@ -32,7 +32,7 @@ static void pps_tty_dcd_change(struct tty_struct *tty, bool active) pps_event(pps, &ts, active ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR, NULL); - dev_dbg(pps->dev, "PPS %s at %lu\n", + dev_dbg(&pps->dev, "PPS %s at %lu\n", active ? "assert" : "clear", jiffies); } @@ -69,7 +69,7 @@ static int pps_tty_open(struct tty_struct *tty) goto err_unregister; } - dev_info(pps->dev, "source \"%s\" added\n", info.path); + dev_dbg(&pps->dev, "source \"%s\" added\n", info.path); return 0; @@ -89,7 +89,7 @@ static void pps_tty_close(struct tty_struct *tty) if (WARN_ON(!pps)) return; - dev_info(pps->dev, "removed\n"); + dev_info(&pps->dev, "removed\n"); pps_unregister_source(pps); } diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c index 53e9c304ae0a..c3f46efd64c3 100644 --- a/drivers/pps/clients/pps_parport.c +++ b/drivers/pps/clients/pps_parport.c @@ -81,7 +81,7 @@ static void parport_irq(void *handle) /* check the signal (no signal means the pulse is lost this time) */ if (!signal_is_set(port)) { local_irq_restore(flags); - dev_err(dev->pps->dev, "lost the signal\n"); + dev_err(&dev->pps->dev, "lost the signal\n"); goto out_assert; } @@ -98,7 +98,7 @@ static void parport_irq(void *handle) /* timeout */ dev->cw_err++; if (dev->cw_err >= CLEAR_WAIT_MAX_ERRORS) { - dev_err(dev->pps->dev, "disabled clear edge capture after %d" + dev_err(&dev->pps->dev, "disabled clear edge capture after %d" " timeouts\n", dev->cw_err); dev->cw = 0; dev->cw_err = 0; diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c index d9d566f70ed1..92d1b62ea239 100644 --- a/drivers/pps/kapi.c +++ b/drivers/pps/kapi.c @@ -41,7 +41,7 @@ static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset) static void pps_echo_client_default(struct pps_device *pps, int event, void *data) { - dev_info(pps->dev, "echo %s %s\n", + dev_info(&pps->dev, "echo %s %s\n", event & PPS_CAPTUREASSERT ? "assert" : "", event & PPS_CAPTURECLEAR ? "clear" : ""); } @@ -112,7 +112,7 @@ struct pps_device *pps_register_source(struct pps_source_info *info, goto kfree_pps; } - dev_info(pps->dev, "new PPS source %s\n", info->name); + dev_dbg(&pps->dev, "new PPS source %s\n", info->name); return pps; @@ -166,7 +166,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, /* check event type */ BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0); - dev_dbg(pps->dev, "PPS event at %lld.%09ld\n", + dev_dbg(&pps->dev, "PPS event at %lld.%09ld\n", (s64)ts->ts_real.tv_sec, ts->ts_real.tv_nsec); timespec_to_pps_ktime(&ts_real, ts->ts_real); @@ -188,7 +188,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, /* Save the time stamp */ pps->assert_tu = ts_real; pps->assert_sequence++; - dev_dbg(pps->dev, "capture assert seq #%u\n", + dev_dbg(&pps->dev, "capture assert seq #%u\n", pps->assert_sequence); captured = ~0; @@ -202,7 +202,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, /* Save the time stamp */ pps->clear_tu = ts_real; pps->clear_sequence++; - dev_dbg(pps->dev, "capture clear seq #%u\n", + dev_dbg(&pps->dev, "capture clear seq #%u\n", pps->clear_sequence); captured = ~0; diff --git a/drivers/pps/kc.c b/drivers/pps/kc.c index 50dc59af45be..fbd23295afd7 100644 --- a/drivers/pps/kc.c +++ b/drivers/pps/kc.c @@ -43,11 +43,11 @@ int pps_kc_bind(struct pps_device *pps, struct pps_bind_args *bind_args) pps_kc_hardpps_mode = 0; pps_kc_hardpps_dev = NULL; spin_unlock_irq(&pps_kc_hardpps_lock); - dev_info(pps->dev, "unbound kernel" + dev_info(&pps->dev, "unbound kernel" " consumer\n"); } else { spin_unlock_irq(&pps_kc_hardpps_lock); - dev_err(pps->dev, "selected kernel consumer" + dev_err(&pps->dev, "selected kernel consumer" " is not bound\n"); return -EINVAL; } @@ -57,11 +57,11 @@ int pps_kc_bind(struct pps_device *pps, struct pps_bind_args *bind_args) pps_kc_hardpps_mode = bind_args->edge; pps_kc_hardpps_dev = pps; spin_unlock_irq(&pps_kc_hardpps_lock); - dev_info(pps->dev, "bound kernel consumer: " + dev_info(&pps->dev, "bound kernel consumer: " "edge=0x%x\n", bind_args->edge); } else { spin_unlock_irq(&pps_kc_hardpps_lock); - dev_err(pps->dev, "another kernel consumer" + dev_err(&pps->dev, "another kernel consumer" " is already bound\n"); return -EINVAL; } @@ -83,7 +83,7 @@ void pps_kc_remove(struct pps_device *pps) pps_kc_hardpps_mode = 0; pps_kc_hardpps_dev = NULL; spin_unlock_irq(&pps_kc_hardpps_lock); - dev_info(pps->dev, "unbound kernel consumer" + dev_info(&pps->dev, "unbound kernel consumer" " on device removal\n"); } else spin_unlock_irq(&pps_kc_hardpps_lock); diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index 22a65ad4e46e..2d008e0d116a 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -25,7 +25,7 @@ * Local variables */ -static dev_t pps_devt; +static int pps_major; static struct class *pps_class; static DEFINE_MUTEX(pps_idr_lock); @@ -62,7 +62,7 @@ static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata) else { unsigned long ticks; - dev_dbg(pps->dev, "timeout %lld.%09d\n", + dev_dbg(&pps->dev, "timeout %lld.%09d\n", (long long) fdata->timeout.sec, fdata->timeout.nsec); ticks = fdata->timeout.sec * HZ; @@ -80,7 +80,7 @@ static int pps_cdev_pps_fetch(struct pps_device *pps, struct pps_fdata *fdata) /* Check for pending signals */ if (err == -ERESTARTSYS) { - dev_dbg(pps->dev, "pending signal caught\n"); + dev_dbg(&pps->dev, "pending signal caught\n"); return -EINTR; } @@ -98,7 +98,7 @@ static long pps_cdev_ioctl(struct file *file, switch (cmd) { case PPS_GETPARAMS: - dev_dbg(pps->dev, "PPS_GETPARAMS\n"); + dev_dbg(&pps->dev, "PPS_GETPARAMS\n"); spin_lock_irq(&pps->lock); @@ -114,7 +114,7 @@ static long pps_cdev_ioctl(struct file *file, break; case PPS_SETPARAMS: - dev_dbg(pps->dev, "PPS_SETPARAMS\n"); + dev_dbg(&pps->dev, "PPS_SETPARAMS\n"); /* Check the capabilities */ if (!capable(CAP_SYS_TIME)) @@ -124,14 +124,14 @@ static long pps_cdev_ioctl(struct file *file, if (err) return -EFAULT; if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) { - dev_dbg(pps->dev, "capture mode unspecified (%x)\n", + dev_dbg(&pps->dev, "capture mode unspecified (%x)\n", params.mode); return -EINVAL; } /* Check for supported capabilities */ if ((params.mode & ~pps->info.mode) != 0) { - dev_dbg(pps->dev, "unsupported capabilities (%x)\n", + dev_dbg(&pps->dev, "unsupported capabilities (%x)\n", params.mode); return -EINVAL; } @@ -144,7 +144,7 @@ static long pps_cdev_ioctl(struct file *file, /* Restore the read only parameters */ if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) { /* section 3.3 of RFC 2783 interpreted */ - dev_dbg(pps->dev, "time format unspecified (%x)\n", + dev_dbg(&pps->dev, "time format unspecified (%x)\n", params.mode); pps->params.mode |= PPS_TSFMT_TSPEC; } @@ -165,7 +165,7 @@ static long pps_cdev_ioctl(struct file *file, break; case PPS_GETCAP: - dev_dbg(pps->dev, "PPS_GETCAP\n"); + dev_dbg(&pps->dev, "PPS_GETCAP\n"); err = put_user(pps->info.mode, iuarg); if (err) @@ -176,7 +176,7 @@ static long pps_cdev_ioctl(struct file *file, case PPS_FETCH: { struct pps_fdata fdata; - dev_dbg(pps->dev, "PPS_FETCH\n"); + dev_dbg(&pps->dev, "PPS_FETCH\n"); err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata)); if (err) @@ -206,7 +206,7 @@ static long pps_cdev_ioctl(struct file *file, case PPS_KC_BIND: { struct pps_bind_args bind_args; - dev_dbg(pps->dev, "PPS_KC_BIND\n"); + dev_dbg(&pps->dev, "PPS_KC_BIND\n"); /* Check the capabilities */ if (!capable(CAP_SYS_TIME)) @@ -218,7 +218,7 @@ static long pps_cdev_ioctl(struct file *file, /* Check for supported capabilities */ if ((bind_args.edge & ~pps->info.mode) != 0) { - dev_err(pps->dev, "unsupported capabilities (%x)\n", + dev_err(&pps->dev, "unsupported capabilities (%x)\n", bind_args.edge); return -EINVAL; } @@ -227,7 +227,7 @@ static long pps_cdev_ioctl(struct file *file, if (bind_args.tsformat != PPS_TSFMT_TSPEC || (bind_args.edge & ~PPS_CAPTUREBOTH) != 0 || bind_args.consumer != PPS_KC_HARDPPS) { - dev_err(pps->dev, "invalid kernel consumer bind" + dev_err(&pps->dev, "invalid kernel consumer bind" " parameters (%x)\n", bind_args.edge); return -EINVAL; } @@ -259,7 +259,7 @@ static long pps_cdev_compat_ioctl(struct file *file, struct pps_fdata fdata; int err; - dev_dbg(pps->dev, "PPS_FETCH\n"); + dev_dbg(&pps->dev, "PPS_FETCH\n"); err = copy_from_user(&compat, uarg, sizeof(struct pps_fdata_compat)); if (err) @@ -296,20 +296,36 @@ static long pps_cdev_compat_ioctl(struct file *file, #define pps_cdev_compat_ioctl NULL #endif +static struct pps_device *pps_idr_get(unsigned long id) +{ + struct pps_device *pps; + + mutex_lock(&pps_idr_lock); + pps = idr_find(&pps_idr, id); + if (pps) + get_device(&pps->dev); + + mutex_unlock(&pps_idr_lock); + return pps; +} + static int pps_cdev_open(struct inode *inode, struct file *file) { - struct pps_device *pps = container_of(inode->i_cdev, - struct pps_device, cdev); + struct pps_device *pps = pps_idr_get(iminor(inode)); + + if (!pps) + return -ENODEV; + file->private_data = pps; - kobject_get(&pps->dev->kobj); return 0; } static int pps_cdev_release(struct inode *inode, struct file *file) { - struct pps_device *pps = container_of(inode->i_cdev, - struct pps_device, cdev); - kobject_put(&pps->dev->kobj); + struct pps_device *pps = file->private_data; + + WARN_ON(pps->id != iminor(inode)); + put_device(&pps->dev); return 0; } @@ -332,22 +348,13 @@ static void pps_device_destruct(struct device *dev) { struct pps_device *pps = dev_get_drvdata(dev); - cdev_del(&pps->cdev); - - /* Now we can release the ID for re-use */ pr_debug("deallocating pps%d\n", pps->id); - mutex_lock(&pps_idr_lock); - idr_remove(&pps_idr, pps->id); - mutex_unlock(&pps_idr_lock); - - kfree(dev); kfree(pps); } int pps_register_cdev(struct pps_device *pps) { int err; - dev_t devt; mutex_lock(&pps_idr_lock); /* @@ -364,40 +371,29 @@ int pps_register_cdev(struct pps_device *pps) goto out_unlock; } pps->id = err; - mutex_unlock(&pps_idr_lock); - devt = MKDEV(MAJOR(pps_devt), pps->id); - - cdev_init(&pps->cdev, &pps_cdev_fops); - pps->cdev.owner = pps->info.owner; - - err = cdev_add(&pps->cdev, devt, 1); - if (err) { - pr_err("%s: failed to add char device %d:%d\n", - pps->info.name, MAJOR(pps_devt), pps->id); + pps->dev.class = pps_class; + pps->dev.parent = pps->info.dev; + pps->dev.devt = MKDEV(pps_major, pps->id); + dev_set_drvdata(&pps->dev, pps); + dev_set_name(&pps->dev, "pps%d", pps->id); + err = device_register(&pps->dev); + if (err) goto free_idr; - } - pps->dev = device_create(pps_class, pps->info.dev, devt, pps, - "pps%d", pps->id); - if (IS_ERR(pps->dev)) { - err = PTR_ERR(pps->dev); - goto del_cdev; - } /* Override the release function with our own */ - pps->dev->release = pps_device_destruct; + pps->dev.release = pps_device_destruct; - pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, - MAJOR(pps_devt), pps->id); + pr_debug("source %s got cdev (%d:%d)\n", pps->info.name, pps_major, + pps->id); + get_device(&pps->dev); + mutex_unlock(&pps_idr_lock); return 0; -del_cdev: - cdev_del(&pps->cdev); - free_idr: - mutex_lock(&pps_idr_lock); idr_remove(&pps_idr, pps->id); + put_device(&pps->dev); out_unlock: mutex_unlock(&pps_idr_lock); return err; @@ -407,7 +403,13 @@ void pps_unregister_cdev(struct pps_device *pps) { pr_debug("unregistering pps%d\n", pps->id); pps->lookup_cookie = NULL; - device_destroy(pps_class, pps->dev->devt); + device_destroy(pps_class, pps->dev.devt); + + /* Now we can release the ID for re-use */ + mutex_lock(&pps_idr_lock); + idr_remove(&pps_idr, pps->id); + put_device(&pps->dev); + mutex_unlock(&pps_idr_lock); } /* @@ -427,6 +429,11 @@ void pps_unregister_cdev(struct pps_device *pps) * so that it will not be used again, even if the pps device cannot * be removed from the idr due to pending references holding the minor * number in use. + * + * Since pps_idr holds a reference to the device, the returned + * pps_device is guaranteed to be valid until pps_unregister_cdev() is + * called on it. But after calling pps_unregister_cdev(), it may be + * freed at any time. */ struct pps_device *pps_lookup_dev(void const *cookie) { @@ -449,13 +456,11 @@ EXPORT_SYMBOL(pps_lookup_dev); static void __exit pps_exit(void) { class_destroy(pps_class); - unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES); + __unregister_chrdev(pps_major, 0, PPS_MAX_SOURCES, "pps"); } static int __init pps_init(void) { - int err; - pps_class = class_create(THIS_MODULE, "pps"); if (IS_ERR(pps_class)) { pr_err("failed to allocate class\n"); @@ -463,8 +468,9 @@ static int __init pps_init(void) } pps_class->dev_groups = pps_groups; - err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps"); - if (err < 0) { + pps_major = __register_chrdev(0, 0, PPS_MAX_SOURCES, "pps", + &pps_cdev_fops); + if (pps_major < 0) { pr_err("failed to allocate char device region\n"); goto remove_class; } @@ -477,8 +483,7 @@ static int __init pps_init(void) remove_class: class_destroy(pps_class); - - return err; + return pps_major; } subsys_initcall(pps_init); diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 8fee9b330b61..87b960e941ba 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3589,7 +3589,7 @@ ptp_ocp_complete(struct ptp_ocp *bp) pps = pps_lookup_dev(bp->ptp); if (pps) - ptp_ocp_symlink(bp, pps->dev, "pps"); + ptp_ocp_symlink(bp, &pps->dev, "pps"); ptp_ocp_debugfs_add_device(bp); diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 78c8ac4951b5..c7abce28ed29 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -56,8 +56,7 @@ struct pps_device { unsigned int id; /* PPS source unique ID */ void const *lookup_cookie; /* For pps_lookup_dev() only */ - struct cdev cdev; - struct device *dev; + struct device dev; struct fasync_struct *async_queue; /* fasync method */ spinlock_t lock; }; From 8c481939fb1db8fcbafef19cee330bbd898075d7 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 7 Feb 2025 01:20:54 +0900 Subject: [PATCH 02/45] Revert "btrfs: avoid monopolizing a core when activating a swap file" This reverts commit bb8e287f596b62fac18ed84cc03a9f1752f6b3b8. The backport for linux-6.1.y, commit bb8e287f596b ("btrfs: avoid monopolizing a core when activating a swap file"), inserted cond_resched() in the wrong location. Revert it now; a subsequent commit will re-backport the original patch. Fixes: bb8e287f596b ("btrfs: avoid monopolizing a core when activating a swap file") # linux-6.1.y Signed-off-by: Koichiro Den Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b2cded5bf69c..f4a754d62bf4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7387,8 +7387,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, ret = -EAGAIN; goto out; } - - cond_resched(); } if (orig_start) From 0c7ba1d65c291b6e28b3a6fce196677c7b2b4817 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 7 Feb 2025 01:20:55 +0900 Subject: [PATCH 03/45] btrfs: avoid monopolizing a core when activating a swap file commit 2c8507c63f5498d4ee4af404a8e44ceae4345056 upstream. This commit re-attempts the backport of the change to the linux-6.1.y branch. Commit bb8e287f596b ("btrfs: avoid monopolizing a core when activating a swap file") on this branch was reverted. During swap activation we iterate over the extents of a file and we can have many thousands of them, so we can end up in a busy loop monopolizing a core. Avoid this by doing a voluntary reschedule after processing each extent. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Koichiro Den Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f4a754d62bf4..a13ab3abef12 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -11368,6 +11368,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, } start += len; + + cond_resched(); } if (bsi.block_len) From 2e59b2b68782519560b3d6a41dd66a3d01a01cd3 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Sun, 26 Jan 2025 17:47:22 +0800 Subject: [PATCH 04/45] nfsd: clear acl_access/acl_default after releasing them commit 7faf14a7b0366f153284db0ad3347c457ea70136 upstream. If getting acl_default fails, acl_access and acl_default will be released simultaneously. However, acl_access will still retain a pointer pointing to the released posix_acl, which will trigger a WARNING in nfs3svc_release_getacl like this: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 26 PID: 3199 at lib/refcount.c:28 refcount_warn_saturate+0xb5/0x170 Modules linked in: CPU: 26 UID: 0 PID: 3199 Comm: nfsd Not tainted 6.12.0-rc6-00079-g04ae226af01f-dirty #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 RIP: 0010:refcount_warn_saturate+0xb5/0x170 Code: cc cc 0f b6 1d b3 20 a5 03 80 fb 01 0f 87 65 48 d8 00 83 e3 01 75 e4 48 c7 c7 c0 3b 9b 85 c6 05 97 20 a5 03 01 e8 fb 3e 30 ff <0f> 0b eb cd 0f b6 1d 8a3 RSP: 0018:ffffc90008637cd8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff83904fde RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff88871ed36380 RBP: ffff888158beeb40 R08: 0000000000000001 R09: fffff520010c6f56 R10: ffffc90008637ab7 R11: 0000000000000001 R12: 0000000000000001 R13: ffff888140e77400 R14: ffff888140e77408 R15: ffffffff858b42c0 FS: 0000000000000000(0000) GS:ffff88871ed00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000562384d32158 CR3: 000000055cc6a000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? refcount_warn_saturate+0xb5/0x170 ? __warn+0xa5/0x140 ? refcount_warn_saturate+0xb5/0x170 ? report_bug+0x1b1/0x1e0 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x17/0x40 ? asm_exc_invalid_op+0x1a/0x20 ? tick_nohz_tick_stopped+0x1e/0x40 ? refcount_warn_saturate+0xb5/0x170 ? refcount_warn_saturate+0xb5/0x170 nfs3svc_release_getacl+0xc9/0xe0 svc_process_common+0x5db/0xb60 ? __pfx_svc_process_common+0x10/0x10 ? __rcu_read_unlock+0x69/0xa0 ? __pfx_nfsd_dispatch+0x10/0x10 ? svc_xprt_received+0xa1/0x120 ? xdr_init_decode+0x11d/0x190 svc_process+0x2a7/0x330 svc_handle_xprt+0x69d/0x940 svc_recv+0x180/0x2d0 nfsd+0x168/0x200 ? __pfx_nfsd+0x10/0x10 kthread+0x1a2/0x1e0 ? kthread+0xf4/0x1e0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Kernel panic - not syncing: kernel: panic_on_warn set ... Clear acl_access/acl_default after posix_acl_release is called to prevent UAF from being triggered. Fixes: a257cdd0e217 ("[PATCH] NFSD: Add server support for NFSv3 ACLs.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241107014705.2509463-1-lilingfeng@huaweicloud.com/ Signed-off-by: Li Lingfeng Reviewed-by: Rick Macklem Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs2acl.c | 2 ++ fs/nfsd/nfs3acl.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 65d4511b7af0..6c4fe9409611 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -84,6 +84,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index a34a22e272ad..e6bb621f1ffd 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -76,6 +76,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } From 38d345f612503b850c2973e5a879f88e441b34d7 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Thu, 30 Jan 2025 11:01:27 -0800 Subject: [PATCH 05/45] NFSD: fix hang in nfsd4_shutdown_callback commit 036ac2778f7b28885814c6fbc07e156ad1624d03 upstream. If nfs4_client is in courtesy state then there is no point to send the callback. This causes nfsd4_shutdown_callback to hang since cl_cb_inflight is not 0. This hang lasts about 15 minutes until TCP notifies NFSD that the connection was dropped. This patch modifies nfsd4_run_cb_work to skip the RPC call if nfs4_client is in courtesy state. Signed-off-by: Dai Ngo Fixes: 66af25799940 ("NFSD: add courteous server support for thread with only delegation") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4callback.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1cdfff9de6e2..272d3facfff9 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1410,8 +1410,11 @@ nfsd4_run_cb_work(struct work_struct *work) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; - if (!clnt) { - /* Callback channel broken, or client killed; give up: */ + if (!clnt || clp->cl_state == NFSD4_COURTESY) { + /* + * Callback channel broken, client killed or + * nfs4_client in courtesy state; give up. + */ nfsd41_destroy_cb(cb); return; } From 457bdeffcab448430ca107fe1c3eccf9a7141e84 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Jan 2025 16:21:45 +0200 Subject: [PATCH 06/45] pinctrl: cy8c95x0: Respect IRQ trigger settings from firmware [ Upstream commit 1ddee69108d305bbc059cbf31c0b47626796be77 ] Some of the platforms may connect the INT pin via inversion logic effectively make the triggering to be active-low. Remove explicit trigger flag to respect the settings from firmware. Without this change even idling chip produces spurious interrupts and kernel disables the line in the result: irq 33: nobody cared (try booting with the "irqpoll" option) CPU: 0 UID: 0 PID: 125 Comm: irq/33-i2c-INT3 Not tainted 6.12.0-00236-g8b874ed11dae #64 Hardware name: Intel Corp. QUARK/Galileo, BIOS 0x01000900 01/01/2014 ... handlers: [<86e86bea>] irq_default_primary_handler threaded [] cy8c95x0_irq_handler [pinctrl_cy8c95x0] Disabling IRQ #33 Fixes: e6cbbe42944d ("pinctrl: Add Cypress cy8c95x0 support") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/20250117142304.596106-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-cy8c95x0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 5abab6bc763a..f7c8ae980813 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -1234,7 +1234,7 @@ static int cy8c95x0_irq_setup(struct cy8c95x0_pinctrl *chip, int irq) ret = devm_request_threaded_irq(chip->dev, irq, NULL, cy8c95x0_irq_handler, - IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_HIGH, + IRQF_ONESHOT | IRQF_SHARED, dev_name(chip->dev), chip); if (ret) { dev_err(chip->dev, "failed to request irq %d\n", irq); From 4e7113f591163d99adc7cbcd7295030c8c5d3fc7 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Fri, 15 Nov 2024 14:26:21 +0800 Subject: [PATCH 07/45] HID: multitouch: Add NULL check in mt_input_configured [ Upstream commit 9b8e2220d3a052a690b1d1b23019673e612494c5 ] devm_kasprintf() can return a NULL pointer on failure,but this returned value in mt_input_configured() is not checked. Add NULL check in mt_input_configured(), to handle kernel NULL pointer dereference error. Fixes: 479439463529 ("HID: multitouch: Correct devm device reference for hidinput input_dev name") Signed-off-by: Charles Han Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-multitouch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 5ad871a7d1a4..6386043aab0b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1668,9 +1668,12 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) break; } - if (suffix) + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } From 436f48c864186e9413d1b7c6e91767cc9e1a65b8 Mon Sep 17 00:00:00 2001 From: Tulio Fernandes Date: Wed, 5 Feb 2025 18:50:34 -0300 Subject: [PATCH 08/45] HID: hid-thrustmaster: fix stack-out-of-bounds read in usb_check_int_endpoints() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0b43d98ff29be3144e86294486b1373b5df74c0e ] Syzbot[1] has detected a stack-out-of-bounds read of the ep_addr array from hid-thrustmaster driver. This array is passed to usb_check_int_endpoints function from usb.c core driver, which executes a for loop that iterates over the elements of the passed array. Not finding a null element at the end of the array, it tries to read the next, non-existent element, crashing the kernel. To fix this, a 0 element was added at the end of the array to break the for loop. [1] https://syzkaller.appspot.com/bug?extid=9c9179ac46169c56c1ad Reported-by: syzbot+9c9179ac46169c56c1ad@syzkaller.appspotmail.com Fixes: 50420d7c79c3 ("HID: hid-thrustmaster: Fix warning in thrustmaster_probe by adding endpoint check") Signed-off-by: Túlio Fernandes Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-thrustmaster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 6c3e758bbb09..3b81468a1df2 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -171,7 +171,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev) b_ep = ep->desc.bEndpointAddress; /* Are the expected endpoints present? */ - u8 ep_addr[1] = {b_ep}; + u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { hid_err(hdev, "Unexpected non-int endpoint\n"); From 90056ece99966182dc0e367f3fd2afab46ada847 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Mon, 3 Feb 2025 12:12:03 +0300 Subject: [PATCH 09/45] ax25: Fix refcount leak caused by setting SO_BINDTODEVICE sockopt [ Upstream commit bca0902e61731a75fc4860c8720168d9f1bae3b6 ] If an AX25 device is bound to a socket by setting the SO_BINDTODEVICE socket option, a refcount leak will occur in ax25_release(). Commit 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()") added decrement of device refcounts in ax25_release(). In order for that to work correctly the refcounts must already be incremented when the device is bound to the socket. An AX25 device can be bound to a socket by either calling ax25_bind() or setting SO_BINDTODEVICE socket option. In both cases the refcounts should be incremented, but in fact it is done only in ax25_bind(). This bug leads to the following issue reported by Syzkaller: ================================================================ refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 1 PID: 5932 at lib/refcount.c:31 refcount_warn_saturate+0x1ed/0x210 lib/refcount.c:31 Modules linked in: CPU: 1 UID: 0 PID: 5932 Comm: syz-executor424 Not tainted 6.13.0-rc4-syzkaller-00110-g4099a71718b0 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:refcount_warn_saturate+0x1ed/0x210 lib/refcount.c:31 Call Trace: __refcount_dec include/linux/refcount.h:336 [inline] refcount_dec include/linux/refcount.h:351 [inline] ref_tracker_free+0x710/0x820 lib/ref_tracker.c:236 netdev_tracker_free include/linux/netdevice.h:4156 [inline] netdev_put include/linux/netdevice.h:4173 [inline] netdev_put include/linux/netdevice.h:4169 [inline] ax25_release+0x33f/0xa10 net/ax25/af_ax25.c:1069 __sock_release+0xb0/0x270 net/socket.c:640 sock_close+0x1c/0x30 net/socket.c:1408 ... do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... ================================================================ Fix the implementation of ax25_setsockopt() by adding increment of refcounts for the new device bound, and decrement of refcounts for the old unbound device. Fixes: 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()") Reported-by: syzbot+33841dc6aa3e1d86b78a@syzkaller.appspotmail.com Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250203091203.1744-1-m.masimov@mt-integration.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ax25/af_ax25.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 4b96dedcc3c9..862e03493b7e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -685,6 +685,15 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } + if (ax25->ax25_dev) { + if (dev == ax25->ax25_dev->dev) { + rcu_read_unlock(); + break; + } + netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker); + ax25_dev_put(ax25->ax25_dev); + } + ax25->ax25_dev = ax25_dev_ax25dev(dev); if (!ax25->ax25_dev) { rcu_read_unlock(); @@ -692,6 +701,8 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } ax25_fillin_cb(ax25, ax25->ax25_dev); + netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC); + ax25_dev_hold(ax25->ax25_dev); rcu_read_unlock(); break; From d77570ae43ec7a23f7d555f40c1a457a23a0971a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:33 +0000 Subject: [PATCH 10/45] ndisc: ndisc_send_redirect() must use dev_get_by_index_rcu() [ Upstream commit 48145a57d4bbe3496e8e4880b23ea6b511e6e519 ] ndisc_send_redirect() is called under RCU protection, not RTNL. It must use dev_get_by_index_rcu() instead of __dev_get_by_index() Fixes: 2f17becfbea5 ("vrf: check the original netdevice for generating redirect") Signed-off-by: Eric Dumazet Cc: Stephen Suryaputra Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 44d3e6ab0c7d..6df3f4aadf64 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1684,7 +1684,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) bool ret; if (netif_is_l3_master(skb->dev)) { - dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); + dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) return; } From c7574740be8ce68a57d0aece24987b9be2114c3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:38 +0000 Subject: [PATCH 11/45] vrf: use RCU protection in l3mdev_l3_out() [ Upstream commit 6d0ce46a93135d96b7fa075a94a88fe0da8e8773 ] l3mdev_l3_out() can be called without RCU being held: raw_sendmsg() ip_push_pending_frames() ip_send_skb() ip_local_out() __ip_local_out() l3mdev_ip_out() Add rcu_read_lock() / rcu_read_unlock() pair to avoid a potential UAF. Fixes: a8e3e1a9f020 ("net: l3mdev: Add hook to output path") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-7-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/l3mdev.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 031c661aa14d..bdfa9d414360 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -198,10 +198,12 @@ struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(dev)) { struct net_device *master; + rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); + rcu_read_unlock(); } return skb; From 79aea5e55156c87dc570e43fcd8bba01b9d6ab3f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2025 10:52:42 +0000 Subject: [PATCH 12/45] vxlan: check vxlan_vnigroup_init() return value [ Upstream commit 5805402dcc56241987bca674a1b4da79a249bab7 ] vxlan_init() must check vxlan_vnigroup_init() success otherwise a crash happens later, spotted by syzbot. Oops: general protection fault, probably for non-canonical address 0xdffffc000000002c: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000160-0x0000000000000167] CPU: 0 UID: 0 PID: 7313 Comm: syz-executor147 Not tainted 6.14.0-rc1-syzkaller-00276-g69b54314c975 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:vxlan_vnigroup_uninit+0x89/0x500 drivers/net/vxlan/vxlan_vnifilter.c:912 Code: 00 48 8b 44 24 08 4c 8b b0 98 41 00 00 49 8d 86 60 01 00 00 48 89 c2 48 89 44 24 10 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 4d 04 00 00 49 8b 86 60 01 00 00 48 ba 00 00 00 RSP: 0018:ffffc9000cc1eea8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000001 RCX: ffffffff8672effb RDX: 000000000000002c RSI: ffffffff8672ecb9 RDI: ffff8880461b4f18 RBP: ffff8880461b4ef4 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000020000 R13: ffff8880461b0d80 R14: 0000000000000000 R15: dffffc0000000000 FS: 00007fecfa95d6c0(0000) GS:ffff88806a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fecfa95cfb8 CR3: 000000004472c000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: vxlan_uninit+0x1ab/0x200 drivers/net/vxlan/vxlan_core.c:2942 unregister_netdevice_many_notify+0x12d6/0x1f30 net/core/dev.c:11824 unregister_netdevice_many net/core/dev.c:11866 [inline] unregister_netdevice_queue+0x307/0x3f0 net/core/dev.c:11736 register_netdevice+0x1829/0x1eb0 net/core/dev.c:10901 __vxlan_dev_create+0x7c6/0xa30 drivers/net/vxlan/vxlan_core.c:3981 vxlan_newlink+0xd1/0x130 drivers/net/vxlan/vxlan_core.c:4407 rtnl_newlink_create net/core/rtnetlink.c:3795 [inline] __rtnl_newlink net/core/rtnetlink.c:3906 [inline] Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Reported-by: syzbot+6a9624592218c2c5e7aa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67a9d9b4.050a0220.110943.002d.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250210105242.883482-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 155d335c80a7..50be5a3c4779 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2982,8 +2982,11 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; - if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) - vxlan_vnigroup_init(vxlan); + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { + err = vxlan_vnigroup_init(vxlan); + if (err) + return err; + } dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) { From 4512482e4805dd30bc77dec511f2a2edba5cb868 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Feb 2025 13:49:28 +0000 Subject: [PATCH 13/45] team: better TEAM_OPTION_TYPE_STRING validation [ Upstream commit 5bef3ac184b5626ea62385d6b82a1992b89d7940 ] syzbot reported following splat [1] Make sure user-provided data contains one nul byte. [1] BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:633 [inline] BUG: KMSAN: uninit-value in string+0x3ec/0x5f0 lib/vsprintf.c:714 string_nocheck lib/vsprintf.c:633 [inline] string+0x3ec/0x5f0 lib/vsprintf.c:714 vsnprintf+0xa5d/0x1960 lib/vsprintf.c:2843 __request_module+0x252/0x9f0 kernel/module/kmod.c:149 team_mode_get drivers/net/team/team_core.c:480 [inline] team_change_mode drivers/net/team/team_core.c:607 [inline] team_mode_option_set+0x437/0x970 drivers/net/team/team_core.c:1401 team_option_set drivers/net/team/team_core.c:375 [inline] team_nl_options_set_doit+0x1339/0x1f90 drivers/net/team/team_core.c:2662 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x1214/0x12c0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x375/0x650 net/netlink/af_netlink.c:2543 genl_rcv+0x40/0x60 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline] netlink_unicast+0xf52/0x1260 net/netlink/af_netlink.c:1348 netlink_sendmsg+0x10da/0x11e0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:733 ____sys_sendmsg+0x877/0xb60 net/socket.c:2573 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2627 __sys_sendmsg net/socket.c:2659 [inline] __do_sys_sendmsg net/socket.c:2664 [inline] __se_sys_sendmsg net/socket.c:2662 [inline] __x64_sys_sendmsg+0x212/0x3c0 net/socket.c:2662 x64_sys_call+0x2ed6/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Reported-by: syzbot+1fcd957a82e3a1baa94d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1fcd957a82e3a1baa94d Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250212134928.1541609-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/team/team.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b23aa3c8bdf8..c2327fa10747 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2670,7 +2670,9 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) ctx.data.u32_val = nla_get_u32(attr_data); break; case TEAM_OPTION_TYPE_STRING: - if (nla_len(attr_data) > TEAM_STRING_MAX_LEN) { + if (nla_len(attr_data) > TEAM_STRING_MAX_LEN || + !memchr(nla_data(attr_data), '\0', + nla_len(attr_data))) { err = -EINVAL; goto team_put; } From 4ff25f0b18d1d0174c105e4620428bcdc1213860 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Thu, 6 Feb 2025 12:44:20 -0500 Subject: [PATCH 14/45] arm64: cacheinfo: Avoid out-of-bounds write to cacheinfo array [ Upstream commit 875d742cf5327c93cba1f11e12b08d3cce7a88d2 ] The loop that detects/populates cache information already has a bounds check on the array size but does not account for cache levels with separate data/instructions cache. Fix this by incrementing the index for any populated leaf (instead of any populated level). Fixes: 5d425c186537 ("arm64: kernel: add support for cpu cache information") Signed-off-by: Radu Rendec Link: https://lore.kernel.org/r/20250206174420.2178724-1-rrendec@redhat.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/cacheinfo.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 97c42be71338..1510f457b615 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -87,16 +87,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; From 99354bc51c461ed9735a6eec92c067e85fa10014 Mon Sep 17 00:00:00 2001 From: Muhammad Adeel Date: Fri, 7 Feb 2025 14:24:32 +0000 Subject: [PATCH 15/45] cgroup: Remove steal time from usage_usec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit db5fd3cf8bf41b84b577b8ad5234ea95f327c9be ] The CPU usage time is the time when user, system or both are using the CPU. Steal time is the time when CPU is waiting to be run by the Hypervisor. It should not be added to the CPU usage time, hence removing it from the usage_usec entry. Fixes: 936f2a70f2077 ("cgroup: add cpu.stat file to root cgroup") Acked-by: Axel Busch Acked-by: Michal Koutný Signed-off-by: Muhammad Adeel Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/rstat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 7006fc8dd677..0ae90c15cad8 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -477,7 +477,6 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) cputime->sum_exec_runtime += user; cputime->sum_exec_runtime += sys; - cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; #ifdef CONFIG_SCHED_CORE bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; From fae3c2825d45a0fddb3458dbd2305163f74f0278 Mon Sep 17 00:00:00 2001 From: Krzysztof Karas Date: Thu, 30 Jan 2025 09:19:31 +0000 Subject: [PATCH 16/45] drm/i915/selftests: avoid using uninitialized context [ Upstream commit 53139b3f9998ea07289e7b70b909fea2264a0de9 ] There is an error path in igt_ppgtt_alloc(), which leads to ww object being passed down to i915_gem_ww_ctx_fini() without initialization. Correct that by only putting ppgtt->vm and returning early. Fixes: 480ae79537b2 ("drm/i915/selftests: Prepare gtt tests for obj->mm.lock removal") Signed-off-by: Krzysztof Karas Reviewed-by: Mikolaj Wasiak Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/iuaonpjc3rywmvhna6umjlvzilocn2uqsrxfxfob24e2taocbi@lkaivvfp4777 (cherry picked from commit 8d8334632ea62424233ac6529712868241d0f8df) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e050a2de5fd1..e25f76b46b0a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -164,7 +164,7 @@ static int igt_ppgtt_alloc(void *arg) return PTR_ERR(ppgtt); if (!ppgtt->vm.allocate_va_range) - goto err_ppgtt_cleanup; + goto ppgtt_vm_put; /* * While we only allocate the page tables here and so we could @@ -232,7 +232,7 @@ err_ppgtt_cleanup: goto retry; } i915_gem_ww_ctx_fini(&ww); - +ppgtt_vm_put: i915_vm_put(&ppgtt->vm); return err; } From a6d8655b9ca24612e096e7e08f4c9e3308ff1988 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 6 Feb 2025 18:46:00 +0100 Subject: [PATCH 17/45] gpio: bcm-kona: Fix GPIO lock/unlock for banks above bank 0 [ Upstream commit de1d0d160f64ee76df1d364d521b2faf465a091c ] The GPIO lock/unlock functions clear/write a bit to the relevant register for each bank. However, due to an oversight the bit that was being written was based on the total GPIO number, not the index of the GPIO within the relevant bank, causing it to fail for any GPIO above 32 (thus any GPIO for banks above bank 0). Fix lock/unlock for these banks by using the correct bit. Fixes: bdb93c03c550 ("gpio: bcm281xx: Centralize register locking") Reviewed-by: Florian Fainelli Reviewed-by: Markus Mayer Signed-off-by: Artur Weber Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250206-kona-gpio-fixes-v2-1-409135eab780@gmail.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-bcm-kona.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 70770429ba48..7e8adc5cc15a 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -85,11 +85,12 @@ static void bcm_kona_gpio_lock_gpio(struct bcm_kona_gpio *kona_gpio, u32 val; unsigned long flags; int bank_id = GPIO_BANK(gpio); + int bit = GPIO_BIT(gpio); raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val |= BIT(gpio); + val |= BIT(bit); bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); @@ -101,11 +102,12 @@ static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, u32 val; unsigned long flags; int bank_id = GPIO_BANK(gpio); + int bit = GPIO_BIT(gpio); raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val &= ~BIT(gpio); + val &= ~BIT(bit); bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); From 1c1b7260c90713f3d31f3133ce694067eefac213 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 6 Feb 2025 18:46:01 +0100 Subject: [PATCH 18/45] gpio: bcm-kona: Make sure GPIO bits are unlocked when requesting IRQ [ Upstream commit 57f5db77a915cc29461a679a6bcae7097967be1a ] The settings for all GPIOs are locked by default in bcm_kona_gpio_reset. The settings for a GPIO are unlocked when requesting it as a GPIO, but not when requesting it as an interrupt, causing the IRQ settings to not get applied. Fix this by making sure to unlock the right bits when an IRQ is requested. To avoid a situation where an IRQ being released causes a lock despite the same GPIO being used by a GPIO request or vice versa, add an unlock counter and only lock if it reaches 0. Fixes: 757651e3d60e ("gpio: bcm281xx: Add GPIO driver") Reviewed-by: Florian Fainelli Reviewed-by: Markus Mayer Signed-off-by: Artur Weber Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250206-kona-gpio-fixes-v2-2-409135eab780@gmail.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-bcm-kona.c | 67 +++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 7e8adc5cc15a..602021e3e683 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -68,6 +68,22 @@ struct bcm_kona_gpio { struct bcm_kona_gpio_bank { int id; int irq; + /* + * Used to keep track of lock/unlock operations for each GPIO in the + * bank. + * + * All GPIOs are locked by default (see bcm_kona_gpio_reset), and the + * unlock count for all GPIOs is 0 by default. Each unlock increments + * the counter, and each lock decrements the counter. + * + * The lock function only locks the GPIO once its unlock counter is + * down to 0. This is necessary because the GPIO is unlocked in two + * places in this driver: once for requested GPIOs, and once for + * requested IRQs. Since it is possible for a GPIO to be requested + * as both a GPIO and an IRQ, we need to ensure that we don't lock it + * too early. + */ + u8 gpio_unlock_count[GPIO_PER_BANK]; /* Used in the interrupt handler */ struct bcm_kona_gpio *kona_gpio; }; @@ -86,14 +102,23 @@ static void bcm_kona_gpio_lock_gpio(struct bcm_kona_gpio *kona_gpio, unsigned long flags; int bank_id = GPIO_BANK(gpio); int bit = GPIO_BIT(gpio); + struct bcm_kona_gpio_bank *bank = &kona_gpio->banks[bank_id]; - raw_spin_lock_irqsave(&kona_gpio->lock, flags); + if (bank->gpio_unlock_count[bit] == 0) { + dev_err(kona_gpio->gpio_chip.parent, + "Unbalanced locks for GPIO %u\n", gpio); + return; + } - val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val |= BIT(bit); - bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + if (--bank->gpio_unlock_count[bit] == 0) { + raw_spin_lock_irqsave(&kona_gpio->lock, flags); - raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); + val |= BIT(bit); + bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + + raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + } } static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, @@ -103,14 +128,19 @@ static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, unsigned long flags; int bank_id = GPIO_BANK(gpio); int bit = GPIO_BIT(gpio); + struct bcm_kona_gpio_bank *bank = &kona_gpio->banks[bank_id]; - raw_spin_lock_irqsave(&kona_gpio->lock, flags); + if (bank->gpio_unlock_count[bit] == 0) { + raw_spin_lock_irqsave(&kona_gpio->lock, flags); - val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val &= ~BIT(bit); - bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); + val &= ~BIT(bit); + bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); - raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + } + + ++bank->gpio_unlock_count[bit]; } static int bcm_kona_gpio_get_dir(struct gpio_chip *chip, unsigned gpio) @@ -361,6 +391,7 @@ static void bcm_kona_gpio_irq_mask(struct irq_data *d) kona_gpio = irq_data_get_irq_chip_data(d); reg_base = kona_gpio->reg_base; + raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(reg_base + GPIO_INT_MASK(bank_id)); @@ -383,6 +414,7 @@ static void bcm_kona_gpio_irq_unmask(struct irq_data *d) kona_gpio = irq_data_get_irq_chip_data(d); reg_base = kona_gpio->reg_base; + raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(reg_base + GPIO_INT_MSKCLR(bank_id)); @@ -478,15 +510,26 @@ static void bcm_kona_gpio_irq_handler(struct irq_desc *desc) static int bcm_kona_gpio_irq_reqres(struct irq_data *d) { struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d); + unsigned int gpio = d->hwirq; - return gpiochip_reqres_irq(&kona_gpio->gpio_chip, d->hwirq); + /* + * We need to unlock the GPIO before any other operations are performed + * on the relevant GPIO configuration registers + */ + bcm_kona_gpio_unlock_gpio(kona_gpio, gpio); + + return gpiochip_reqres_irq(&kona_gpio->gpio_chip, gpio); } static void bcm_kona_gpio_irq_relres(struct irq_data *d) { struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d); + unsigned int gpio = d->hwirq; - gpiochip_relres_irq(&kona_gpio->gpio_chip, d->hwirq); + /* Once we no longer use it, lock the GPIO again */ + bcm_kona_gpio_lock_gpio(kona_gpio, gpio); + + gpiochip_relres_irq(&kona_gpio->gpio_chip, gpio); } static struct irq_chip bcm_gpio_irq_chip = { From 6edce95922534dce19785ce5fec5504f6abbe2e7 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Thu, 6 Feb 2025 18:46:02 +0100 Subject: [PATCH 19/45] gpio: bcm-kona: Add missing newline to dev_err format string [ Upstream commit 615279db222c3ac56d5c93716efd72b843295c1f ] Add a missing newline to the format string of the "Couldn't get IRQ for bank..." error message. Fixes: 757651e3d60e ("gpio: bcm281xx: Add GPIO driver") Reviewed-by: Florian Fainelli Reviewed-by: Markus Mayer Signed-off-by: Artur Weber Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250206-kona-gpio-fixes-v2-3-409135eab780@gmail.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-bcm-kona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 602021e3e683..3373e46e3ba0 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -667,7 +667,7 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev) bank->irq = platform_get_irq(pdev, i); bank->kona_gpio = kona_gpio; if (bank->irq < 0) { - dev_err(dev, "Couldn't get IRQ for bank %d", i); + dev_err(dev, "Couldn't get IRQ for bank %d\n", i); ret = -ENOENT; goto err_irq_domain; } From 099606a7b2d53e99ce31af42894c1fc9d77c6db9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 10 Feb 2025 08:43:39 +0100 Subject: [PATCH 20/45] xen/swiotlb: relax alignment requirements [ Upstream commit 85fcb57c983f423180ba6ec5d0034242da05cc54 ] When mapping a buffer for DMA via .map_page or .map_sg DMA operations, there is no need to check the machine frames to be aligned according to the mapped areas size. All what is needed in these cases is that the buffer is contiguous at machine level. So carve out the alignment check from range_straddles_page_boundary() and move it to a helper called by xen_swiotlb_alloc_coherent() and xen_swiotlb_free_coherent() directly. Fixes: 9f40ec84a797 ("xen/swiotlb: add alignment check for dma buffers") Reported-by: Jan Vejvalka Tested-by: Jan Vejvalka Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/swiotlb-xen.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0451e6ebc21a..0893c1012de6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -74,19 +74,21 @@ static inline phys_addr_t xen_dma_to_phys(struct device *dev, return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr)); } +static inline bool range_requires_alignment(phys_addr_t p, size_t size) +{ + phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); + phys_addr_t bus_addr = pfn_to_bfn(XEN_PFN_DOWN(p)) << XEN_PAGE_SHIFT; + + return IS_ALIGNED(p, algn) && !IS_ALIGNED(bus_addr, algn); +} + static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); - phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); next_bfn = pfn_to_bfn(xen_pfn); - /* If buffer is physically aligned, ensure DMA alignment. */ - if (IS_ALIGNED(p, algn) && - !IS_ALIGNED((phys_addr_t)next_bfn << XEN_PAGE_SHIFT, algn)) - return 1; - for (i = 1; i < nr_pages; i++) if (pfn_to_bfn(++xen_pfn) != ++next_bfn) return 1; @@ -155,7 +157,8 @@ xen_swiotlb_alloc_coherent(struct device *dev, size_t size, *dma_handle = xen_phys_to_dma(dev, phys); if (*dma_handle + size - 1 > dma_mask || - range_straddles_page_boundary(phys, size)) { + range_straddles_page_boundary(phys, size) || + range_requires_alignment(phys, size)) { if (xen_create_contiguous_region(phys, order, fls64(dma_mask), dma_handle) != 0) goto out_free_pages; @@ -181,7 +184,8 @@ xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, size = ALIGN(size, XEN_PAGE_SIZE); if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || - WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) + WARN_ON_ONCE(range_straddles_page_boundary(phys, size) || + range_requires_alignment(phys, size))) return; if (TestClearPageXenRemapped(virt_to_page(vaddr))) From 7a32e55815e89dbc42e151ba8908a5ebd1e2e42a Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 2 Aug 2023 18:31:51 +0200 Subject: [PATCH 21/45] xen: remove a confusing comment on auto-translated guest I/O [ Upstream commit d826c9e61c99120f8996f8fed6417167e32eb922 ] After removing the conditional return from xen_create_contiguous_region(), the accompanying comment was left in place, but it now precedes an unrelated conditional and confuses readers. Fixes: 989513a735f5 ("xen: cleanup pvh leftovers from pv-only sources") Signed-off-by: Petr Tesarik Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20230802163151.1486-1-petrtesarik@huaweicloud.com Signed-off-by: Juergen Gross Stable-dep-of: e93ec87286bd ("x86/xen: allow larger contiguous memory regions in PV guests") Signed-off-by: Sasha Levin --- arch/x86/xen/mmu_pv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index ee29fb558f2e..22b619f89a1d 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2302,12 +2302,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, int success; unsigned long vstart = (unsigned long)phys_to_virt(pstart); - /* - * Currently an auto-translated guest will not perform I/O, nor will - * it require PAE page directories below 4GB. Therefore any calls to - * this function are redundant and can be ignored. - */ - if (unlikely(order > MAX_CONTIG_ORDER)) return -ENOMEM; From ca2701997c687a07f4a4b6a26073779a6154350e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 11 Feb 2025 11:16:28 +0100 Subject: [PATCH 22/45] x86/xen: allow larger contiguous memory regions in PV guests [ Upstream commit e93ec87286bd1fd30b7389e7a387cfb259f297e3 ] Today a PV guest (including dom0) can create 2MB contiguous memory regions for DMA buffers at max. This has led to problems at least with the megaraid_sas driver, which wants to allocate a 2.3MB DMA buffer. The limiting factor is the frame array used to do the hypercall for making the memory contiguous, which has 512 entries and is just a static array in mmu_pv.c. In order to not waste memory for non-PV guests, put the initial frame array into .init.data section and dynamically allocate an array from the .init_after_bootmem hook of PV guests. In case a contiguous memory area larger than the initially supported 2MB is requested, allocate a larger buffer for the frame list. Note that such an allocation is tried only after memory management has been initialized properly, which is tested via a flag being set in the .init_after_bootmem hook. Fixes: 9f40ec84a797 ("xen/swiotlb: add alignment check for dma buffers") Signed-off-by: Juergen Gross Tested-by: Alan Robinson Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/mmu_pv.c | 71 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 22b619f89a1d..6abce0816ca3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -97,6 +97,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; */ static DEFINE_SPINLOCK(xen_reservation_lock); +/* Protected by xen_reservation_lock. */ +#define MIN_CONTIG_ORDER 9 /* 2MB */ +static unsigned int discontig_frames_order = MIN_CONTIG_ORDER; +static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata; +static unsigned long *discontig_frames __refdata = discontig_frames_early; +static bool discontig_frames_dyn; + +static int alloc_discontig_frames(unsigned int order) +{ + unsigned long *new_array, *old_array; + unsigned int old_order; + unsigned long flags; + + BUG_ON(order < MIN_CONTIG_ORDER); + BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE); + + new_array = (unsigned long *)__get_free_pages(GFP_KERNEL, + order - MIN_CONTIG_ORDER); + if (!new_array) + return -ENOMEM; + + spin_lock_irqsave(&xen_reservation_lock, flags); + + old_order = discontig_frames_order; + + if (order > discontig_frames_order || !discontig_frames_dyn) { + if (!discontig_frames_dyn) + old_array = NULL; + else + old_array = discontig_frames; + + discontig_frames = new_array; + discontig_frames_order = order; + discontig_frames_dyn = true; + } else { + old_array = new_array; + } + + spin_unlock_irqrestore(&xen_reservation_lock, flags); + + free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER); + + return 0; +} + /* * Note about cr3 (pagetable base) values: * @@ -797,6 +842,9 @@ static void __init xen_after_bootmem(void) SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); + + if (alloc_discontig_frames(MIN_CONTIG_ORDER)) + BUG(); } static void xen_unpin_page(struct mm_struct *mm, struct page *page, @@ -2177,10 +2225,6 @@ void __init xen_init_mmu_ops(void) memset(dummy_mapping, 0xff, PAGE_SIZE); } -/* Protected by xen_reservation_lock. */ -#define MAX_CONTIG_ORDER 9 /* 2MB */ -static unsigned long discontig_frames[1< MAX_CONTIG_ORDER)) - return -ENOMEM; + if (unlikely(order > discontig_frames_order)) { + if (!discontig_frames_dyn) + return -ENOMEM; + + if (alloc_discontig_frames(order)) + return -ENOMEM; + } memset((void *) vstart, 0, PAGE_SIZE << order); spin_lock_irqsave(&xen_reservation_lock, flags); + in_frames = discontig_frames; + /* 1. Zap current PTEs, remembering MFNs. */ xen_zap_pfn_range(vstart, order, in_frames, NULL); @@ -2332,12 +2383,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { - unsigned long *out_frames = discontig_frames, in_frame; + unsigned long *out_frames, in_frame; unsigned long flags; int success; unsigned long vstart; - if (unlikely(order > MAX_CONTIG_ORDER)) + if (unlikely(order > discontig_frames_order)) return; vstart = (unsigned long)phys_to_virt(pstart); @@ -2345,6 +2396,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_lock_irqsave(&xen_reservation_lock, flags); + out_frames = discontig_frames; + /* 1. Find start MFN of contiguous extent. */ in_frame = virt_to_mfn(vstart); From b9c5fb07398bf56a8a2f3c6650a90257221ca5fe Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Wed, 25 Dec 2024 13:15:48 +0000 Subject: [PATCH 23/45] RDMA/efa: Reset device on probe failure [ Upstream commit 123c13f10ed3627ba112172d8bd122a72cae226d ] Make sure the device is being reset on driver exit whatever the reason is, to keep the device aligned and allow it to close shared resources (e.g. admin queue). Reviewed-by: Firas Jahjah Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Link: https://patch.msgid.link/20241225131548.15155-1-mrgolin@amazon.com Reviewed-by: Gal Pressman Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/efa/efa_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 15ee92081118..924940ca9de0 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -452,7 +452,6 @@ static void efa_ib_device_remove(struct efa_dev *dev) ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); efa_destroy_eqs(dev); - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -623,12 +622,14 @@ err_disable_device: return ERR_PTR(err); } -static void efa_remove_device(struct pci_dev *pdev) +static void efa_remove_device(struct pci_dev *pdev, + enum efa_regs_reset_reason_types reset_reason) { struct efa_dev *dev = pci_get_drvdata(pdev); struct efa_com_dev *edev; edev = &dev->edev; + efa_com_dev_reset(edev, reset_reason); efa_com_admin_destroy(edev); efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); @@ -656,7 +657,7 @@ static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_remove_device: - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR); return err; } @@ -665,7 +666,7 @@ static void efa_remove(struct pci_dev *pdev) struct efa_dev *dev = pci_get_drvdata(pdev); efa_ib_device_remove(dev); - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_NORMAL); } static struct pci_driver efa_pci_driver = { From 7bbbd311dd503653a2cc86d9226740883051dc92 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 2 Jan 2025 20:19:51 +0200 Subject: [PATCH 24/45] fbdev: omap: use threaded IRQ for LCD DMA [ Upstream commit e4b6b665df815b4841e71b72f06446884e8aad40 ] When using touchscreen and framebuffer, Nokia 770 crashes easily with: BUG: scheduling while atomic: irq/144-ads7846/82/0x00010000 Modules linked in: usb_f_ecm g_ether usb_f_rndis u_ether libcomposite configfs omap_udc ohci_omap ohci_hcd CPU: 0 UID: 0 PID: 82 Comm: irq/144-ads7846 Not tainted 6.12.7-770 #2 Hardware name: Nokia 770 Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x54/0x5c dump_stack_lvl from __schedule_bug+0x50/0x70 __schedule_bug from __schedule+0x4d4/0x5bc __schedule from schedule+0x34/0xa0 schedule from schedule_preempt_disabled+0xc/0x10 schedule_preempt_disabled from __mutex_lock.constprop.0+0x218/0x3b4 __mutex_lock.constprop.0 from clk_prepare_lock+0x38/0xe4 clk_prepare_lock from clk_set_rate+0x18/0x154 clk_set_rate from sossi_read_data+0x4c/0x168 sossi_read_data from hwa742_read_reg+0x5c/0x8c hwa742_read_reg from send_frame_handler+0xfc/0x300 send_frame_handler from process_pending_requests+0x74/0xd0 process_pending_requests from lcd_dma_irq_handler+0x50/0x74 lcd_dma_irq_handler from __handle_irq_event_percpu+0x44/0x130 __handle_irq_event_percpu from handle_irq_event+0x28/0x68 handle_irq_event from handle_level_irq+0x9c/0x170 handle_level_irq from generic_handle_domain_irq+0x2c/0x3c generic_handle_domain_irq from omap1_handle_irq+0x40/0x8c omap1_handle_irq from generic_handle_arch_irq+0x28/0x3c generic_handle_arch_irq from call_with_stack+0x1c/0x24 call_with_stack from __irq_svc+0x94/0xa8 Exception stack(0xc5255da0 to 0xc5255de8) 5da0: 00000001 c22fc620 00000000 00000000 c08384a8 c106fc00 00000000 c240c248 5dc0: c113a600 c3f6ec30 00000001 00000000 c22fc620 c5255df0 c22fc620 c0279a94 5de0: 60000013 ffffffff __irq_svc from clk_prepare_lock+0x4c/0xe4 clk_prepare_lock from clk_get_rate+0x10/0x74 clk_get_rate from uwire_setup_transfer+0x40/0x180 uwire_setup_transfer from spi_bitbang_transfer_one+0x2c/0x9c spi_bitbang_transfer_one from spi_transfer_one_message+0x2d0/0x664 spi_transfer_one_message from __spi_pump_transfer_message+0x29c/0x498 __spi_pump_transfer_message from __spi_sync+0x1f8/0x2e8 __spi_sync from spi_sync+0x24/0x40 spi_sync from ads7846_halfd_read_state+0x5c/0x1c0 ads7846_halfd_read_state from ads7846_irq+0x58/0x348 ads7846_irq from irq_thread_fn+0x1c/0x78 irq_thread_fn from irq_thread+0x120/0x228 irq_thread from kthread+0xc8/0xe8 kthread from ret_from_fork+0x14/0x28 As a quick fix, switch to a threaded IRQ which provides a stable system. Signed-off-by: Aaro Koskinen Reviewed-by: Linus Walleij Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/omap/lcd_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap/lcd_dma.c b/drivers/video/fbdev/omap/lcd_dma.c index f85817635a8c..0da23c57e475 100644 --- a/drivers/video/fbdev/omap/lcd_dma.c +++ b/drivers/video/fbdev/omap/lcd_dma.c @@ -432,8 +432,8 @@ static int __init omap_init_lcd_dma(void) spin_lock_init(&lcd_dma.lock); - r = request_irq(INT_DMA_LCD, lcd_dma_irq_handler, 0, - "LCD DMA", NULL); + r = request_threaded_irq(INT_DMA_LCD, NULL, lcd_dma_irq_handler, + IRQF_ONESHOT, "LCD DMA", NULL); if (r != 0) pr_err("unable to request IRQ for LCD DMA (error %d)\n", r); From a4dd6fc7ef55858218bce8b5c520fc46b6c59f94 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Nov 2024 11:41:42 +0100 Subject: [PATCH 25/45] media: cxd2841er: fix 64-bit division on gcc-9 [ Upstream commit 8d46603eeeb4c6abff1d2e49f2a6ae289dac765e ] It appears that do_div() once more gets confused by a complex expression that ends up not quite being constant despite __builtin_constant_p() thinking it is: ERROR: modpost: "__aeabi_uldivmod" [drivers/media/dvb-frontends/cxd2841er.ko] undefined! Use div_u64() instead, forcing the expression to be evaluated first, and making it a bit more readable. Cc: Dan Carpenter Reported-by: Naresh Kamboju Closes: https://lore.kernel.org/linux-media/CA+G9fYvvNm-aYodLaAwwTjEGtX0YxR-1R14FOA5aHKt0sSVsYg@mail.gmail.com/ Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/linux-media/CA+G9fYvvNm-aYodLaAwwTjEGtX0YxR-1R14FOA5aHKt0sSVsYg@mail.gmail.com/ Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil [hverkuil: added Closes tags] Signed-off-by: Sasha Levin --- drivers/media/dvb-frontends/cxd2841er.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c index e9d1eef40c62..798da5042136 100644 --- a/drivers/media/dvb-frontends/cxd2841er.c +++ b/drivers/media/dvb-frontends/cxd2841er.c @@ -311,12 +311,8 @@ static int cxd2841er_set_reg_bits(struct cxd2841er_priv *priv, static u32 cxd2841er_calc_iffreq_xtal(enum cxd2841er_xtal xtal, u32 ifhz) { - u64 tmp; - - tmp = (u64) ifhz * 16777216; - do_div(tmp, ((xtal == SONY_XTAL_24000) ? 48000000 : 41000000)); - - return (u32) tmp; + return div_u64(ifhz * 16777216ull, + (xtal == SONY_XTAL_24000) ? 48000000 : 41000000); } static u32 cxd2841er_calc_iffreq(u32 ifhz) From 86307e443c5844f38e1b98e2c51a4195c55576cd Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Sun, 29 Dec 2024 18:50:39 +0800 Subject: [PATCH 26/45] media: vidtv: Fix a null-ptr-deref in vidtv_mux_stop_thread [ Upstream commit 1221989555db711578a327a9367f1be46500cb48 ] syzbot report a null-ptr-deref in vidtv_mux_stop_thread. [1] If dvb->mux is not initialized successfully by vidtv_mux_init() in the vidtv_start_streaming(), it will trigger null pointer dereference about mux in vidtv_mux_stop_thread(). Adjust the timing of streaming initialization and check it before stopping it. [1] KASAN: null-ptr-deref in range [0x0000000000000128-0x000000000000012f] CPU: 0 UID: 0 PID: 5842 Comm: syz-executor248 Not tainted 6.13.0-rc4-syzkaller-00012-g9b2ffa6148b1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:vidtv_mux_stop_thread+0x26/0x80 drivers/media/test-drivers/vidtv/vidtv_mux.c:471 Code: 90 90 90 90 66 0f 1f 00 55 53 48 89 fb e8 82 2e c8 f9 48 8d bb 28 01 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 02 7e 3b 0f b6 ab 28 01 00 00 31 ff 89 ee e8 RSP: 0018:ffffc90003f2faa8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff87cfb125 RDX: 0000000000000025 RSI: ffffffff87d120ce RDI: 0000000000000128 RBP: ffff888029b8d220 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000003 R12: ffff888029b8d188 R13: ffffffff8f590aa0 R14: ffffc9000581c5c8 R15: ffff888029a17710 FS: 00007f7eef5156c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7eef5e635c CR3: 0000000076ca6000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: vidtv_stop_streaming drivers/media/test-drivers/vidtv/vidtv_bridge.c:209 [inline] vidtv_stop_feed+0x151/0x250 drivers/media/test-drivers/vidtv/vidtv_bridge.c:252 dmx_section_feed_stop_filtering+0x90/0x160 drivers/media/dvb-core/dvb_demux.c:1000 dvb_dmxdev_feed_stop.isra.0+0x1ee/0x270 drivers/media/dvb-core/dmxdev.c:486 dvb_dmxdev_filter_stop+0x22a/0x3a0 drivers/media/dvb-core/dmxdev.c:559 dvb_dmxdev_filter_free drivers/media/dvb-core/dmxdev.c:840 [inline] dvb_demux_release+0x92/0x550 drivers/media/dvb-core/dmxdev.c:1246 __fput+0x3f8/0xb60 fs/file_table.c:450 task_work_run+0x14e/0x250 kernel/task_work.c:239 get_signal+0x1d3/0x2610 kernel/signal.c:2790 arch_do_signal_or_restart+0x90/0x7e0 arch/x86/kernel/signal.c:337 exit_to_user_mode_loop kernel/entry/common.c:111 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x150/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f Reported-by: syzbot+5e248227c80a3be8e96a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5e248227c80a3be8e96a Signed-off-by: Edward Adam Davis Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/test-drivers/vidtv/vidtv_bridge.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/test-drivers/vidtv/vidtv_bridge.c b/drivers/media/test-drivers/vidtv/vidtv_bridge.c index dff7265a42ca..c1621680ec57 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_bridge.c +++ b/drivers/media/test-drivers/vidtv/vidtv_bridge.c @@ -191,10 +191,11 @@ static int vidtv_start_streaming(struct vidtv_dvb *dvb) mux_args.mux_buf_sz = mux_buf_sz; - dvb->streaming = true; dvb->mux = vidtv_mux_init(dvb->fe[0], dev, &mux_args); if (!dvb->mux) return -ENOMEM; + + dvb->streaming = true; vidtv_mux_start_thread(dvb->mux); dev_dbg_ratelimited(dev, "Started streaming\n"); @@ -205,6 +206,11 @@ static int vidtv_stop_streaming(struct vidtv_dvb *dvb) { struct device *dev = &dvb->pdev->dev; + if (!dvb->streaming) { + dev_warn_ratelimited(dev, "No streaming. Skipping.\n"); + return 0; + } + dvb->streaming = false; vidtv_mux_stop_thread(dvb->mux); vidtv_mux_destroy(dvb->mux); From d2eb7e87329bd521ab43e3fb63ec2104b29c3e56 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 2 Jan 2025 17:43:13 +0100 Subject: [PATCH 27/45] PCI/DPC: Quirk PIO log size for Intel Raptor Lake-P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b198499c7d2508a76243b98e7cca992f6fd2b7f7 ] Apparently the Raptor Lake-P reference firmware configures the PIO log size correctly, but some vendor BIOSes, including at least ASUSTeK COMPUTER INC. Zenbook UX3402VA_UX3402VA, do not. Apply the quirk for Raptor Lake-P. This prevents kernel complaints like: DPC: RP PIO log size 0 is invalid and also enables the DPC driver to dump the RP PIO Log registers when DPC is triggered. Note that the bug report also mentions 8086:a76e, which has been already added by 627c6db20703 ("PCI/DPC: Quirk PIO log size for Intel Raptor Lake Root Ports"). Link: https://lore.kernel.org/r/20250102164315.7562-1-tiwai@suse.de Link: https://bugzilla.suse.com/show_bug.cgi?id=1234623 Signed-off-by: Takashi Iwai [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Signed-off-by: Krzysztof Wilczyński Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 2b3df65005ca..c16c8507d048 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6139,6 +6139,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa72f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size); #endif From e86cc66b106d8b0b775b0e740100fb9baf4c4865 Mon Sep 17 00:00:00 2001 From: Rakesh Babu Saladi Date: Mon, 20 Jan 2025 15:25:24 +0530 Subject: [PATCH 28/45] PCI: switchtec: Add Microchip PCI100X device IDs [ Upstream commit a3282f84b2151d254dc4abf24d1255c6382be774 ] Add Microchip parts to the Device ID table so the driver supports PCI100x devices. Add a new macro to quirk the Microchip Switchtec PCI100x parts to allow DMA access via NTB to work when the IOMMU is turned on. PCI100x family has 6 variants; each variant is designed for different application usages, different port counts and lane counts: PCI1001 has 1 x4 upstream port and 3 x4 downstream ports PCI1002 has 1 x4 upstream port and 4 x2 downstream ports PCI1003 has 2 x4 upstream ports, 2 x2 upstream ports, and 2 x2 downstream ports PCI1004 has 4 x4 upstream ports PCI1005 has 1 x4 upstream port and 6 x2 downstream ports PCI1006 has 6 x2 upstream ports and 2 x2 downstream ports [Historical note: these parts use PCI_VENDOR_ID_EFAR (0x1055), from EFAR Microsystems, which was acquired in 1996 by Standard Microsystems Corp, which was acquired by Microchip Technology in 2012. The PCI-SIG confirms that Vendor ID 0x1055 is assigned to Microchip even though it's not visible via https://pcisig.com/membership/member-companies] Link: https://lore.kernel.org/r/20250120095524.243103-1-Saladi.Rakeshbabu@microchip.com Signed-off-by: Rakesh Babu Saladi [bhelgaas: Vendor ID history] Signed-off-by: Bjorn Helgaas Acked-By: Logan Gunthorpe Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 11 +++++++++++ drivers/pci/switch/switchtec.c | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c16c8507d048..fb115b8ba342 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5870,6 +5870,17 @@ SWITCHTEC_QUIRK(0x5552); /* PAXA 52XG5 */ SWITCHTEC_QUIRK(0x5536); /* PAXA 36XG5 */ SWITCHTEC_QUIRK(0x5528); /* PAXA 28XG5 */ +#define SWITCHTEC_PCI100X_QUIRK(vid) \ + DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_EFAR, vid, \ + PCI_CLASS_BRIDGE_OTHER, 8, quirk_switchtec_ntb_dma_alias) +SWITCHTEC_PCI100X_QUIRK(0x1001); /* PCI1001XG4 */ +SWITCHTEC_PCI100X_QUIRK(0x1002); /* PCI1002XG4 */ +SWITCHTEC_PCI100X_QUIRK(0x1003); /* PCI1003XG4 */ +SWITCHTEC_PCI100X_QUIRK(0x1004); /* PCI1004XG4 */ +SWITCHTEC_PCI100X_QUIRK(0x1005); /* PCI1005XG4 */ +SWITCHTEC_PCI100X_QUIRK(0x1006); /* PCI1006XG4 */ + + /* * The PLX NTB uses devfn proxy IDs to move TLPs between NT endpoints. * These IDs are used to forward responses to the originator on the other diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 332af6938d7f..9011518b1d13 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1739,6 +1739,26 @@ static void switchtec_pci_remove(struct pci_dev *pdev) .driver_data = gen, \ } +#define SWITCHTEC_PCI100X_DEVICE(device_id, gen) \ + { \ + .vendor = PCI_VENDOR_ID_EFAR, \ + .device = device_id, \ + .subvendor = PCI_ANY_ID, \ + .subdevice = PCI_ANY_ID, \ + .class = (PCI_CLASS_MEMORY_OTHER << 8), \ + .class_mask = 0xFFFFFFFF, \ + .driver_data = gen, \ + }, \ + { \ + .vendor = PCI_VENDOR_ID_EFAR, \ + .device = device_id, \ + .subvendor = PCI_ANY_ID, \ + .subdevice = PCI_ANY_ID, \ + .class = (PCI_CLASS_BRIDGE_OTHER << 8), \ + .class_mask = 0xFFFFFFFF, \ + .driver_data = gen, \ + } + static const struct pci_device_id switchtec_pci_tbl[] = { SWITCHTEC_PCI_DEVICE(0x8531, SWITCHTEC_GEN3), /* PFX 24xG3 */ SWITCHTEC_PCI_DEVICE(0x8532, SWITCHTEC_GEN3), /* PFX 32xG3 */ @@ -1833,6 +1853,12 @@ static const struct pci_device_id switchtec_pci_tbl[] = { SWITCHTEC_PCI_DEVICE(0x5552, SWITCHTEC_GEN5), /* PAXA 52XG5 */ SWITCHTEC_PCI_DEVICE(0x5536, SWITCHTEC_GEN5), /* PAXA 36XG5 */ SWITCHTEC_PCI_DEVICE(0x5528, SWITCHTEC_GEN5), /* PAXA 28XG5 */ + SWITCHTEC_PCI100X_DEVICE(0x1001, SWITCHTEC_GEN4), /* PCI1001 16XG4 */ + SWITCHTEC_PCI100X_DEVICE(0x1002, SWITCHTEC_GEN4), /* PCI1002 12XG4 */ + SWITCHTEC_PCI100X_DEVICE(0x1003, SWITCHTEC_GEN4), /* PCI1003 16XG4 */ + SWITCHTEC_PCI100X_DEVICE(0x1004, SWITCHTEC_GEN4), /* PCI1004 16XG4 */ + SWITCHTEC_PCI100X_DEVICE(0x1005, SWITCHTEC_GEN4), /* PCI1005 16XG4 */ + SWITCHTEC_PCI100X_DEVICE(0x1006, SWITCHTEC_GEN4), /* PCI1006 16XG4 */ {0} }; MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl); From 5e7b6e44468c3242c21c2a8656d009fb3eb50a73 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Wed, 18 Dec 2024 09:42:14 +0800 Subject: [PATCH 29/45] scsi: ufs: bsg: Set bsg_queue to NULL after removal [ Upstream commit 1e95c798d8a7f70965f0f88d4657b682ff0ec75f ] Currently, this does not cause any issues, but I believe it is necessary to set bsg_queue to NULL after removing it to prevent potential use-after-free (UAF) access. Signed-off-by: Guixin Liu Link: https://lore.kernel.org/r/20241218014214.64533-3-kanie@linux.alibaba.com Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufs_bsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/core/ufs_bsg.c b/drivers/ufs/core/ufs_bsg.c index 87d89136cab9..ead55e063d2b 100644 --- a/drivers/ufs/core/ufs_bsg.c +++ b/drivers/ufs/core/ufs_bsg.c @@ -181,6 +181,7 @@ void ufs_bsg_remove(struct ufs_hba *hba) return; bsg_remove_queue(hba->bsg_queue); + hba->bsg_queue = NULL; device_del(bsg_dev); put_device(bsg_dev); From bb38fcfa38a66b90a563487c8ab0032b4994cd57 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:30 +0100 Subject: [PATCH 30/45] rtla/timerlat_hist: Abort event processing on second signal [ Upstream commit d6899e560366e10141189697502bc5521940c588 ] If either SIGINT is received twice, or after a SIGALRM (that is, after timerlat was supposed to stop), abort processing events currently left in the tracefs buffer and exit immediately. This allows the user to exit rtla without waiting for processing all events, should that take longer than wanted, at the cost of not processing all samples. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-5-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- tools/tracing/rtla/src/timerlat_hist.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 83cc7ef3d36b..b7a7dcd68b57 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -786,6 +786,14 @@ static int stop_tracing; static struct trace_instance *hist_inst = NULL; static void stop_hist(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(hist_inst->inst); + return; + } stop_tracing = 1; if (hist_inst) trace_instance_stop(hist_inst); From 8b1d868376e8818ca9da7427be5219b17b68c03f Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:31 +0100 Subject: [PATCH 31/45] rtla/timerlat_top: Abort event processing on second signal [ Upstream commit 80967b354a76b360943af384c10d807d98bea5c4 ] If either SIGINT is received twice, or after a SIGALRM (that is, after timerlat was supposed to stop), abort processing events currently left in the tracefs buffer and exit immediately. This allows the user to exit rtla without waiting for processing all events, should that take longer than wanted, at the cost of not processing all samples. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-6-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- tools/tracing/rtla/src/timerlat_top.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 08e940ecdc96..46c3405a356f 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -578,6 +578,14 @@ static int stop_tracing; static struct trace_instance *top_inst = NULL; static void stop_top(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(top_inst->inst); + return; + } stop_tracing = 1; if (top_inst) trace_instance_stop(top_inst); From f601bb7d537ebc6fa995c4279a5f2fd31f51df0f Mon Sep 17 00:00:00 2001 From: Ramesh Thomas Date: Tue, 10 Dec 2024 05:19:37 -0800 Subject: [PATCH 32/45] vfio/pci: Enable iowrite64 and ioread64 for vfio pci [ Upstream commit 2b938e3db335e3670475e31a722c2bee34748c5a ] Definitions of ioread64 and iowrite64 macros in asm/io.h called by vfio pci implementations are enclosed inside check for CONFIG_GENERIC_IOMAP. They don't get defined if CONFIG_GENERIC_IOMAP is defined. Include linux/io-64-nonatomic-lo-hi.h to define iowrite64 and ioread64 macros when they are not defined. io-64-nonatomic-lo-hi.h maps the macros to generic implementation in lib/iomap.c. The generic implementation does 64 bit rw if readq/writeq is defined for the architecture, otherwise it would do 32 bit back to back rw. Note that there are two versions of the generic implementation that differs in the order the 32 bit words are written if 64 bit support is not present. This is not the little/big endian ordering, which is handled separately. This patch uses the lo followed by hi word ordering which is consistent with current back to back implementation in the vfio/pci code. Signed-off-by: Ramesh Thomas Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20241210131938.303500-2-ramesh.thomas@intel.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/vfio_pci_rdwr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e27de61ac9fe..8191c8fcfb25 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "vfio_pci_priv.h" From 0db813696df1e97bdfe6763c040127e11be0af60 Mon Sep 17 00:00:00 2001 From: Maksym Planeta Date: Wed, 4 Dec 2024 11:35:15 +0100 Subject: [PATCH 33/45] Grab mm lock before grabbing pt lock [ Upstream commit 6d002348789bc16e9203e9818b7a3688787e3b29 ] Function xen_pin_page calls xen_pte_lock, which in turn grab page table lock (ptlock). When locking, xen_pte_lock expect mm->page_table_lock to be held before grabbing ptlock, but this does not happen when pinning is caused by xen_mm_pin_all. This commit addresses lockdep warning below, which shows up when suspending a Xen VM. [ 3680.658422] Freezing user space processes [ 3680.660156] Freezing user space processes completed (elapsed 0.001 seconds) [ 3680.660182] OOM killer disabled. [ 3680.660192] Freezing remaining freezable tasks [ 3680.661485] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 3680.685254] [ 3680.685265] ================================== [ 3680.685269] WARNING: Nested lock was not taken [ 3680.685274] 6.12.0+ #16 Tainted: G W [ 3680.685279] ---------------------------------- [ 3680.685283] migration/0/19 is trying to lock: [ 3680.685288] ffff88800bac33c0 (ptlock_ptr(ptdesc)#2){+.+.}-{3:3}, at: xen_pin_page+0x175/0x1d0 [ 3680.685303] [ 3680.685303] but this task is not holding: [ 3680.685308] init_mm.page_table_lock [ 3680.685311] [ 3680.685311] stack backtrace: [ 3680.685316] CPU: 0 UID: 0 PID: 19 Comm: migration/0 Tainted: G W 6.12.0+ #16 [ 3680.685324] Tainted: [W]=WARN [ 3680.685328] Stopper: multi_cpu_stop+0x0/0x120 <- __stop_cpus.constprop.0+0x8c/0xd0 [ 3680.685339] Call Trace: [ 3680.685344] [ 3680.685347] dump_stack_lvl+0x77/0xb0 [ 3680.685356] __lock_acquire+0x917/0x2310 [ 3680.685364] lock_acquire+0xce/0x2c0 [ 3680.685369] ? xen_pin_page+0x175/0x1d0 [ 3680.685373] _raw_spin_lock_nest_lock+0x2f/0x70 [ 3680.685381] ? xen_pin_page+0x175/0x1d0 [ 3680.685386] xen_pin_page+0x175/0x1d0 [ 3680.685390] ? __pfx_xen_pin_page+0x10/0x10 [ 3680.685394] __xen_pgd_walk+0x233/0x2c0 [ 3680.685401] ? stop_one_cpu+0x91/0x100 [ 3680.685405] __xen_pgd_pin+0x5d/0x250 [ 3680.685410] xen_mm_pin_all+0x70/0xa0 [ 3680.685415] xen_pv_pre_suspend+0xf/0x280 [ 3680.685420] xen_suspend+0x57/0x1a0 [ 3680.685428] multi_cpu_stop+0x6b/0x120 [ 3680.685432] ? update_cpumasks_hier+0x7c/0xa60 [ 3680.685439] ? __pfx_multi_cpu_stop+0x10/0x10 [ 3680.685443] cpu_stopper_thread+0x8c/0x140 [ 3680.685448] ? smpboot_thread_fn+0x20/0x1f0 [ 3680.685454] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 3680.685458] smpboot_thread_fn+0xed/0x1f0 [ 3680.685462] kthread+0xde/0x110 [ 3680.685467] ? __pfx_kthread+0x10/0x10 [ 3680.685471] ret_from_fork+0x2f/0x50 [ 3680.685478] ? __pfx_kthread+0x10/0x10 [ 3680.685482] ret_from_fork_asm+0x1a/0x30 [ 3680.685489] [ 3680.685491] [ 3680.685491] other info that might help us debug this: [ 3680.685497] 1 lock held by migration/0/19: [ 3680.685500] #0: ffffffff8284df38 (pgd_lock){+.+.}-{3:3}, at: xen_mm_pin_all+0x14/0xa0 [ 3680.685512] [ 3680.685512] stack backtrace: [ 3680.685518] CPU: 0 UID: 0 PID: 19 Comm: migration/0 Tainted: G W 6.12.0+ #16 [ 3680.685528] Tainted: [W]=WARN [ 3680.685531] Stopper: multi_cpu_stop+0x0/0x120 <- __stop_cpus.constprop.0+0x8c/0xd0 [ 3680.685538] Call Trace: [ 3680.685541] [ 3680.685544] dump_stack_lvl+0x77/0xb0 [ 3680.685549] __lock_acquire+0x93c/0x2310 [ 3680.685554] lock_acquire+0xce/0x2c0 [ 3680.685558] ? xen_pin_page+0x175/0x1d0 [ 3680.685562] _raw_spin_lock_nest_lock+0x2f/0x70 [ 3680.685568] ? xen_pin_page+0x175/0x1d0 [ 3680.685572] xen_pin_page+0x175/0x1d0 [ 3680.685578] ? __pfx_xen_pin_page+0x10/0x10 [ 3680.685582] __xen_pgd_walk+0x233/0x2c0 [ 3680.685588] ? stop_one_cpu+0x91/0x100 [ 3680.685592] __xen_pgd_pin+0x5d/0x250 [ 3680.685596] xen_mm_pin_all+0x70/0xa0 [ 3680.685600] xen_pv_pre_suspend+0xf/0x280 [ 3680.685607] xen_suspend+0x57/0x1a0 [ 3680.685611] multi_cpu_stop+0x6b/0x120 [ 3680.685615] ? update_cpumasks_hier+0x7c/0xa60 [ 3680.685620] ? __pfx_multi_cpu_stop+0x10/0x10 [ 3680.685625] cpu_stopper_thread+0x8c/0x140 [ 3680.685629] ? smpboot_thread_fn+0x20/0x1f0 [ 3680.685634] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 3680.685638] smpboot_thread_fn+0xed/0x1f0 [ 3680.685642] kthread+0xde/0x110 [ 3680.685645] ? __pfx_kthread+0x10/0x10 [ 3680.685649] ret_from_fork+0x2f/0x50 [ 3680.685654] ? __pfx_kthread+0x10/0x10 [ 3680.685657] ret_from_fork_asm+0x1a/0x30 [ 3680.685662] [ 3680.685267] xen:grant_table: Grant tables using version 1 layout [ 3680.685921] OOM killer enabled. [ 3680.685934] Restarting tasks ... done. Signed-off-by: Maksym Planeta Reviewed-by: Juergen Gross Message-ID: <20241204103516.3309112-1-maksym@exostellar.io> Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/mmu_pv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 6abce0816ca3..7dabdb999565 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -811,6 +811,7 @@ void xen_mm_pin_all(void) { struct page *page; + spin_lock(&init_mm.page_table_lock); spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { @@ -821,6 +822,7 @@ void xen_mm_pin_all(void) } spin_unlock(&pgd_lock); + spin_unlock(&init_mm.page_table_lock); } static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, @@ -920,6 +922,7 @@ void xen_mm_unpin_all(void) { struct page *page; + spin_lock(&init_mm.page_table_lock); spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { @@ -931,6 +934,7 @@ void xen_mm_unpin_all(void) } spin_unlock(&pgd_lock); + spin_unlock(&init_mm.page_table_lock); } static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) From 04bac40ad5d6c339650fc72ea9b074a899ffd303 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Wed, 22 Jan 2025 13:33:09 +0900 Subject: [PATCH 34/45] selftests: gpio: gpio-sim: Fix missing chip disablements [ Upstream commit f8524ac33cd452aef5384504b3264db6039a455e ] Since upstream commit 8bd76b3d3f3a ("gpio: sim: lock up configfs that an instantiated device depends on"), rmdir for an active virtual devices been prohibited. Update gpio-sim selftest to align with the change. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202501221006.a1ca5dfa-lkp@intel.com Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250122043309.304621-1-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- tools/testing/selftests/gpio/gpio-sim.sh | 31 +++++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index bf67b23ed29a..46101a800beb 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -46,12 +46,6 @@ remove_chip() { rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" } -configfs_cleanup() { - for CHIP in `ls $CONFIGFS_DIR/`; do - remove_chip $CHIP - done -} - create_chip() { local CHIP=$1 @@ -105,6 +99,13 @@ disable_chip() { echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" } +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + disable_chip $CHIP + remove_chip $CHIP + done +} + configfs_chip_name() { local CHIP=$1 local BANK=$2 @@ -181,6 +182,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +disable_chip chip remove_chip chip echo "1.2. chip_name returns 'none' if the chip is still pending" @@ -195,6 +197,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +disable_chip chip remove_chip chip echo "2. Creating and configuring simulated chips" @@ -204,6 +207,7 @@ create_chip chip create_bank chip bank enable_chip chip test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +disable_chip chip remove_chip chip echo "2.2. Number of lines can be specified" @@ -212,6 +216,7 @@ create_bank chip bank set_num_lines chip bank 16 enable_chip chip test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +disable_chip chip remove_chip chip echo "2.3. Label can be set" @@ -220,6 +225,7 @@ create_bank chip bank set_label chip bank foobar enable_chip chip test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +disable_chip chip remove_chip chip echo "2.4. Label can be left empty" @@ -227,6 +233,7 @@ create_chip chip create_bank chip bank enable_chip chip test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +disable_chip chip remove_chip chip echo "2.5. Line names can be configured" @@ -238,6 +245,7 @@ set_line_name chip bank 2 bar enable_chip chip test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.6. Line config can remain unused if offset is greater than number of lines" @@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar enable_chip chip test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.7. Line configfs directory names are sanitized" @@ -267,6 +276,7 @@ for CHIP in $CHIPS; do enable_chip $CHIP done for CHIP in $CHIPS; do + disable_chip $CHIP remove_chip $CHIP done @@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ fail "Setting label of a live chip should fail" echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ fail "Setting number of lines of a live chip should fail" +disable_chip chip remove_chip chip echo "2.10. Can't create line items when chip is live" @@ -285,6 +296,7 @@ create_chip chip create_bank chip bank enable_chip chip mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +disable_chip chip remove_chip chip echo "2.11. Probe errors are propagated to user-space" @@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog enable_chip chip $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ fail "Setting the value of a hogged line shouldn't succeed" +disable_chip chip remove_chip chip echo "3. Controlling simulated chips" @@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly" sysfs_set_pull chip bank 0 pull-down $BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 test "$?" = "0" || fail "pull set incorrectly" +disable_chip chip remove_chip chip echo "3.2. Pull can be read from sysfs" @@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" sysfs_set_pull chip bank 0 pull-up test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +disable_chip chip remove_chip chip echo "3.3. Incorrect input in sysfs is rejected" @@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +disable_chip chip remove_chip chip echo "3.4. Can't write to value" @@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +disable_chip chip remove_chip chip echo "4. Simulated GPIO chips are functional" @@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & sleep 0.1 # FIXME Any better way? test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" kill $! +disable_chip chip remove_chip chip echo "4.2. Bias settings work correctly" @@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +disable_chip chip remove_chip chip echo "GPIO $MODULE test PASS" From b47002ed65ade940839b7f439ff4a194e7d5ec28 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 4 Dec 2024 21:03:16 -0500 Subject: [PATCH 35/45] x86/mm/tlb: Only trim the mm_cpumask once a second [ Upstream commit 6db2526c1d694c91c6e05e2f186c085e9460f202 ] Setting and clearing CPU bits in the mm_cpumask is only ever done by the CPU itself, from the context switch code or the TLB flush code. Synchronization is handled by switch_mm_irqs_off() blocking interrupts. Sending TLB flush IPIs to CPUs that are in the mm_cpumask, but no longer running the program causes a regression in the will-it-scale tlbflush2 test. This test is contrived, but a large regression here might cause a small regression in some real world workload. Instead of always sending IPIs to CPUs that are in the mm_cpumask, but no longer running the program, send these IPIs only once a second. The rest of the time we can skip over CPUs where the loaded_mm is different from the target mm. Reported-by: kernel test roboto Signed-off-by: Rik van Riel Signed-off-by: Ingo Molnar Cc: Dave Hansen Cc: Andy Lutomirski Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Linus Torvalds Link: https://lore.kernel.org/r/20241204210316.612ee573@fangorn Closes: https://lore.kernel.org/oe-lkp/202411282207.6bd28eae-lkp@intel.com/ Signed-off-by: Sasha Levin --- arch/x86/include/asm/mmu.h | 2 ++ arch/x86/include/asm/mmu_context.h | 1 + arch/x86/include/asm/tlbflush.h | 1 + arch/x86/mm/tlb.c | 35 +++++++++++++++++++++++++++--- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5d7494631ea9..c07c018a1c13 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -33,6 +33,8 @@ typedef struct { */ atomic64_t tlb_gen; + unsigned long next_trim_cpumask; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct rw_semaphore ldt_usr_sem; struct ldt_struct *ldt; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index b8d40ddeab00..6f5c7584fe1e 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -106,6 +106,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); + mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index cda3118f3b27..d1eb6bbfd39e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -208,6 +208,7 @@ struct flush_tlb_info { unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; + u8 trim_cpumask; }; void flush_tlb_local(void); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c1e31e9a85d7..b07e2167fceb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -878,9 +878,36 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu, void *data) +static bool should_flush_tlb(int cpu, void *data) { - return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); + struct flush_tlb_info *info = data; + + /* Lazy TLB will get flushed at the next context switch. */ + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + return false; + + /* No mm means kernel memory flush. */ + if (!info->mm) + return true; + + /* The target mm is loaded, and the CPU is not lazy. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + return true; + + /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ + if (info->trim_cpumask) + return true; + + return false; +} + +static bool should_trim_cpumask(struct mm_struct *mm) +{ + if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { + WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); + return true; + } + return false; } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); @@ -914,7 +941,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, (void *)info, 1, cpumask); } @@ -965,6 +992,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); + info->trim_cpumask = 0; return info; } @@ -1007,6 +1035,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * flush_tlb_func_local() directly in this case. */ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); From 1da2697307dad281dd690a19441b5ca4af92d786 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 8 Jan 2025 14:21:08 -0500 Subject: [PATCH 36/45] orangefs: fix a oob in orangefs_debug_write [ Upstream commit f7c848431632598ff9bce57a659db6af60d75b39 ] I got a syzbot report: slab-out-of-bounds Read in orangefs_debug_write... several people suggested fixes, I tested Al Viro's suggestion and made this patch. Signed-off-by: Mike Marshall Reported-by: syzbot+fc519d7875f2d9186c1f@syzkaller.appspotmail.com Signed-off-by: Sasha Levin --- fs/orangefs/orangefs-debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1b508f543384..fa41db088488 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -393,9 +393,9 @@ static ssize_t orangefs_debug_write(struct file *file, * Thwart users who try to jamb a ridiculous number * of bytes into the debug file... */ - if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) { + if (count > ORANGEFS_MAX_DEBUG_STRING_LEN) { silly = count; - count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1; + count = ORANGEFS_MAX_DEBUG_STRING_LEN; } buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); From 40336ad6ec342aa6637a9fb301e2dd644dbc14bc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 23 Jan 2025 14:25:07 +0100 Subject: [PATCH 37/45] ASoC: Intel: bytcr_rt5640: Add DMI quirk for Vexia Edu Atla 10 tablet 5V [ Upstream commit 6917192378c1ce17ba31df51c4e0d8b1c97a453b ] The Vexia EDU ATLA 10 tablet comes in 2 different versions with significantly different mainboards. The only outward difference is that the charging barrel on one is marked 5V and the other is marked 9V. The 5V version mostly works with the BYTCR defaults, except that it is missing a CHAN package in its ACPI tables and the default of using SSP0-AIF2 is wrong, instead SSP0-AIF1 must be used. That and its jack detect signal is not inverted as it usually is. Add a DMI quirk for the 5V version to fix sound not working. Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20250123132507.18434-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_rt5640.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index e7d20011e288..67b343632a10 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1122,7 +1122,22 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, - { /* Vexia Edu Atla 10 tablet */ + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_JD_NOT_INV | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, + { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), From 7eb5dd201695645af071592a50026eb780081a72 Mon Sep 17 00:00:00 2001 From: Andy Strohman Date: Thu, 9 Jan 2025 02:27:56 +0000 Subject: [PATCH 38/45] batman-adv: fix panic during interface removal commit ccb7276a6d26d6f8416e315b43b45e15ee7f29e2 upstream. Reference counting is used to ensure that batadv_hardif_neigh_node and batadv_hard_iface are not freed before/during batadv_v_elp_throughput_metric_update work is finished. But there isn't a guarantee that the hard if will remain associated with a soft interface up until the work is finished. This fixes a crash triggered by reboot that looks like this: Call trace: batadv_v_mesh_free+0xd0/0x4dc [batman_adv] batadv_v_elp_throughput_metric_update+0x1c/0xa4 process_one_work+0x178/0x398 worker_thread+0x2e8/0x4d0 kthread+0xd8/0xdc ret_from_fork+0x10/0x20 (the batadv_v_mesh_free call is misleading, and does not actually happen) I was able to make the issue happen more reliably by changing hardif_neigh->bat_v.metric_work work to be delayed work. This allowed me to track down and confirm the fix. Cc: stable@vger.kernel.org Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput") Signed-off-by: Andy Strohman [sven@narfation.org: prevent entering batadv_v_elp_get_throughput without soft_iface] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v_elp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 98a624f32b94..05092f6e7d2f 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -67,12 +67,19 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; + struct net_device *soft_iface = hard_iface->soft_iface; struct ethtool_link_ksettings link_settings; struct net_device *real_netdev; struct station_info sinfo; u32 throughput; int ret; + /* don't query throughput when no longer associated with any + * batman-adv interface + */ + if (!soft_iface) + return BATADV_THROUGHPUT_DEFAULT_VALUE; + /* if the user specified a customised value for this interface, then * return it directly */ @@ -142,7 +149,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) default_throughput: if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) { - batadv_info(hard_iface->soft_iface, + batadv_info(soft_iface, "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n", hard_iface->net_dev->name, BATADV_THROUGHPUT_DEFAULT_VALUE / 10, From 792a14c90dab338a10a3cf57c367644b485df15d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 20 Jan 2025 20:35:28 +0100 Subject: [PATCH 39/45] batman-adv: Ignore neighbor throughput metrics in error case commit e7e34ffc976aaae4f465b7898303241b81ceefc3 upstream. If a temporary error happened in the evaluation of the neighbor throughput information, then the invalid throughput result should not be stored in the throughtput EWMA. Cc: stable@vger.kernel.org Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v_elp.c | 50 ++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 05092f6e7d2f..4acc4d295506 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -60,11 +60,13 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) /** * batadv_v_elp_get_throughput() - get the throughput towards a neighbour * @neigh: the neighbour for which the throughput has to be obtained + * @pthroughput: calculated throughput towards the given neighbour in multiples + * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). * - * Return: The throughput towards the given neighbour in multiples of 100kpbs - * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). + * Return: true when value behind @pthroughput was set */ -static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) +static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, + u32 *pthroughput) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; struct net_device *soft_iface = hard_iface->soft_iface; @@ -78,14 +80,16 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) * batman-adv interface */ if (!soft_iface) - return BATADV_THROUGHPUT_DEFAULT_VALUE; + return false; /* if the user specified a customised value for this interface, then * return it directly */ throughput = atomic_read(&hard_iface->bat_v.throughput_override); - if (throughput != 0) - return throughput; + if (throughput != 0) { + *pthroughput = throughput; + return true; + } /* if this is a wireless device, then ask its throughput through * cfg80211 API @@ -112,19 +116,24 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) * possible to delete this neighbor. For now set * the throughput metric to 0. */ - return 0; + *pthroughput = 0; + return true; } if (ret) goto default_throughput; - if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) - return sinfo.expected_throughput / 100; + if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) { + *pthroughput = sinfo.expected_throughput / 100; + return true; + } /* try to estimate the expected throughput based on reported tx * rates */ - if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) - return cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { + *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + return true; + } goto default_throughput; } @@ -143,8 +152,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && throughput != SPEED_UNKNOWN) - return throughput * 10; + if (throughput && throughput != SPEED_UNKNOWN) { + *pthroughput = throughput * 10; + return true; + } } default_throughput: @@ -158,7 +169,8 @@ default_throughput: } /* if none of the above cases apply, return the base_throughput */ - return BATADV_THROUGHPUT_DEFAULT_VALUE; + *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE; + return true; } /** @@ -170,15 +182,21 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work) { struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; struct batadv_hardif_neigh_node *neigh; + u32 throughput; + bool valid; neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, metric_work); neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, bat_v); - ewma_throughput_add(&neigh->bat_v.throughput, - batadv_v_elp_get_throughput(neigh)); + valid = batadv_v_elp_get_throughput(neigh, &throughput); + if (!valid) + goto put_neigh; + ewma_throughput_add(&neigh->bat_v.throughput, throughput); + +put_neigh: /* decrement refcounter to balance increment performed before scheduling * this task */ From 781a06fd265a8151f7601122d9c2e985663828ff Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 20 Jan 2025 00:06:11 +0100 Subject: [PATCH 40/45] batman-adv: Drop unmanaged ELP metric worker commit 8c8ecc98f5c65947b0070a24bac11e12e47cc65d upstream. The ELP worker needs to calculate new metric values for all neighbors "reachable" over an interface. Some of the used metric sources require locks which might need to sleep. This sleep is incompatible with the RCU list iterator used for the recorded neighbors. The initial approach to work around of this problem was to queue another work item per neighbor and then run this in a new context. Even when this solved the RCU vs might_sleep() conflict, it has a major problems: Nothing was stopping the work item in case it is not needed anymore - for example because one of the related interfaces was removed or the batman-adv module was unloaded - resulting in potential invalid memory accesses. Directly canceling the metric worker also has various problems: * cancel_work_sync for a to-be-deactivated interface is called with rtnl_lock held. But the code in the ELP metric worker also tries to use rtnl_lock() - which will never return in this case. This also means that cancel_work_sync would never return because it is waiting for the worker to finish. * iterating over the neighbor list for the to-be-deactivated interface is currently done using the RCU specific methods. Which means that it is possible to miss items when iterating over it without the associated spinlock - a behaviour which is acceptable for a periodic metric check but not for a cleanup routine (which must "stop" all still running workers) The better approch is to get rid of the per interface neighbor metric worker and handle everything in the interface worker. The original problems are solved by: * creating a list of neighbors which require new metric information inside the RCU protected context, gathering the metric according to the new list outside the RCU protected context * only use rcu_trylock inside metric gathering code to avoid a deadlock when the cancel_delayed_work_sync is called in the interface removal code (which is called with the rtnl_lock held) Cc: stable@vger.kernel.org Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/bat_v.c | 2 -- net/batman-adv/bat_v_elp.c | 71 ++++++++++++++++++++++++++------------ net/batman-adv/bat_v_elp.h | 2 -- net/batman-adv/types.h | 3 -- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 54e41fc709c3..651e01b86141 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -113,8 +113,6 @@ static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { ewma_throughput_init(&hardif_neigh->bat_v.throughput); - INIT_WORK(&hardif_neigh->bat_v.metric_work, - batadv_v_elp_throughput_metric_update); } /** diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 4acc4d295506..6dbb4266b558 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +44,18 @@ #include "routing.h" #include "send.h" +/** + * struct batadv_v_metric_queue_entry - list of hardif neighbors which require + * and metric update + */ +struct batadv_v_metric_queue_entry { + /** @hardif_neigh: hardif neighbor scheduled for metric update */ + struct batadv_hardif_neigh_node *hardif_neigh; + + /** @list: list node for metric_queue */ + struct list_head list; +}; + /** * batadv_v_elp_start_timer() - restart timer for ELP periodic work * @hard_iface: the interface for which the timer has to be reset @@ -138,10 +152,17 @@ static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, goto default_throughput; } + /* only use rtnl_trylock because the elp worker will be cancelled while + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise + * wait forever when the elp work_item was started and it is then also + * trying to rtnl_lock + */ + if (!rtnl_trylock()) + return false; + /* if not a wifi interface, check if this device provides data via * ethtool (e.g. an Ethernet adapter) */ - rtnl_lock(); ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); rtnl_unlock(); if (ret == 0) { @@ -176,31 +197,19 @@ default_throughput: /** * batadv_v_elp_throughput_metric_update() - worker updating the throughput * metric of a single hop neighbour - * @work: the work queue item + * @neigh: the neighbour to probe */ -void batadv_v_elp_throughput_metric_update(struct work_struct *work) +static void +batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh) { - struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; - struct batadv_hardif_neigh_node *neigh; u32 throughput; bool valid; - neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, - metric_work); - neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, - bat_v); - valid = batadv_v_elp_get_throughput(neigh, &throughput); if (!valid) - goto put_neigh; + return; ewma_throughput_add(&neigh->bat_v.throughput, throughput); - -put_neigh: - /* decrement refcounter to balance increment performed before scheduling - * this task - */ - batadv_hardif_neigh_put(neigh); } /** @@ -274,14 +283,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) */ static void batadv_v_elp_periodic_work(struct work_struct *work) { + struct batadv_v_metric_queue_entry *metric_entry; + struct batadv_v_metric_queue_entry *metric_safe; struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_hard_iface *hard_iface; struct batadv_hard_iface_bat_v *bat_v; struct batadv_elp_packet *elp_packet; + struct list_head metric_queue; struct batadv_priv *bat_priv; struct sk_buff *skb; u32 elp_interval; - bool ret; bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); @@ -317,6 +328,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) atomic_inc(&hard_iface->bat_v.elp_seqno); + INIT_LIST_HEAD(&metric_queue); + /* The throughput metric is updated on each sent packet. This way, if a * node is dead and no longer sends packets, batman-adv is still able to * react timely to its death. @@ -341,16 +354,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) /* Reading the estimated throughput from cfg80211 is a task that * may sleep and that is not allowed in an rcu protected - * context. Therefore schedule a task for that. + * context. Therefore add it to metric_queue and process it + * outside rcu protected context. */ - ret = queue_work(batadv_event_workqueue, - &hardif_neigh->bat_v.metric_work); - - if (!ret) + metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC); + if (!metric_entry) { batadv_hardif_neigh_put(hardif_neigh); + continue; + } + + metric_entry->hardif_neigh = hardif_neigh; + list_add(&metric_entry->list, &metric_queue); } rcu_read_unlock(); + list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) { + batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh); + + batadv_hardif_neigh_put(metric_entry->hardif_neigh); + list_del(&metric_entry->list); + kfree(metric_entry); + } + restart_timer: batadv_v_elp_start_timer(hard_iface); out: diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 9e2740195fa2..c9cb0a307100 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -10,7 +10,6 @@ #include "main.h" #include -#include int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); @@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming); -void batadv_v_elp_throughput_metric_update(struct work_struct *work); #endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 76791815b26b..6f30afaabfdf 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v { * neighbor */ unsigned long last_unicast_tx; - - /** @metric_work: work queue callback item for metric update */ - struct work_struct metric_work; }; /** From 3484ea33157bc7334f57e64826ec5a4bf992151a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Feb 2025 14:44:14 +0800 Subject: [PATCH 41/45] drm/amdgpu: avoid buffer overflow attach in smu_sys_set_pp_table() commit 1abb2648698bf10783d2236a6b4a7ca5e8021699 upstream. It malicious user provides a small pptable through sysfs and then a bigger pptable, it may cause buffer overflow attack in function smu_sys_set_pp_table(). Reviewed-by: Lijo Lazar Signed-off-by: Jiang Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1a02930d93df..537285f255ab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -516,7 +516,8 @@ static int smu_sys_set_pp_table(void *handle, return -EIO; } - if (!smu_table->hardcode_pptable) { + if (!smu_table->hardcode_pptable || smu_table->power_play_table_size < size) { + kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = kzalloc(size, GFP_KERNEL); if (!smu_table->hardcode_pptable) return -ENOMEM; From 5393cf22312418262679eaadb130d608c75fe690 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 17 Jan 2025 16:34:51 -0800 Subject: [PATCH 42/45] KVM: x86: Reject Hyper-V's SEND_IPI hypercalls if local APIC isn't in-kernel commit a8de7f100bb5989d9c3627d3a223ee1c863f3b69 upstream. Advertise support for Hyper-V's SEND_IPI and SEND_IPI_EX hypercalls if and only if the local API is emulated/virtualized by KVM, and explicitly reject said hypercalls if the local APIC is emulated in userspace, i.e. don't rely on userspace to opt-in to KVM_CAP_HYPERV_ENFORCE_CPUID. Rejecting SEND_IPI and SEND_IPI_EX fixes a NULL-pointer dereference if Hyper-V enlightenments are exposed to the guest without an in-kernel local APIC: dump_stack+0xbe/0xfd __kasan_report.cold+0x34/0x84 kasan_report+0x3a/0x50 __apic_accept_irq+0x3a/0x5c0 kvm_hv_send_ipi.isra.0+0x34e/0x820 kvm_hv_hypercall+0x8d9/0x9d0 kvm_emulate_hypercall+0x506/0x7e0 __vmx_handle_exit+0x283/0xb60 vmx_handle_exit+0x1d/0xd0 vcpu_enter_guest+0x16b0/0x24c0 vcpu_run+0xc0/0x550 kvm_arch_vcpu_ioctl_run+0x170/0x6d0 kvm_vcpu_ioctl+0x413/0xb20 __se_sys_ioctl+0x111/0x160 do_syscal1_64+0x30/0x40 entry_SYSCALL_64_after_hwframe+0x67/0xd1 Note, checking the sending vCPU is sufficient, as the per-VM irqchip_mode can't be modified after vCPUs are created, i.e. if one vCPU has an in-kernel local APIC, then all vCPUs have an in-kernel local APIC. Reported-by: Dongjie Zou Fixes: 214ff83d4473 ("KVM: x86: hyperv: implement PV IPI send hypercalls") Fixes: 2bc39970e932 ("x86/kvm/hyper-v: Introduce KVM_GET_SUPPORTED_HV_CPUID") Cc: stable@vger.kernel.org Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20250118003454.2619573-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/hyperv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 04cca46fed1e..28555bbd52e8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1915,6 +1915,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) u32 vector; bool all_cpus; + if (!lapic_in_kernel(vcpu)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, @@ -2518,7 +2521,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; - ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + if (!vcpu || lapic_in_kernel(vcpu)) + ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; if (evmcs_ver) ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; From 0105b41790675457c79ea4f5721905526709daf8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Jan 2025 17:08:25 -0800 Subject: [PATCH 43/45] KVM: nSVM: Enter guest mode before initializing nested NPT MMU commit 46d6c6f3ef0eaff71c2db6d77d4e2ebb7adac34f upstream. When preparing vmcb02 for nested VMRUN (or state restore), "enter" guest mode prior to initializing the MMU for nested NPT so that guest_mode is set in the MMU's role. KVM's model is that all L2 MMUs are tagged with guest_mode, as the behavior of hypervisor MMUs tends to be significantly different than kernel MMUs. Practically speaking, the bug is relatively benign, as KVM only directly queries role.guest_mode in kvm_mmu_free_guest_mode_roots() and kvm_mmu_page_ad_need_write_protect(), which SVM doesn't use, and in paths that are optimizations (mmu_page_zap_pte() and shadow_mmu_try_split_huge_pages()). And while the role is incorprated into shadow page usage, because nested NPT requires KVM to be using NPT for L1, reusing shadow pages across L1 and L2 is impossible as L1 MMUs will always have direct=1, while L2 MMUs will have direct=0. Hoist the TLB processing and setting of HF_GUEST_MASK to the beginning of the flow instead of forcing guest_mode in the MMU, as nothing in nested_vmcb02_prepare_control() between the old and new locations touches TLB flush requests or HF_GUEST_MASK, i.e. there's no reason to present inconsistent vCPU state to the MMU. Fixes: 69cb877487de ("KVM: nSVM: move MMU setup to nested_prepare_vmcb_control") Cc: stable@vger.kernel.org Reported-by: Yosry Ahmed Reviewed-by: Yosry Ahmed Link: https://lore.kernel.org/r/20250130010825.220346-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/svm/nested.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d392022dcb89..2fa130c4c173 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5150,7 +5150,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, union kvm_mmu_page_role root_role; /* NPT requires CR0.PG=1. */ - WARN_ON_ONCE(cpu_role.base.direct); + WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode); root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 974f30d38c79..b889dc6e20a7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -619,6 +619,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, u32 pause_count12; u32 pause_thresh12; + nested_svm_transition_tlb_flush(vcpu); + + /* Enter Guest-Mode */ + enter_guest_mode(vcpu); + /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. @@ -717,11 +722,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, } } - nested_svm_transition_tlb_flush(vcpu); - - /* Enter Guest-Mode */ - enter_guest_mode(vcpu); - /* * Merge guest and host intercepts - must be called with vcpu in * guest-mode to take effect. From d00438789b496e938aabbaa9efea927b32a9940e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 30 Jan 2025 17:07:21 -0800 Subject: [PATCH 44/45] perf/x86/intel: Ensure LBRs are disabled when a CPU is starting commit c631a2de7ae48d50434bdc205d901423f8577c65 upstream. Explicitly clear DEBUGCTL.LBR when a CPU is starting, prior to purging the LBR MSRs themselves, as at least one system has been found to transfer control to the kernel with LBRs enabled (it's unclear whether it's a BIOS flaw or a CPU goof). Because the kernel preserves the original DEBUGCTL, even when toggling LBRs, leaving DEBUGCTL.LBR as is results in running with LBRs enabled at all times. Closes: https://lore.kernel.org/all/c9d8269bff69f6359731d758e3b1135dedd7cc61.camel@redhat.com Reported-by: Maxim Levitsky Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Maxim Levitsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250131010721.470503-1-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 5 ++++- arch/x86/include/asm/msr-index.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3bc31cd20c81..117f86283183 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4582,8 +4582,11 @@ static void intel_pmu_cpu_starting(int cpu) init_debug_store_on_cpu(cpu); /* - * Deal with CPUs that don't clear their LBRs on power-up. + * Deal with CPUs that don't clear their LBRs on power-up, and that may + * even boot with LBRs enabled. */ + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); intel_pmu_lbr_reset(); cpuc->lbr_sel = NULL; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 681e8401b8a3..5ffcfcc41282 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -357,7 +357,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) From a298df1bbe3e5fb354576c84f0f2cb8aeb2429f3 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Sat, 1 Feb 2025 22:09:02 +0530 Subject: [PATCH 45/45] usb: dwc3: Fix timeout issue during controller enter/exit from halt state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d3a8c28426fc1fb3252753a9f1db0d691ffc21b0 upstream. There is a frequent timeout during controller enter/exit from halt state after toggling the run_stop bit by SW. This timeout occurs when performing frequent role switches between host and device, causing device enumeration issues due to the timeout. This issue was not present when USB2 suspend PHY was disabled by passing the SNPS quirks (snps,dis_u2_susphy_quirk and snps,dis_enblslpm_quirk) from the DTS. However, there is a requirement to enable USB2 suspend PHY by setting of GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY bits when controller starts in gadget or host mode results in the timeout issue. This commit addresses this timeout issue by ensuring that the bits GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting the dwc3_gadget_run_stop sequence and restoring them after the dwc3_gadget_run_stop sequence is completed. Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250201163903.459-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 489493a0adee..5d9f25715a60 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2485,10 +2485,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on) { u32 reg; u32 timeout = 2000; + u32 saved_config = 0; if (pm_runtime_suspended(dwc->dev)) return 0; + /* + * When operating in USB 2.0 speeds (HS/FS), ensure that + * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting + * or stopping the controller. This resolves timeout issues that occur + * during frequent role switches between host and device modes. + * + * Save and clear these settings, then restore them after completing the + * controller start or stop sequence. + * + * This solution was discovered through experimentation as it is not + * mentioned in the dwc3 programming guide. It has been tested on an + * Exynos platforms. + */ + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + } + + if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) { + saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM; + reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; + } + + if (saved_config) + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); + reg = dwc3_readl(dwc->regs, DWC3_DCTL); if (is_on) { if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) { @@ -2516,6 +2544,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on) reg &= DWC3_DSTS_DEVCTRLHLT; } while (--timeout && !(!is_on ^ !reg)); + if (saved_config) { + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + reg |= saved_config; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); + } + if (!timeout) return -ETIMEDOUT;