From 9dc1a3404cd0229bdd161b9f2cdac6fbc43b4eab Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Thu, 16 Jan 2025 18:29:00 +0300 Subject: [PATCH 01/22] net/ncsi: wait for the last response to Deselect Package before configuring channel commit 6bb194d036c6e1b329dcdff459338cdd9a54802a upstream. The NCSI state machine as it's currently implemented assumes that transition to the next logical state is performed either explicitly by calling `schedule_work(&ndp->work)` to re-queue itself or implicitly after processing the predefined (ndp->pending_req_num) number of replies. Thus to avoid the configuration FSM from advancing prematurely and getting out of sync with the process it's essential to not skip waiting for a reply. This patch makes the code wait for reception of the Deselect Package response for the last package probed before proceeding to channel configuration. Thanks go to Potin Lai and Cosmo Chou for the initial investigation and testing. Fixes: 8e13f70be05e ("net/ncsi: Probe single packages to avoid conflict") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser Link: https://patch.msgid.link/20250116152900.8656-1-fercerpav@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ncsi/ncsi-manage.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 2281eb1a0a03..6a58ae8b5d03 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1385,6 +1385,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_package; break; case ncsi_dev_state_probe_package: + if (ndp->package_probe_id >= 8) { + /* Last package probed, finishing */ + ndp->flags |= NCSI_DEV_PROBED; + break; + } + ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; @@ -1501,13 +1507,8 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) if (ret) goto error; - /* Probe next package */ + /* Probe next package after receiving response */ ndp->package_probe_id++; - if (ndp->package_probe_id >= 8) { - /* Probe finished */ - ndp->flags |= NCSI_DEV_PROBED; - break; - } nd->state = ncsi_dev_state_probe_package; ndp->active_package = NULL; break; From 220b4a9012764c1d83c1df99861efa724c8f7d01 Mon Sep 17 00:00:00 2001 From: Milos Reljin Date: Fri, 24 Jan 2025 10:41:02 +0000 Subject: [PATCH 02/22] net: phy: c45-tjaxx: add delay between MDIO write and read in soft_reset commit bd1bbab717608757cccbbe08b0d46e6c3ed0ced5 upstream. In application note (AN13663) for TJA1120, on page 30, there's a figure with average PHY startup timing values following software reset. The time it takes for SMI to become operational after software reset ranges roughly from 500 us to 1500 us. This commit adds 2000 us delay after MDIO write which triggers software reset. Without this delay, soft_reset function returns an error and prevents successful PHY init. Cc: stable@vger.kernel.org Fixes: b050f2f15e04 ("phy: nxp-c45: add driver for tja1103") Signed-off-by: Milos Reljin Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/AM8P250MB0124D258E5A71041AF2CC322E1E32@AM8P250MB0124.EURP250.PROD.OUTLOOK.COM Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/nxp-c45-tja11xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 029875a59ff8..718bba014b57 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -937,6 +937,8 @@ static int nxp_c45_soft_reset(struct phy_device *phydev) if (ret) return ret; + usleep_range(2000, 2050); + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_CONTROL, ret, !(ret & DEVICE_CONTROL_RESET), 20000, From 463c1d66c00c67fbc6f1d3c1b83a8d5092e45ecb Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Tue, 7 Jan 2025 15:48:21 +0100 Subject: [PATCH 03/22] rtla/osnoise: Distinguish missing workload option commit 80d3ba1cf51bfbbb3b098434f2b2c95cd7c0ae5c upstream. osnoise_set_workload returns -1 for both missing OSNOISE_WORKLOAD option and failure in setting the option. Return -1 for missing and -2 for failure to distinguish them. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Link: https://lore.kernel.org/20250107144823.239782-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/osnoise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index b8ec6c15bccb..333195dc63d9 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -693,7 +693,7 @@ int osnoise_set_tracing_thresh(struct osnoise_context *context, long long tracin retval = osnoise_write_ll_config("tracing_thresh", tracing_thresh); if (retval < 0) - return -1; + return -2; context->tracing_thresh = tracing_thresh; From 6074e23ded57543a4ca7dcac87ea0c8f9598a92f Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:27 +0100 Subject: [PATCH 04/22] rtla: Add trace_instance_stop commit e879b5dcf8d044f3865a32d95cc5b213f314c54f upstream. Support not only turning trace on for the timerlat tracer, but also turning it off. This will be used in subsequent patches to stop the timerlat tracer without also wiping the trace buffer. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/trace.c | 8 ++++++++ tools/tracing/rtla/src/trace.h | 1 + 2 files changed, 9 insertions(+) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index e1ba6d9f4265..93e4032b2397 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -196,6 +196,14 @@ int trace_instance_start(struct trace_instance *trace) return tracefs_trace_on(trace->inst); } +/* + * trace_instance_stop - stop tracing a given rtla instance + */ +int trace_instance_stop(struct trace_instance *trace) +{ + return tracefs_trace_off(trace->inst); +} + /* * trace_events_free - free a list of trace events */ diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index 2e9a89a25615..551a7cb81f63 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -21,6 +21,7 @@ struct trace_instance { int trace_instance_init(struct trace_instance *trace, char *tool_name); int trace_instance_start(struct trace_instance *trace); +int trace_instance_stop(struct trace_instance *trace); void trace_instance_destroy(struct trace_instance *trace); struct trace_seq *get_trace_seq(void); From b854ac0024db08361c361249bdd1cc5f82719c27 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:28 +0100 Subject: [PATCH 05/22] rtla/timerlat_hist: Stop timerlat tracer on signal commit c73cab9dbed04d8f65ca69177b4b21ed3e09dfa7 upstream. Currently, when either SIGINT from the user or SIGALRM from the duration timer is caught by rtla-timerlat, stop_tracing is set to break out of the main loop. This is not sufficient for cases where the timerlat tracer is producing more data than rtla can consume, since in that case, rtla is looping indefinitely inside tracefs_iterate_raw_events, never reaches the check of stop_tracing and hangs. In addition to setting stop_tracing, also stop the timerlat tracer on received signal (SIGINT or SIGALRM). This will stop new samples so that the existing samples may be processed and tracefs_iterate_raw_events eventually exits. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-3-tglozar@redhat.com Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/timerlat_hist.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index ed08295bfa12..83cc7ef3d36b 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -783,9 +783,12 @@ out_err: } static int stop_tracing; +static struct trace_instance *hist_inst = NULL; static void stop_hist(int sig) { stop_tracing = 1; + if (hist_inst) + trace_instance_stop(hist_inst); } /* @@ -828,6 +831,12 @@ int timerlat_hist_main(int argc, char *argv[]) } trace = &tool->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + hist_inst = trace; retval = enable_timerlat(trace); if (retval) { @@ -894,7 +903,7 @@ int timerlat_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (trace_is_off(&tool->trace, &record->trace) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); From 3b88ea45a64bc3b13dfcaaf52b2de97b8b19287d Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:29 +0100 Subject: [PATCH 06/22] rtla/timerlat_top: Stop timerlat tracer on signal commit a4dfce7559d75430c464294ddee554be2a413c4a upstream. Currently, when either SIGINT from the user or SIGALRM from the duration timer is caught by rtla-timerlat, stop_tracing is set to break out of the main loop. This is not sufficient for cases where the timerlat tracer is producing more data than rtla can consume, since in that case, rtla is looping indefinitely inside tracefs_iterate_raw_events, never reaches the check of stop_tracing and hangs. In addition to setting stop_tracing, also stop the timerlat tracer on received signal (SIGINT or SIGALRM). This will stop new samples so that the existing samples may be processed and tracefs_iterate_raw_events eventually exits. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-4-tglozar@redhat.com Fixes: a828cd18bc4a ("rtla: Add timerlat tool and timelart top mode") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- tools/tracing/rtla/src/timerlat_top.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 8fc0f6aa19da..08e940ecdc96 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -575,9 +575,12 @@ out_err: } static int stop_tracing; +static struct trace_instance *top_inst = NULL; static void stop_top(int sig) { stop_tracing = 1; + if (top_inst) + trace_instance_stop(top_inst); } /* @@ -620,6 +623,13 @@ int timerlat_top_main(int argc, char *argv[]) } trace = &top->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + top_inst = trace; + retval = enable_timerlat(trace); if (retval) { @@ -690,7 +700,7 @@ int timerlat_top_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&top->trace, &record->trace)) { + if (trace_is_off(&top->trace, &record->trace) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); From cd78038467da89be1b15a649d900344730be39a8 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 6 Nov 2024 23:04:39 +0100 Subject: [PATCH 07/22] pinctrl: samsung: fix fwnode refcount cleanup if platform_get_irq_optional() fails commit 459915f55509f4bfd6076daa1428e28490ddee3b upstream. Commit 50ebd19e3585 ("pinctrl: samsung: drop pin banks references on error paths") fixed the pin bank references on the error paths of the probe function, but there is still an error path where this is not done. If samsung_pinctrl_get_soc_data() does not fail, the child references will have acquired, and they will need to be released in the error path of platform_get_irq_optional(), as it is done in the following error paths within the probe function. Replace the direct return in the error path with a goto instruction to the cleanup function. Cc: stable@vger.kernel.org Fixes: a382d568f144 ("pinctrl: samsung: Use platform_get_irq_optional() to get the interrupt") Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20241106-samsung-pinctrl-put-v1-1-de854e26dd03@gmail.com [krzysztof: change Fixes SHA to point to commit introducing the return leading to OF node leak] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/samsung/pinctrl-samsung.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index bd13b5ef246d..0022ee0851fa 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1149,7 +1149,7 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) ret = platform_get_irq_optional(pdev, 0); if (ret < 0 && ret != -ENXIO) - return ret; + goto err_put_banks; if (ret > 0) drvdata->irq = ret; From 5d1041c76de656f9f8d5a192218039a9acf9bd00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 23 Jan 2025 08:22:40 +0100 Subject: [PATCH 08/22] ptp: Ensure info->enable callback is always set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fd53aa40e65f518453115b6f56183b0c201db26b upstream. The ioctl and sysfs handlers unconditionally call the ->enable callback. Not all drivers implement that callback, leading to NULL dereferences. Example of affected drivers: ptp_s390.c, ptp_vclock.c and ptp_mock.c. Instead use a dummy callback if no better was specified by the driver. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Acked-by: Richard Cochran Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250123-ptp-enable-v1-1-b015834d3a47@weissschuh.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_clock.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 389599bd13bb..af66ed48dc00 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -188,6 +188,11 @@ static int ptp_getcycles64(struct ptp_clock_info *info, struct timespec64 *ts) return info->gettime64(info, ts); } +static int ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on) +{ + return -EOPNOTSUPP; +} + static void ptp_aux_kworker(struct kthread_work *work) { struct ptp_clock *ptp = container_of(work, struct ptp_clock, @@ -250,6 +255,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->info->getcrosscycles = ptp->info->getcrosststamp; } + if (!ptp->info->enable) + ptp->info->enable = ptp_enable; + if (ptp->info->do_aux_work) { kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index); From 30ab0f6c90f89bfb5429c534cc70cef5478b0041 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 27 Nov 2024 17:01:22 +0100 Subject: [PATCH 09/22] rtc: zynqmp: Fix optional clock name property commit 2a388ff22d2cbfc5cbd628ef085bdcd3b7dc64f5 upstream. Clock description in DT binding introduced by commit f69060c14431 ("dt-bindings: rtc: zynqmp: Add clock information") is talking about "rtc" clock name but driver is checking "rtc_clk" name instead. Because clock is optional property likely in was never handled properly by the driver. Fixes: 07dcc6f9c762 ("rtc: zynqmp: Add calibration set and get support") Signed-off-by: Michal Simek Cc: stable@kernel.org Reviewed-by: Peter Korsgaard Link: https://lore.kernel.org/r/cd5f0c9d01ec1f5a240e37a7e0d85b8dacb3a869.1732723280.git.michal.simek@amd.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-zynqmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c index c9b85c838ebe..e8a2cad05e50 100644 --- a/drivers/rtc/rtc-zynqmp.c +++ b/drivers/rtc/rtc-zynqmp.c @@ -318,8 +318,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev) return ret; } - /* Getting the rtc_clk info */ - xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc_clk"); + /* Getting the rtc info */ + xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc"); if (IS_ERR(xrtcdev->rtc_clk)) { if (PTR_ERR(xrtcdev->rtc_clk) != -EPROBE_DEFER) dev_warn(&pdev->dev, "Device clock not found.\n"); From 233b210a678bddf8b49b02a070074a52b87e6d43 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 10 Feb 2025 17:27:54 +0000 Subject: [PATCH 10/22] io_uring: fix multishots with selected buffers [ upstream commit d63b0e8a628e62ca85a0f7915230186bb92f8bb4 ] We do io_kbuf_recycle() when arming a poll but every iteration of a multishot can grab more buffers, which is why we need to flush the kbuf ring state before continuing with waiting. Cc: stable@vger.kernel.org Fixes: b3fdea6ecb55c ("io_uring: multishot recv") Reported-by: Muhammad Ramdhan Reported-by: Bing-Jhong Billy Jheng Reported-by: Jacob Soo Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1bfc9990fe435f1fc6152ca9efeba5eb3e68339c.1738025570.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/poll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/poll.c b/io_uring/poll.c index 428f84ed1021..bbdc9a7624a1 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -307,6 +307,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) } } else { int ret = io_poll_issue(req, locked); + io_kbuf_recycle(req, 0); + if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; if (ret < 0) From a1b17713b32c75a90132ea2f92b1257f3bbc20f3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 10 Feb 2025 17:27:55 +0000 Subject: [PATCH 11/22] io_uring: fix io_req_prep_async with provided buffers io_req_prep_async() can import provided buffers, commit the ring state by giving up on that before, it'll be reimported later if needed. Reported-by: Muhammad Ramdhan Reported-by: Bing-Jhong Billy Jheng Reported-by: Jacob Soo Fixes: c7fb19428d67d ("io_uring: add support for ring mapped supplied buffers") Signed-off-by: Pavel Begunkov Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 64502323be5e..33597284e1cb 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1624,6 +1624,7 @@ bool io_alloc_async_data(struct io_kiocb *req) int io_req_prep_async(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; + int ret; /* assign early for deferred execution for non-fixed file */ if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE) && !req->file) @@ -1636,7 +1637,9 @@ int io_req_prep_async(struct io_kiocb *req) if (io_alloc_async_data(req)) return -EAGAIN; } - return def->prep_async(req); + ret = def->prep_async(req); + io_kbuf_recycle(req, 0); + return ret; } static u32 io_get_sequence(struct io_kiocb *req) From 58d38d324589ea99bcdba1b5f085ef3a3310ccb8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 10 Feb 2025 17:27:56 +0000 Subject: [PATCH 12/22] io_uring/rw: commit provided buffer state on async When we get -EIOCBQUEUED, we need to ensure that the buffer is consumed from the provided buffer ring, which can be done with io_kbuf_recycle() + REQ_F_PARTIAL_IO. Reported-by: Muhammad Ramdhan Reported-by: Bing-Jhong Billy Jheng Reported-by: Jacob Soo Fixes: c7fb19428d67d ("io_uring: add support for ring mapped supplied buffers") Signed-off-by: Pavel Begunkov Signed-off-by: Greg Kroah-Hartman --- io_uring/rw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/io_uring/rw.c b/io_uring/rw.c index 692663bd864f..b75f62dccce6 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -772,6 +772,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) goto done; ret = 0; } else if (ret == -EIOCBQUEUED) { + req->flags |= REQ_F_PARTIAL_IO; + io_kbuf_recycle(req, issue_flags); if (iovec) kfree(iovec); return IOU_ISSUE_SKIP_COMPLETE; @@ -795,6 +797,9 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) goto done; } + req->flags |= REQ_F_PARTIAL_IO; + io_kbuf_recycle(req, issue_flags); + io = req->async_data; s = &io->s; /* @@ -935,6 +940,11 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) else ret2 = -EINVAL; + if (ret2 == -EIOCBQUEUED) { + req->flags |= REQ_F_PARTIAL_IO; + io_kbuf_recycle(req, issue_flags); + } + if (req->flags & REQ_F_REISSUE) { req->flags &= ~REQ_F_REISSUE; ret2 = -EAGAIN; From ba266064713c2f8d53e1438a99b37303ddfee008 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Sat, 4 Jan 2025 22:47:08 +0800 Subject: [PATCH 13/22] MIPS: ftrace: Declare ftrace_get_parent_ra_addr() as static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ddd068d81445b17ac0bed084dfeb9e58b4df3ddd upstream. Declare ftrace_get_parent_ra_addr() as static to suppress clang compiler warning that 'no previous prototype'. This function is not intended to be called from other parts. Fix follow error with clang-19: arch/mips/kernel/ftrace.c:251:15: error: no previous prototype for function 'ftrace_get_parent_ra_addr' [-Werror,-Wmissing-prototypes] 251 | unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long | ^ arch/mips/kernel/ftrace.c:251:1: note: declare 'static' if the function is not intended to be used outside of this translation unit 251 | unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long | ^ | static 1 error generated. Signed-off-by: WangYuli Acked-by: Masami Hiramatsu (Google) Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 8c401e42301c..f39e85fd58fa 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -248,7 +248,7 @@ int ftrace_disable_ftrace_graph_caller(void) #define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ #define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ -unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long +static unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long old_parent_ra, unsigned long parent_ra_addr, unsigned long fp) { unsigned long sp, ip, tmp; From 846d74f329d6df30eef8f5e4ceb57b19c315aeff Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Mon, 20 Jan 2025 16:35:36 +0300 Subject: [PATCH 14/22] net/ncsi: use dev_set_mac_address() for Get MC MAC Address handling commit 05d91cdb1f9108426b14975ef4eeddf15875ca05 upstream. Copy of the rationale from 790071347a0a1a89e618eedcd51c687ea783aeb3: Change ndo_set_mac_address to dev_set_mac_address because dev_set_mac_address provides a way to notify network layer about MAC change. In other case, services may not aware about MAC change and keep using old one which set from network adapter driver. As example, DHCP client from systemd do not update MAC address without notification from net subsystem which leads to the problem with acquiring the right address from DHCP server. Since dev_set_mac_address requires RTNL lock the operation can not be performed directly in the response handler, see 9e2bbab94b88295dcc57c7580393c9ee08d7314d. The way of selecting the first suitable MAC address from the list is changed, instead of having the driver check it this patch just assumes any valid MAC should be good. Fixes: b8291cf3d118 ("net/ncsi: Add NC-SI 1.2 Get MC MAC Address command") Signed-off-by: Paul Fertser Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ncsi/ncsi-rsp.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 14bd66909ca4..4a8ce2949fae 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1089,14 +1089,12 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; + struct sockaddr *saddr = &ndp->pending_mac; struct net_device *ndev = ndp->ndev.dev; struct ncsi_rsp_gmcma_pkt *rsp; - struct sockaddr saddr; - int ret = -1; int i; rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp); - saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n", @@ -1108,20 +1106,20 @@ static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) rsp->addresses[i][4], rsp->addresses[i][5]); } + saddr->sa_family = ndev->type; for (i = 0; i < rsp->address_count; i++) { - memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN); - ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) { + if (!is_valid_ether_addr(rsp->addresses[i])) { netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n", - saddr.sa_data); + rsp->addresses[i]); continue; } - netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data); + memcpy(saddr->sa_data, rsp->addresses[i], ETH_ALEN); + netdev_warn(ndev, "NCSI: Will set MAC address to %pM\n", saddr->sa_data); break; } - ndp->gma_flag = ret == 0; - return ret; + ndp->gma_flag = 1; + return 0; } static struct ncsi_rsp_handler { From b880f21f618ad09fc3ae0b955caa65cf356bfef6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 15 Dec 2023 10:09:43 +0100 Subject: [PATCH 15/22] gpio: xilinx: remove excess kernel doc commit 4c7fcbf5077532b80bc233c83d56e09a6bfa16b0 upstream. The irqchip field has been removed from struct xgpio_instance so remove the doc as well. Fixes: b4510f8fd5d0 ("gpio: xilinx: Convert to immutable irq_chip") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312150239.IyuTVvrL-lkp@intel.com/ Signed-off-by: Bartosz Golaszewski Reviewed-by: Michal Simek Reviewed-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-xilinx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index 06ab5b71dcee..8f094c898002 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -52,7 +52,6 @@ * @dir: GPIO direction shadow register * @gpio_lock: Lock used for synchronization * @irq: IRQ used by GPIO device - * @irqchip: IRQ chip * @enable: GPIO IRQ enable/disable bitfield * @rising_edge: GPIO IRQ rising edge enable/disable bitfield * @falling_edge: GPIO IRQ falling edge enable/disable bitfield From 918cf2616af595d028b4ac0dfa3d55fb7c107436 Mon Sep 17 00:00:00 2001 From: Su Yue Date: Mon, 6 Jan 2025 22:06:40 +0800 Subject: [PATCH 16/22] ocfs2: check dir i_size in ocfs2_find_entry commit b0fce54b8c0d8e5f2b4c243c803c5996e73baee8 upstream. syz reports an out of bounds read: ================================================================== BUG: KASAN: slab-out-of-bounds in ocfs2_match fs/ocfs2/dir.c:334 [inline] BUG: KASAN: slab-out-of-bounds in ocfs2_search_dirblock+0x283/0x6e0 fs/ocfs2/dir.c:367 Read of size 1 at addr ffff88804d8b9982 by task syz-executor.2/14802 CPU: 0 UID: 0 PID: 14802 Comm: syz-executor.2 Not tainted 6.13.0-rc4 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Sched_ext: serialise (enabled+all), task: runnable_at=-10ms Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x229/0x350 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x164/0x530 mm/kasan/report.c:489 kasan_report+0x147/0x180 mm/kasan/report.c:602 ocfs2_match fs/ocfs2/dir.c:334 [inline] ocfs2_search_dirblock+0x283/0x6e0 fs/ocfs2/dir.c:367 ocfs2_find_entry_id fs/ocfs2/dir.c:414 [inline] ocfs2_find_entry+0x1143/0x2db0 fs/ocfs2/dir.c:1078 ocfs2_find_files_on_disk+0x18e/0x530 fs/ocfs2/dir.c:1981 ocfs2_lookup_ino_from_name+0xb6/0x110 fs/ocfs2/dir.c:2003 ocfs2_lookup+0x30a/0xd40 fs/ocfs2/namei.c:122 lookup_open fs/namei.c:3627 [inline] open_last_lookups fs/namei.c:3748 [inline] path_openat+0x145a/0x3870 fs/namei.c:3984 do_filp_open+0xe9/0x1c0 fs/namei.c:4014 do_sys_openat2+0x135/0x1d0 fs/open.c:1402 do_sys_open fs/open.c:1417 [inline] __do_sys_openat fs/open.c:1433 [inline] __se_sys_openat fs/open.c:1428 [inline] __x64_sys_openat+0x15d/0x1c0 fs/open.c:1428 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf6/0x210 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f01076903ad Code: c3 e8 a7 2b 00 00 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f01084acfc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 00007f01077cbf80 RCX: 00007f01076903ad RDX: 0000000000105042 RSI: 0000000020000080 RDI: ffffffffffffff9c RBP: 00007f01077cbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000001ff R11: 0000000000000246 R12: 0000000000000000 R13: 00007f01077cbf80 R14: 00007f010764fc90 R15: 00007f010848d000 ================================================================== And a general protection fault in ocfs2_prepare_dir_for_insert: ================================================================== loop0: detected capacity change from 0 to 32768 JBD2: Ignoring recovery information on journal ocfs2: Mounting device (7,0) on (node local, slot 0) with ordered data mode. Oops: general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 UID: 0 PID: 5096 Comm: syz-executor792 Not tainted 6.11.0-rc4-syzkaller-00002-gb0da640826ba #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:ocfs2_find_dir_space_id fs/ocfs2/dir.c:3406 [inline] RIP: 0010:ocfs2_prepare_dir_for_insert+0x3309/0x5c70 fs/ocfs2/dir.c:4280 Code: 00 00 e8 2a 25 13 fe e9 ba 06 00 00 e8 20 25 13 fe e9 4f 01 00 00 e8 16 25 13 fe 49 8d 7f 08 49 8d 5f 09 48 89 f8 48 c1 e8 03 <42> 0f b6 04 20 84 c0 0f 85 bd 23 00 00 48 89 d8 48 c1 e8 03 42 0f RSP: 0018:ffffc9000af9f020 EFLAGS: 00010202 RAX: 0000000000000001 RBX: 0000000000000009 RCX: ffff88801e27a440 RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000008 RBP: ffffc9000af9f830 R08: ffffffff8380395b R09: ffffffff838090a7 R10: 0000000000000002 R11: ffff88801e27a440 R12: dffffc0000000000 R13: ffff88803c660878 R14: f700000000000088 R15: 0000000000000000 FS: 000055555a677380(0000) GS:ffff888020800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000560bce569178 CR3: 000000001de5a000 CR4: 0000000000350ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ocfs2_mknod+0xcaf/0x2b40 fs/ocfs2/namei.c:292 vfs_mknod+0x36d/0x3b0 fs/namei.c:4088 do_mknodat+0x3ec/0x5b0 __do_sys_mknodat fs/namei.c:4166 [inline] __se_sys_mknodat fs/namei.c:4163 [inline] __x64_sys_mknodat+0xa7/0xc0 fs/namei.c:4163 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f2dafda3a99 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe336a6658 EFLAGS: 00000246 ORIG_RAX: 0000000000000103 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2dafda3a99 RDX: 00000000000021c0 RSI: 0000000020000040 RDI: 00000000ffffff9c RBP: 00007f2dafe1b5f0 R08: 0000000000004480 R09: 000055555a6784c0 R10: 0000000000000103 R11: 0000000000000246 R12: 00007ffe336a6680 R13: 00007ffe336a68a8 R14: 431bde82d7b634db R15: 00007f2dafdec03b ================================================================== The two reports are all caused invalid negative i_size of dir inode. For ocfs2, dir_inode can't be negative or zero. Here add a check in which is called by ocfs2_check_dir_for_entry(). It fixes the second report as ocfs2_check_dir_for_entry() must be called before ocfs2_prepare_dir_for_insert(). Also set a up limit for dir with OCFS2_INLINE_DATA_FL. The i_size can't be great than blocksize. Link: https://lkml.kernel.org/r/20250106140640.92260-1-glass.su@suse.com Reported-by: Jiacheng Xu Link: https://lore.kernel.org/ocfs2-devel/17a04f01.1ae74.19436d003fc.Coremail.stitch@zju.edu.cn/T/#u Reported-by: syzbot+5a64828fcc4c2ad9b04f@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/0000000000005894f3062018caf1@google.com/T/ Signed-off-by: Su Yue Reviewed-by: Heming Zhao Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dir.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d27e15b54be4..de6fd4a09ffd 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1065,26 +1065,39 @@ int ocfs2_find_entry(const char *name, int namelen, { struct buffer_head *bh; struct ocfs2_dir_entry *res_dir = NULL; + int ret = 0; if (ocfs2_dir_indexed(dir)) return ocfs2_find_entry_dx(name, namelen, dir, lookup); + if (unlikely(i_size_read(dir) <= 0)) { + ret = -EFSCORRUPTED; + mlog_errno(ret); + goto out; + } /* * The unindexed dir code only uses part of the lookup * structure, so there's no reason to push it down further * than this. */ - if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) { + ret = -EFSCORRUPTED; + mlog_errno(ret); + goto out; + } bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); - else + } else { bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir); + } if (bh == NULL) return -ENOENT; lookup->dl_leaf_bh = bh; lookup->dl_entry = res_dir; - return 0; +out: + return ret; } /* @@ -2011,6 +2024,7 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, * * Return 0 if the name does not exist * Return -EEXIST if the directory contains the name + * Return -EFSCORRUPTED if found corruption * * Callers should have i_rwsem + a cluster lock on dir */ @@ -2024,9 +2038,12 @@ int ocfs2_check_dir_for_entry(struct inode *dir, trace_ocfs2_check_dir_for_entry( (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); - if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { + ret = ocfs2_find_entry(name, namelen, dir, &lookup); + if (ret == 0) { ret = -EEXIST; mlog_errno(ret); + } else if (ret == -ENOENT) { + ret = 0; } ocfs2_free_dir_lookup_result(&lookup); From d6bba3ece960129a553d4b16f1b00c884dc0993a Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Thu, 7 Nov 2024 19:06:48 +0800 Subject: [PATCH 17/22] cachefiles: Fix NULL pointer dereference in object->file commit 31ad74b20227ce6b40910ff78b1c604e42975cf1 upstream. At present, the object->file has the NULL pointer dereference problem in ondemand-mode. The root cause is that the allocated fd and object->file lifetime are inconsistent, and the user-space invocation to anon_fd uses object->file. Following is the process that triggers the issue: [write fd] [umount] cachefiles_ondemand_fd_write_iter fscache_cookie_state_machine cachefiles_withdraw_cookie if (!file) return -ENOBUFS cachefiles_clean_up_object cachefiles_unmark_inode_in_use fput(object->file) object->file = NULL // file NULL pointer dereference! __cachefiles_write(..., file, ...) Fix this issue by add an additional reference count to the object->file before write/llseek, and decrement after it finished. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Zizhi Wo Link: https://lore.kernel.org/r/20241107110649.3980193-5-wozizhi@huawei.com Reviewed-by: David Howells Signed-off-by: Christian Brauner Signed-off-by: Bin Lan Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/interface.c | 14 ++++++++++---- fs/cachefiles/ondemand.c | 30 ++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index bde23e156a63..fd71775c703a 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -327,6 +327,8 @@ static void cachefiles_commit_object(struct cachefiles_object *object, static void cachefiles_clean_up_object(struct cachefiles_object *object, struct cachefiles_cache *cache) { + struct file *file; + if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) { if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { cachefiles_see_object(object, cachefiles_obj_see_clean_delete); @@ -342,10 +344,14 @@ static void cachefiles_clean_up_object(struct cachefiles_object *object, } cachefiles_unmark_inode_in_use(object, object->file); - if (object->file) { - fput(object->file); - object->file = NULL; - } + + spin_lock(&object->lock); + file = object->file; + object->file = NULL; + spin_unlock(&object->lock); + + if (file) + fput(file); } /* diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index d1a0264b08a6..3389a373faf6 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -61,20 +61,26 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, { struct cachefiles_object *object = kiocb->ki_filp->private_data; struct cachefiles_cache *cache = object->volume->cache; - struct file *file = object->file; + struct file *file; size_t len = iter->count; loff_t pos = kiocb->ki_pos; const struct cred *saved_cred; int ret; - if (!file) + spin_lock(&object->lock); + file = object->file; + if (!file) { + spin_unlock(&object->lock); return -ENOBUFS; + } + get_file(file); + spin_unlock(&object->lock); cachefiles_begin_secure(cache, &saved_cred); ret = __cachefiles_prepare_write(object, file, &pos, &len, true); cachefiles_end_secure(cache, saved_cred); if (ret < 0) - return ret; + goto out; trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len); ret = __cachefiles_write(object, file, pos, iter, NULL, NULL); @@ -83,6 +89,8 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, kiocb->ki_pos += ret; } +out: + fput(file); return ret; } @@ -90,12 +98,22 @@ static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, int whence) { struct cachefiles_object *object = filp->private_data; - struct file *file = object->file; + struct file *file; + loff_t ret; - if (!file) + spin_lock(&object->lock); + file = object->file; + if (!file) { + spin_unlock(&object->lock); return -ENOBUFS; + } + get_file(file); + spin_unlock(&object->lock); - return vfs_llseek(file, pos, whence); + ret = vfs_llseek(file, pos, whence); + fput(file); + + return ret; } static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, From 22b0734c9401a74ed4ebd9e8ef0da33e493852eb Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Sun, 9 Feb 2025 18:48:30 +0100 Subject: [PATCH 18/22] mptcp: pm: only set fullmesh for subflow endp commit 1bb0d1348546ad059f55c93def34e67cb2a034a6 upstream. With the in-kernel path-manager, it is possible to change the 'fullmesh' flag. The code in mptcp_pm_nl_fullmesh() expects to change it only on 'subflow' endpoints, to recreate more or less subflows using the linked address. Unfortunately, the set_flags() hook was a bit more permissive, and allowed 'implicit' endpoints to get the 'fullmesh' flag while it is not allowed before. That's what syzbot found, triggering the following warning: WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 __mark_subflow_endp_available net/mptcp/pm_netlink.c:1496 [inline] WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_pm_nl_fullmesh net/mptcp/pm_netlink.c:1980 [inline] WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_nl_set_flags net/mptcp/pm_netlink.c:2003 [inline] WARNING: CPU: 0 PID: 6499 at net/mptcp/pm_netlink.c:1496 mptcp_pm_nl_set_flags+0x974/0xdc0 net/mptcp/pm_netlink.c:2064 Modules linked in: CPU: 0 UID: 0 PID: 6499 Comm: syz.1.413 Not tainted 6.13.0-rc5-syzkaller-00172-gd1bf27c4e176 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__mark_subflow_endp_available net/mptcp/pm_netlink.c:1496 [inline] RIP: 0010:mptcp_pm_nl_fullmesh net/mptcp/pm_netlink.c:1980 [inline] RIP: 0010:mptcp_nl_set_flags net/mptcp/pm_netlink.c:2003 [inline] RIP: 0010:mptcp_pm_nl_set_flags+0x974/0xdc0 net/mptcp/pm_netlink.c:2064 Code: 01 00 00 49 89 c5 e8 fb 45 e8 f5 e9 b8 fc ff ff e8 f1 45 e8 f5 4c 89 f7 be 03 00 00 00 e8 44 1d 0b f9 eb a0 e8 dd 45 e8 f5 90 <0f> 0b 90 e9 17 ff ff ff 89 d9 80 e1 07 38 c1 0f 8c c9 fc ff ff 48 RSP: 0018:ffffc9000d307240 EFLAGS: 00010293 RAX: ffffffff8bb72e03 RBX: 0000000000000000 RCX: ffff88807da88000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc9000d307430 R08: ffffffff8bb72cf0 R09: 1ffff1100b842a5e R10: dffffc0000000000 R11: ffffed100b842a5f R12: ffff88801e2e5ac0 R13: ffff88805c214800 R14: ffff88805c2152e8 R15: 1ffff1100b842a5d FS: 00005555619f6500(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002840 CR3: 00000000247e6000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2542 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1321 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1347 netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1891 sock_sendmsg_nosec net/socket.c:711 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:726 ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2583 ___sys_sendmsg net/socket.c:2637 [inline] __sys_sendmsg+0x269/0x350 net/socket.c:2669 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f5fe8785d29 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff571f5558 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f5fe8975fa0 RCX: 00007f5fe8785d29 RDX: 0000000000000000 RSI: 0000000020000480 RDI: 0000000000000007 RBP: 00007f5fe8801b08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f5fe8975fa0 R14: 00007f5fe8975fa0 R15: 00000000000011f4 Here, syzbot managed to set the 'fullmesh' flag on an 'implicit' and used -- according to 'id_avail_bitmap' -- endpoint, causing the PM to try decrement the local_addr_used counter which is only incremented for the 'subflow' endpoint. Note that 'no type' endpoints -- not 'subflow', 'signal', 'implicit' -- are fine, because their ID will not be marked as used in the 'id_avail' bitmap, and setting 'fullmesh' can help forcing the creation of subflow when receiving an ADD_ADDR. Fixes: 73c762c1f07d ("mptcp: set fullmesh flag in pm_netlink") Cc: stable@vger.kernel.org Reported-by: syzbot+cd16e79c1e45f3fe0377@syzkaller.appspotmail.com Closes: https://lore.kernel.org/6786ac51.050a0220.216c54.00a6.GAE@google.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/540 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250123-net-mptcp-syzbot-issues-v1-2-af73258a726f@kernel.org Signed-off-by: Jakub Kicinski [ Conflicts in pm_netlink.c, because the code has been moved around in commit 6a42477fe449 ("mptcp: update set_flags interfaces"), but the same fix can still be applied at the original place. ] Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3fd7de56a30f..ddbc352f8420 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2086,7 +2086,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); return -EINVAL; } From b740d3c3cb405253e4607cc28bae3117196e03a2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 9 Feb 2025 18:48:31 +0100 Subject: [PATCH 19/22] mptcp: prevent excessive coalescing on receive commit 56b824eb49d6258aa0bad09a406ceac3f643cdae upstream. Currently the skb size after coalescing is only limited by the skb layout (the skb must not carry frag_list). A single coalesced skb covering several MSS can potentially fill completely the receive buffer. In such a case, the snd win will zero until the receive buffer will be empty again, affecting tput badly. Fixes: 8268ed4c9d19 ("mptcp: introduce and use mptcp_try_coalesce()") Cc: stable@vger.kernel.org # please delay 2 weeks after 6.13-final release Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-3-8608af434ceb@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1cec2be7db55..e975693b8fa9 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -149,6 +149,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, int delta; if (MPTCP_SKB_CB(from)->offset || + ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; From de5bd24197bd9ee37ec1e379a3d882bbd15c5065 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 10 Jan 2025 16:38:22 -0500 Subject: [PATCH 20/22] tty: xilinx_uartps: split sysrq handling commit b06f388994500297bb91be60ffaf6825ecfd2afe upstream. lockdep detects the following circular locking dependency: CPU 0 CPU 1 ========================== ============================ cdns_uart_isr() printk() uart_port_lock(port) console_lock() cdns_uart_console_write() if (!port->sysrq) uart_port_lock(port) uart_handle_break() port->sysrq = ... uart_handle_sysrq_char() printk() console_lock() The fixed commit attempts to avoid this situation by only taking the port lock in cdns_uart_console_write if port->sysrq unset. However, if (as shown above) cdns_uart_console_write runs before port->sysrq is set, then it will try to take the port lock anyway. This may result in a deadlock. Fix this by splitting sysrq handling into two parts. We use the prepare helper under the port lock and defer handling until we release the lock. Fixes: 74ea66d4ca06 ("tty: xuartps: Improve sysrq handling") Signed-off-by: Sean Anderson Cc: stable@vger.kernel.org # c980248179d: serial: xilinx_uartps: Use port lock wrappers Acked-by: John Ogness Link: https://lore.kernel.org/r/20250110213822.2107462-1-sean.anderson@linux.dev Signed-off-by: Sean Anderson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index 2eff7cff57c4..29afcc6d9bb7 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -268,7 +268,7 @@ static void cdns_uart_handle_rx(void *dev_id, unsigned int isrstatus) continue; } - if (uart_handle_sysrq_char(port, data)) + if (uart_prepare_sysrq_char(port, data)) continue; if (is_rxbs_support) { @@ -371,7 +371,7 @@ static irqreturn_t cdns_uart_isr(int irq, void *dev_id) !(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_RX_DIS)) cdns_uart_handle_rx(dev_id, isrstatus); - spin_unlock(&port->lock); + uart_unlock_and_check_sysrq(port); return IRQ_HANDLED; } @@ -1231,10 +1231,8 @@ static void cdns_uart_console_write(struct console *co, const char *s, unsigned int imr, ctrl; int locked = 1; - if (port->sysrq) - locked = 0; - else if (oops_in_progress) - locked = spin_trylock_irqsave(&port->lock, flags); + if (oops_in_progress) + locked = uart_port_trylock_irqsave(port, &flags); else spin_lock_irqsave(&port->lock, flags); From c4720d0490ca395891bc950639779f10860837ae Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 18 May 2023 10:55:10 -0400 Subject: [PATCH 21/22] maple_tree: fix static analyser cppcheck issue commit 5729e06c819184b7ba40869c1ad53e1a463040b2 upstream. Patch series "Maple tree mas_{next,prev}_range() and cleanup", v4. This patchset contains a number of clean ups to the code to make it more usable (next/prev range), the addition of debug output formatting, the addition of printing the maple state information in the WARN_ON/BUG_ON code. There is also work done here to keep nodes active during iterations to reduce the necessity of re-walking the tree. Finally, there is a new interface added to move to the next or previous range in the tree, even if it is empty. The organisation of the patches is as follows: 0001-0004 - Small clean ups 0005-0018 - Additional debug options and WARN_ON/BUG_ON changes 0019 - Test module __init and __exit addition 0020-0021 - More functional clean ups 0022-0026 - Changes to keep nodes active 0027-0034 - Add new mas_{prev,next}_range() 0035 - Use new mas_{prev,next}_range() in mmap_region() This patch (of 35): Static analyser of the maple tree code noticed that the split variable is being used to dereference into an array prior to checking the variable itself. Fix this issue by changing the order of the statement to check the variable first. Link: https://lkml.kernel.org/r/20230518145544.1722059-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230518145544.1722059-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reported-by: David Binderman Reviewed-by: Peng Zhang Cc: Sergey Senozhatsky Cc: Vernon Yang Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/maple_tree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 4c586008c6dd..bf40f4327a59 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1935,8 +1935,9 @@ static inline int mab_calc_split(struct ma_state *mas, * causes one node to be deficient. * NOTE: mt_min_slots is 1 based, b_end and split are zero. */ - while (((bn->pivot[split] - min) < slot_count - 1) && - (split < slot_count - 1) && (b_end - split > slot_min)) + while ((split < slot_count - 1) && + ((bn->pivot[split] - min) < slot_count - 1) && + (b_end - split > slot_min)) split++; } From 09b288fb7f4eaf91e441b9cd6d488cbac77d8de5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 13 Nov 2024 03:16:14 +0000 Subject: [PATCH 22/22] maple_tree: simplify split calculation commit 4f6a6bed0bfef4b966f076f33eb4f5547226056a upstream. Patch series "simplify split calculation", v3. This patch (of 3): The current calculation for splitting nodes tries to enforce a minimum span on the leaf nodes. This code is complex and never worked correctly to begin with, due to the min value being passed as 0 for all leaves. The calculation should just split the data as equally as possible between the new nodes. Note that b_end will be one more than the data, so the left side is still favoured in the calculation. The current code may also lead to a deficient node by not leaving enough data for the right side of the split. This issue is also addressed with the split calculation change. [Liam.Howlett@Oracle.com: rephrase the change log] Link: https://lkml.kernel.org/r/20241113031616.10530-1-richard.weiyang@gmail.com Link: https://lkml.kernel.org/r/20241113031616.10530-2-richard.weiyang@gmail.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Sidhartha Kumar Cc: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/maple_tree.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index bf40f4327a59..b5d216bdd3a5 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1890,11 +1890,11 @@ static inline int mab_no_null_split(struct maple_big_node *b_node, * Return: The first split location. The middle split is set in @mid_split. */ static inline int mab_calc_split(struct ma_state *mas, - struct maple_big_node *bn, unsigned char *mid_split, unsigned long min) + struct maple_big_node *bn, unsigned char *mid_split) { unsigned char b_end = bn->b_end; int split = b_end / 2; /* Assume equal split. */ - unsigned char slot_min, slot_count = mt_slots[bn->type]; + unsigned char slot_count = mt_slots[bn->type]; /* * To support gap tracking, all NULL entries are kept together and a node cannot @@ -1927,18 +1927,7 @@ static inline int mab_calc_split(struct ma_state *mas, split = b_end / 3; *mid_split = split * 2; } else { - slot_min = mt_min_slots[bn->type]; - *mid_split = 0; - /* - * Avoid having a range less than the slot count unless it - * causes one node to be deficient. - * NOTE: mt_min_slots is 1 based, b_end and split are zero. - */ - while ((split < slot_count - 1) && - ((bn->pivot[split] - min) < slot_count - 1) && - (b_end - split > slot_min)) - split++; } /* Avoid ending a node on a NULL entry */ @@ -2664,7 +2653,7 @@ static inline struct maple_enode static inline unsigned char mas_mab_to_node(struct ma_state *mas, struct maple_big_node *b_node, struct maple_enode **left, struct maple_enode **right, struct maple_enode **middle, - unsigned char *mid_split, unsigned long min) + unsigned char *mid_split) { unsigned char split = 0; unsigned char slot_count = mt_slots[b_node->type]; @@ -2677,7 +2666,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas, if (b_node->b_end < slot_count) { split = b_node->b_end; } else { - split = mab_calc_split(mas, b_node, mid_split, min); + split = mab_calc_split(mas, b_node, mid_split); *right = mas_new_ma_node(mas, b_node); } @@ -3076,7 +3065,7 @@ static int mas_spanning_rebalance(struct ma_state *mas, mast->bn->b_end--; mast->bn->type = mte_node_type(mast->orig_l->node); split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, - &mid_split, mast->orig_l->min); + &mid_split); mast_set_split_parents(mast, left, middle, right, split, mid_split); mast_cp_to_nodes(mast, left, middle, right, split, mid_split); @@ -3591,7 +3580,7 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) if (mas_push_data(mas, height, &mast, false)) break; - split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); + split = mab_calc_split(mas, b_node, &mid_split); mast_split_data(&mast, mas, split); /* * Usually correct, mab_mas_cp in the above call overwrites