From 5661dd95a2958634485bb1a53f90a6ab621d4b0c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 30 Jan 2020 15:16:44 -0700 Subject: [PATCH 01/37] printk: Convert a use of sprintf to snprintf in console_unlock When CONFIG_PRINTK is disabled (e.g. when building allnoconfig), clang warns: ../kernel/printk/printk.c:2416:10: warning: 'sprintf' will always overflow; destination buffer has size 0, but format string expands to at least 33 [-Wfortify-source] len = sprintf(text, ^ 1 warning generated. It is not wrong; text has a zero size when CONFIG_PRINTK is disabled because LOG_LINE_MAX and PREFIX_MAX are both zero. Change to snprintf so that this case is explicitly handled without any risk of overflow. Link: https://github.com/ClangBuiltLinux/linux/issues/846 Link: https://github.com/llvm/llvm-project/commit/6d485ff455ea2b37fef9e06e426dae6c1241b231 Link: http://lkml.kernel.org/r/20200130221644.2273-1-natechancellor@gmail.com Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: clang-built-linux@googlegroups.com Signed-off-by: Nathan Chancellor Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fada22dc4ab6..a44094727a5c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2413,9 +2413,9 @@ again: printk_safe_enter_irqsave(flags); raw_spin_lock(&logbuf_lock); if (console_seq < log_first_seq) { - len = sprintf(text, - "** %llu printk messages dropped **\n", - log_first_seq - console_seq); + len = snprintf(text, sizeof(text), + "** %llu printk messages dropped **\n", + log_first_seq - console_seq); /* messages are gone, move to first one */ console_seq = log_first_seq; From ad8cd1db80cc33337bdbee63c453ef6d5132474b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 13 Feb 2020 10:51:31 +0100 Subject: [PATCH 02/37] printk: Move console matching logic into a separate function This moves the loop that tries to match a newly registered console with the command line or add_preferred_console list into a separate helper, in order to be able to call it multiple times in subsequent patches. Link: https://lore.kernel.org/r/20200213095133.23176-2-pmladek@suse.com Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 105 +++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 40 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fada22dc4ab6..0ebcdf53e75d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -280,6 +280,7 @@ static struct console *exclusive_console; static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; +static bool has_preferred_console; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); @@ -2626,6 +2627,60 @@ static int __init keep_bootcon_setup(char *str) early_param("keep_bootcon", keep_bootcon_setup); +/* + * This is called by register_console() to try to match + * the newly registered console with any of the ones selected + * by either the command line or add_preferred_console() and + * setup/enable it. + * + * Care need to be taken with consoles that are statically + * enabled such as netconsole + */ +static int try_enable_new_console(struct console *newcon) +{ + struct console_cmdline *c; + int i; + + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { + if (!newcon->match || + newcon->match(newcon, c->name, c->index, c->options) != 0) { + /* default matching */ + BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); + if (strcmp(c->name, newcon->name) != 0) + continue; + if (newcon->index >= 0 && + newcon->index != c->index) + continue; + if (newcon->index < 0) + newcon->index = c->index; + + if (_braille_register_console(newcon, c)) + return 0; + + if (newcon->setup && + newcon->setup(newcon, c->options) != 0) + return -EIO; + } + newcon->flags |= CON_ENABLED; + if (i == preferred_console) { + newcon->flags |= CON_CONSDEV; + has_preferred_console = true; + } + return 0; + } + + /* + * Some consoles, such as pstore and netconsole, can be enabled even + * without matching. + */ + if (newcon->flags & CON_ENABLED) + return 0; + + return -ENOENT; +} + /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to @@ -2647,11 +2702,9 @@ early_param("keep_bootcon", keep_bootcon_setup); */ void register_console(struct console *newcon) { - int i; unsigned long flags; struct console *bcon = NULL; - struct console_cmdline *c; - static bool has_preferred; + int err; if (console_drivers) for_each_console(bcon) @@ -2678,15 +2731,15 @@ void register_console(struct console *newcon) if (console_drivers && console_drivers->flags & CON_BOOT) bcon = console_drivers; - if (!has_preferred || bcon || !console_drivers) - has_preferred = preferred_console >= 0; + if (!has_preferred_console || bcon || !console_drivers) + has_preferred_console = preferred_console >= 0; /* * See if we want to use this console driver. If we * didn't select a console we take the first one * that registers here. */ - if (!has_preferred) { + if (!has_preferred_console) { if (newcon->index < 0) newcon->index = 0; if (newcon->setup == NULL || @@ -2694,47 +2747,19 @@ void register_console(struct console *newcon) newcon->flags |= CON_ENABLED; if (newcon->device) { newcon->flags |= CON_CONSDEV; - has_preferred = true; + has_preferred_console = true; } } } /* - * See if this console matches one we selected on - * the command line. + * See if this console matches one we selected on + * the command line or if it was statically enabled */ - for (i = 0, c = console_cmdline; - i < MAX_CMDLINECONSOLES && c->name[0]; - i++, c++) { - if (!newcon->match || - newcon->match(newcon, c->name, c->index, c->options) != 0) { - /* default matching */ - BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); - if (strcmp(c->name, newcon->name) != 0) - continue; - if (newcon->index >= 0 && - newcon->index != c->index) - continue; - if (newcon->index < 0) - newcon->index = c->index; + err = try_enable_new_console(newcon); - if (_braille_register_console(newcon, c)) - return; - - if (newcon->setup && - newcon->setup(newcon, c->options) != 0) - break; - } - - newcon->flags |= CON_ENABLED; - if (i == preferred_console) { - newcon->flags |= CON_CONSDEV; - has_preferred = true; - } - break; - } - - if (!(newcon->flags & CON_ENABLED)) + /* printk() messages are not printed to the Braille console. */ + if (err || newcon->flags & CON_BRL) return; /* From e369d8227fd211be36242fc44a9dc2209e246b9a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 13 Feb 2020 10:51:32 +0100 Subject: [PATCH 03/37] printk: Fix preferred console selection with multiple matches In the following circumstances, the rule of selecting the console corresponding to the last "console=" entry on the command line as the preferred console (CON_CONSDEV, ie, /dev/console) fails. This is a specific example, but it could happen with different consoles that have a similar name aliasing mechanism. - The kernel command line has both console=tty0 and console=ttyS0 in that order (the latter with speed etc... arguments). This is common with some cloud setups such as Amazon Linux. - add_preferred_console is called early to register "uart0". In our case that happens from acpi_parse_spcr() on arm64 since the "enable_console" argument is true on that architecture. This causes "uart0" to become entry 0 of the console_cmdline array. Now, because of the above, what happens is: - add_preferred_console is called by the cmdline parsing for tty0 and ttyS0 respectively, thus occupying entries 1 and 2 of the console_cmdline array (since this happens after ACPI SPCR parsing). At that point preferred_console is set to 2 as expected. - When the tty layer kicks in, it will call register_console for tty0. This will match entry 1 in console_cmdline array. It isn't our preferred console but because it's our only console at this point, it will end up "first" in the consoles list. - When 8250 probes the actual serial port later on, it calls register_console for ttyS0. At that point the loop in register_console tries to match it with the entries in the console_cmdline array. Ideally this should match ttyS0 in entry 2, which is preferred, causing it to be inserted first and to replace tty0 as CONSDEV. However, 8250 provides a "match" hook in its struct console, and that hook will match "uart" as an alias to "ttyS". So we match uart0 at entry 0 in the array which is not the preferred console and will not match entry 2 which is since we break out of the loop on the first match. As a result, we don't set CONSDEV and don't insert it first, but second in the console list. As a result, we end up with tty0 remaining first in the array, and thus /dev/console going there instead of the last user specified one which is ttyS0. This tentative fix register_console() to scan first for consoles specified on the command line, and only if none is found, to then scan for consoles specified by the architecture. Link: https://lore.kernel.org/r/20200213095133.23176-3-pmladek@suse.com Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/console_cmdline.h | 1 + kernel/printk/printk.c | 29 ++++++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index 11f19c466af5..3ca74ad391d6 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -6,6 +6,7 @@ struct console_cmdline { char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ + bool user_specified; /* Specified by command line vs. platform */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE char *brl_options; /* Options for braille driver */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 0ebcdf53e75d..f76ef3f0efca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2116,7 +2116,7 @@ asmlinkage __visible void early_printk(const char *fmt, ...) #endif static int __add_preferred_console(char *name, int idx, char *options, - char *brl_options) + char *brl_options, bool user_specified) { struct console_cmdline *c; int i; @@ -2131,6 +2131,8 @@ static int __add_preferred_console(char *name, int idx, char *options, if (strcmp(c->name, name) == 0 && c->index == idx) { if (!brl_options) preferred_console = i; + if (user_specified) + c->user_specified = true; return 0; } } @@ -2140,6 +2142,7 @@ static int __add_preferred_console(char *name, int idx, char *options, preferred_console = i; strlcpy(c->name, name, sizeof(c->name)); c->options = options; + c->user_specified = user_specified; braille_set_options(c, brl_options); c->index = idx; @@ -2194,7 +2197,7 @@ static int __init console_setup(char *str) idx = simple_strtoul(s, NULL, 10); *s = 0; - __add_preferred_console(buf, idx, options, brl_options); + __add_preferred_console(buf, idx, options, brl_options, true); console_set_on_cmdline = 1; return 1; } @@ -2215,7 +2218,7 @@ __setup("console=", console_setup); */ int add_preferred_console(char *name, int idx, char *options) { - return __add_preferred_console(name, idx, options, NULL); + return __add_preferred_console(name, idx, options, NULL, false); } bool console_suspend_enabled = true; @@ -2636,7 +2639,7 @@ early_param("keep_bootcon", keep_bootcon_setup); * Care need to be taken with consoles that are statically * enabled such as netconsole */ -static int try_enable_new_console(struct console *newcon) +static int try_enable_new_console(struct console *newcon, bool user_specified) { struct console_cmdline *c; int i; @@ -2644,6 +2647,8 @@ static int try_enable_new_console(struct console *newcon) for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && c->name[0]; i++, c++) { + if (c->user_specified != user_specified) + continue; if (!newcon->match || newcon->match(newcon, c->name, c->index, c->options) != 0) { /* default matching */ @@ -2673,9 +2678,10 @@ static int try_enable_new_console(struct console *newcon) /* * Some consoles, such as pstore and netconsole, can be enabled even - * without matching. + * without matching. Accept the pre-enabled consoles only when match() + * and setup() had a change to be called. */ - if (newcon->flags & CON_ENABLED) + if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) return 0; return -ENOENT; @@ -2752,11 +2758,12 @@ void register_console(struct console *newcon) } } - /* - * See if this console matches one we selected on - * the command line or if it was statically enabled - */ - err = try_enable_new_console(newcon); + /* See if this console matches one we selected on the command line */ + err = try_enable_new_console(newcon, true); + + /* If not, try to match against the platform default(s) */ + if (err == -ENOENT) + err = try_enable_new_console(newcon, false); /* printk() messages are not printed to the Braille console. */ if (err || newcon->flags & CON_BRL) From 33225d7b0ac9903c5701bbede7dfdaeba74ad6c3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 13 Feb 2020 10:51:33 +0100 Subject: [PATCH 04/37] printk: Correctly set CON_CONSDEV even when preferred console was not registered CON_CONSDEV flag was historically used to put/keep the preferred console first in console_drivers list. Where the preferred console is the last on the command line. The ordering is important only when opening /dev/console: + tty_kopen() + tty_lookup_driver() + console_device() The flag was originally an implementation detail. But it was later made accessible from userspace via /proc/consoles. It was used, for example, by the tool "showconsole" to show the real tty accessible via /dev/console, see https://github.com/bitstreamout/showconsole Now, the current code sets CON_CONSDEV only for the preferred console or when a fallback console is added. The flag is not set when the preferred console is defined on the command line but it is not registered from some reasons. Simple solution is to set CON_CONSDEV flag for the first registered console. It will work most of the time because: + Most real consoles have console->device defined. + Boot consoles are removed in printk_late_init(). + unregister_console() moves CON_CONSDEV flag to the next console. Clean solution would require checking con->device when the preferred console is registered and in unregister_console(). Conclusion: Use the simple solution for now. It is better than the current state and good enough. The clean solution is not worth it. It would complicate the already complicated code without too much gain. Instead the code would deserve a complete rewrite. Link: https://lore.kernel.org/r/20200213095133.23176-4-pmladek@suse.com Signed-off-by: Benjamin Herrenschmidt [pmladek@suse.com: Correct reasoning in the commit message, comment update.] Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/console.h | 2 +- kernel/printk/printk.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/console.h b/include/linux/console.h index d09951d5a94e..72141f71c014 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -135,7 +135,7 @@ static inline int con_debug_leave(void) */ #define CON_PRINTBUFFER (1) -#define CON_CONSDEV (2) /* Last on the command line */ +#define CON_CONSDEV (2) /* Preferred console, /dev/console */ #define CON_ENABLED (4) #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f76ef3f0efca..cf0ceacdae2f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2788,6 +2788,8 @@ void register_console(struct console *newcon) console_drivers = newcon; if (newcon->next) newcon->next->flags &= ~CON_CONSDEV; + /* Ensure this flag is always set for the head of the list */ + newcon->flags |= CON_CONSDEV; } else { newcon->next = console_drivers->next; console_drivers->next = newcon; From e8cc2b97ca5aa12152c302b2f5d41d125e55ab1f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Feb 2020 10:57:23 +0200 Subject: [PATCH 05/37] lib/vsprintf: update comment about simple_strto() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 885e68e8b7b1 ("kernel.h: update comment about simple_strto() functions") updated a comment regard to simple_strto() functions, but missed similar change in the vsprintf.c module. Update comments in vsprintf.c as well for simple_strto() functions. Link: https://lore.kernel.org/r/20200221085723.42469-1-andriy.shevchenko@linux.intel.com Reported-by: Uwe Kleine-König Signed-off-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/vsprintf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7c488a1ce318..d5641a217685 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -58,7 +58,7 @@ * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use * - * This function is obsolete. Please use kstrtoull instead. + * This function has caveats. Please use kstrtoull instead. */ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { @@ -83,7 +83,7 @@ EXPORT_SYMBOL(simple_strtoull); * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use * - * This function is obsolete. Please use kstrtoul instead. + * This function has caveats. Please use kstrtoul instead. */ unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) { @@ -97,7 +97,7 @@ EXPORT_SYMBOL(simple_strtoul); * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use * - * This function is obsolete. Please use kstrtol instead. + * This function has caveats. Please use kstrtol instead. */ long simple_strtol(const char *cp, char **endp, unsigned int base) { @@ -114,7 +114,7 @@ EXPORT_SYMBOL(simple_strtol); * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use * - * This function is obsolete. Please use kstrtoll instead. + * This function has caveats. Please use kstrtoll instead. */ long long simple_strtoll(const char *cp, char **endp, unsigned int base) { From 493362dd7b9fe29758fb2976ee8de5efa520451a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 13 Apr 2020 06:15:56 +0200 Subject: [PATCH 06/37] EDAC/armada_xp: Fix some log messages Fix spelling (s/Aramda/Armada/) in a log message and in a comment. While at it, add a trailing '\n' in messages. Signed-off-by: Christophe JAILLET Signed-off-by: Borislav Petkov Reviewed-by: Jan Luebbe Link: https://lkml.kernel.org/r/20200413041556.3514-1-christophe.jaillet@wanadoo.fr --- drivers/edac/armada_xp_edac.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c index a7502ebe9bdc..e3e757513d1b 100644 --- a/drivers/edac/armada_xp_edac.c +++ b/drivers/edac/armada_xp_edac.c @@ -78,7 +78,7 @@ struct axp_mc_drvdata { char msg[128]; }; -/* derived from "DRAM Address Multiplexing" in the ARAMDA XP Functional Spec */ +/* derived from "DRAM Address Multiplexing" in the ARMADA XP Functional Spec */ static uint32_t axp_mc_calc_address(struct axp_mc_drvdata *drvdata, uint8_t cs, uint8_t bank, uint16_t row, uint16_t col) @@ -160,12 +160,12 @@ static void axp_mc_check(struct mem_ctl_info *mci) if (cnt_sbe) cnt_sbe--; else - dev_warn(mci->pdev, "inconsistent SBE count detected"); + dev_warn(mci->pdev, "inconsistent SBE count detected\n"); } else { if (cnt_dbe) cnt_dbe--; else - dev_warn(mci->pdev, "inconsistent DBE count detected"); + dev_warn(mci->pdev, "inconsistent DBE count detected\n"); } /* report earlier errors */ @@ -304,7 +304,7 @@ static int axp_mc_probe(struct platform_device *pdev) config = readl(base + SDRAM_CONFIG_REG); if (!(config & SDRAM_CONFIG_ECC_MASK)) { - dev_warn(&pdev->dev, "SDRAM ECC is not enabled"); + dev_warn(&pdev->dev, "SDRAM ECC is not enabled\n"); return -EINVAL; } @@ -532,9 +532,9 @@ static int aurora_l2_probe(struct platform_device *pdev) l2x0_aux_ctrl = readl(base + L2X0_AUX_CTRL); if (!(l2x0_aux_ctrl & AURORA_ACR_PARITY_EN)) - dev_warn(&pdev->dev, "tag parity is not enabled"); + dev_warn(&pdev->dev, "tag parity is not enabled\n"); if (!(l2x0_aux_ctrl & AURORA_ACR_ECC_EN)) - dev_warn(&pdev->dev, "data ECC is not enabled"); + dev_warn(&pdev->dev, "data ECC is not enabled\n"); dci = edac_device_alloc_ctl_info(sizeof(*drvdata), "cpu", 1, "L", 1, 2, NULL, 0, 0); @@ -618,7 +618,7 @@ static int __init armada_xp_edac_init(void) res = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); if (res) - pr_warn("Aramda XP EDAC drivers fail to register\n"); + pr_warn("Armada XP EDAC drivers fail to register\n"); return 0; } From 87a4eca891b6646c05fb6cf4ee026ad621680b10 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 9 Apr 2020 17:32:59 +0800 Subject: [PATCH 07/37] EDAC/xgene: Remove set but not used address local var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following gcc warning: drivers/edac/xgene_edac.c:1486:7: warning: variable ‘address’ set but not used [-Wunused-but-set-variable] u32 address; ^~~~~~~ Remove the unused macro RBERRADDR_RD while at it. Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200409093259.20069-1-yanaijie@huawei.com --- drivers/edac/xgene_edac.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index e4a1032ba0b5..1d2c27a00a4a 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1349,7 +1349,6 @@ static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3) #define WORD_ALIGNED_ERR_MASK BIT(28) #define PAGE_ACCESS_ERR_MASK BIT(27) #define WRITE_ACCESS_MASK BIT(26) -#define RBERRADDR_RD(src) ((src) & 0x03FFFFFF) static const char * const soc_mem_err_v1[] = { "10GbE0", @@ -1483,13 +1482,11 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev) return; if (reg & STICKYERR_MASK) { bool write; - u32 address; dev_err(edac_dev->dev, "IOB bus access error(s)\n"); if (regmap_read(ctx->edac->rb_map, RBEIR, ®)) return; write = reg & WRITE_ACCESS_MASK ? 1 : 0; - address = RBERRADDR_RD(reg); if (reg & AGENT_OFFLINE_ERR_MASK) dev_err(edac_dev->dev, "IOB bus %s access to offline agent error\n", From d9976bc155b11a0875a361e656255c64964d8090 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 13 Apr 2020 13:52:55 +0200 Subject: [PATCH 08/37] MAINTAINERS: Remove sifive_l2_cache.c from EDAC-SIFIVE pattern Commit 9209fb51896f ("riscv: move sifive_l2_cache.c to drivers/soc") moved arch/riscv/mm/sifive_l2_cache.c to drivers/soc/sifive/sifive_l2_cache.c and adjusted the MAINTAINERS EDAC-SIFIVE entry but slipped in a mistake. Since then, ./scripts/get_maintainer.pl --self-test complains: warning: no file matches F: drivers/soc/sifive_l2_cache.c Boris suggested that sifive_l2_cache.c is considered part of the SIFIVE DRIVERS, not part of EDAC-SIFIVE. So simply drop this entry, and by the sifive keyword pattern in SIFIVE PATTERNS, it is automatically part of the SIFIVE DRIVERS. Suggested-by: Borislav Petkov Co-developed-by: Sebastian Duda Signed-off-by: Sebastian Duda Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200413115255.7100-1-lukas.bulwahn@gmail.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..63839a920e1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6172,7 +6172,6 @@ M: Yash Shah L: linux-edac@vger.kernel.org S: Supported F: drivers/edac/sifive_edac.c -F: drivers/soc/sifive_l2_cache.c EDAC-SKYLAKE M: Tony Luck From 9adbf3c609af92a57a73000a3cb8f4c2d307dfa3 Mon Sep 17 00:00:00 2001 From: Mihai Carabas Date: Tue, 21 Apr 2020 22:28:38 +0300 Subject: [PATCH 09/37] x86/microcode: Fix return value for microcode late loading The return value from stop_machine() might not be consistent. stop_machine_cpuslocked() returns: - zero if all functions have returned 0. - a non-zero value if at least one of the functions returned a non-zero value. There is no way to know if it is negative or positive. So make __reload_late() return 0 on success or negative otherwise. [ bp: Unify ret val check and touch up. ] Signed-off-by: Mihai Carabas Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/1587497318-4438-1-git-send-email-mihai.carabas@oracle.com --- arch/x86/kernel/cpu/microcode/core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 7019d4b2df0c..baec68b7e010 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -545,8 +545,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout) /* * Returns: * < 0 - on error - * 0 - no update done - * 1 - microcode was updated + * 0 - success (no update done or microcode was updated) */ static int __reload_late(void *info) { @@ -573,11 +572,11 @@ static int __reload_late(void *info) else goto wait_for_siblings; - if (err > UCODE_NFOUND) { - pr_warn("Error reloading microcode on CPU %d\n", cpu); + if (err >= UCODE_NFOUND) { + if (err == UCODE_ERROR) + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; - } else if (err == UCODE_UPDATED || err == UCODE_OK) { - ret = 1; } wait_for_siblings: @@ -608,7 +607,7 @@ static int microcode_reload_late(void) atomic_set(&late_cpus_out, 0); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (ret > 0) + if (ret == 0) microcode_check(); pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode); @@ -649,7 +648,7 @@ static ssize_t reload_store(struct device *dev, put: put_online_cpus(); - if (ret >= 0) + if (ret == 0) ret = size; return ret; From 58d66175d4615097f3832e5733c64b9a3536ccec Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 14:52:24 +0800 Subject: [PATCH 10/37] EDAC/thunderx: Make symbols static Make a couple of symbols static, as reported by sparse. [ bp: Massage. ] Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/1587624744-97240-1-git-send-email-zou_wei@huawei.com --- drivers/edac/thunderx_edac.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index 34be60fe6892..4af9744cc6d0 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1278,7 +1278,7 @@ OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); -struct debugfs_entry *ocx_dfs_ents[] = { +static struct debugfs_entry *ocx_dfs_ents[] = { &debugfs_tlk0_ecc_ctl, &debugfs_tlk1_ecc_ctl, &debugfs_tlk2_ecc_ctl, @@ -1919,19 +1919,19 @@ err_free: L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); -struct debugfs_entry *l2c_tad_dfs_ents[] = { +static struct debugfs_entry *l2c_tad_dfs_ents[] = { &debugfs_tad_int, }; L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); -struct debugfs_entry *l2c_cbc_dfs_ents[] = { +static struct debugfs_entry *l2c_cbc_dfs_ents[] = { &debugfs_cbc_int, }; L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); -struct debugfs_entry *l2c_mci_dfs_ents[] = { +static struct debugfs_entry *l2c_mci_dfs_ents[] = { &debugfs_mci_int, }; From b2f9fb0d671f31e8af410a057eb429af3c17ec66 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 16:50:06 +0800 Subject: [PATCH 11/37] EDAC/amd8131: Remove defined but not used bridge_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following gcc warning: drivers/edac/amd8131_edac.c:47:21: warning: ‘bridge_str’ defined but not used [-Wunused-const-variable=] static char * const bridge_str[] = { ^~~~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Borislav Petkov Reviewed-by: Robert Richter Link: https://lkml.kernel.org/r/20200415085006.6732-1-yanaijie@huawei.com --- drivers/edac/amd8131_edac.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c index 93c82bc17493..169353710982 100644 --- a/drivers/edac/amd8131_edac.c +++ b/drivers/edac/amd8131_edac.c @@ -44,14 +44,6 @@ static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) " PCI Access Write Error at 0x%x\n", reg); } -static char * const bridge_str[] = { - [NORTH_A] = "NORTH A", - [NORTH_B] = "NORTH B", - [SOUTH_A] = "SOUTH A", - [SOUTH_B] = "SOUTH B", - [NO_BRIDGE] = "NO BRIDGE", -}; - /* Support up to two AMD8131 chipsets on a platform */ static struct amd8131_dev_info amd8131_devices[] = { { From ee5340abab3babb91c1807cea47de4468b2dfc91 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 24 Apr 2020 20:18:14 +0800 Subject: [PATCH 12/37] EDAC, {skx,i10nm}: Make some configurations CPU model specific The device ID for configuration agent PCI device and the offset for bus number configuration register can be CPU model specific. So add a new structure res_config to make them configurable and pass res_config to {skx,i10nm}_init() and skx_get_all_bus_mappings() for use. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Reviewed-by: Borislav Petkov Link: https://lore.kernel.org/r/20200427083246.GB11036@zn.tnic --- drivers/edac/i10nm_base.c | 17 +++++++++++++---- drivers/edac/skx_base.c | 13 +++++++++++-- drivers/edac/skx_common.c | 11 +++++------ drivers/edac/skx_common.h | 11 +++++++++-- 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index df08de963d10..20859dcf5239 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -122,10 +122,16 @@ static int i10nm_get_all_munits(void) return 0; } +static struct res_config i10nm_cfg = { + .type = I10NM, + .decs_did = 0x3452, + .busno_cfg_offset = 0xcc, +}; + static const struct x86_cpu_id i10nm_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, NULL), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &i10nm_cfg), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &i10nm_cfg), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &i10nm_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -234,6 +240,7 @@ static int __init i10nm_init(void) { u8 mc = 0, src_id = 0, node_id = 0; const struct x86_cpu_id *id; + struct res_config *cfg; const char *owner; struct skx_dev *d; int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; @@ -249,11 +256,13 @@ static int __init i10nm_init(void) if (!id) return -ENODEV; + cfg = (struct res_config *)id->driver_data; + rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); if (rc) return rc; - rc = skx_get_all_bus_mappings(0x3452, 0xcc, I10NM, &i10nm_edac_list); + rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list); if (rc < 0) goto fail; if (rc == 0) { diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 46a3a3440f5e..94c942fd06c1 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -157,8 +157,14 @@ fail: return -ENODEV; } +static struct res_config skx_cfg = { + .type = SKX, + .decs_did = 0x2016, + .busno_cfg_offset = 0xcc, +}; + static const struct x86_cpu_id skx_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_cfg), { } }; MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); @@ -641,6 +647,7 @@ static inline void teardown_skx_debug(void) {} static int __init skx_init(void) { const struct x86_cpu_id *id; + struct res_config *cfg; const struct munit *m; const char *owner; int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8}; @@ -657,11 +664,13 @@ static int __init skx_init(void) if (!id) return -ENODEV; + cfg = (struct res_config *)id->driver_data; + rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm); if (rc) return rc; - rc = skx_get_all_bus_mappings(0x2016, 0xcc, SKX, &skx_edac_list); + rc = skx_get_all_bus_mappings(cfg, &skx_edac_list); if (rc < 0) goto fail; if (rc == 0) { diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 99bbaf629b8d..abc9ddd2b7d1 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -197,12 +197,11 @@ static int get_width(u32 mtr) } /* - * We use the per-socket device @did to count how many sockets are present, + * We use the per-socket device @cfg->did to count how many sockets are present, * and to detemine which PCI buses are associated with each socket. Allocate * and build the full list of all the skx_dev structures that we need here. */ -int skx_get_all_bus_mappings(unsigned int did, int off, enum type type, - struct list_head **list) +int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list) { struct pci_dev *pdev, *prev; struct skx_dev *d; @@ -211,7 +210,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type type, prev = NULL; for (;;) { - pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, prev); + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev); if (!pdev) break; ndev++; @@ -221,7 +220,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type type, return -ENOMEM; } - if (pci_read_config_dword(pdev, off, ®)) { + if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, ®)) { kfree(d); pci_dev_put(pdev); skx_printk(KERN_ERR, "Failed to read bus idx\n"); @@ -230,7 +229,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type type, d->bus[0] = GET_BITFIELD(reg, 0, 7); d->bus[1] = GET_BITFIELD(reg, 8, 15); - if (type == SKX) { + if (cfg->type == SKX) { d->seg = pci_domain_nr(pdev->bus); d->bus[2] = GET_BITFIELD(reg, 16, 23); d->bus[3] = GET_BITFIELD(reg, 24, 31); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 60d1ea669afd..19dd8c099520 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -112,6 +112,14 @@ struct decoded_addr { int bank_group; }; +struct res_config { + enum type type; + /* Configuration agent device ID */ + unsigned int decs_did; + /* Default bus number configuration register offset */ + int busno_cfg_offset; +}; + typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci); typedef bool (*skx_decode_f)(struct decoded_addr *res); typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len); @@ -123,8 +131,7 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); -int skx_get_all_bus_mappings(unsigned int did, int off, enum type, - struct list_head **list); +int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); From ce20670828c1228ecd37befbdda87a1f87a803b9 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 24 Apr 2020 20:18:33 +0800 Subject: [PATCH 13/37] EDAC/i10nm: Update driver to support different bus number config register offsets The i10nm_edac driver failed to load on Ice Lake and Tremont/Jacobsville servers if their CPU stepping >= 4 and failed on Ice Lake-D servers from stepping 0. The root cause was that for Ice Lake and Tremont/Jacobsville servers with CPU stepping >=4, the offset for bus number configuration register was updated from 0xcc to 0xd0. For Ice Lake-D servers, all the steppings use the updated 0xd0 offset. Fix the issue by using the appropriate offset for bus number configuration register according to the CPU model number and stepping. Reported-by: Jerry Chen Reported-and-tested-by: Jin Wen Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Reviewed-by: Borislav Petkov Link: https://lore.kernel.org/linux-edac/20200427084022.GC11036@zn.tnic --- drivers/edac/i10nm_base.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 20859dcf5239..a6bc54b02de4 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -122,16 +122,22 @@ static int i10nm_get_all_munits(void) return 0; } -static struct res_config i10nm_cfg = { +static struct res_config i10nm_cfg0 = { .type = I10NM, .decs_did = 0x3452, .busno_cfg_offset = 0xcc, }; +static struct res_config i10nm_cfg1 = { + .type = I10NM, + .decs_did = 0x3452, + .busno_cfg_offset = 0xd0, +}; + static const struct x86_cpu_id i10nm_cpuids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &i10nm_cfg), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &i10nm_cfg), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &i10nm_cfg), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &i10nm_cfg0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &i10nm_cfg0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &i10nm_cfg1), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); @@ -258,6 +264,10 @@ static int __init i10nm_init(void) cfg = (struct res_config *)id->driver_data; + /* Newer steppings have different offset for ATOM_TREMONT_D/ICELAKE_X */ + if (boot_cpu_data.x86_stepping >= 4) + cfg->busno_cfg_offset = 0xd0; + rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); if (rc) return rc; From 8dd97c65185c5a63c668e5bd8a861c04f47a35ed Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:12 -0700 Subject: [PATCH 14/37] x86/resctrl: Rename asm/resctrl_sched.h to asm/resctrl.h asm/resctrl_sched.h is dedicated to the code used for configuration of the CPU resource control state when a task is scheduled. Rename resctrl_sched.h to resctrl.h in preparation of additions that will no longer make this file dedicated to work done during scheduling. No functional change. Suggested-by: Borislav Petkov Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/6914e0ef880b539a82a6d889f9423496d471ad1d.1588715690.git.reinette.chatre@intel.com --- MAINTAINERS | 2 +- arch/x86/include/asm/{resctrl_sched.h => resctrl.h} | 6 +++--- arch/x86/kernel/cpu/resctrl/core.c | 2 +- arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) rename arch/x86/include/asm/{resctrl_sched.h => resctrl.h} (96%) diff --git a/MAINTAINERS b/MAINTAINERS index 2926327e4976..ecfb07ab49b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14227,7 +14227,7 @@ M: Reinette Chatre L: linux-kernel@vger.kernel.org S: Supported F: Documentation/x86/resctrl* -F: arch/x86/include/asm/resctrl_sched.h +F: arch/x86/include/asm/resctrl.h F: arch/x86/kernel/cpu/resctrl/ F: tools/testing/selftests/resctrl/ diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl.h similarity index 96% rename from arch/x86/include/asm/resctrl_sched.h rename to arch/x86/include/asm/resctrl.h index f6b7fe2833cc..99b5728e441d 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_RESCTRL_SCHED_H -#define _ASM_X86_RESCTRL_SCHED_H +#ifndef _ASM_X86_RESCTRL_H +#define _ASM_X86_RESCTRL_H #ifdef CONFIG_X86_CPU_RESCTRL @@ -90,4 +90,4 @@ static inline void resctrl_sched_in(void) {} #endif /* CONFIG_X86_CPU_RESCTRL */ -#endif /* _ASM_X86_RESCTRL_SCHED_H */ +#endif /* _ASM_X86_RESCTRL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index d8cc5223b7ce..6f38c88226af 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include "internal.h" /* Mutex to protect rdtgroup access. */ diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index d7623e1b927d..4bd28b388a1a 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include "../../events/perf_event.h" /* For X86_CONFIG() */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 5a359d9fcc05..6276ae015945 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -29,7 +29,7 @@ #include -#include +#include #include "internal.h" DEFINE_STATIC_KEY_FALSE(rdt_enable_key); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 954b013cc585..538d4e8d6589 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include #include "process.h" diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5ef9d8f25b0e..0c169a5687e1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include #include #ifdef CONFIG_IA32_EMULATION From 0118ad82c2a64ebcf15d7565ed35361407efadfa Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:13 -0700 Subject: [PATCH 15/37] x86/cpu: Move resctrl CPUID code to resctrl/ The function determining a platform's support and properties of cache occupancy and memory bandwidth monitoring (properties of X86_FEATURE_CQM_LLC) can be found among the common CPU code. After the feature's properties is populated in the per-CPU data the resctrl subsystem is the only consumer (via boot_cpu_data). Move the function that obtains the CPU information used by resctrl to the resctrl subsystem and rename it from init_cqm() to resctrl_cpu_detect(). The function continues to be called from the common CPU code. This move is done in preparation of the addition of some vendor specific code. No functional change. Suggested-by: Borislav Petkov Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/38433b99f9d16c8f4ee796f8cc42b871531fa203.1588715690.git.reinette.chatre@intel.com --- arch/x86/include/asm/resctrl.h | 3 +++ arch/x86/kernel/cpu/common.c | 27 ++------------------------- arch/x86/kernel/cpu/resctrl/core.c | 24 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 99b5728e441d..07603064df8f 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -84,9 +84,12 @@ static inline void resctrl_sched_in(void) __resctrl_sched_in(); } +void resctrl_cpu_detect(struct cpuinfo_x86 *c); + #else static inline void resctrl_sched_in(void) {} +static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} #endif /* CONFIG_X86_CPU_RESCTRL */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bed0cb83fe24..a8f0f22ee5c1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "cpu.h" @@ -854,30 +855,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c) } } -static void init_cqm(struct cpuinfo_x86 *c) -{ - if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - return; - } - - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = cpuid_ebx(0xf); - - if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || - cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || - cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { - u32 eax, ebx, ecx, edx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); - - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } -} - void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -945,7 +922,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); - init_cqm(c); + resctrl_cpu_detect(c); /* * Clear/Set all flags overridden by options, after probe. diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 6f38c88226af..861c6d1ba9ab 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -958,6 +958,30 @@ static __init void rdt_init_res_defs(void) static enum cpuhp_state rdt_online; +void resctrl_cpu_detect(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + return; + } + + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = cpuid_ebx(0xf); + + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); + + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } +} + static int __init resctrl_late_init(void) { struct rdt_resource *r; From f0d339db56478e3bcd98d5e985d3d69cacf27549 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:14 -0700 Subject: [PATCH 16/37] x86/resctrl: Remove unnecessary RMID checks The cache and memory bandwidth monitoring properties are read using CPUID on every CPU. After the information is read from the system a sanity check is run to (1) ensure that the RMID data is initialized for the boot CPU in case the information was not available on the boot CPU and (2) the boot CPU's RMID is set to the minimum of RMID obtained from all CPUs. Every known platform that supports resctrl has the same maximum RMID on all CPUs. Both sanity checks found in x86_init_cache_qos() can thus safely be removed. Suggested-by: Borislav Petkov Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/c9a3b60d34091840c8b0bd1c6fab15e5ba92cb17.1588715690.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/common.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8f0f22ee5c1..556a96d05a6c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1354,20 +1354,6 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -static void x86_init_cache_qos(struct cpuinfo_x86 *c) -{ - /* - * The heavy lifting of max_rmid and cache_occ_scale are handled - * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu - * in case CQM bits really aren't there in this CPU. - */ - if (c != &boot_cpu_data) { - boot_cpu_data.x86_cache_max_rmid = - min(boot_cpu_data.x86_cache_max_rmid, - c->x86_cache_max_rmid); - } -} - /* * Validate that ACPI/mptables have the same information about the * effective APIC id and update the package map. @@ -1480,7 +1466,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif x86_init_rdrand(c); - x86_init_cache_qos(c); setup_pku(c); /* From 923f3a2b48bdccb6a1d1f0dd48de03de7ad936d9 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:15 -0700 Subject: [PATCH 17/37] x86/resctrl: Query LLC monitoring properties once during boot Cache and memory bandwidth monitoring are features that are part of x86 CPU resource control that is supported by the resctrl subsystem. The monitoring properties are obtained via CPUID from every CPU and only used within the resctrl subsystem where the properties are only read from boot_cpu_data. Obtain the monitoring properties once, placed in boot_cpu_data, via the ->c_bsp_init() helpers of the vendors that support X86_FEATURE_CQM_LLC. Suggested-by: Borislav Petkov Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/6d74a6ac3e69f4b7a8b4115835f9455faf0f468d.1588715690.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/amd.c | 3 +++ arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/cpu/intel.c | 7 +++++++ arch/x86/kernel/cpu/resctrl/core.c | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 547ad7bbf0e0..c36e89930965 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 # include @@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } + + resctrl_cpu_detect(c); } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 556a96d05a6c..d07809286b95 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -56,7 +56,6 @@ #include #include #include -#include #include "cpu.h" @@ -922,7 +921,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); - resctrl_cpu_detect(c); /* * Clear/Set all flags overridden by options, after probe. diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a19a680542ce..166d7c355896 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c) detect_ht_early(c); } +static void bsp_init_intel(struct cpuinfo_x86 *c) +{ + resctrl_cpu_detect(c); +} + #ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 @@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = { #endif .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, + .c_bsp_init = bsp_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 861c6d1ba9ab..d5979073301e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -958,6 +958,7 @@ static __init void rdt_init_res_defs(void) static enum cpuhp_state rdt_online; +/* Runs once on the BSP during boot. */ void resctrl_cpu_detect(struct cpuinfo_x86 *c) { if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { From 46637d4570e108d1f6721cfa2cca1d078882761a Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:16 -0700 Subject: [PATCH 18/37] x86/resctrl: Maintain MBM counter width per resource The original Memory Bandwidth Monitoring (MBM) architectural definition defines counters of up to 62 bits in the IA32_QM_CTR MSR, and the first-generation MBM implementation uses 24 bit counters. Software is required to poll at 1 second or faster to ensure that data is retrieved before a counter rollover occurs more than once under worst conditions. As system bandwidths scale the software requirement is maintained with the introduction of a per-resource enumerable MBM counter width. In preparation for supporting hardware with an enumerable MBM counter width the current globally static MBM counter width is moved to a per-resource MBM counter width. Currently initialized to 24 always to result in no functional change. In essence there is one function, mbm_overflow_count() that needs to know the counter width to handle rollovers. The static value used within mbm_overflow_count() will be replaced with a value discovered from the hardware. Support for learning the MBM counter width from hardware is added in the change that follows. Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/e36743b9800f16ce600f86b89127391f61261f23.1588715690.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 8 +++++--- arch/x86/kernel/cpu/resctrl/internal.h | 7 +++++-- arch/x86/kernel/cpu/resctrl/monitor.c | 21 +++++++++++++-------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 055c8613b531..934c8fb8a64a 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, return ret; } -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first) +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first) { /* * setup the parameters to send to the IPI to read the data. */ rr->rgrp = rdtgrp; rr->evtid = evtid; + rr->r = r; rr->d = d; rr->val = 0; rr->first = first; @@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) goto out; } - mon_event_read(&rr, d, rdtgrp, evtid, false); + mon_event_read(&rr, r, d, rdtgrp, evtid, false); if (rr.val & RMID_VAL_ERROR) seq_puts(m, "Error\n"); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 3dd13f3a8b23..58b002c31655 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -87,6 +87,7 @@ union mon_data_bits { struct rmid_read { struct rdtgroup *rgrp; + struct rdt_resource *r; struct rdt_domain *d; int evtid; bool first; @@ -460,6 +461,7 @@ struct rdt_resource { struct list_head evt_list; int num_rmid; unsigned int mon_scale; + unsigned int mbm_width; unsigned long fflags; }; @@ -587,8 +589,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id); void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, struct rdt_domain *d); -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first); +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first); void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 773124b0e18a..df964c03f6c6 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -214,9 +214,9 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) { - u64 shift = 64 - MBM_CNTR_WIDTH, chunks; + u64 shift = 64 - width, chunks; chunks = (cur_msr << shift) - (prev_msr << shift); return chunks >>= shift; @@ -256,7 +256,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval); + chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width); m->chunks += chunks; m->prev_msr = tval; @@ -278,7 +278,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) return; - chunks = mbm_overflow_count(m->prev_bw_msr, tval); + chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); m->chunks_bw += chunks; m->chunks = m->chunks_bw; cur_bw = (chunks * r->mon_scale) >> 20; @@ -433,11 +433,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) } } -static void mbm_update(struct rdt_domain *d, int rmid) +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) { struct rmid_read rr; rr.first = false; + rr.r = r; rr.d = d; /* @@ -510,6 +511,7 @@ void mbm_handle_overflow(struct work_struct *work) struct rdtgroup *prgrp, *crgrp; int cpu = smp_processor_id(); struct list_head *head; + struct rdt_resource *r; struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); @@ -517,16 +519,18 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; - d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); + r = &rdt_resources_all[RDT_RESOURCE_L3]; + + d = get_domain_from_cpu(cpu, r); if (!d) goto out_unlock; list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(d, prgrp->mon.rmid); + mbm_update(r, d, prgrp->mon.rmid); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(d, crgrp->mon.rmid); + mbm_update(r, d, crgrp->mon.rmid); if (is_mba_sc(NULL)) update_mba_bw(prgrp, d); @@ -619,6 +623,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) r->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + r->mbm_width = MBM_CNTR_WIDTH; /* * A reasonable upper limit on the max threshold is the number diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6276ae015945..d7cb5ab0d1f0 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2472,7 +2472,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, goto out_destroy; if (is_mbm_event(mevt->evtid)) - mon_event_read(&rr, d, prgrp, mevt->evtid, true); + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } kernfs_activate(kn); return 0; From f3d44f18b0662327c42128b9d3604489bdb6e36f Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:17 -0700 Subject: [PATCH 19/37] x86/resctrl: Support CPUID enumeration of MBM counter width The original Memory Bandwidth Monitoring (MBM) architectural definition defines counters of up to 62 bits in the IA32_QM_CTR MSR while the first-generation MBM implementation uses statically defined 24 bit counters. Expand the MBM CPUID enumeration properties to include the MBM counter width. The previously undefined EAX output register contains, in bits [7:0], the MBM counter width encoded as an offset from 24 bits. Enumerating this property is only specified for Intel CPUs. Suggested-by: Borislav Petkov Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/afa3af2f753f6bc301fb743bc8944e749cb24afa.1588715690.git.reinette.chatre@intel.com --- arch/x86/include/asm/processor.h | 3 ++- arch/x86/kernel/cpu/resctrl/core.c | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3bcf27caf6c9..c4e8fd709cf6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -113,9 +113,10 @@ struct cpuinfo_x86 { /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ - /* Cache QoS architectural values: */ + /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ + int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index d5979073301e..12f967c6b603 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -964,6 +964,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { c->x86_cache_max_rmid = -1; c->x86_cache_occ_scale = -1; + c->x86_cache_mbm_width_offset = -1; return; } @@ -980,6 +981,10 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; + if (c->x86_vendor == X86_VENDOR_INTEL) + c->x86_cache_mbm_width_offset = eax & 0xff; + else + c->x86_cache_mbm_width_offset = -1; } } From 0c4d5ba1b998e713815b7790d3db6ced0ae49489 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Tue, 5 May 2020 15:36:18 -0700 Subject: [PATCH 20/37] x86/resctrl: Support wider MBM counters The original Memory Bandwidth Monitoring (MBM) architectural definition defines counters of up to 62 bits in the IA32_QM_CTR MSR while the first-generation MBM implementation uses statically defined 24 bit counters. The MBM CPUID enumeration properties have been expanded to include the MBM counter width, encoded as an offset from 24 bits. While eight bits are available for the counter width offset IA32_QM_CTR MSR only supports 62 bit counters. Add a sanity check, with warning printed when encountered, to ensure counters cannot exceed the 62 bit limit. Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/69d52abd5b14794d3a0f05ba7c755ed1f4c0d5ed.1588715690.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/internal.h | 8 +++++++- arch/x86/kernel/cpu/resctrl/monitor.c | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 58b002c31655..f20a47d120b1 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -31,7 +31,7 @@ #define CQM_LIMBOCHECK_INTERVAL 1000 -#define MBM_CNTR_WIDTH 24 +#define MBM_CNTR_WIDTH_BASE 24 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 @@ -40,6 +40,12 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) +/* + * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for + * data to be returned. The counter width is discovered from the hardware + * as an offset from MBM_CNTR_WIDTH_BASE. + */ +#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) struct rdt_fs_context { diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index df964c03f6c6..837d7d012b7b 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -618,12 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { + unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; r->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; - r->mbm_width = MBM_CNTR_WIDTH; + r->mbm_width = MBM_CNTR_WIDTH_BASE; + + if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) + r->mbm_width += mbm_offset; + else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX) + pr_warn("Ignoring impossible MBM counter offset\n"); /* * A reasonable upper limit on the max threshold is the number From 4fdd88877e5259dcc5e504dca449acc0324d71b9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:49:36 +0900 Subject: [PATCH 21/37] kprobes: Lock kprobe_mutex while showing kprobe_blacklist Lock kprobe_mutex while showing kprobe_blacklist to prevent updating the kprobe_blacklist. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.571125195@linutronix.de --- kernel/kprobes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2625c241ac00..570d60827656 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2420,6 +2420,7 @@ static const struct file_operations debugfs_kprobes_operations = { /* kprobes/blacklist -- shows which functions can not be probed */ static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) { + mutex_lock(&kprobe_mutex); return seq_list_start(&kprobe_blacklist, *pos); } @@ -2446,10 +2447,15 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) return 0; } +static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v) +{ + mutex_unlock(&kprobe_mutex); +} + static const struct seq_operations kprobe_blacklist_seq_ops = { .start = kprobe_blacklist_seq_start, .next = kprobe_blacklist_seq_next, - .stop = kprobe_seq_stop, /* Reuse void function */ + .stop = kprobe_blacklist_seq_stop, .show = kprobe_blacklist_seq_show, }; From 1e6769b0aece51ea7a3dc3117c37d4a5669e4a21 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:49:48 +0900 Subject: [PATCH 22/37] kprobes: Support __kprobes blacklist in modules Support __kprobes attribute for blacklist functions in modules. The __kprobes attribute functions are stored in .kprobes.text section. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.678201813@linutronix.de --- include/linux/module.h | 4 ++++ kernel/kprobes.c | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/module.c | 4 ++++ 3 files changed, 50 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 1ad393e62bef..369c354f9207 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -489,6 +489,10 @@ struct module { unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_KPROBES + void *kprobes_text_start; + unsigned int kprobes_text_size; +#endif #ifdef CONFIG_LIVEPATCH bool klp; /* Is this a livepatch module? */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 570d60827656..b7549992b9bd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2179,6 +2179,19 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end) return 0; } +/* Remove all symbols in given area from kprobe blacklist */ +static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) +{ + struct kprobe_blacklist_entry *ent, *n; + + list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) { + if (ent->start_addr < start || ent->start_addr >= end) + continue; + list_del(&ent->list); + kfree(ent); + } +} + int __init __weak arch_populate_kprobe_blacklist(void) { return 0; @@ -2215,6 +2228,28 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return ret ? : arch_populate_kprobe_blacklist(); } +static void add_module_kprobe_blacklist(struct module *mod) +{ + unsigned long start, end; + + start = (unsigned long)mod->kprobes_text_start; + if (start) { + end = start + mod->kprobes_text_size; + kprobe_add_area_blacklist(start, end); + } +} + +static void remove_module_kprobe_blacklist(struct module *mod) +{ + unsigned long start, end; + + start = (unsigned long)mod->kprobes_text_start; + if (start) { + end = start + mod->kprobes_text_size; + kprobe_remove_area_blacklist(start, end); + } +} + /* Module notifier call back, checking kprobes on the module */ static int kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2225,6 +2260,11 @@ static int kprobes_module_callback(struct notifier_block *nb, unsigned int i; int checkcore = (val == MODULE_STATE_GOING); + if (val == MODULE_STATE_COMING) { + mutex_lock(&kprobe_mutex); + add_module_kprobe_blacklist(mod); + mutex_unlock(&kprobe_mutex); + } if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) return NOTIFY_DONE; @@ -2255,6 +2295,8 @@ static int kprobes_module_callback(struct notifier_block *nb, kill_kprobe(p); } } + if (val == MODULE_STATE_GOING) + remove_module_kprobe_blacklist(mod); mutex_unlock(&kprobe_mutex); return NOTIFY_DONE; } diff --git a/kernel/module.c b/kernel/module.c index 646f1e2330d2..978f3fa40850 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3193,6 +3193,10 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->ei_funcs = section_objs(info, "_error_injection_whitelist", sizeof(*mod->ei_funcs), &mod->num_ei_funcs); +#endif +#ifdef CONFIG_KPROBES + mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1, + &mod->kprobes_text_size); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); From 16db6264c93d2d7df9eb8be5d9eb717ab30105fe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:50:00 +0900 Subject: [PATCH 23/37] kprobes: Support NOKPROBE_SYMBOL() in modules Support NOKPROBE_SYMBOL() in modules. NOKPROBE_SYMBOL() records only symbol address in "_kprobe_blacklist" section in the module. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.771170126@linutronix.de --- include/linux/module.h | 2 ++ kernel/kprobes.c | 17 +++++++++++++++++ kernel/module.c | 3 +++ 3 files changed, 22 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 369c354f9207..1192097c9a69 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -492,6 +492,8 @@ struct module { #ifdef CONFIG_KPROBES void *kprobes_text_start; unsigned int kprobes_text_size; + unsigned long *kprobe_blacklist; + unsigned int num_kprobe_blacklist; #endif #ifdef CONFIG_LIVEPATCH diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b7549992b9bd..9eb5acf0a9f3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2192,6 +2192,11 @@ static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end) } } +static void kprobe_remove_ksym_blacklist(unsigned long entry) +{ + kprobe_remove_area_blacklist(entry, entry + 1); +} + int __init __weak arch_populate_kprobe_blacklist(void) { return 0; @@ -2231,6 +2236,12 @@ static int __init populate_kprobe_blacklist(unsigned long *start, static void add_module_kprobe_blacklist(struct module *mod) { unsigned long start, end; + int i; + + if (mod->kprobe_blacklist) { + for (i = 0; i < mod->num_kprobe_blacklist; i++) + kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]); + } start = (unsigned long)mod->kprobes_text_start; if (start) { @@ -2242,6 +2253,12 @@ static void add_module_kprobe_blacklist(struct module *mod) static void remove_module_kprobe_blacklist(struct module *mod) { unsigned long start, end; + int i; + + if (mod->kprobe_blacklist) { + for (i = 0; i < mod->num_kprobe_blacklist; i++) + kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]); + } start = (unsigned long)mod->kprobes_text_start; if (start) { diff --git a/kernel/module.c b/kernel/module.c index 978f3fa40850..faf733789560 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3197,6 +3197,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) #ifdef CONFIG_KPROBES mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1, &mod->kprobes_text_size); + mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist", + sizeof(unsigned long), + &mod->num_kprobe_blacklist); #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); From d85eaa9411472a99de4b5732cb59c8bae629d5f1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Mar 2020 23:50:11 +0900 Subject: [PATCH 24/37] samples/kprobes: Add __kprobes and NOKPROBE_SYMBOL() for handlers. Add __kprobes and NOKPROBE_SYMBOL() for sample kprobe handlers. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134059.878578033@linutronix.de --- samples/kprobes/kprobe_example.c | 6 ++++-- samples/kprobes/kretprobe_example.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index d693c23a85e8..501911d1b327 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -25,7 +25,7 @@ static struct kprobe kp = { }; /* kprobe pre_handler: called just before the probed instruction is executed */ -static int handler_pre(struct kprobe *p, struct pt_regs *regs) +static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) { #ifdef CONFIG_X86 pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n", @@ -54,7 +54,7 @@ static int handler_pre(struct kprobe *p, struct pt_regs *regs) } /* kprobe post_handler: called after the probed instruction is executed */ -static void handler_post(struct kprobe *p, struct pt_regs *regs, +static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, unsigned long flags) { #ifdef CONFIG_X86 @@ -90,6 +90,8 @@ static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) /* Return 0 because we don't handle the fault. */ return 0; } +/* NOKPROBE_SYMBOL() is also available */ +NOKPROBE_SYMBOL(handler_fault); static int __init kprobe_init(void) { diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 186315ca88b3..013e8e6ebae9 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -48,6 +48,7 @@ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) data->entry_stamp = ktime_get(); return 0; } +NOKPROBE_SYMBOL(entry_handler); /* * Return-probe handler: Log the return value and duration. Duration may turn @@ -67,6 +68,7 @@ static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) func_name, retval, (long long)delta); return 0; } +NOKPROBE_SYMBOL(ret_handler); static struct kretprobe my_kretprobe = { .handler = ret_handler, From 6377a38bd345b7f3b664c43ebb647ae55c1166e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 May 2020 12:21:17 -0700 Subject: [PATCH 25/37] fs-verity: fix all kerneldoc warnings Fix all kerneldoc warnings in fs/verity/ and include/linux/fsverity.h. Most of these were due to missing documentation for function parameters. Detected with: scripts/kernel-doc -v -none fs/verity/*.{c,h} include/linux/fsverity.h This cleanup makes it possible to check new patches for kerneldoc warnings without having to filter out all the existing ones. Link: https://lore.kernel.org/r/20200511192118.71427-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/enable.c | 2 ++ fs/verity/fsverity_private.h | 2 +- fs/verity/measure.c | 2 ++ fs/verity/open.c | 1 + fs/verity/signature.c | 3 +++ fs/verity/verify.c | 3 +++ include/linux/fsverity.h | 3 +++ 7 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index d98bea308fd7..5ab3bbec8108 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -329,6 +329,8 @@ rollback: /** * fsverity_ioctl_enable() - enable verity on a file + * @filp: file to enable verity on + * @uarg: user pointer to fsverity_enable_arg * * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of * Documentation/filesystems/fsverity.rst for the documentation. diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index 74768cf539da..8788f3a5035c 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -61,7 +61,7 @@ struct merkle_tree_params { u64 level_start[FS_VERITY_MAX_LEVELS]; }; -/** +/* * fsverity_info - cached verity metadata for an inode * * When a verity file is first opened, an instance of this struct is allocated diff --git a/fs/verity/measure.c b/fs/verity/measure.c index 05049b68c745..df409a5682ed 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -11,6 +11,8 @@ /** * fsverity_ioctl_measure() - get a verity file's measurement + * @filp: file to get measurement of + * @_uarg: user pointer to fsverity_digest * * Retrieve the file measurement that the kernel is enforcing for reads from a * verity file. See the "FS_IOC_MEASURE_VERITY" section of diff --git a/fs/verity/open.c b/fs/verity/open.c index c5fe6948e262..d007db0c9304 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -330,6 +330,7 @@ EXPORT_SYMBOL_GPL(fsverity_prepare_setattr); /** * fsverity_cleanup_inode() - free the inode's verity info, if present + * @inode: an inode being evicted * * Filesystems must call this on inode eviction to free ->i_verity_info. */ diff --git a/fs/verity/signature.c b/fs/verity/signature.c index c8b255232de5..b14ed96387ec 100644 --- a/fs/verity/signature.c +++ b/fs/verity/signature.c @@ -28,6 +28,9 @@ static struct key *fsverity_keyring; /** * fsverity_verify_signature() - check a verity file's signature + * @vi: the file's fsverity_info + * @desc: the file's fsverity_descriptor + * @desc_size: size of @desc * * If the file's fs-verity descriptor includes a signature of the file * measurement, verify it against the certificates in the fs-verity keyring. diff --git a/fs/verity/verify.c b/fs/verity/verify.c index e0cb62da3864..a8b68c6f663d 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -179,6 +179,7 @@ out: /** * fsverity_verify_page() - verify a data page + * @page: the page to verity * * Verify a page that has just been read from a verity file. The page must be a * pagecache page that is still locked and not yet uptodate. @@ -206,6 +207,7 @@ EXPORT_SYMBOL_GPL(fsverity_verify_page); #ifdef CONFIG_BLOCK /** * fsverity_verify_bio() - verify a 'read' bio that has just completed + * @bio: the bio to verify * * Verify a set of pages that have just been read from a verity file. The pages * must be pagecache pages that are still locked and not yet uptodate. Pages @@ -264,6 +266,7 @@ EXPORT_SYMBOL_GPL(fsverity_verify_bio); /** * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue + * @work: the work to enqueue * * Enqueue verification work for asynchronous processing. */ diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index ecc604e61d61..5ac30198e032 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -200,6 +200,7 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) /** * fsverity_active() - do reads from the inode need to go through fs-verity? + * @inode: inode to check * * This checks whether ->i_verity_info has been set. * @@ -207,6 +208,8 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work) * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to * a race condition where the file is being read concurrently with * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) + * + * Return: true if reads need to go through fs-verity, otherwise false */ static inline bool fsverity_active(const struct inode *inode) { From 9cd6b593cfc9eaa476c9a3fa768b08bca73213d0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 May 2020 12:21:18 -0700 Subject: [PATCH 26/37] fs-verity: remove unnecessary extern keywords Remove the unnecessary 'extern' keywords from function declarations. This makes it so that we don't have a mix of both styles, so it won't be ambiguous what to use in new fs-verity patches. This also makes the code shorter and matches the 'checkpatch --strict' expectation. Link: https://lore.kernel.org/r/20200511192118.71427-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/fsverity_private.h | 2 +- include/linux/fsverity.h | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index 8788f3a5035c..e96d99d5145e 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -134,7 +134,7 @@ void __init fsverity_check_hash_algs(void); /* init.c */ -extern void __printf(3, 4) __cold +void __printf(3, 4) __cold fsverity_msg(const struct inode *inode, const char *level, const char *fmt, ...); diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 5ac30198e032..78201a6d35f6 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -121,23 +121,23 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) /* enable.c */ -extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg); +int fsverity_ioctl_enable(struct file *filp, const void __user *arg); /* measure.c */ -extern int fsverity_ioctl_measure(struct file *filp, void __user *arg); +int fsverity_ioctl_measure(struct file *filp, void __user *arg); /* open.c */ -extern int fsverity_file_open(struct inode *inode, struct file *filp); -extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); -extern void fsverity_cleanup_inode(struct inode *inode); +int fsverity_file_open(struct inode *inode, struct file *filp); +int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr); +void fsverity_cleanup_inode(struct inode *inode); /* verify.c */ -extern bool fsverity_verify_page(struct page *page); -extern void fsverity_verify_bio(struct bio *bio); -extern void fsverity_enqueue_verify_work(struct work_struct *work); +bool fsverity_verify_page(struct page *page); +void fsverity_verify_bio(struct bio *bio); +void fsverity_enqueue_verify_work(struct work_struct *work); #else /* !CONFIG_FS_VERITY */ From 6553896666433e7efec589838b400a2a652b3ffa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Mar 2020 22:47:17 +0100 Subject: [PATCH 27/37] vmlinux.lds.h: Create section for protection against instrumentation Some code pathes, especially the low level entry code, must be protected against instrumentation for various reasons: - Low level entry code can be a fragile beast, especially on x86. - With NO_HZ_FULL RCU state needs to be established before using it. Having a dedicated section for such code allows to validate with tooling that no unsafe functions are invoked. Add the .noinstr.text section and the noinstr attribute to mark functions. noinstr implies notrace. Kprobes will gain a section check later. Provide also a set of markers: instrumentation_begin()/end() These are used to mark code inside a noinstr function which calls into regular instrumentable text section as safe. The instrumentation markers are only active when CONFIG_DEBUG_ENTRY is enabled as the end marker emits a NOP to prevent the compiler from merging the annotation points. This means the objtool verification requires a kernel compiled with this option. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Link: https://lkml.kernel.org/r/20200505134100.075416272@linutronix.de --- arch/powerpc/kernel/vmlinux.lds.S | 1 + include/asm-generic/sections.h | 3 ++ include/asm-generic/vmlinux.lds.h | 10 ++++++ include/linux/compiler.h | 53 +++++++++++++++++++++++++++++++ include/linux/compiler_types.h | 4 +++ scripts/mod/modpost.c | 2 +- 6 files changed, 72 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..a1706b63b82d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d1779d442aa5..66397ed10acb 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -53,6 +53,9 @@ extern char __ctors_start[], __ctors_end[]; /* Start and end of .opd section - used for function descriptors. */ extern char __start_opd[], __end_opd[]; +/* Start and end of instrumentation protected text section */ +extern char __noinstr_text_start[], __noinstr_text_end[]; + extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 71e387a5fe90..db600ef218d7 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -540,6 +540,15 @@ . = ALIGN((align)); \ __end_rodata = .; +/* + * Non-instrumentable text section + */ +#define NOINSTR_TEXT \ + ALIGN_FUNCTION(); \ + __noinstr_text_start = .; \ + *(.noinstr.text) \ + __noinstr_text_end = .; + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map @@ -551,6 +560,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ + NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ MEM_KEEP(init.text*) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 034b0a644efc..e9ead0505671 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -120,12 +120,65 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(.rodata..c_jump_table) +#ifdef CONFIG_DEBUG_ENTRY +/* Begin/end of an instrumentation safe region */ +#define instrumentation_begin() ({ \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) + +/* + * Because instrumentation_{begin,end}() can nest, objtool validation considers + * _begin() a +1 and _end() a -1 and computes a sum over the instructions. + * When the value is greater than 0, we consider instrumentation allowed. + * + * There is a problem with code like: + * + * noinstr void foo() + * { + * instrumentation_begin(); + * ... + * if (cond) { + * instrumentation_begin(); + * ... + * instrumentation_end(); + * } + * bar(); + * instrumentation_end(); + * } + * + * If instrumentation_end() would be an empty label, like all the other + * annotations, the inner _end(), which is at the end of a conditional block, + * would land on the instruction after the block. + * + * If we then consider the sum of the !cond path, we'll see that the call to + * bar() is with a 0-value, even though, we meant it to happen with a positive + * value. + * + * To avoid this, have _end() be a NOP instruction, this ensures it will be + * part of the condition block and does not escape. + */ +#define instrumentation_end() ({ \ + asm volatile("%c0: nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#endif /* CONFIG_DEBUG_ENTRY */ + #else #define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif +#ifndef instrumentation_begin +#define instrumentation_begin() do { } while(0) +#define instrumentation_end() do { } while(0) +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..5da257cbebf1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,6 +118,10 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 5c3c50c5ec52..0053d4fea847 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -948,7 +948,7 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ - ".kprobes.text", ".cpuidle.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", ".text.*", \ ".coldtext" From 66e9b0717102507e64f638790eaece88765cc9e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Mar 2020 14:04:34 +0100 Subject: [PATCH 28/37] kprobes: Prevent probes in .noinstr.text section Instrumentation is forbidden in the .noinstr.text section. Make kprobes respect this. Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Chartre Acked-by: Peter Zijlstra Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20200505134100.179862032@linutronix.de --- include/linux/module.h | 2 ++ kernel/kprobes.c | 18 ++++++++++++++++++ kernel/module.c | 3 +++ 3 files changed, 23 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 1192097c9a69..d849d06e4d44 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -458,6 +458,8 @@ struct module { void __percpu *percpu; unsigned int percpu_size; #endif + void *noinstr_text_start; + unsigned int noinstr_text_size; #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9eb5acf0a9f3..3f310df4a693 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2229,6 +2229,12 @@ static int __init populate_kprobe_blacklist(unsigned long *start, /* Symbols in __kprobes_text are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, (unsigned long)__kprobes_text_end); + if (ret) + return ret; + + /* Symbols in noinstr section are blacklisted */ + ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, + (unsigned long)__noinstr_text_end); return ret ? : arch_populate_kprobe_blacklist(); } @@ -2248,6 +2254,12 @@ static void add_module_kprobe_blacklist(struct module *mod) end = start + mod->kprobes_text_size; kprobe_add_area_blacklist(start, end); } + + start = (unsigned long)mod->noinstr_text_start; + if (start) { + end = start + mod->noinstr_text_size; + kprobe_add_area_blacklist(start, end); + } } static void remove_module_kprobe_blacklist(struct module *mod) @@ -2265,6 +2277,12 @@ static void remove_module_kprobe_blacklist(struct module *mod) end = start + mod->kprobes_text_size; kprobe_remove_area_blacklist(start, end); } + + start = (unsigned long)mod->noinstr_text_start; + if (start) { + end = start + mod->noinstr_text_size; + kprobe_remove_area_blacklist(start, end); + } } /* Module notifier call back, checking kprobes on the module */ diff --git a/kernel/module.c b/kernel/module.c index faf733789560..72ed2b3a6ee2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3150,6 +3150,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) } #endif + mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1, + &mod->noinstr_text_size); + #ifdef CONFIG_TRACEPOINTS mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", sizeof(*mod->tracepoints_ptrs), From 1032095053b34d474aa20f2625d97dd306e0991b Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 15 May 2020 20:34:06 +0800 Subject: [PATCH 29/37] EDAC/skx: Use the mcmtr register to retrieve close_pg/bank_xor_enable The skx_edac driver wrongly uses the mtr register to retrieve two fields close_pg and bank_xor_enable. Fix it by using the correct mcmtr register to get the two fields. Cc: Signed-off-by: Qiuxu Zhuo Reported-by: Matthew Riley Acked-by: Aristeu Rozanski Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20200515210146.1337-1-tony.luck@intel.com --- drivers/edac/i10nm_base.c | 2 +- drivers/edac/skx_base.c | 20 ++++++++------------ drivers/edac/skx_common.c | 6 +++--- drivers/edac/skx_common.h | 2 +- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index df08de963d10..916d37f0503b 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -161,7 +161,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci) mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, dimm, + ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, imc, i, j); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 46a3a3440f5e..a51954bc488c 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -163,27 +163,23 @@ static const struct x86_cpu_id skx_cpuids[] = { }; MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); -#define SKX_GET_MTMTR(dev, reg) \ - pci_read_config_dword((dev), 0x87c, &(reg)) - -static bool skx_check_ecc(struct pci_dev *pdev) +static bool skx_check_ecc(u32 mcmtr) { - u32 mtmtr; - - SKX_GET_MTMTR(pdev, mtmtr); - - return !!GET_BITFIELD(mtmtr, 2, 2); + return !!GET_BITFIELD(mcmtr, 2, 2); } static int skx_get_dimm_config(struct mem_ctl_info *mci) { struct skx_pvt *pvt = mci->pvt_info; + u32 mtr, mcmtr, amap, mcddrtcfg; struct skx_imc *imc = pvt->imc; - u32 mtr, amap, mcddrtcfg; struct dimm_info *dimm; int i, j; int ndimms; + /* Only the mcmtr on the first channel is effective */ + pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr); + for (i = 0; i < SKX_NUM_CHANNELS; i++) { ndimms = 0; pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); @@ -193,14 +189,14 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) pci_read_config_dword(imc->chan[i].cdev, 0x80 + 4 * j, &mtr); if (IS_DIMM_PRESENT(mtr)) { - ndimms += skx_get_dimm_info(mtr, amap, dimm, imc, i, j); + ndimms += skx_get_dimm_info(mtr, mcmtr, amap, dimm, imc, i, j); } else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) { ndimms += skx_get_nvdimm_info(dimm, imc, i, j, EDAC_MOD_STR); nvdimm_count++; } } - if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { + if (ndimms && !skx_check_ecc(mcmtr)) { skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); return -ENODEV; } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 99bbaf629b8d..412c651bef26 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -304,7 +304,7 @@ static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add, #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows") #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols") -int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, +int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno) { int banks = 16, ranks, rows, cols, npages; @@ -324,8 +324,8 @@ int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, imc->mc, chan, dimmno, size, npages, banks, 1 << ranks, rows, cols); - imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0); - imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9); + imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0); + imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9); imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); imc->chan[chan].dimms[dimmno].rowbits = rows; imc->chan[chan].dimms[dimmno].colbits = cols; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 60d1ea669afd..319f9b2f1f89 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -128,7 +128,7 @@ int skx_get_all_bus_mappings(unsigned int did, int off, enum type, int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); -int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, +int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, struct skx_imc *imc, int chan, int dimmno); int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, From 7daac5b2fdf88e3c3e84cf0d577f524beb0244ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 20:00:44 +0300 Subject: [PATCH 30/37] lib/vsprintf: Print time64_t in human readable format There are users which print time and date represented by content of time64_t type in human readable format. Instead of open coding that each time introduce %ptT[dt][r] specifier. Few test cases for %ptT specifier has been added as well. Link: https://lore.kernel.org/r/20200415170046.33374-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Alexandre Belloni Acked-by: Sergey Senozhatsky Rewieved-by: Petr Mladek Signed-off-by: Petr Mladek --- Documentation/core-api/printk-formats.rst | 22 ++++++++-------- lib/test_printf.c | 13 +++++++--- lib/vsprintf.c | 31 +++++++++++++++++++++-- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 8ebe46b1af39..a407cdd09083 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -468,21 +468,23 @@ Examples (OF):: %pfwf /ocp@68000000/i2c@48072000/camera@10/port/endpoint - Full name %pfwP endpoint - Node name -Time and date (struct rtc_time) -------------------------------- +Time and date +------------- :: - %ptR YYYY-mm-ddTHH:MM:SS - %ptRd YYYY-mm-dd - %ptRt HH:MM:SS - %ptR[dt][r] + %pt[RT] YYYY-mm-ddTHH:MM:SS + %pt[RT]d YYYY-mm-dd + %pt[RT]t HH:MM:SS + %pt[RT][dt][r] -For printing date and time as represented by struct rtc_time structure in -human readable format. +For printing date and time as represented by + R struct rtc_time structure + T time64_t type +in human readable format. -By default year will be incremented by 1900 and month by 1. Use %ptRr (raw) -to suppress this behaviour. +By default year will be incremented by 1900 and month by 1. +Use %pt[RT]r (raw) to suppress this behaviour. Passed by reference. diff --git a/lib/test_printf.c b/lib/test_printf.c index 2d9f520d2f27..6dc0a6c33b8c 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -478,7 +478,7 @@ struct_va_format(void) } static void __init -struct_rtc_time(void) +time_and_date(void) { /* 1543210543 */ const struct rtc_time tm = { @@ -489,14 +489,21 @@ struct_rtc_time(void) .tm_mon = 10, .tm_year = 118, }; + /* 2019-01-04T15:32:23 */ + time64_t t = 1546615943; - test("(%ptR?)", "%pt", &tm); + test("(%pt?)", "%pt", &tm); test("2018-11-26T05:35:43", "%ptR", &tm); test("0118-10-26T05:35:43", "%ptRr", &tm); test("05:35:43|2018-11-26", "%ptRt|%ptRd", &tm, &tm); test("05:35:43|0118-10-26", "%ptRtr|%ptRdr", &tm, &tm); test("05:35:43|2018-11-26", "%ptRttr|%ptRdtr", &tm, &tm); test("05:35:43 tr|2018-11-26 tr", "%ptRt tr|%ptRd tr", &tm, &tm); + + test("2019-01-04T15:32:23", "%ptT", &t); + test("0119-00-04T15:32:23", "%ptTr", &t); + test("15:32:23|2019-01-04", "%ptTt|%ptTd", &t, &t); + test("15:32:23|0119-00-04", "%ptTtr|%ptTdr", &t, &t); } static void __init @@ -661,7 +668,7 @@ test_pointer(void) uuid(); dentry(); struct_va_format(); - struct_rtc_time(); + time_and_date(); struct_clk(); bitmap(); netdev_features(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7c488a1ce318..adbcca9c9cba 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1819,6 +1820,29 @@ char *rtc_str(char *buf, char *end, const struct rtc_time *tm, return buf; } +static noinline_for_stack +char *time64_str(char *buf, char *end, const time64_t time, + struct printf_spec spec, const char *fmt) +{ + struct rtc_time rtc_time; + struct tm tm; + + time64_to_tm(time, 0, &tm); + + rtc_time.tm_sec = tm.tm_sec; + rtc_time.tm_min = tm.tm_min; + rtc_time.tm_hour = tm.tm_hour; + rtc_time.tm_mday = tm.tm_mday; + rtc_time.tm_mon = tm.tm_mon; + rtc_time.tm_year = tm.tm_year; + rtc_time.tm_wday = tm.tm_wday; + rtc_time.tm_yday = tm.tm_yday; + + rtc_time.tm_isdst = 0; + + return rtc_str(buf, end, &rtc_time, spec, fmt); +} + static noinline_for_stack char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec, const char *fmt) @@ -1826,8 +1850,10 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec, switch (fmt[1]) { case 'R': return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt); + case 'T': + return time64_str(buf, end, *(const time64_t *)ptr, spec, fmt); default: - return error_string(buf, end, "(%ptR?)", spec); + return error_string(buf, end, "(%pt?)", spec); } } @@ -2143,8 +2169,9 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file * - 'g' For block_device name (gendisk + partition number) - * - 't[R][dt][r]' For time and date as represented: + * - 't[RT][dt][r]' For time and date as represented by: * R struct rtc_time + * T time64_t * - 'C' For a clock, it prints the name (Common Clock Framework) or address * (legacy clock framework) of the clock * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address From 4a60f58ee00266ebee652e991954e48d060ea950 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 20:00:45 +0300 Subject: [PATCH 31/37] ARM: bcm2835: Switch to use %ptT Use %ptT instead of open coded variant to print content of time64_t type in human readable format. Link: https://lore.kernel.org/r/20200415170046.33374-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Sergey Senozhatsky Rewieved-by: Petr Mladek Signed-off-by: Petr Mladek --- drivers/firmware/raspberrypi.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c index da26a584dca0..a3e85186f8e6 100644 --- a/drivers/firmware/raspberrypi.c +++ b/drivers/firmware/raspberrypi.c @@ -182,16 +182,10 @@ rpi_firmware_print_firmware_revision(struct rpi_firmware *fw) RPI_FIRMWARE_GET_FIRMWARE_REVISION, &packet, sizeof(packet)); - if (ret == 0) { - struct tm tm; + if (ret) + return; - time64_to_tm(packet, 0, &tm); - - dev_info(fw->cl.dev, - "Attached to firmware from %04ld-%02d-%02d %02d:%02d\n", - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min); - } + dev_info(fw->cl.dev, "Attached to firmware from %ptT\n", &packet); } static void From 5f1fcf8c0adc94a334832a7fdf925801b8c339b4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Apr 2020 20:00:46 +0300 Subject: [PATCH 32/37] usb: pulse8-cec: Switch to use %ptT Use %ptT instead of open coded variant to print content of time64_t type in human readable format. Link: https://lore.kernel.org/r/20200415170046.33374-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Hans Verkuil Acked-by: Sergey Senozhatsky Rewieved-by: Petr Mladek Signed-off-by: Petr Mladek --- drivers/media/usb/pulse8-cec/pulse8-cec.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c index 0655aa9ecf28..beae6aa12638 100644 --- a/drivers/media/usb/pulse8-cec/pulse8-cec.c +++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c @@ -661,7 +661,6 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, u8 *data = pulse8->data + 1; u8 cmd[2]; int err; - struct tm tm; time64_t date; pulse8->vers = 0; @@ -682,10 +681,7 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, if (err) return err; date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; - time64_to_tm(date, 0, &tm); - dev_info(pulse8->dev, "Firmware build date %04ld.%02d.%02d %02d:%02d:%02d\n", - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + dev_info(pulse8->dev, "Firmware build date %ptT\n", &date); dev_dbg(pulse8->dev, "Persistent config:\n"); cmd[0] = MSGCODE_GET_AUTO_ENABLED; From 325606af573152e02f44d791f152b7f9564bcb30 Mon Sep 17 00:00:00 2001 From: Ethon Paul Date: Sat, 18 Apr 2020 19:35:36 +0800 Subject: [PATCH 33/37] printk: Fix a typo in comment "interator"->"iterator" There is a typo in comment, fix it. Signed-off-by: Ethon Paul Cc: Steven Rostedt Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9a9b6156270b..525038745a14 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3360,7 +3360,7 @@ out: EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); /** - * kmsg_dump_rewind_nolock - reset the interator (unlocked version) + * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) * @dumper: registered kmsg dumper * * Reset the dumper's iterator so that kmsg_dump_get_line() and @@ -3378,7 +3378,7 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) } /** - * kmsg_dump_rewind - reset the interator + * kmsg_dump_rewind - reset the iterator * @dumper: registered kmsg dumper * * Reset the dumper's iterator so that kmsg_dump_get_line() and From 8ece3b3eb576a78d2e67ad4c3a80a39fa6708809 Mon Sep 17 00:00:00 2001 From: Bruno Meneguele Date: Tue, 17 Mar 2020 07:33:44 -0300 Subject: [PATCH 34/37] kernel/printk: add kmsg SEEK_CUR handling Userspace libraries, e.g. glibc's dprintf(), perform a SEEK_CUR operation over any file descriptor requested to make sure the current position isn't pointing to junk due to previous manipulation of that same fd. And whenever that fd doesn't have support for such operation, the userspace code expects -ESPIPE to be returned. However, when the fd in question references the /dev/kmsg interface, the current kernel code state returns -EINVAL instead, causing an unexpected behavior in userspace: in the case of glibc, when -ESPIPE is returned it gets ignored and the call completes successfully, while returning -EINVAL forces dprintf to fail without performing any action over that fd: if (_IO_SEEKOFF (fp, (off64_t)0, _IO_seek_cur, _IOS_INPUT|_IOS_OUTPUT) == _IO_pos_BAD && errno != ESPIPE) return NULL; With this patch we make sure to return the correct value when SEEK_CUR is requested over kmsg and also add some kernel doc information to formalize this behavior. Link: https://lore.kernel.org/r/20200317103344.574277-1-bmeneg@redhat.com Cc: linux-kernel@vger.kernel.org Cc: rostedt@goodmis.org, Cc: David.Laight@ACULAB.COM Signed-off-by: Bruno Meneguele Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- Documentation/ABI/testing/dev-kmsg | 5 +++++ kernel/printk/printk.c | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg index f307506eb54c..1e6c28b1942b 100644 --- a/Documentation/ABI/testing/dev-kmsg +++ b/Documentation/ABI/testing/dev-kmsg @@ -56,6 +56,11 @@ Description: The /dev/kmsg character device node provides userspace access seek after the last record available at the time the last SYSLOG_ACTION_CLEAR was issued. + Due to the record nature of this interface with a "read all" + behavior and the specific positions each seek operation sets, + SEEK_CUR is not supported, returning -ESPIPE (invalid seek) to + errno whenever requested. + The output format consists of a prefix carrying the syslog prefix including priority and facility, the 64 bit message sequence number and the monotonic timestamp in microseconds, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 525038745a14..35cc5f548860 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -974,6 +974,16 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) user->idx = log_next_idx; user->seq = log_next_seq; break; + case SEEK_CUR: + /* + * It isn't supported due to the record nature of this + * interface: _SET _DATA and _END point to very specific + * record positions, while _CUR would be more useful in case + * of a byte-based log. Because of that, return the default + * errno value for invalid seek operation. + */ + ret = -ESPIPE; + break; default: ret = -EINVAL; } From 48021f98130880dd74286459a1ef48b5e9bc374f Mon Sep 17 00:00:00 2001 From: Shreyas Joshi Date: Fri, 22 May 2020 16:53:06 +1000 Subject: [PATCH 35/37] printk: handle blank console arguments passed in. If uboot passes a blank string to console_setup then it results in a trashed memory. Ultimately, the kernel crashes during freeing up the memory. This fix checks if there is a blank parameter being passed to console_setup from uboot. In case it detects that the console parameter is blank then it doesn't setup the serial device and it gracefully exits. Link: https://lore.kernel.org/r/20200522065306.83-1-shreyas.joshi@biamp.com Signed-off-by: Shreyas Joshi Acked-by: Sergey Senozhatsky [pmladek@suse.com: Better format the commit message and code, remove unnecessary brackets.] Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 35cc5f548860..a3990505abdf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2200,6 +2200,9 @@ static int __init console_setup(char *str) char *s, *options, *brl_options = NULL; int idx; + if (str[0] == 0) + return 1; + if (_braille_console_setup(&str, &brl_options)) return 1; From eb012d125a2419786f5bcaaf8a901babc7b6e3d7 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 25 May 2020 00:32:43 +0900 Subject: [PATCH 36/37] printk: Remove pr_cont_once() pr_cont_once() does not make sense; at least emitting module name using pr_fmt() into middle of a line (after e.g. pr_info_once()) does not make sense. Let's remove unused pr_cont_once(). Link: https://lore.kernel.org/r/20200524153243.11690-1-penguin-kernel@I-love.SAKURA.ne.jp Cc: Joe Perches Signed-off-by: Tetsuo Handa Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index e061635e0409..07f2d08f79ff 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -384,8 +384,7 @@ extern int kptr_restrict; printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) -#define pr_cont_once(fmt, ...) \ - printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__) +/* no pr_cont_once, don't do that... */ #if defined(DEBUG) #define pr_devel_once(fmt, ...) \ From f00eb5ff2f6ffc4a2f0fc0e829410082bff0f78e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 29 Apr 2020 16:48:47 +0100 Subject: [PATCH 37/37] EDAC/amd64: Remove redundant assignment to variable ret in hw_info_get() The variable ret is being assigned with a value that is never read and it is being updated later with a new value. The initialization is redundant so remove it. Signed-off-by: Colin Ian King Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200429154847.287001-1-colin.king@canonical.com --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index f91f3bc1e0b2..9cf7cc1f3f72 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3403,7 +3403,7 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { static int hw_info_get(struct amd64_pvt *pvt) { u16 pci_id1, pci_id2; - int ret = -EINVAL; + int ret; if (pvt->fam >= 0x17) { pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);