From a18670f4617d41829ce88ab3e47bbc406e4dc5e8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 21 Jul 2019 14:55:47 +0200 Subject: [PATCH 001/345] watchdog: ath79_wdt: fix a typo in the name of a function It is likely that 'ath97_wdt_shutdown()' should be 'ath79_wdt_shutdown()' Signed-off-by: Christophe JAILLET Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ath79_wdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 2e09981fe978..75de664ef4b0 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -302,7 +302,7 @@ static int ath79_wdt_remove(struct platform_device *pdev) return 0; } -static void ath97_wdt_shutdown(struct platform_device *pdev) +static void ath79_wdt_shutdown(struct platform_device *pdev) { ath79_wdt_disable(); } @@ -318,7 +318,7 @@ MODULE_DEVICE_TABLE(of, ath79_wdt_match); static struct platform_driver ath79_wdt_driver = { .probe = ath79_wdt_probe, .remove = ath79_wdt_remove, - .shutdown = ath97_wdt_shutdown, + .shutdown = ath79_wdt_shutdown, .driver = { .name = DRIVER_NAME, .of_match_table = of_match_ptr(ath79_wdt_match), From 630ee1a50c40210ef021435faa0e80d17a904883 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 10:10:33 -0500 Subject: [PATCH 002/345] watchdog: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: drivers/watchdog/ar7_wdt.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 237:3 drivers/watchdog/pcwd.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 653:3 drivers/watchdog/sb_wdog.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 204:3 drivers/watchdog/wdt.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 391:3 Reported-by: Geert Uytterhoeven Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190729151033.GA10143@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ar7_wdt.c | 1 + drivers/watchdog/pcwd.c | 2 +- drivers/watchdog/sb_wdog.c | 1 + drivers/watchdog/wdt.c | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c index b9b2d06b3879..668a1c704f28 100644 --- a/drivers/watchdog/ar7_wdt.c +++ b/drivers/watchdog/ar7_wdt.c @@ -235,6 +235,7 @@ static long ar7_wdt_ioctl(struct file *file, ar7_wdt_update_margin(new_margin); ar7_wdt_kick(1); spin_unlock(&wdt_lock); + /* Fall through */ case WDIOC_GETTIMEOUT: if (put_user(margin, (int *)arg)) diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index 1b2cf5b95a89..c3c93e00b320 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -651,7 +651,7 @@ static long pcwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; pcwd_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(heartbeat, argp); diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c index 5a6ced7a7e8f..202fc8d8ca5f 100644 --- a/drivers/watchdog/sb_wdog.c +++ b/drivers/watchdog/sb_wdog.c @@ -202,6 +202,7 @@ static long sbwdog_ioctl(struct file *file, unsigned int cmd, timeout = time; sbwdog_set(user_dog, timeout); sbwdog_pet(user_dog); + /* Fall through */ case WDIOC_GETTIMEOUT: /* diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c index 0650100fad00..7d278b37e083 100644 --- a/drivers/watchdog/wdt.c +++ b/drivers/watchdog/wdt.c @@ -389,7 +389,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wdt_set_heartbeat(new_heartbeat)) return -EINVAL; wdt_ping(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: From ca58397c53ddba9544748dc0cbaef4ff34282466 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 15:06:02 -0500 Subject: [PATCH 003/345] watchdog: scx200_wdt: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: i386): drivers/watchdog/scx200_wdt.c: In function ‘scx200_wdt_ioctl’: drivers/watchdog/scx200_wdt.c:188:3: warning: this statement may fall through [-Wimplicit-fallthrough=] scx200_wdt_ping(); ^~~~~~~~~~~~~~~~~ drivers/watchdog/scx200_wdt.c:189:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190729200602.GA6854@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/scx200_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/scx200_wdt.c b/drivers/watchdog/scx200_wdt.c index efd7996694de..46268309ee9b 100644 --- a/drivers/watchdog/scx200_wdt.c +++ b/drivers/watchdog/scx200_wdt.c @@ -186,6 +186,7 @@ static long scx200_wdt_ioctl(struct file *file, unsigned int cmd, margin = new_margin; scx200_wdt_update_margin(); scx200_wdt_ping(); + /* Fall through */ case WDIOC_GETTIMEOUT: if (put_user(margin, p)) return -EFAULT; From 2c017640826a16eab385b756431ce560e44a7054 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 17:31:59 -0500 Subject: [PATCH 004/345] watchdog: wdt977: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: arm): drivers/watchdog/wdt977.c: In function ‘wdt977_ioctl’: LD [M] drivers/media/platform/vicodec/vicodec.o drivers/watchdog/wdt977.c:400:3: warning: this statement may fall through [-Wimplicit-fallthrough=] wdt977_keepalive(); ^~~~~~~~~~~~~~~~~~ drivers/watchdog/wdt977.c:403:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190729223159.GA20878@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/wdt977.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/wdt977.c b/drivers/watchdog/wdt977.c index 567005d7598e..5c52c73e1839 100644 --- a/drivers/watchdog/wdt977.c +++ b/drivers/watchdog/wdt977.c @@ -398,7 +398,7 @@ static long wdt977_ioctl(struct file *file, unsigned int cmd, return -EINVAL; wdt977_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); From 4b4b8b03458ef33dc2545c038d3134ba668038a4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 20:46:50 -0500 Subject: [PATCH 005/345] watchdog: riowd: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: sparc64): drivers/watchdog/riowd.c: In function ‘riowd_ioctl’: drivers/watchdog/riowd.c:136:3: warning: this statement may fall through [-Wimplicit-fallthrough=] riowd_writereg(p, riowd_timeout, WDTO_INDEX); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/watchdog/riowd.c:139:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190730014650.GA31309@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/riowd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index 41a2a11535a6..b35f7be20c00 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -134,7 +134,7 @@ static long riowd_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EINVAL; riowd_timeout = (new_margin + 59) / 60; riowd_writereg(p, riowd_timeout, WDTO_INDEX); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(riowd_timeout * 60, (int __user *)argp); From 26ae6a8e9b09a94a38e5351502bee0e801d4d8c6 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 30 Jul 2019 11:15:48 -0700 Subject: [PATCH 006/345] watchdog: Remove dev_err() usage after platform_get_irq() We don't need dev_err() messages when platform_get_irq() fails now that platform_get_irq() prints an error message itself when something goes wrong. Let's remove these prints with a simple semantic patch. // @@ expression ret; struct platform_device *E; @@ ret = ( platform_get_irq(E, ...) | platform_get_irq_byname(E, ...) ); if ( \( ret < 0 \| ret <= 0 \) ) { ( -if (ret != -EPROBE_DEFER) -{ ... -dev_err(...); -... } | ... -dev_err(...); ) ... } // While we're here, remove braces on if statements that only have one statement (manually). Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: linux-watchdog@vger.kernel.org Cc: Greg Kroah-Hartman Signed-off-by: Stephen Boyd Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sprd_wdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c index edba4e278685..0bb17b046140 100644 --- a/drivers/watchdog/sprd_wdt.c +++ b/drivers/watchdog/sprd_wdt.c @@ -284,10 +284,8 @@ static int sprd_wdt_probe(struct platform_device *pdev) } wdt->irq = platform_get_irq(pdev, 0); - if (wdt->irq < 0) { - dev_err(dev, "failed to get IRQ resource\n"); + if (wdt->irq < 0) return wdt->irq; - } ret = devm_request_irq(dev, wdt->irq, sprd_wdt_isr, IRQF_NO_SUSPEND, "sprd-wdt", (void *)wdt); From b18f22d02ad1a4cfc0ca348766da651f9119cf44 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 6 Aug 2019 02:39:53 -0500 Subject: [PATCH 007/345] watchdog: jz4740: Fix unused variable warning in jz4740_wdt_probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warning (Building: ci20_defconfig mips): drivers/watchdog/jz4740_wdt.c: In function ‘jz4740_wdt_probe’: drivers/watchdog/jz4740_wdt.c:165:6: warning: unused variable ‘ret’ [-Wunused-variable] int ret; ^~~ Fixes: 9ee644c9326c ("watchdog: jz4740_wdt: drop warning after registering device") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190806073953.GA13685@embeddedor Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/jz4740_wdt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/watchdog/jz4740_wdt.c b/drivers/watchdog/jz4740_wdt.c index d4a90916dd38..c6052ae54f32 100644 --- a/drivers/watchdog/jz4740_wdt.c +++ b/drivers/watchdog/jz4740_wdt.c @@ -162,7 +162,6 @@ static int jz4740_wdt_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct jz4740_wdt_drvdata *drvdata; struct watchdog_device *jz4740_wdt; - int ret; drvdata = devm_kzalloc(dev, sizeof(struct jz4740_wdt_drvdata), GFP_KERNEL); From fdf4f2fb8e8990c131b2b1a5a9c03681bb16e87a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 22 Jul 2019 18:03:02 -0700 Subject: [PATCH 008/345] drivers: thermal: processor_thermal_device: Export sysfs interface for TCC offset This change exports an interface to read tcc offset and allow writing if the platform is not locked. Refer to Intel SDM for details on the MSR: MSR_TEMPERATURE_TARGET. Here TCC Activation Offset (R/W) bits allow temperature offset in degrees in relation to TjMAX. This change will be useful for improving performance from user space for some platforms, if the current offset is not optimal. Signed-off-by: Srinivas Pandruvada Tested-by: Benjamin Berg Signed-off-by: Zhang Rui --- .../processor_thermal_device.c | 91 ++++++++++++++++++- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index d3446acf9bbd..97333fc4be42 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -137,6 +137,72 @@ static const struct attribute_group power_limit_attribute_group = { .name = "power_limits" }; +static ssize_t tcc_offset_degree_celsius_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + int err; + + err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val); + if (err) + return err; + + val = (val >> 24) & 0xff; + return sprintf(buf, "%d\n", (int)val); +} + +static int tcc_offset_update(int tcc) +{ + u64 val; + int err; + + if (!tcc) + return -EINVAL; + + err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val); + if (err) + return err; + + val &= ~GENMASK_ULL(31, 24); + val |= (tcc & 0xff) << 24; + + err = wrmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, val); + if (err) + return err; + + return 0; +} + +static int tcc_offset_save; + +static ssize_t tcc_offset_degree_celsius_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + u64 val; + int tcc, err; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, &val); + if (err) + return err; + + if (!(val & BIT(30))) + return -EACCES; + + if (kstrtoint(buf, 0, &tcc)) + return -EINVAL; + + err = tcc_offset_update(tcc); + if (err) + return err; + + tcc_offset_save = tcc; + + return count; +} + +static DEVICE_ATTR_RW(tcc_offset_degree_celsius); + static int stored_tjmax; /* since it is fixed, we can have local storage */ static int get_tjmax(void) @@ -332,6 +398,7 @@ static void proc_thermal_remove(struct proc_thermal_device *proc_priv) acpi_remove_notify_handler(proc_priv->adev->handle, ACPI_DEVICE_NOTIFY, proc_thermal_notify); int340x_thermal_zone_remove(proc_priv->int340x_zone); + sysfs_remove_file(&proc_priv->dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr); sysfs_remove_group(&proc_priv->dev->kobj, &power_limit_attribute_group); } @@ -355,8 +422,15 @@ static int int3401_add(struct platform_device *pdev) dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PLATFORM_DEV\n"); - return sysfs_create_group(&pdev->dev.kobj, - &power_limit_attribute_group); + ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + if (ret) + return ret; + + ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group); + if (ret) + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + + return ret; } static int int3401_remove(struct platform_device *pdev) @@ -588,8 +662,15 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PCI\n"); - return sysfs_create_group(&pdev->dev.kobj, - &power_limit_attribute_group); + ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + if (ret) + return ret; + + ret = sysfs_create_group(&pdev->dev.kobj, &power_limit_attribute_group); + if (ret) + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_tcc_offset_degree_celsius.attr); + + return ret; } static void proc_thermal_pci_remove(struct pci_dev *pdev) @@ -615,6 +696,8 @@ static int proc_thermal_resume(struct device *dev) proc_dev = dev_get_drvdata(dev); proc_thermal_read_ppcc(proc_dev); + tcc_offset_update(tcc_offset_save); + return 0; } #else From 0ac33e4e9b5e4ccdd43c9d4e77882a1af0e41dec Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 16 Aug 2019 18:06:04 +0200 Subject: [PATCH 009/345] selftests: use "$(MAKE)" instead of "make" When doing "make kselftest TARGETS=bpf -j12", bpf progs end up being compiled sequentially and thus slowly. The reason is that parent make (tools/testing/selftests/Makefile) does not share its jobserver with child make (tools/testing/selftests/bpf/Makefile), therefore the latter runs with -j1. Change all instances of "make" to "$(MAKE)", so that the whole make hierarchy runs using a single jobserver. Signed-off-by: Ilya Leoshkevich Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 25b43a8c2b15..c3feccb99ff5 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -126,9 +126,9 @@ endif # in the default INSTALL_HDR_PATH usr/include. khdr: ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install + $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install else - make --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \ + $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \ ARCH=$(ARCH) -C $(top_srcdir) headers_install endif @@ -136,35 +136,35 @@ all: khdr @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ done; hotplug: @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_hotplug: hotplug @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ done; clean_hotplug: @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; run_pstore_crash: - make -C pstore run_crash + $(MAKE) -C pstore run_crash # Use $BUILD as the default install root. $BUILD points to the # right output location for the following cases: @@ -184,7 +184,7 @@ ifdef INSTALL_PATH install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ done; @# Ask all targets to emit their test scripts @@ -203,7 +203,7 @@ ifdef INSTALL_PATH echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ echo -n "run_many" >> $(ALL_SCRIPT); \ - make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "" >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ done; @@ -216,7 +216,7 @@ endif clean: @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; .PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean From 51904045d4aa07fbccf6ff18d56d2064a7676d35 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 30 Jul 2019 10:21:22 +0800 Subject: [PATCH 010/345] thermal: qoriq: Add clock operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms like i.MX8MQ has clock control for this module, need to add clock operations to make sure the driver is working properly. Signed-off-by: Anson Huang Reviewed-by: Guido Günther Reviewed-by: Dong Aisheng Signed-off-by: Zhang Rui --- drivers/thermal/qoriq_thermal.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 7b364933bfb1..28939471ce16 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -2,6 +2,7 @@ // // Copyright 2016 Freescale Semiconductor, Inc. +#include #include #include #include @@ -72,6 +73,7 @@ struct qoriq_sensor { struct qoriq_tmu_data { struct qoriq_tmu_regs __iomem *regs; + struct clk *clk; bool little_endian; struct qoriq_sensor *sensor[SITES_MAX]; }; @@ -209,6 +211,16 @@ static int qoriq_tmu_probe(struct platform_device *pdev) goto err_iomap; } + data->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(data->clk)) + return PTR_ERR(data->clk); + + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(&pdev->dev, "Failed to enable clock\n"); + return ret; + } + qoriq_tmu_init_device(data); /* TMU initialization */ ret = qoriq_tmu_calibration(pdev); /* TMU calibration */ @@ -225,6 +237,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) return 0; err_tmu: + clk_disable_unprepare(data->clk); iounmap(data->regs); err_iomap: @@ -241,6 +254,9 @@ static int qoriq_tmu_remove(struct platform_device *pdev) tmu_write(data, TMR_DISABLE, &data->regs->tmr); iounmap(data->regs); + + clk_disable_unprepare(data->clk); + platform_set_drvdata(pdev, NULL); return 0; @@ -257,14 +273,21 @@ static int qoriq_tmu_suspend(struct device *dev) tmr &= ~TMR_ME; tmu_write(data, tmr, &data->regs->tmr); + clk_disable_unprepare(data->clk); + return 0; } static int qoriq_tmu_resume(struct device *dev) { u32 tmr; + int ret; struct qoriq_tmu_data *data = dev_get_drvdata(dev); + ret = clk_prepare_enable(data->clk); + if (ret) + return ret; + /* Enable monitoring */ tmr = tmu_read(data, &data->regs->tmr); tmr |= TMR_ME; From 11f0cdc8bd621fcded06c951b05076e618c5c717 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 30 Jul 2019 10:21:23 +0800 Subject: [PATCH 011/345] thermal: qoriq: Fix error path of calling qoriq_tmu_register_tmu_zone fail When registering tmu zone failed, the error path should be err_tmu instead of err_iomap, as iounmap() needs to be called. Signed-off-by: Anson Huang Reviewed-by: Dong Aisheng Signed-off-by: Zhang Rui --- drivers/thermal/qoriq_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 28939471ce16..5755a1108a1f 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -231,7 +231,7 @@ static int qoriq_tmu_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "Failed to register sensors\n"); ret = -ENODEV; - goto err_iomap; + goto err_tmu; } return 0; From 4d82000af007acda6b46729ea368f6b2823c532c Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 30 Jul 2019 10:21:24 +0800 Subject: [PATCH 012/345] thermal: qoriq: Use devm_platform_ioremap_resource() instead of of_iomap() Use devm_platform_ioremap_resource() instead of of_iomap() to save the iounmap() call in error handle path; Signed-off-by: Anson Huang Reviewed-by: Dong Aisheng Signed-off-by: Zhang Rui --- drivers/thermal/qoriq_thermal.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 5755a1108a1f..8d19601d0ca6 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -204,11 +204,10 @@ static int qoriq_tmu_probe(struct platform_device *pdev) data->little_endian = of_property_read_bool(np, "little-endian"); - data->regs = of_iomap(np, 0); - if (!data->regs) { + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) { dev_err(&pdev->dev, "Failed to get memory region\n"); - ret = -ENODEV; - goto err_iomap; + return PTR_ERR(data->regs); } data->clk = devm_clk_get_optional(&pdev->dev, NULL); @@ -225,22 +224,19 @@ static int qoriq_tmu_probe(struct platform_device *pdev) ret = qoriq_tmu_calibration(pdev); /* TMU calibration */ if (ret < 0) - goto err_tmu; + goto err; ret = qoriq_tmu_register_tmu_zone(pdev); if (ret < 0) { dev_err(&pdev->dev, "Failed to register sensors\n"); ret = -ENODEV; - goto err_tmu; + goto err; } return 0; -err_tmu: +err: clk_disable_unprepare(data->clk); - iounmap(data->regs); - -err_iomap: platform_set_drvdata(pdev, NULL); return ret; @@ -253,8 +249,6 @@ static int qoriq_tmu_remove(struct platform_device *pdev) /* Disable monitoring */ tmu_write(data, TMR_DISABLE, &data->regs->tmr); - iounmap(data->regs); - clk_disable_unprepare(data->clk); platform_set_drvdata(pdev, NULL); From aea591970f659bd6d792ca4c46dd0d251331a397 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 30 Jul 2019 10:21:25 +0800 Subject: [PATCH 013/345] thermal: qoriq: Use __maybe_unused instead of #if CONFIG_PM_SLEEP Use __maybe_unused for power management related functions instead of #if CONFIG_PM_SLEEP to simply the code. Signed-off-by: Anson Huang Reviewed-by: Dong Aisheng Signed-off-by: Zhang Rui --- drivers/thermal/qoriq_thermal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c index 8d19601d0ca6..39542c670301 100644 --- a/drivers/thermal/qoriq_thermal.c +++ b/drivers/thermal/qoriq_thermal.c @@ -256,8 +256,7 @@ static int qoriq_tmu_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int qoriq_tmu_suspend(struct device *dev) +static int __maybe_unused qoriq_tmu_suspend(struct device *dev) { u32 tmr; struct qoriq_tmu_data *data = dev_get_drvdata(dev); @@ -272,7 +271,7 @@ static int qoriq_tmu_suspend(struct device *dev) return 0; } -static int qoriq_tmu_resume(struct device *dev) +static int __maybe_unused qoriq_tmu_resume(struct device *dev) { u32 tmr; int ret; @@ -289,7 +288,6 @@ static int qoriq_tmu_resume(struct device *dev) return 0; } -#endif static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops, qoriq_tmu_suspend, qoriq_tmu_resume); From 11f787b0840e26648b5e01fad99bd2aa35098d5d Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 30 Jul 2019 10:21:26 +0800 Subject: [PATCH 014/345] dt-bindings: thermal: qoriq: Add optional clocks property Some platforms like i.MX8M series SoCs have clock control for TMU, add optional clocks property to the binding doc. Signed-off-by: Anson Huang Reviewed-by: Rob Herring Reviewed-by: Dong Aisheng Signed-off-by: Zhang Rui --- Documentation/devicetree/bindings/thermal/qoriq-thermal.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt index 04cbb90a5d3e..28f2cbaf1702 100644 --- a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt @@ -23,6 +23,7 @@ Required properties: Optional property: - little-endian : If present, the TMU registers are little endian. If absent, the default is big endian. +- clocks : the clock for clocking the TMU silicon. Example: From 9aee3713135a6733f6e58ec84c1ce24514579039 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Thu, 13 Jun 2019 11:49:23 -0700 Subject: [PATCH 015/345] thermal: armada: Fix -Wshift-negative-value Clang produces the following warning drivers/thermal/armada_thermal.c:270:33: warning: shifting a negative signed value is undefined [-Wshift-negative-value] 1 warning reg &= ~CONTROL1_TSEN_AVG_MASK << CONTROL1_TSEN_AVG_SHIFT; generated . ~~~~~~~~~~~~~~~~~~~~~~~ ^ CONTROL1_TSEN_AVG_SHIFT is defined to be zero. Since shifting by zero does nothing this variable can be removed. Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/532 Signed-off-by: Nathan Huckleberry Reviewed-by: Daniel Lezcano Acked-by: Miquel Raynal Signed-off-by: Zhang Rui --- drivers/thermal/armada_thermal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index 8c07a393dc2e..709a22f455e9 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -53,7 +53,6 @@ #define CONTROL0_TSEN_MODE_EXTERNAL 0x2 #define CONTROL0_TSEN_MODE_MASK 0x3 -#define CONTROL1_TSEN_AVG_SHIFT 0 #define CONTROL1_TSEN_AVG_MASK 0x7 #define CONTROL1_EXT_TSEN_SW_RESET BIT(7) #define CONTROL1_EXT_TSEN_HW_RESETn BIT(8) @@ -267,8 +266,8 @@ static void armada_cp110_init(struct platform_device *pdev, /* Average the output value over 2^1 = 2 samples */ regmap_read(priv->syscon, data->syscon_control1_off, ®); - reg &= ~CONTROL1_TSEN_AVG_MASK << CONTROL1_TSEN_AVG_SHIFT; - reg |= 1 << CONTROL1_TSEN_AVG_SHIFT; + reg &= ~CONTROL1_TSEN_AVG_MASK; + reg |= 1; regmap_write(priv->syscon, data->syscon_control1_off, reg); } From b9cd1663fb49c9432c346be742f94ef25b406c89 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Mon, 8 Jul 2019 20:34:09 +0800 Subject: [PATCH 016/345] thermal: rcar_gen3_thermal: Replace devm_add_action() followed by failure action with devm_add_action_or_reset() devm_add_action_or_reset() is introduced as a helper function which internally calls devm_add_action(). If devm_add_action() fails then it will execute the action mentioned and return the error code. This reduce source code size (avoid writing the action twice) and reduce the likelyhood of bugs. Signed-off-by: Fuqian Huang Signed-off-by: Zhang Rui --- drivers/thermal/rcar_gen3_thermal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index a56463308694..755d2b5bd2c2 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -443,9 +443,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) if (ret) goto error_unregister; - ret = devm_add_action(dev, rcar_gen3_hwmon_action, zone); + ret = devm_add_action_or_reset(dev, rcar_gen3_hwmon_action, zone); if (ret) { - rcar_gen3_hwmon_action(zone); goto error_unregister; } From 9d6b4b871dcfd6796b46a05e002884e051687e47 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 21 Jul 2019 18:33:58 +0200 Subject: [PATCH 017/345] thermal: tegra: Fix a typo s/sochterm/soctherm/ Signed-off-by: Christophe JAILLET Signed-off-by: Zhang Rui --- drivers/thermal/tegra/soctherm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c index 43941eb734eb..5acaad3a594f 100644 --- a/drivers/thermal/tegra/soctherm.c +++ b/drivers/thermal/tegra/soctherm.c @@ -202,7 +202,7 @@ /* get dividend from the depth */ #define THROT_DEPTH_DIVIDEND(depth) ((256 * (100 - (depth)) / 100) - 1) -/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-sochterm.h +/* gk20a nv_therm interface N:3 Mapping. Levels defined in tegra124-soctherm.h * level vector * NONE 3'b000 * LOW 3'b001 From 6b8249abb093551ef173d13a25ed0044d5dd33e0 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 23 Aug 2019 10:38:35 +0100 Subject: [PATCH 018/345] drivers: thermal: qcom: tsens: Fix memory leak from qfprom read memory returned as part of nvmem_read via qfprom_read should be freed by the consumer once done. Existing code is not doing it so fix it. Below memory leak detected by kmemleak [] kmemleak_alloc+0x50/0x84 [] __kmalloc+0xe8/0x168 [] nvmem_cell_read+0x30/0x80 [] qfprom_read+0x4c/0x7c [] calibrate_v1+0x34/0x204 [] tsens_probe+0x164/0x258 [] platform_drv_probe+0x80/0xa0 [] really_probe+0x208/0x248 [] driver_probe_device+0x98/0xc0 [] __device_attach_driver+0x9c/0xac [] bus_for_each_drv+0x60/0x8c [] __device_attach+0x8c/0x100 [] device_initial_probe+0x20/0x28 [] bus_probe_device+0x34/0x7c [] deferred_probe_work_func+0x6c/0x98 [] process_one_work+0x160/0x2f8 Signed-off-by: Srinivas Kandagatla Acked-by: Amit Kucheria Signed-off-by: Zhang Rui --- drivers/thermal/qcom/tsens-8960.c | 2 ++ drivers/thermal/qcom/tsens-v0_1.c | 12 ++++++++++-- drivers/thermal/qcom/tsens-v1.c | 1 + drivers/thermal/qcom/tsens.h | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c index 8d9b721dadb6..e46a4e3f25c4 100644 --- a/drivers/thermal/qcom/tsens-8960.c +++ b/drivers/thermal/qcom/tsens-8960.c @@ -229,6 +229,8 @@ static int calibrate_8960(struct tsens_priv *priv) for (i = 0; i < num_read; i++, s++) s->offset = data[i]; + kfree(data); + return 0; } diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c index 6f26fadf4c27..055647bcee67 100644 --- a/drivers/thermal/qcom/tsens-v0_1.c +++ b/drivers/thermal/qcom/tsens-v0_1.c @@ -145,8 +145,10 @@ static int calibrate_8916(struct tsens_priv *priv) return PTR_ERR(qfprom_cdata); qfprom_csel = (u32 *)qfprom_read(priv->dev, "calib_sel"); - if (IS_ERR(qfprom_csel)) + if (IS_ERR(qfprom_csel)) { + kfree(qfprom_cdata); return PTR_ERR(qfprom_csel); + } mode = (qfprom_csel[0] & MSM8916_CAL_SEL_MASK) >> MSM8916_CAL_SEL_SHIFT; dev_dbg(priv->dev, "calibration mode is %d\n", mode); @@ -181,6 +183,8 @@ static int calibrate_8916(struct tsens_priv *priv) } compute_intercept_slope(priv, p1, p2, mode); + kfree(qfprom_cdata); + kfree(qfprom_csel); return 0; } @@ -198,8 +202,10 @@ static int calibrate_8974(struct tsens_priv *priv) return PTR_ERR(calib); bkp = (u32 *)qfprom_read(priv->dev, "calib_backup"); - if (IS_ERR(bkp)) + if (IS_ERR(bkp)) { + kfree(calib); return PTR_ERR(bkp); + } calib_redun_sel = bkp[1] & BKP_REDUN_SEL; calib_redun_sel >>= BKP_REDUN_SHIFT; @@ -313,6 +319,8 @@ static int calibrate_8974(struct tsens_priv *priv) } compute_intercept_slope(priv, p1, p2, mode); + kfree(calib); + kfree(bkp); return 0; } diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c index 10b595d4f619..870f502f2cb6 100644 --- a/drivers/thermal/qcom/tsens-v1.c +++ b/drivers/thermal/qcom/tsens-v1.c @@ -138,6 +138,7 @@ static int calibrate_v1(struct tsens_priv *priv) } compute_intercept_slope(priv, p1, p2, mode); + kfree(qfprom_cdata); return 0; } diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h index 2fd94997245b..b89083b61c38 100644 --- a/drivers/thermal/qcom/tsens.h +++ b/drivers/thermal/qcom/tsens.h @@ -17,6 +17,7 @@ #include #include +#include struct tsens_priv; From c87a37ebd40b889178664c2c09cc187334146292 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 20 Aug 2019 18:03:25 +0800 Subject: [PATCH 019/345] 9p: avoid attaching writeback_fid on mmap with type PRIVATE Currently on mmap cache policy, we always attach writeback_fid whether mmap type is SHARED or PRIVATE. However, in the use case of kata-container which combines 9p(Guest OS) with overlayfs(Host OS), this behavior will trigger overlayfs' copy-up when excute command inside container. Link: http://lkml.kernel.org/r/20190820100325.10313-1-cgxu519@zoho.com.cn Signed-off-by: Chengguang Xu Signed-off-by: Dominique Martinet --- fs/9p/vfs_file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 4cc966a31cb3..fe7f0bd2048e 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -513,6 +513,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) v9inode = V9FS_I(inode); mutex_lock(&v9inode->v_mutex); if (!v9inode->writeback_fid && + (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) { /* * clone a fid and add it to writeback_fid @@ -614,6 +615,8 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) (vma->vm_end - vma->vm_start - 1), }; + if (!(vma->vm_flags & VM_SHARED)) + return; p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma); From 0ce772fe79b68f83df40f07f28207b292785c677 Mon Sep 17 00:00:00 2001 From: Lu Shuaibing Date: Thu, 13 Jun 2019 15:08:54 +0800 Subject: [PATCH 020/345] 9p: Transport error uninitialized The p9_tag_alloc() does not initialize the transport error t_err field. The struct p9_req_t *req is allocated and stored in a struct p9_client variable. The field t_err is never initialized before p9_conn_cancel() checks its value. KUMSAN(KernelUninitializedMemorySantizer, a new error detection tool) reports this bug. ================================================================== BUG: KUMSAN: use of uninitialized memory in p9_conn_cancel+0x2d9/0x3b0 Read of size 4 at addr ffff88805f9b600c by task kworker/1:2/1216 CPU: 1 PID: 1216 Comm: kworker/1:2 Not tainted 5.2.0-rc4+ #28 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Workqueue: events p9_write_work Call Trace: dump_stack+0x75/0xae __kumsan_report+0x17c/0x3e6 kumsan_report+0xe/0x20 p9_conn_cancel+0x2d9/0x3b0 p9_write_work+0x183/0x4a0 process_one_work+0x4d1/0x8c0 worker_thread+0x6e/0x780 kthread+0x1ca/0x1f0 ret_from_fork+0x35/0x40 Allocated by task 1979: save_stack+0x19/0x80 __kumsan_kmalloc.constprop.3+0xbc/0x120 kmem_cache_alloc+0xa7/0x170 p9_client_prepare_req.part.9+0x3b/0x380 p9_client_rpc+0x15e/0x880 p9_client_create+0x3d0/0xac0 v9fs_session_init+0x192/0xc80 v9fs_mount+0x67/0x470 legacy_get_tree+0x70/0xd0 vfs_get_tree+0x4a/0x1c0 do_mount+0xba9/0xf90 ksys_mount+0xa8/0x120 __x64_sys_mount+0x62/0x70 do_syscall_64+0x6d/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 0: (stack is not available) The buggy address belongs to the object at ffff88805f9b6008 which belongs to the cache p9_req_t of size 144 The buggy address is located 4 bytes inside of 144-byte region [ffff88805f9b6008, ffff88805f9b6098) The buggy address belongs to the page: page:ffffea00017e6d80 refcount:1 mapcount:0 mapping:ffff888068b63740 index:0xffff88805f9b7d90 compound_mapcount: 0 flags: 0x100000000010200(slab|head) raw: 0100000000010200 ffff888068b66450 ffff888068b66450 ffff888068b63740 raw: ffff88805f9b7d90 0000000000100001 00000001ffffffff 0000000000000000 page dumped because: kumsan: bad access detected ================================================================== Link: http://lkml.kernel.org/r/20190613070854.10434-1-shuaibinglu@126.com Signed-off-by: Lu Shuaibing [dominique.martinet@cea.fr: grouped the added init with the others] Signed-off-by: Dominique Martinet --- net/9p/client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/9p/client.c b/net/9p/client.c index 9622f3e469f6..1d48afc7033c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -281,6 +281,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) p9pdu_reset(&req->tc); p9pdu_reset(&req->rc); + req->t_err = 0; req->status = REQ_STATUS_ALLOC; init_waitqueue_head(&req->wq); INIT_LIST_HEAD(&req->req_list); From 962a991c5de18452d6c429d99f3039387cf5cbb0 Mon Sep 17 00:00:00 2001 From: Bharath Vedartham Date: Thu, 23 May 2019 01:15:19 +0530 Subject: [PATCH 021/345] 9p/cache.c: Fix memory leak in v9fs_cache_session_get_cookie v9fs_cache_session_get_cookie assigns a random cachetag to v9ses->cachetag, if the cachetag is not assigned previously. v9fs_random_cachetag allocates memory to v9ses->cachetag with kmalloc and uses scnprintf to fill it up with a cachetag. But if scnprintf fails, v9ses->cachetag is not freed in the current code causing a memory leak. Fix this by freeing v9ses->cachetag it v9fs_random_cachetag fails. This was reported by syzbot, the link to the report is below: https://syzkaller.appspot.com/bug?id=f012bdf297a7a4c860c38a88b44fbee43fd9bbf3 Link: http://lkml.kernel.org/r/20190522194519.GA5313@bharath12345-Inspiron-5559 Reported-by: syzbot+3a030a73b6c1e9833815@syzkaller.appspotmail.com Signed-off-by: Bharath Vedartham Signed-off-by: Dominique Martinet --- fs/9p/cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 995e332eee5c..eb2151fb6049 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -51,6 +51,8 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) if (!v9ses->cachetag) { if (v9fs_random_cachetag(v9ses) < 0) { v9ses->fscache = NULL; + kfree(v9ses->cachetag); + v9ses->cachetag = NULL; return; } } From aafee43b72863f1f70aeaf1332d049916e8df239 Mon Sep 17 00:00:00 2001 From: Bharath Vedartham Date: Thu, 23 May 2019 22:26:20 +0530 Subject: [PATCH 022/345] 9p/vfs_super.c: Remove unused parameter data in v9fs_fill_super v9fs_fill_super has a param 'void *data' which is unused in the function. This patch removes the 'void *data' param in v9fs_fill_super and changes the parameters in all function calls of v9fs_fill_super. Link: http://lkml.kernel.org/r/20190523165619.GA4209@bharath12345-Inspiron-5559 Signed-off-by: Bharath Vedartham Signed-off-by: Dominique Martinet --- fs/9p/vfs_super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 08112fbcaece..6c5ac0a5e539 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -58,7 +58,7 @@ static int v9fs_set_super(struct super_block *s, void *data) static int v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, - int flags, void *data) + int flags) { int ret; @@ -128,7 +128,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, retval = PTR_ERR(sb); goto clunk_fid; } - retval = v9fs_fill_super(sb, v9ses, flags, data); + retval = v9fs_fill_super(sb, v9ses, flags); if (retval) goto release_sb; From 5079bde790304959bf744c548efdd5be660ea8e2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2019 14:48:19 +0200 Subject: [PATCH 023/345] perf python: Add missing python/perf.so dependency for libperf The python/perf.so compilation needs libperf ready, otherwise it fails: $ make python/perf.so JOBS=1 BUILD: Doing 'make -j1' parallel build GEN python/perf.so gcc: error: /home/jolsa/kernel/linux-perf/tools/perf/lib/libperf.a: No such file or directory Fixing this with by adding libperf dependency. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190901124822.10132-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f9807d8c005b..2ccc12f3730b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -567,7 +567,7 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) # Create python binding output directory if not already present _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python') -$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) +$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF) $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \ CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ $(PYTHON_WORD) util/setup.py \ From 9eab951f34dbd092ab520bda167f288899858306 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2019 14:48:21 +0200 Subject: [PATCH 024/345] perf tests: Add libperf automated test for 'make -C tools/perf build-test' Add a libperf build test, that is triggered when one does: $ make -C tools/perf build-test Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190901124822.10132-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 70c48475896d..6b3afed5d910 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -327,6 +327,10 @@ make_kernelsrc_tools: (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) +make_libperf: + @echo "- make -C lib"; + make -C lib clean >$@ 2>&1; make -C lib >>$@ 2>&1 && rm $@ + FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC @@ -365,5 +369,5 @@ $(foreach t,$(run),$(if $(findstring make_static,$(t)),\ $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE)))) endif -.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools +.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools make_libperf endif # ifndef MK From 227cb129858a3eb4b874937274b09834ffcc39b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Sep 2019 14:48:22 +0200 Subject: [PATCH 025/345] libperf: Add missing event.h file to install rule So that this development header is properly installed and can be found by tools linking with libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190901124822.10132-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index a67efb8d9d39..e325c0503dc6 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -146,6 +146,7 @@ install_headers: $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ From 4256d434935e9c85a731823be562785494ca364b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 14:12:53 +0200 Subject: [PATCH 026/345] libperf: Adopt perf_cpu_map__max() function From 'perf stat', so that it can be used from multiple places. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Joe Mario Cc: Kan Liang Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190902121255.536-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 +------------- tools/perf/lib/cpumap.c | 12 ++++++++++++ tools/perf/lib/include/perf/cpumap.h | 1 + tools/perf/lib/libperf.map | 1 + 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7e17bf9f700a..5bc0c570b7b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -822,18 +822,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, return cpu_map__get_core(map, cpu, NULL); } -static int cpu_map__get_max(struct perf_cpu_map *map) -{ - int i, max = -1; - - for (i = 0; i < map->nr; i++) { - if (map->map[i] > max) - max = map->map[i]; - } - - return max; -} - static int perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) { @@ -928,7 +916,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = cpu_map__get_max(evsel_list->core.cpus); + nr = perf_cpu_map__max(evsel_list->core.cpus); stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c index 1f0e6f334237..2ca1fafa620d 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/perf/lib/cpumap.c @@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) return -1; } + +int perf_cpu_map__max(struct perf_cpu_map *map) +{ + int i, max = -1; + + for (i = 0; i < map->nr; i++) { + if (map->map[i] > max) + max = map->map[i]; + } + + return max; +} diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h index 8aa995c59498..ac9aa497f84a 100644 --- a/tools/perf/lib/include/perf/cpumap.h +++ b/tools/perf/lib/include/perf/cpumap.h @@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); +LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index dc4d66363bc4..cd0d17b996c8 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -9,6 +9,7 @@ LIBPERF_0.0.1 { perf_cpu_map__nr; perf_cpu_map__cpu; perf_cpu_map__empty; + perf_cpu_map__max; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; From c4bb667eaf520f21b3a3db0489682becc9c49bcc Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 2 Aug 2019 18:15:19 +0100 Subject: [PATCH 027/345] fuse: reserve values for mapping protocol SETUPMAPPING is a command for use with 'virtiofsd', a fuse-over-virtio implementation; it may find use in other fuse impelementations as well in which the kernel does not have access to the address space of the daemon directly. A SETUPMAPPING operation causes a section of a file to be mapped into a memory window visible to the kernel. The offsets in the file and the window are defined by the kernel performing the operation. The daemon may reject the request, for reasons including permissions and limited resources. When a request perfectly overlaps a previous mapping, the previous mapping is replaced. When a mapping partially overlaps a previous mapping, the previous mapping is split into one or two smaller mappings. REMOVEMAPPING is the complement to SETUPMAPPING; it unmaps a range of mapped files from the window visible to the kernel. The map_alignment field communicates the alignment constraint for FUSE_SETUPMAPPING/FUSE_REMOVEMAPPING and allows the daemon to constrain the addresses and file offsets chosen by the kernel. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal Signed-off-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index df2e12fb3381..802b0377a49e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -133,6 +133,8 @@ * * 7.31 * - add FUSE_WRITE_KILL_PRIV flag + * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING + * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag */ #ifndef _LINUX_FUSE_H @@ -274,6 +276,7 @@ struct fuse_file_lock { * FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request + * FUSE_MAP_ALIGNMENT: map_alignment field is valid */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -301,6 +304,7 @@ struct fuse_file_lock { #define FUSE_CACHE_SYMLINKS (1 << 23) #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) +#define FUSE_MAP_ALIGNMENT (1 << 26) /** * CUSE INIT request/reply flags @@ -422,6 +426,8 @@ enum fuse_opcode { FUSE_RENAME2 = 45, FUSE_LSEEK = 46, FUSE_COPY_FILE_RANGE = 47, + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -656,7 +662,7 @@ struct fuse_init_out { uint32_t max_write; uint32_t time_gran; uint16_t max_pages; - uint16_t padding; + uint16_t map_alignment; uint32_t unused[8]; }; From 2472518e44eee3a45bd436a4162046790b74767a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 29 Aug 2019 11:08:35 -0700 Subject: [PATCH 028/345] Revert "drm/bridge: adv7511: Attach to DSI host at probe time" This reverts commit 83f35bc3a852f1c3892c7474998c5cec707c7ba3. There are at least two DSI controller drivers which relies on the old behaviour of adv7511 driver. To avoid platform breakage this patch should be reverted. This is a temporary solution, as it blocks adv7511 usage with other platforms. Assumption that DSI device driver (bridge/panel) should first expose drm_bridge/drm_panel object, then look for DSI bus is just incorrect - it can work with devices controlled via i2c but it cannot work with devices controlled via DSI - they will not be able to probe. To solve the issue following steps should be performed: - rework reverted patch allowing co-operation with broken DSI controller drivers - with simple/ugly workaround, - fix controller drivers and then remove workaround. Signed-off-by: Rob Clark [a.hajda: changed commit message] Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20190829180836.14453-1-robdclark@gmail.com --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 98bccace8c1c..f6d2681f6927 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -874,6 +874,9 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge) &adv7511_connector_helper_funcs); drm_connector_attach_encoder(&adv->connector, bridge->encoder); + if (adv->type == ADV7533) + ret = adv7533_attach_dsi(adv); + if (adv->i2c_main->irq) regmap_write(adv->regmap, ADV7511_REG_INT_ENABLE(0), ADV7511_INT0_HPD); @@ -1219,17 +1222,8 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) drm_bridge_add(&adv7511->bridge); adv7511_audio_init(dev, adv7511); - - if (adv7511->type == ADV7533) { - ret = adv7533_attach_dsi(adv7511); - if (ret) - goto err_remove_bridge; - } - return 0; -err_remove_bridge: - drm_bridge_remove(&adv7511->bridge); err_unregister_cec: i2c_unregister_device(adv7511->i2c_cec); if (adv7511->cec_clk) From c9b8af43a7cddf4f0bd8f36a4242cf70e73097fa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Aug 2019 16:40:29 +0200 Subject: [PATCH 029/345] watchdog: pnx4008_wdt: allow compile-testing The only thing that prevents building this driver on other platforms is the mach/hardware.h include, which is not actually used here at all, so remove the line and allow CONFIG_COMPILE_TEST. Acked-by: Sylvain Lemieux Reviewed-by: Guenter Roeck Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20190809144043.476786-4-arnd@arndb.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 2 +- drivers/watchdog/pnx4008_wdt.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 8188963a405b..a45f9e3e442b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -551,7 +551,7 @@ config OMAP_WATCHDOG config PNX4008_WATCHDOG tristate "LPC32XX Watchdog" - depends on ARCH_LPC32XX + depends on ARCH_LPC32XX || COMPILE_TEST select WATCHDOG_CORE help Say Y here if to include support for the watchdog timer diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c index 7b446b696f2b..e0ea133c1690 100644 --- a/drivers/watchdog/pnx4008_wdt.c +++ b/drivers/watchdog/pnx4008_wdt.c @@ -30,7 +30,6 @@ #include #include #include -#include /* WatchDog Timer - Chapter 23 Page 207 */ From a65f506f4a824fc256ff3cae41a14549550f49f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Aug 2019 22:27:32 +0200 Subject: [PATCH 030/345] watchdog: remove ks8695 driver The platform is getting removed, so there are no remaining users of this driver. Signed-off-by: Arnd Bergmann Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20190809202749.742267-5-arnd@arndb.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../watchdog/watchdog-parameters.rst | 9 - drivers/watchdog/Kconfig | 7 - drivers/watchdog/Makefile | 1 - drivers/watchdog/ks8695_wdt.c | 319 ------------------ 4 files changed, 336 deletions(-) delete mode 100644 drivers/watchdog/ks8695_wdt.c diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst index a3985cc5aeda..226aba56f704 100644 --- a/Documentation/watchdog/watchdog-parameters.rst +++ b/Documentation/watchdog/watchdog-parameters.rst @@ -301,15 +301,6 @@ ixp4xx_wdt: ------------------------------------------------- -ks8695_wdt: - wdt_time: - Watchdog time in seconds. (default=5) - nowayout: - Watchdog cannot be stopped once started - (default=kernel config parameter) - -------------------------------------------------- - machzwd: nowayout: Watchdog cannot be stopped once started diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index a45f9e3e442b..7e45a7792ed5 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -477,13 +477,6 @@ config IXP4XX_WATCHDOG Say N if you are unsure. -config KS8695_WATCHDOG - tristate "KS8695 watchdog" - depends on ARCH_KS8695 - help - Watchdog timer embedded into KS8695 processor. This will reboot your - system when the timeout is reached. - config HAVE_S3C2410_WATCHDOG bool help diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 7caa920e7e60..85f55ec76f8d 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -49,7 +49,6 @@ obj-$(CONFIG_21285_WATCHDOG) += wdt285.o obj-$(CONFIG_977_WATCHDOG) += wdt977.o obj-$(CONFIG_FTWDT010_WATCHDOG) += ftwdt010_wdt.o obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o -obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o obj-$(CONFIG_SAMA5D4_WATCHDOG) += sama5d4_wdt.o diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c deleted file mode 100644 index 1550ce3c5702..000000000000 --- a/drivers/watchdog/ks8695_wdt.c +++ /dev/null @@ -1,319 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Watchdog driver for Kendin/Micrel KS8695. - * - * (C) 2007 Andrew Victor - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define KS8695_TMR_OFFSET (0xF0000 + 0xE400) -#define KS8695_TMR_VA (KS8695_IO_VA + KS8695_TMR_OFFSET) - -/* - * Timer registers - */ -#define KS8695_TMCON (0x00) /* Timer Control Register */ -#define KS8695_T0TC (0x08) /* Timer 0 Timeout Count Register */ -#define TMCON_T0EN (1 << 0) /* Timer 0 Enable */ - -/* Timer0 Timeout Counter Register */ -#define T0TC_WATCHDOG (0xff) /* Enable watchdog mode */ - -#define WDT_DEFAULT_TIME 5 /* seconds */ -#define WDT_MAX_TIME 171 /* seconds */ - -static int wdt_time = WDT_DEFAULT_TIME; -static bool nowayout = WATCHDOG_NOWAYOUT; - -module_param(wdt_time, int, 0); -MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default=" - __MODULE_STRING(WDT_DEFAULT_TIME) ")"); - -#ifdef CONFIG_WATCHDOG_NOWAYOUT -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" - __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -#endif - - -static unsigned long ks8695wdt_busy; -static DEFINE_SPINLOCK(ks8695_lock); - -/* ......................................................................... */ - -/* - * Disable the watchdog. - */ -static inline void ks8695_wdt_stop(void) -{ - unsigned long tmcon; - - spin_lock(&ks8695_lock); - /* disable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - spin_unlock(&ks8695_lock); -} - -/* - * Enable and reset the watchdog. - */ -static inline void ks8695_wdt_start(void) -{ - unsigned long tmcon; - unsigned long tval = wdt_time * KS8695_CLOCK_RATE; - - spin_lock(&ks8695_lock); - /* disable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - - /* program timer0 */ - __raw_writel(tval | T0TC_WATCHDOG, KS8695_TMR_VA + KS8695_T0TC); - - /* re-enable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - spin_unlock(&ks8695_lock); -} - -/* - * Reload the watchdog timer. (ie, pat the watchdog) - */ -static inline void ks8695_wdt_reload(void) -{ - unsigned long tmcon; - - spin_lock(&ks8695_lock); - /* disable, then re-enable timer0 */ - tmcon = __raw_readl(KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon & ~TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - __raw_writel(tmcon | TMCON_T0EN, KS8695_TMR_VA + KS8695_TMCON); - spin_unlock(&ks8695_lock); -} - -/* - * Change the watchdog time interval. - */ -static int ks8695_wdt_settimeout(int new_time) -{ - /* - * All counting occurs at KS8695_CLOCK_RATE / 128 = 0.256 Hz - * - * Since WDV is a 16-bit counter, the maximum period is - * 65536 / 0.256 = 256 seconds. - */ - if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) - return -EINVAL; - - /* Set new watchdog time. It will be used when - ks8695_wdt_start() is called. */ - wdt_time = new_time; - return 0; -} - -/* ......................................................................... */ - -/* - * Watchdog device is opened, and watchdog starts running. - */ -static int ks8695_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(0, &ks8695wdt_busy)) - return -EBUSY; - - ks8695_wdt_start(); - return stream_open(inode, file); -} - -/* - * Close the watchdog device. - * If CONFIG_WATCHDOG_NOWAYOUT is NOT defined then the watchdog is also - * disabled. - */ -static int ks8695_wdt_close(struct inode *inode, struct file *file) -{ - /* Disable the watchdog when file is closed */ - if (!nowayout) - ks8695_wdt_stop(); - clear_bit(0, &ks8695wdt_busy); - return 0; -} - -static const struct watchdog_info ks8695_wdt_info = { - .identity = "ks8695 watchdog", - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, -}; - -/* - * Handle commands from user-space. - */ -static long ks8695_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &ks8695_wdt_info, - sizeof(ks8695_wdt_info)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - case WDIOC_SETOPTIONS: - if (get_user(new_value, p)) - return -EFAULT; - if (new_value & WDIOS_DISABLECARD) - ks8695_wdt_stop(); - if (new_value & WDIOS_ENABLECARD) - ks8695_wdt_start(); - return 0; - case WDIOC_KEEPALIVE: - ks8695_wdt_reload(); /* pat the watchdog */ - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - if (ks8695_wdt_settimeout(new_value)) - return -EINVAL; - /* Enable new time value */ - ks8695_wdt_start(); - /* Return current value */ - return put_user(wdt_time, p); - case WDIOC_GETTIMEOUT: - return put_user(wdt_time, p); - default: - return -ENOTTY; - } -} - -/* - * Pat the watchdog whenever device is written to. - */ -static ssize_t ks8695_wdt_write(struct file *file, const char *data, - size_t len, loff_t *ppos) -{ - ks8695_wdt_reload(); /* pat the watchdog */ - return len; -} - -/* ......................................................................... */ - -static const struct file_operations ks8695wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = ks8695_wdt_ioctl, - .open = ks8695_wdt_open, - .release = ks8695_wdt_close, - .write = ks8695_wdt_write, -}; - -static struct miscdevice ks8695wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &ks8695wdt_fops, -}; - -static int ks8695wdt_probe(struct platform_device *pdev) -{ - int res; - - if (ks8695wdt_miscdev.parent) - return -EBUSY; - ks8695wdt_miscdev.parent = &pdev->dev; - - res = misc_register(&ks8695wdt_miscdev); - if (res) - return res; - - pr_info("KS8695 Watchdog Timer enabled (%d seconds%s)\n", - wdt_time, nowayout ? ", nowayout" : ""); - return 0; -} - -static int ks8695wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&ks8695wdt_miscdev); - ks8695wdt_miscdev.parent = NULL; - - return 0; -} - -static void ks8695wdt_shutdown(struct platform_device *pdev) -{ - ks8695_wdt_stop(); -} - -#ifdef CONFIG_PM - -static int ks8695wdt_suspend(struct platform_device *pdev, pm_message_t message) -{ - ks8695_wdt_stop(); - return 0; -} - -static int ks8695wdt_resume(struct platform_device *pdev) -{ - if (ks8695wdt_busy) - ks8695_wdt_start(); - return 0; -} - -#else -#define ks8695wdt_suspend NULL -#define ks8695wdt_resume NULL -#endif - -static struct platform_driver ks8695wdt_driver = { - .probe = ks8695wdt_probe, - .remove = ks8695wdt_remove, - .shutdown = ks8695wdt_shutdown, - .suspend = ks8695wdt_suspend, - .resume = ks8695wdt_resume, - .driver = { - .name = "ks8695_wdt", - }, -}; - -static int __init ks8695_wdt_init(void) -{ - /* Check that the heartbeat value is within range; - if not reset to the default */ - if (ks8695_wdt_settimeout(wdt_time)) { - ks8695_wdt_settimeout(WDT_DEFAULT_TIME); - pr_info("ks8695_wdt: wdt_time value must be 1 <= wdt_time <= %i" - ", using %d\n", wdt_time, WDT_MAX_TIME); - } - return platform_driver_register(&ks8695wdt_driver); -} - -static void __exit ks8695_wdt_exit(void) -{ - platform_driver_unregister(&ks8695wdt_driver); -} - -module_init(ks8695_wdt_init); -module_exit(ks8695_wdt_exit); - -MODULE_AUTHOR("Andrew Victor"); -MODULE_DESCRIPTION("Watchdog driver for KS8695"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:ks8695_wdt"); From 58e4db99123343be174afbdd20751a18bfffc74b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Aug 2019 22:27:34 +0200 Subject: [PATCH 031/345] watchdog: remove w90x900 driver The ARM w90x900 platform is getting removed, so this driver is obsolete Signed-off-by: Arnd Bergmann Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20190809202749.742267-7-arnd@arndb.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../watchdog/watchdog-parameters.rst | 10 - drivers/watchdog/Kconfig | 9 - drivers/watchdog/Makefile | 1 - drivers/watchdog/nuc900_wdt.c | 302 ------------------ 4 files changed, 322 deletions(-) delete mode 100644 drivers/watchdog/nuc900_wdt.c diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst index 226aba56f704..223c99361a30 100644 --- a/Documentation/watchdog/watchdog-parameters.rst +++ b/Documentation/watchdog/watchdog-parameters.rst @@ -366,16 +366,6 @@ nic7018_wdt: ------------------------------------------------- -nuc900_wdt: - heartbeat: - Watchdog heartbeats in seconds. - (default = 15) - nowayout: - Watchdog cannot be stopped once started - (default=kernel config parameter) - -------------------------------------------------- - omap_wdt: timer_margin: initial watchdog timeout (in seconds) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 7e45a7792ed5..a8f5c8147d4b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -655,15 +655,6 @@ config STMP3XXX_RTC_WATCHDOG To compile this driver as a module, choose M here: the module will be called stmp3xxx_rtc_wdt. -config NUC900_WATCHDOG - tristate "Nuvoton NUC900 watchdog" - depends on ARCH_W90X900 || COMPILE_TEST - help - Say Y here if to include support for the watchdog timer - for the Nuvoton NUC900 series SoCs. - To compile this driver as a module, choose M here: the - module will be called nuc900_wdt. - config TS4800_WATCHDOG tristate "TS-4800 Watchdog" depends on HAS_IOMEM && OF diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 85f55ec76f8d..b5a0aed537af 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -63,7 +63,6 @@ obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o obj-$(CONFIG_NPCM7XX_WATCHDOG) += npcm_wdt.o obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o -obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o obj-$(CONFIG_TS4800_WATCHDOG) += ts4800_wdt.o obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o diff --git a/drivers/watchdog/nuc900_wdt.c b/drivers/watchdog/nuc900_wdt.c deleted file mode 100644 index db124cebe838..000000000000 --- a/drivers/watchdog/nuc900_wdt.c +++ /dev/null @@ -1,302 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2009 Nuvoton technology corporation. - * - * Wan ZongShun - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define REG_WTCR 0x1c -#define WTCLK (0x01 << 10) -#define WTE (0x01 << 7) /*wdt enable*/ -#define WTIS (0x03 << 4) -#define WTIF (0x01 << 3) -#define WTRF (0x01 << 2) -#define WTRE (0x01 << 1) -#define WTR (0x01 << 0) -/* - * The watchdog time interval can be calculated via following formula: - * WTIS real time interval (formula) - * 0x00 ((2^ 14 ) * ((external crystal freq) / 256))seconds - * 0x01 ((2^ 16 ) * ((external crystal freq) / 256))seconds - * 0x02 ((2^ 18 ) * ((external crystal freq) / 256))seconds - * 0x03 ((2^ 20 ) * ((external crystal freq) / 256))seconds - * - * The external crystal freq is 15Mhz in the nuc900 evaluation board. - * So 0x00 = +-0.28 seconds, 0x01 = +-1.12 seconds, 0x02 = +-4.48 seconds, - * 0x03 = +- 16.92 seconds.. - */ -#define WDT_HW_TIMEOUT 0x02 -#define WDT_TIMEOUT (HZ/2) -#define WDT_HEARTBEAT 15 - -static int heartbeat = WDT_HEARTBEAT; -module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " - "(default = " __MODULE_STRING(WDT_HEARTBEAT) ")"); - -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " - "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); - -struct nuc900_wdt { - struct clk *wdt_clock; - struct platform_device *pdev; - void __iomem *wdt_base; - char expect_close; - struct timer_list timer; - spinlock_t wdt_lock; - unsigned long next_heartbeat; -}; - -static unsigned long nuc900wdt_busy; -static struct nuc900_wdt *nuc900_wdt; - -static inline void nuc900_wdt_keepalive(void) -{ - unsigned int val; - - spin_lock(&nuc900_wdt->wdt_lock); - - val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR); - val |= (WTR | WTIF); - __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR); - - spin_unlock(&nuc900_wdt->wdt_lock); -} - -static inline void nuc900_wdt_start(void) -{ - unsigned int val; - - spin_lock(&nuc900_wdt->wdt_lock); - - val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR); - val |= (WTRE | WTE | WTR | WTCLK | WTIF); - val &= ~WTIS; - val |= (WDT_HW_TIMEOUT << 0x04); - __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR); - - spin_unlock(&nuc900_wdt->wdt_lock); - - nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ; - mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT); -} - -static inline void nuc900_wdt_stop(void) -{ - unsigned int val; - - del_timer(&nuc900_wdt->timer); - - spin_lock(&nuc900_wdt->wdt_lock); - - val = __raw_readl(nuc900_wdt->wdt_base + REG_WTCR); - val &= ~WTE; - __raw_writel(val, nuc900_wdt->wdt_base + REG_WTCR); - - spin_unlock(&nuc900_wdt->wdt_lock); -} - -static inline void nuc900_wdt_ping(void) -{ - nuc900_wdt->next_heartbeat = jiffies + heartbeat * HZ; -} - -static int nuc900_wdt_open(struct inode *inode, struct file *file) -{ - - if (test_and_set_bit(0, &nuc900wdt_busy)) - return -EBUSY; - - nuc900_wdt_start(); - - return stream_open(inode, file); -} - -static int nuc900_wdt_close(struct inode *inode, struct file *file) -{ - if (nuc900_wdt->expect_close == 42) - nuc900_wdt_stop(); - else { - dev_crit(&nuc900_wdt->pdev->dev, - "Unexpected close, not stopping watchdog!\n"); - nuc900_wdt_ping(); - } - - nuc900_wdt->expect_close = 0; - clear_bit(0, &nuc900wdt_busy); - return 0; -} - -static const struct watchdog_info nuc900_wdt_info = { - .identity = "nuc900 watchdog", - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | - WDIOF_MAGICCLOSE, -}; - -static long nuc900_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &nuc900_wdt_info, - sizeof(nuc900_wdt_info)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_KEEPALIVE: - nuc900_wdt_ping(); - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - heartbeat = new_value; - nuc900_wdt_ping(); - - return put_user(new_value, p); - case WDIOC_GETTIMEOUT: - return put_user(heartbeat, p); - default: - return -ENOTTY; - } -} - -static ssize_t nuc900_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - if (!len) - return 0; - - /* Scan for magic character */ - if (!nowayout) { - size_t i; - - nuc900_wdt->expect_close = 0; - - for (i = 0; i < len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') { - nuc900_wdt->expect_close = 42; - break; - } - } - } - - nuc900_wdt_ping(); - return len; -} - -static void nuc900_wdt_timer_ping(struct timer_list *unused) -{ - if (time_before(jiffies, nuc900_wdt->next_heartbeat)) { - nuc900_wdt_keepalive(); - mod_timer(&nuc900_wdt->timer, jiffies + WDT_TIMEOUT); - } else - dev_warn(&nuc900_wdt->pdev->dev, "Will reset the machine !\n"); -} - -static const struct file_operations nuc900wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = nuc900_wdt_ioctl, - .open = nuc900_wdt_open, - .release = nuc900_wdt_close, - .write = nuc900_wdt_write, -}; - -static struct miscdevice nuc900wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &nuc900wdt_fops, -}; - -static int nuc900wdt_probe(struct platform_device *pdev) -{ - int ret = 0; - - nuc900_wdt = devm_kzalloc(&pdev->dev, sizeof(*nuc900_wdt), - GFP_KERNEL); - if (!nuc900_wdt) - return -ENOMEM; - - nuc900_wdt->pdev = pdev; - - spin_lock_init(&nuc900_wdt->wdt_lock); - - nuc900_wdt->wdt_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(nuc900_wdt->wdt_base)) - return PTR_ERR(nuc900_wdt->wdt_base); - - nuc900_wdt->wdt_clock = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(nuc900_wdt->wdt_clock)) { - dev_err(&pdev->dev, "failed to find watchdog clock source\n"); - return PTR_ERR(nuc900_wdt->wdt_clock); - } - - clk_enable(nuc900_wdt->wdt_clock); - - timer_setup(&nuc900_wdt->timer, nuc900_wdt_timer_ping, 0); - - ret = misc_register(&nuc900wdt_miscdev); - if (ret) { - dev_err(&pdev->dev, "err register miscdev on minor=%d (%d)\n", - WATCHDOG_MINOR, ret); - goto err_clk; - } - - return 0; - -err_clk: - clk_disable(nuc900_wdt->wdt_clock); - return ret; -} - -static int nuc900wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&nuc900wdt_miscdev); - - clk_disable(nuc900_wdt->wdt_clock); - - return 0; -} - -static struct platform_driver nuc900wdt_driver = { - .probe = nuc900wdt_probe, - .remove = nuc900wdt_remove, - .driver = { - .name = "nuc900-wdt", - }, -}; - -module_platform_driver(nuc900wdt_driver); - -MODULE_AUTHOR("Wan ZongShun "); -MODULE_DESCRIPTION("Watchdog driver for NUC900"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:nuc900-wdt"); From 31bfa64e9428c2ab6608377fb3d4c40e29c7f4ce Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 11 Aug 2019 14:23:46 -0700 Subject: [PATCH 032/345] watchdog: diag288_wdt: Remove leftover includes from conversion to watchdog API Commit f7a94db4e959 ("s390/watchdog: use watchdog API") converted the driver to use the watchdog API, but some includes as well as MODULE_ALIAS_MISCDEV() were missed. Cc: Philipp Hachtmann Cc: Martin Schwidefsky Signed-off-by: Guenter Roeck Reviewed-by: Wim Van Sebroeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/diag288_wdt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c index 181440b7b4d0..aafc8d98bf9f 100644 --- a/drivers/watchdog/diag288_wdt.c +++ b/drivers/watchdog/diag288_wdt.c @@ -26,13 +26,11 @@ #include #include #include -#include #include #include #include #include #include -#include #define MAX_CMDLEN 240 #define DEFAULT_CMD "SYSTEM RESTART" @@ -70,7 +68,6 @@ MODULE_PARM_DESC(conceal, "Enable the CONCEAL CP option while the watchdog is ac module_param_named(nowayout, nowayout_info, bool, 0444); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default = CONFIG_WATCHDOG_NOWAYOUT)"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); MODULE_ALIAS("vmwatchdog"); static int __diag288(unsigned int func, unsigned int timeout, From 68f28b01fb9e5fc3ec273104714bd71bac783845 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 14 Aug 2019 22:42:32 +0200 Subject: [PATCH 033/345] watchdog: cpwd: use generic compat_ptr_ioctl The cpwd_compat_ioctl() contains a bogus mutex that dates back to a leftover BKL instance. Simplify the implementation by using the new compat_ptr_ioctl() helper function that will do the right thing for all calls here. Note that WIOCSTART/WIOCSTOP don't take any arguments, so the compat_ptr() conversion is not needed here, but it also doesn't hurt. Signed-off-by: Arnd Bergmann Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190814204259.120942-6-arnd@arndb.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/cpwd.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c index b973b31179df..9393be584e72 100644 --- a/drivers/watchdog/cpwd.c +++ b/drivers/watchdog/cpwd.c @@ -473,29 +473,6 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } -static long cpwd_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int rval = -ENOIOCTLCMD; - - switch (cmd) { - /* solaris ioctls are specific to this driver */ - case WIOCSTART: - case WIOCSTOP: - case WIOCGSTAT: - mutex_lock(&cpwd_mutex); - rval = cpwd_ioctl(file, cmd, arg); - mutex_unlock(&cpwd_mutex); - break; - - /* everything else is handled by the generic compat layer */ - default: - break; - } - - return rval; -} - static ssize_t cpwd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -520,7 +497,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer, static const struct file_operations cpwd_fops = { .owner = THIS_MODULE, .unlocked_ioctl = cpwd_ioctl, - .compat_ioctl = cpwd_compat_ioctl, + .compat_ioctl = compat_ptr_ioctl, .open = cpwd_open, .write = cpwd_write, .read = cpwd_read, From 144783a80cd2cbc45c6ce17db649140b65f203dd Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 12 Aug 2019 15:13:56 +0200 Subject: [PATCH 034/345] watchdog: imx2_wdt: fix min() calculation in imx2_wdt_set_timeout Converting from ms to s requires dividing by 1000, not multiplying. So this is currently taking the smaller of new_timeout and 1.28e8, i.e. effectively new_timeout. The driver knows what it set max_hw_heartbeat_ms to, so use that value instead of doing a division at run-time. FWIW, this can easily be tested by booting into a busybox shell and doing "watchdog -t 5 -T 130 /dev/watchdog" - without this patch, the watchdog fires after 130&127 == 2 seconds. Fixes: b07e228eee69 "watchdog: imx2_wdt: Fix set_timeout for big timeout values" Cc: stable@vger.kernel.org # 5.2 plus anything the above got backported to Signed-off-by: Rasmus Villemoes Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812131356.23039-1-linux@rasmusvillemoes.dk Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/imx2_wdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 32af3974e6bb..8d019a961ccc 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -55,7 +55,7 @@ #define IMX2_WDT_WMCR 0x08 /* Misc Register */ -#define IMX2_WDT_MAX_TIME 128 +#define IMX2_WDT_MAX_TIME 128U #define IMX2_WDT_DEFAULT_TIME 60 /* in seconds */ #define WDOG_SEC_TO_COUNT(s) ((s * 2 - 1) << 8) @@ -180,7 +180,7 @@ static int imx2_wdt_set_timeout(struct watchdog_device *wdog, { unsigned int actual; - actual = min(new_timeout, wdog->max_hw_heartbeat_ms * 1000); + actual = min(new_timeout, IMX2_WDT_MAX_TIME); __imx2_wdt_set_timeout(wdog, actual); wdog->timeout = new_timeout; return 0; From 30520ee8e3bac25dbb1bb43da0e49177be3e19c0 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 12 Aug 2019 16:44:34 +0800 Subject: [PATCH 035/345] watchdog: imx_sc: Remove unnecessary error log An error message is already displayed by watchdog_register_device() when failed, so no need to have error log again for failure of calling devm_watchdog_register_device(). Signed-off-by: Anson Huang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812084434.13316-1-Anson.Huang@nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/imx_sc_wdt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c index 78eaaf75a263..9260475439eb 100644 --- a/drivers/watchdog/imx_sc_wdt.c +++ b/drivers/watchdog/imx_sc_wdt.c @@ -175,11 +175,8 @@ static int imx_sc_wdt_probe(struct platform_device *pdev) watchdog_stop_on_unregister(wdog); ret = devm_watchdog_register_device(dev, wdog); - - if (ret) { - dev_err(dev, "Failed to register watchdog device\n"); + if (ret) return ret; - } ret = imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG, SC_IRQ_WDOG, From 670e51b0301e844d53da7e0a37c5063cdab81876 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:45 -0700 Subject: [PATCH 036/345] watchdog: ziirave_wdt: Add missing newline Add missing newline. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-2-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index dec660c509b3..6ec028fb2635 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -671,7 +671,7 @@ static int ziirave_wdt_probe(struct i2c_client *client, if (ret) return ret; - dev_info(&client->dev, "Timeout set to %ds.", + dev_info(&client->dev, "Timeout set to %ds\n", w_priv->wdd.timeout); } From 4a9600c7e735c343cf723bf4a97bfb0435748e20 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:46 -0700 Subject: [PATCH 037/345] watchdog: ziirave_wdt: Be verbose about errors in probe() The driver is quite silent in case of probe failure, which makes it more difficult to diagnose problem from the kernel log. Add logging to all of the silent error paths ziirave_wdt_probe() to improve that. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-3-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 6ec028fb2635..8c71341a9c1c 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -658,8 +658,10 @@ static int ziirave_wdt_probe(struct i2c_client *client, */ if (w_priv->wdd.timeout == 0) { val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_TIMEOUT); - if (val < 0) + if (val < 0) { + dev_err(&client->dev, "Failed to read timeout\n"); return val; + } if (val < ZIIRAVE_TIMEOUT_MIN) return -ENODEV; @@ -668,8 +670,10 @@ static int ziirave_wdt_probe(struct i2c_client *client, } else { ret = ziirave_wdt_set_timeout(&w_priv->wdd, w_priv->wdd.timeout); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to set timeout\n"); return ret; + } dev_info(&client->dev, "Timeout set to %ds\n", w_priv->wdd.timeout); @@ -681,34 +685,46 @@ static int ziirave_wdt_probe(struct i2c_client *client, /* If in unconfigured state, set to stopped */ val = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_STATE); - if (val < 0) + if (val < 0) { + dev_err(&client->dev, "Failed to read state\n"); return val; + } if (val == ZIIRAVE_STATE_INITIAL) ziirave_wdt_stop(&w_priv->wdd); ret = ziirave_wdt_init_duration(client); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to init duration\n"); return ret; + } ret = ziirave_wdt_revision(client, &w_priv->firmware_rev, ZIIRAVE_WDT_FIRM_VER_MAJOR); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to read firmware version\n"); return ret; + } ret = ziirave_wdt_revision(client, &w_priv->bootloader_rev, ZIIRAVE_WDT_BOOT_VER_MAJOR); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to read bootloader version\n"); return ret; + } w_priv->reset_reason = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_RESET_REASON); - if (w_priv->reset_reason < 0) + if (w_priv->reset_reason < 0) { + dev_err(&client->dev, "Failed to read reset reason\n"); return w_priv->reset_reason; + } if (w_priv->reset_reason >= ARRAY_SIZE(ziirave_reasons) || - !ziirave_reasons[w_priv->reset_reason]) + !ziirave_reasons[w_priv->reset_reason]) { + dev_err(&client->dev, "Invalid reset reason\n"); return -ENODEV; + } ret = watchdog_register_device(&w_priv->wdd); From b774fcef7dde27da8e8ed21571a254b1a78d200f Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:47 -0700 Subject: [PATCH 038/345] watchdog: ziirave_wdt: Be more verbose during firmware update Add more error logging to ziirave_firm_upload() for diagnostics. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-4-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 8c71341a9c1c..b3e255b40209 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -335,14 +335,18 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1, false); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to jump to bootloader\n"); return ret; + } msleep(500); ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_START, 1, true); - if (ret) + if (ret) { + dev_err(&client->dev, "Failed to start download\n"); return ret; + } msleep(500); @@ -438,14 +442,20 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, /* End download operation */ ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_END, 1, false); - if (ret) + if (ret) { + dev_err(&client->dev, + "Failed to end firmware download: %d\n", ret); return ret; + } /* Reset the processor */ ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_RESET_PROCESSOR, 1, false); - if (ret) + if (ret) { + dev_err(&client->dev, + "Failed to reset the watchdog: %d\n", ret); return ret; + } msleep(500); From 39d0387d5e5e9bbd1fd9cfaab19581939b05f1c8 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:48 -0700 Subject: [PATCH 039/345] watchdog: ziirave_wdt: Don't bail out on unexpected timeout value Reprogramming bootloader on watchdog MCU will result in reported default timeout value of "0". That in turn will be unnecessarily rejected by the driver as invalid device (-ENODEV). Simplify probe to read stored timeout value, set it to a sane default if it is bogus, and then program that value unconditionally. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-5-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index b3e255b40209..a11b92383c5f 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -23,6 +23,7 @@ #define ZIIRAVE_TIMEOUT_MIN 3 #define ZIIRAVE_TIMEOUT_MAX 255 +#define ZIIRAVE_TIMEOUT_DEFAULT 30 #define ZIIRAVE_PING_VALUE 0x0 @@ -673,22 +674,21 @@ static int ziirave_wdt_probe(struct i2c_client *client, return val; } - if (val < ZIIRAVE_TIMEOUT_MIN) - return -ENODEV; + if (val > ZIIRAVE_TIMEOUT_MAX || + val < ZIIRAVE_TIMEOUT_MIN) + val = ZIIRAVE_TIMEOUT_DEFAULT; w_priv->wdd.timeout = val; - } else { - ret = ziirave_wdt_set_timeout(&w_priv->wdd, - w_priv->wdd.timeout); - if (ret) { - dev_err(&client->dev, "Failed to set timeout\n"); - return ret; - } - - dev_info(&client->dev, "Timeout set to %ds\n", - w_priv->wdd.timeout); } + ret = ziirave_wdt_set_timeout(&w_priv->wdd, w_priv->wdd.timeout); + if (ret) { + dev_err(&client->dev, "Failed to set timeout\n"); + return ret; + } + + dev_info(&client->dev, "Timeout set to %ds\n", w_priv->wdd.timeout); + watchdog_set_nowayout(&w_priv->wdd, nowayout); i2c_set_clientdata(client, w_priv); From 42abc12464f74feb2e868fba439c713d56ef9573 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:49 -0700 Subject: [PATCH 040/345] watchdog: ziirave_wdt: Log bootloader/firmware info during probe Log bootloader/firmware info during probe. This information is available via sysfs already, but it's really helpful to have this in kernel log during startup as well. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-6-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index a11b92383c5f..75c066602c00 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -69,6 +69,9 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER 0x0c #define ZIIRAVE_CMD_DOWNLOAD_PACKET 0x0e +#define ZIIRAVE_FW_VERSION_FMT "02.%02u.%02u" +#define ZIIRAVE_BL_VERSION_FMT "01.%02u.%02u" + struct ziirave_wdt_rev { unsigned char major; unsigned char minor; @@ -489,7 +492,7 @@ static ssize_t ziirave_wdt_sysfs_show_firm(struct device *dev, if (ret) return ret; - ret = sprintf(buf, "02.%02u.%02u", w_priv->firmware_rev.major, + ret = sprintf(buf, ZIIRAVE_FW_VERSION_FMT, w_priv->firmware_rev.major, w_priv->firmware_rev.minor); mutex_unlock(&w_priv->sysfs_mutex); @@ -512,7 +515,7 @@ static ssize_t ziirave_wdt_sysfs_show_boot(struct device *dev, if (ret) return ret; - ret = sprintf(buf, "01.%02u.%02u", w_priv->bootloader_rev.major, + ret = sprintf(buf, ZIIRAVE_BL_VERSION_FMT, w_priv->bootloader_rev.major, w_priv->bootloader_rev.minor); mutex_unlock(&w_priv->sysfs_mutex); @@ -579,7 +582,8 @@ static ssize_t ziirave_wdt_sysfs_store_firm(struct device *dev, goto unlock_mutex; } - dev_info(&client->dev, "Firmware updated to version 02.%02u.%02u\n", + dev_info(&client->dev, + "Firmware updated to version " ZIIRAVE_FW_VERSION_FMT "\n", w_priv->firmware_rev.major, w_priv->firmware_rev.minor); /* Restore the watchdog timeout */ @@ -716,6 +720,10 @@ static int ziirave_wdt_probe(struct i2c_client *client, return ret; } + dev_info(&client->dev, + "Firmware version: " ZIIRAVE_FW_VERSION_FMT "\n", + w_priv->firmware_rev.major, w_priv->firmware_rev.minor); + ret = ziirave_wdt_revision(client, &w_priv->bootloader_rev, ZIIRAVE_WDT_BOOT_VER_MAJOR); if (ret) { @@ -723,6 +731,10 @@ static int ziirave_wdt_probe(struct i2c_client *client, return ret; } + dev_info(&client->dev, + "Bootloader version: " ZIIRAVE_BL_VERSION_FMT "\n", + w_priv->bootloader_rev.major, w_priv->bootloader_rev.minor); + w_priv->reset_reason = i2c_smbus_read_byte_data(client, ZIIRAVE_WDT_RESET_REASON); if (w_priv->reset_reason < 0) { From 5870f4958ccf90f4cdf3911bade77cb72a3bf28c Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:50 -0700 Subject: [PATCH 041/345] watchdog: ziirave_wdt: Simplify ziirave_firm_write_pkt() There no reason why ziirave_firm_write_pkt() has to take firmware data via 'struct ihex_binrec' and it can just take address, data pointer and data length as individual arguments. Make this change to allow us to drastically simplify handling page crossing case by removing all of the extra code required to split 'struct ihex_binrec' into two. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-7-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 116 +++++++++++++-------------------- 1 file changed, 44 insertions(+), 72 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 75c066602c00..b2d5ff0c22fe 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -51,6 +51,7 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_FIRM_PKT_DATA_SIZE 16 #define ZIIRAVE_FIRM_FLASH_MEMORY_START 0x1600 #define ZIIRAVE_FIRM_FLASH_MEMORY_END 0x2bbf +#define ZIIRAVE_FIRM_PAGE_SIZE 128 /* Received and ready for next Download packet. */ #define ZIIRAVE_FIRM_DOWNLOAD_ACK 1 @@ -244,27 +245,26 @@ static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command, * Data0 .. Data15: Array of 16 bytes of data. * Checksum: Checksum byte to verify data integrity. */ -static int ziirave_firm_write_pkt(struct watchdog_device *wdd, - const struct ihex_binrec *rec) +static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, + u32 addr, const u8 *data, u8 len) { + const u16 addr16 = (u16)addr / 2; struct i2c_client *client = to_i2c_client(wdd->parent); u8 i, checksum = 0, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE]; int ret; - u16 addr; memset(packet, 0, ARRAY_SIZE(packet)); /* Packet length */ - packet[0] = (u8)be16_to_cpu(rec->len); + packet[0] = len; /* Packet address */ - addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; - packet[1] = addr & 0xff; - packet[2] = (addr & 0xff00) >> 8; + packet[1] = addr16 & 0xff; + packet[2] = (addr16 & 0xff00) >> 8; /* Packet data */ - if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) + if (len > ZIIRAVE_FIRM_PKT_DATA_SIZE) return -EMSGSIZE; - memcpy(packet + 3, rec->data, be16_to_cpu(rec->len)); + memcpy(packet + 3, data, len); /* Packet checksum */ for (i = 0; i < ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1; i++) @@ -276,11 +276,35 @@ static int ziirave_firm_write_pkt(struct watchdog_device *wdd, if (ret) dev_err(&client->dev, "Failed to write firmware packet at address 0x%04x: %d\n", - addr, ret); + addr16, ret); return ret; } +static int ziirave_firm_write_pkt(struct watchdog_device *wdd, + u32 addr, const u8 *data, u8 len) +{ + const u8 max_write_len = ZIIRAVE_FIRM_PAGE_SIZE - + (addr - ALIGN_DOWN(addr, ZIIRAVE_FIRM_PAGE_SIZE)); + int ret; + + if (len > max_write_len) { + /* + * If data crossed page boundary we need to split this + * write in two + */ + ret = __ziirave_firm_write_pkt(wdd, addr, data, max_write_len); + if (ret) + return ret; + + addr += max_write_len; + data += max_write_len; + len -= max_write_len; + } + + return __ziirave_firm_write_pkt(wdd, addr, data, len); +} + static int ziirave_firm_verify(struct watchdog_device *wdd, const struct firmware *fw) { @@ -333,9 +357,8 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, const struct firmware *fw) { struct i2c_client *client = to_i2c_client(wdd->parent); - int ret, words_till_page_break; const struct ihex_binrec *rec; - struct ihex_binrec *rec_new; + int ret; ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1, false); @@ -366,68 +389,17 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, return -EMSGSIZE; } - /* Calculate words till page break */ - words_till_page_break = (64 - ((be32_to_cpu(rec->addr) >> 1) & - 0x3f)); - if ((be16_to_cpu(rec->len) >> 1) > words_till_page_break) { - /* - * Data in passes page boundary, so we need to split in - * two blocks of data. Create a packet with the first - * block of data. - */ - rec_new = kzalloc(sizeof(struct ihex_binrec) + - (words_till_page_break << 1), - GFP_KERNEL); - if (!rec_new) - return -ENOMEM; - - rec_new->len = cpu_to_be16(words_till_page_break << 1); - rec_new->addr = rec->addr; - memcpy(rec_new->data, rec->data, - be16_to_cpu(rec_new->len)); - - ret = ziirave_firm_write_pkt(wdd, rec_new); - kfree(rec_new); - if (ret) - return ret; - - /* Create a packet with the second block of data */ - rec_new = kzalloc(sizeof(struct ihex_binrec) + - be16_to_cpu(rec->len) - - (words_till_page_break << 1), - GFP_KERNEL); - if (!rec_new) - return -ENOMEM; - - /* Remaining bytes */ - rec_new->len = rec->len - - cpu_to_be16(words_till_page_break << 1); - - rec_new->addr = cpu_to_be32(be32_to_cpu(rec->addr) + - (words_till_page_break << 1)); - - memcpy(rec_new->data, - rec->data + (words_till_page_break << 1), - be16_to_cpu(rec_new->len)); - - ret = ziirave_firm_write_pkt(wdd, rec_new); - kfree(rec_new); - if (ret) - return ret; - } else { - ret = ziirave_firm_write_pkt(wdd, rec); - if (ret) - return ret; - } + ret = ziirave_firm_write_pkt(wdd, be32_to_cpu(rec->addr), + rec->data, be16_to_cpu(rec->len)); + if (ret) + return ret; } - /* For end of download, the length field will be set to 0 */ - rec_new = kzalloc(sizeof(struct ihex_binrec) + 1, GFP_KERNEL); - if (!rec_new) - return -ENOMEM; - - ret = ziirave_firm_write_pkt(wdd, rec_new); - kfree(rec_new); + /* + * Finish firmware download process by sending a zero length + * payload + */ + ret = ziirave_firm_write_pkt(wdd, 0, NULL, 0); if (ret) { dev_err(&client->dev, "Failed to send EMPTY packet: %d\n", ret); return ret; From 08188e8dbc7587d58948efca493219515d6a5639 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:51 -0700 Subject: [PATCH 042/345] watchdog: ziirave_wdt: Check packet length only once We don't need to check for packet length more than once, so drop the extra check in ziirave_firm_upload(). While at it move the check at the very start of __ziirave_firm_write_pkt(), as to not waste any time preparing a packet we'll never use. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-8-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index b2d5ff0c22fe..9d9c669b8b47 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -253,6 +253,13 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, u8 i, checksum = 0, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE]; int ret; + /* Check max data size */ + if (len > ZIIRAVE_FIRM_PKT_DATA_SIZE) { + dev_err(&client->dev, "Firmware packet too long (%d)\n", + len); + return -EMSGSIZE; + } + memset(packet, 0, ARRAY_SIZE(packet)); /* Packet length */ @@ -261,9 +268,6 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, packet[1] = addr16 & 0xff; packet[2] = (addr16 & 0xff00) >> 8; - /* Packet data */ - if (len > ZIIRAVE_FIRM_PKT_DATA_SIZE) - return -EMSGSIZE; memcpy(packet + 3, data, len); /* Packet checksum */ @@ -382,13 +386,6 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, if (!be16_to_cpu(rec->len)) break; - /* Check max data size */ - if (be16_to_cpu(rec->len) > ZIIRAVE_FIRM_PKT_DATA_SIZE) { - dev_err(&client->dev, "Firmware packet too long (%d)\n", - be16_to_cpu(rec->len)); - return -EMSGSIZE; - } - ret = ziirave_firm_write_pkt(wdd, be32_to_cpu(rec->addr), rec->data, be16_to_cpu(rec->len)); if (ret) From dc0dd28951f16be9714e90468e14566d186a7388 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:52 -0700 Subject: [PATCH 043/345] watchdog: ziirave_wdt: Skip zeros when calculating checksum Zeros don't contribute anything to checksum value, so we can skip unused portion of the packet when calculating its checksum. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-9-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 9d9c669b8b47..19da0910c2d1 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -271,7 +271,7 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, memcpy(packet + 3, data, len); /* Packet checksum */ - for (i = 0; i < ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1; i++) + for (i = 0; i < len + 3; i++) checksum += packet[i]; packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum; From e6bd448653d61d17ddf9a663ca84d1f422846907 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:53 -0700 Subject: [PATCH 044/345] watchdog: ziirave_wdt: Fix incorrect use of ARRAY_SIZE Both memset() and ziirave_firm_write_block_data() expect length in bytes as an argument, not a number of elements in array. It just happens that in this particular case both values are equal. Modify the code to use sizeof() instead. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-10-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 19da0910c2d1..e0f55cbdc603 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -203,7 +203,7 @@ static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr) return i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR, - ARRAY_SIZE(address), address); + sizeof(address), address); } static int ziirave_firm_write_block_data(struct watchdog_device *wdd, @@ -260,7 +260,7 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, return -EMSGSIZE; } - memset(packet, 0, ARRAY_SIZE(packet)); + memset(packet, 0, sizeof(packet)); /* Packet length */ packet[0] = len; @@ -276,7 +276,7 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum; ret = ziirave_firm_write_block_data(wdd, ZIIRAVE_CMD_DOWNLOAD_PACKET, - ARRAY_SIZE(packet), packet, true); + sizeof(packet), packet, true); if (ret) dev_err(&client->dev, "Failed to write firmware packet at address 0x%04x: %d\n", From 10f98fef7ba65fcb74129296631adc99750f1a96 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:54 -0700 Subject: [PATCH 045/345] watchdog: ziirave_wdt: Zero out only what's necessary Instead of zeroing out all of the packet and then overwriting a significant portion of those zeros via memcpy(), zero out only a portion of the packet that is known to not contain any data. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-11-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index e0f55cbdc603..69694f2836d7 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -260,8 +260,6 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, return -EMSGSIZE; } - memset(packet, 0, sizeof(packet)); - /* Packet length */ packet[0] = len; /* Packet address */ @@ -269,6 +267,7 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, packet[2] = (addr16 & 0xff00) >> 8; memcpy(packet + 3, data, len); + memset(packet + 3 + len, 0, ZIIRAVE_FIRM_PKT_DATA_SIZE - len); /* Packet checksum */ for (i = 0; i < len + 3; i++) From 08f980a8ffc4c0891b3998f588c1b02fe57caec9 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:55 -0700 Subject: [PATCH 046/345] watchdog: ziirave_wdt: Make use of put_unaligned_le16 Instead of doing this explicitly use put_unaligned_le16() to place 16-bit address value into command payload. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-12-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 69694f2836d7..38cf3ca329d7 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -21,6 +21,8 @@ #include #include +#include + #define ZIIRAVE_TIMEOUT_MIN 3 #define ZIIRAVE_TIMEOUT_MAX 255 #define ZIIRAVE_TIMEOUT_DEFAULT 30 @@ -198,8 +200,7 @@ static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr) struct i2c_client *client = to_i2c_client(wdd->parent); u8 address[2]; - address[0] = addr & 0xff; - address[1] = (addr >> 8) & 0xff; + put_unaligned_le16(addr, address); return i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR, @@ -263,8 +264,7 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, /* Packet length */ packet[0] = len; /* Packet address */ - packet[1] = addr16 & 0xff; - packet[2] = (addr16 & 0xff00) >> 8; + put_unaligned_le16(addr16, packet + 1); memcpy(packet + 3, data, len); memset(packet + 3 + len, 0, ZIIRAVE_FIRM_PKT_DATA_SIZE - len); From d91bb8d9162586bc17afb7ffd28b03cce064e6f4 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:56 -0700 Subject: [PATCH 047/345] watchdog: ziirave_wdt: Don't check if ihex record length is zero Ihex_next_binrec() will return NULL if next record's 'len' is zero, so explicit checks for that in the driver are unnecessary. Drop them. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-13-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 38cf3ca329d7..4b95467bf239 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -318,10 +318,6 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, u16 addr; for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { - /* Zero length marks end of records */ - if (!be16_to_cpu(rec->len)) - break; - addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || addr > ZIIRAVE_FIRM_FLASH_MEMORY_END) @@ -381,10 +377,6 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, msleep(500); for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { - /* Zero length marks end of records */ - if (!be16_to_cpu(rec->len)) - break; - ret = ziirave_firm_write_pkt(wdd, be32_to_cpu(rec->addr), rec->data, be16_to_cpu(rec->len)); if (ret) From de88053807d848a504a65fbe7ea03c9aedc16b90 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:57 -0700 Subject: [PATCH 048/345] watchdog: ziirave_wdt: Don't read out more than 'len' firmware bytes We only compare first 'len' bytes of read firmware, so we don't need to read more that that. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-14-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 4b95467bf239..f05095b08016 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -318,6 +318,8 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, u16 addr; for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { + const u16 len = be16_to_cpu(rec->len); + addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || addr > ZIIRAVE_FIRM_FLASH_MEMORY_END) @@ -331,7 +333,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, return ret; } - for (i = 0; i < ARRAY_SIZE(data); i++) { + for (i = 0; i < len; i++) { ret = i2c_smbus_read_byte_data(client, ZIIRAVE_CMD_DOWNLOAD_READ_BYTE); if (ret < 0) { @@ -342,7 +344,7 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, data[i] = ret; } - if (memcmp(data, rec->data, be16_to_cpu(rec->len))) { + if (memcmp(data, rec->data, len)) { dev_err(&client->dev, "Firmware mismatch at address 0x%04x\n", addr); return -EINVAL; From d2ddc4505ed268a57bfc07817bc0494aca6d8818 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:58 -0700 Subject: [PATCH 049/345] watchdog: ziirave_wdt: Don't try to program readonly flash Bootloader code will ignore any attempts to write data to any flash area outside of [ZIIRAVE_FIRM_FLASH_MEMORY_START; ZIIRAVE_FIRM_FLASH_MEMORY_END]. Firmware update code already have an appropriate check to skip those areas when validating updated firmware. Firmware programming code, OTOH, does not and will needlessly send no-op I2C traffic. Add an appropriate check to __ziirave_firm_write_pkt() so as to save all of that wasted effort. While at it, normalize all of the address handling code to use full 32-bit address in units of bytes and convert it to an appropriate value only in places where that is necessary. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-15-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index f05095b08016..a3cc936858ad 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -51,8 +51,8 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_FIRM_PKT_TOTAL_SIZE 20 #define ZIIRAVE_FIRM_PKT_DATA_SIZE 16 -#define ZIIRAVE_FIRM_FLASH_MEMORY_START 0x1600 -#define ZIIRAVE_FIRM_FLASH_MEMORY_END 0x2bbf +#define ZIIRAVE_FIRM_FLASH_MEMORY_START (2 * 0x1600) +#define ZIIRAVE_FIRM_FLASH_MEMORY_END (2 * 0x2bbf) #define ZIIRAVE_FIRM_PAGE_SIZE 128 /* Received and ready for next Download packet. */ @@ -195,12 +195,13 @@ static int ziirave_firm_wait_for_ack(struct watchdog_device *wdd) return ret == ZIIRAVE_FIRM_DOWNLOAD_ACK ? 0 : -EIO; } -static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u16 addr) +static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u32 addr) { struct i2c_client *client = to_i2c_client(wdd->parent); + const u16 addr16 = (u16)addr / 2; u8 address[2]; - put_unaligned_le16(addr, address); + put_unaligned_le16(addr16, address); return i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_SET_READ_ADDR, @@ -234,6 +235,12 @@ static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command, wait_for_ack); } +static bool ziirave_firm_addr_readonly(u32 addr) +{ + return addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || + addr > ZIIRAVE_FIRM_FLASH_MEMORY_END; +} + /* * ziirave_firm_write_pkt() - Build and write a firmware packet * @@ -261,6 +268,16 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, return -EMSGSIZE; } + /* + * Ignore packets that are targeting program memory outisde of + * app partition, since they will be ignored by the + * bootloader. At the same time, we need to make sure we'll + * allow zero length packet that will be sent as the last step + * of firmware update + */ + if (len && ziirave_firm_addr_readonly(addr)) + return 0; + /* Packet length */ packet[0] = len; /* Packet address */ @@ -279,7 +296,7 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, if (ret) dev_err(&client->dev, "Failed to write firmware packet at address 0x%04x: %d\n", - addr16, ret); + addr, ret); return ret; } @@ -315,14 +332,12 @@ static int ziirave_firm_verify(struct watchdog_device *wdd, const struct ihex_binrec *rec; int i, ret; u8 data[ZIIRAVE_FIRM_PKT_DATA_SIZE]; - u16 addr; for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { const u16 len = be16_to_cpu(rec->len); + const u32 addr = be32_to_cpu(rec->addr); - addr = (be32_to_cpu(rec->addr) & 0xffff) >> 1; - if (addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || - addr > ZIIRAVE_FIRM_FLASH_MEMORY_END) + if (ziirave_firm_addr_readonly(addr)) continue; ret = ziirave_firm_set_read_addr(wdd, addr); From d2c1d4258f7fbcfe386795c83abc9e6261ed9397 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:08:59 -0700 Subject: [PATCH 050/345] watchdog: ziirave_wdt: Fix misleading error message Fix misleading error message in ziirave_wdt_init_duration(). Saying "unable to set ..." implies that an attempt at communication with watchdog device has taken palce and was not successful. In this case, however, all it indicates is that no reset pulse duration was specified either via kernel parameter or Device Tree. Re-phase the log message to be more clear about benign nature of this event. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-16-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index a3cc936858ad..0c150f3cf408 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -603,7 +603,7 @@ static int ziirave_wdt_init_duration(struct i2c_client *client) &reset_duration); if (ret) { dev_info(&client->dev, - "Unable to set reset pulse duration, using default\n"); + "No reset pulse duration specified, using default\n"); return 0; } } From 910d0f968727b9e456e440596540f4059f8e74cf Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:00 -0700 Subject: [PATCH 051/345] watchdog: ziirave_wdt: Fix JUMP_TO_BOOTLOADER payload Bootloader firmware expects the following traffic for JUMP_TO_BOOTLOADER: S Addr Wr [A] 0x0c [A] 0x01 [A] P using ziirave_firm_write_byte() will result in S Addr Wr [A] 0x0c [A] 0x01 [A] 0x01 [A] P which happens to work because firmware will ignore any extra bytes and expected magic value matches byte count sent by i2c_smbus_write_block_data(). Fix this by converting the code to use i2c_smbus_write_byte_data() instead. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-17-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 0c150f3cf408..0185b9175cc0 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -72,6 +72,8 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER 0x0c #define ZIIRAVE_CMD_DOWNLOAD_PACKET 0x0e +#define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC 1 + #define ZIIRAVE_FW_VERSION_FMT "02.%02u.%02u" #define ZIIRAVE_BL_VERSION_FMT "01.%02u.%02u" @@ -376,8 +378,9 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, const struct ihex_binrec *rec; int ret; - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, 1, - false); + ret = i2c_smbus_write_byte_data(client, + ZIIRAVE_CMD_JUMP_TO_BOOTLOADER, + ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC); if (ret) { dev_err(&client->dev, "Failed to jump to bootloader\n"); return ret; From c47825fb72ea660b7d15f40dc7f8a73dcdd1c670 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:01 -0700 Subject: [PATCH 052/345] watchdog: ziirave_wdt: Fix DOWNLOAD_END payload Bootloader firmware expects the following traffic for DOWNLOAD_END: S Addr Wr [A] 0x11 [A] P using ziirave_firm_write_byte() will result in S Addr Wr [A] 0x11 [A] 0x01 [A] 0x01 [A] P which happens to work because firmware will ignore any extra bytes sent. Fix this by converting the code to use i2c_smbus_write_byte() instead. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-18-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 0185b9175cc0..a598780d73d3 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -425,7 +425,7 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, } /* End download operation */ - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_END, 1, false); + ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_END); if (ret) { dev_err(&client->dev, "Failed to end firmware download: %d\n", ret); From 0007cbd517a23fe5e46328baec89ffee0269f780 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:02 -0700 Subject: [PATCH 053/345] watchdog: ziirave_wdt: Fix RESET_PROCESSOR payload Bootloader firmware expects the following traffic for RESET_PROCESSOR: S Addr Wr [A] 0x0b [A] 0x01 [A] P using ziirave_firm_write_byte() will result in S Addr Wr [A] 0x0b [A] 0x01 [A] 0x01 [A] P which happens to work because firmware will ignore any extra bytes and expected magic value matches byte count sent by i2c_smbus_write_block_data(). Fix this by converting the code to use i2c_smbus_write_byte_data() instead. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-19-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index a598780d73d3..92df27350e41 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -73,6 +73,7 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, #define ZIIRAVE_CMD_DOWNLOAD_PACKET 0x0e #define ZIIRAVE_CMD_JUMP_TO_BOOTLOADER_MAGIC 1 +#define ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC 1 #define ZIIRAVE_FW_VERSION_FMT "02.%02u.%02u" #define ZIIRAVE_BL_VERSION_FMT "01.%02u.%02u" @@ -433,8 +434,9 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, } /* Reset the processor */ - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_RESET_PROCESSOR, 1, - false); + ret = i2c_smbus_write_byte_data(client, + ZIIRAVE_CMD_RESET_PROCESSOR, + ZIIRAVE_CMD_RESET_PROCESSOR_MAGIC); if (ret) { dev_err(&client->dev, "Failed to reset the watchdog: %d\n", ret); From fe05178c7891c546e07e24836c0af967996766c6 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:03 -0700 Subject: [PATCH 054/345] watchdog: ziirave_wdt: Drop status polling code Bootloader firmware doesn't implement DOWNLOAD_START or DOWNLOAD_PACKET in a non-blocking way. It will stretch the clock of the first status byte read until the operation is complete. Polling for the status is not really necessary since it will always succed on the first try. Replace polling code with a simple single byte read to simplify things. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-20-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 92df27350e41..681f65349779 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -57,11 +57,6 @@ static char *ziirave_reasons[] = {"power cycle", "hw watchdog", NULL, NULL, /* Received and ready for next Download packet. */ #define ZIIRAVE_FIRM_DOWNLOAD_ACK 1 -/* Currently writing to flash. Retry Download status in a moment! */ -#define ZIIRAVE_FIRM_DOWNLOAD_BUSY 2 - -/* Wait for ACK timeout in ms */ -#define ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT 50 /* Firmware commands */ #define ZIIRAVE_CMD_DOWNLOAD_START 0x10 @@ -175,25 +170,16 @@ static unsigned int ziirave_wdt_get_timeleft(struct watchdog_device *wdd) return ret; } -static int ziirave_firm_wait_for_ack(struct watchdog_device *wdd) +static int ziirave_firm_read_ack(struct watchdog_device *wdd) { struct i2c_client *client = to_i2c_client(wdd->parent); int ret; - unsigned long timeout; - timeout = jiffies + msecs_to_jiffies(ZIIRAVE_FIRM_WAIT_FOR_ACK_TIMEOUT); - do { - if (time_after(jiffies, timeout)) - return -ETIMEDOUT; - - usleep_range(5000, 10000); - - ret = i2c_smbus_read_byte(client); - if (ret < 0) { - dev_err(&client->dev, "Failed to read byte\n"); - return ret; - } - } while (ret == ZIIRAVE_FIRM_DOWNLOAD_BUSY); + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_err(&client->dev, "Failed to read status byte\n"); + return ret; + } return ret == ZIIRAVE_FIRM_DOWNLOAD_ACK ? 0 : -EIO; } @@ -226,7 +212,7 @@ static int ziirave_firm_write_block_data(struct watchdog_device *wdd, } if (wait_for_ack) - ret = ziirave_firm_wait_for_ack(wdd); + ret = ziirave_firm_read_ack(wdd); return ret; } From fa0d2f44aa6879e21843d517138510ccb7e1e39f Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:04 -0700 Subject: [PATCH 055/345] watchdog: ziirave_wdt: Fix DOWNLOAD_START payload Bootloader firmware expects the following traffic for DOWNLOAD_END: S Addr Wr [A] 0x10 [A] P using ziirave_firm_write_byte() will result in S Addr Wr [A] 0x10 [A] 0x01 [A] 0x01 [A] P which happens to work because firmware will ignore any extra bytes sent. Fix this by converting the code to use i2c_smbus_write_byte() instead. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-21-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 681f65349779..ed69fa82e09c 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -217,13 +217,6 @@ static int ziirave_firm_write_block_data(struct watchdog_device *wdd, return ret; } -static int ziirave_firm_write_byte(struct watchdog_device *wdd, u8 command, - u8 byte, bool wait_for_ack) -{ - return ziirave_firm_write_block_data(wdd, command, 1, &byte, - wait_for_ack); -} - static bool ziirave_firm_addr_readonly(u32 addr) { return addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || @@ -375,12 +368,18 @@ static int ziirave_firm_upload(struct watchdog_device *wdd, msleep(500); - ret = ziirave_firm_write_byte(wdd, ZIIRAVE_CMD_DOWNLOAD_START, 1, true); + ret = i2c_smbus_write_byte(client, ZIIRAVE_CMD_DOWNLOAD_START); if (ret) { dev_err(&client->dev, "Failed to start download\n"); return ret; } + ret = ziirave_firm_read_ack(wdd); + if (ret) { + dev_err(&client->dev, "No ACK for start download\n"); + return ret; + } + msleep(500); for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { From 08c913fe3ea67c54318b91ed3d29cd98d8012ab9 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:05 -0700 Subject: [PATCH 056/345] watchdog: ziirave_wdt: Drop ziirave_firm_write_block_data() There's only one user of ziirave_firm_write_block_data(), so we may as well inline it. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-22-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index ed69fa82e09c..48278034cda6 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -197,26 +197,6 @@ static int ziirave_firm_set_read_addr(struct watchdog_device *wdd, u32 addr) sizeof(address), address); } -static int ziirave_firm_write_block_data(struct watchdog_device *wdd, - u8 command, u8 length, const u8 *data, - bool wait_for_ack) -{ - struct i2c_client *client = to_i2c_client(wdd->parent); - int ret; - - ret = i2c_smbus_write_block_data(client, command, length, data); - if (ret) { - dev_err(&client->dev, - "Failed to send command 0x%02x: %d\n", command, ret); - return ret; - } - - if (wait_for_ack) - ret = ziirave_firm_read_ack(wdd); - - return ret; -} - static bool ziirave_firm_addr_readonly(u32 addr) { return addr < ZIIRAVE_FIRM_FLASH_MEMORY_START || @@ -273,8 +253,15 @@ static int __ziirave_firm_write_pkt(struct watchdog_device *wdd, checksum += packet[i]; packet[ZIIRAVE_FIRM_PKT_TOTAL_SIZE - 1] = checksum; - ret = ziirave_firm_write_block_data(wdd, ZIIRAVE_CMD_DOWNLOAD_PACKET, - sizeof(packet), packet, true); + ret = i2c_smbus_write_block_data(client, ZIIRAVE_CMD_DOWNLOAD_PACKET, + sizeof(packet), packet); + if (ret) { + dev_err(&client->dev, + "Failed to send DOWNLOAD_PACKET: %d\n", ret); + return ret; + } + + ret = ziirave_firm_read_ack(wdd); if (ret) dev_err(&client->dev, "Failed to write firmware packet at address 0x%04x: %d\n", From f676ac8305f776c31f347080d64a2474cffbcab1 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 12 Aug 2019 13:09:06 -0700 Subject: [PATCH 057/345] watchdog: ziirave_wdt: Update checked I2C functionality mask Update checked I2C functionality mask to reflect all of the SMBus primitives used by this driver. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Guenter Roeck Cc: Rick Ramstetter Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190812200906.31344-23-andrew.smirnov@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ziirave_wdt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/ziirave_wdt.c b/drivers/watchdog/ziirave_wdt.c index 48278034cda6..4a363a8b2d20 100644 --- a/drivers/watchdog/ziirave_wdt.c +++ b/drivers/watchdog/ziirave_wdt.c @@ -602,7 +602,10 @@ static int ziirave_wdt_probe(struct i2c_client *client, struct ziirave_wdt_data *w_priv; int val; - if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | + I2C_FUNC_SMBUS_WRITE_BLOCK_DATA)) return -ENODEV; w_priv = devm_kzalloc(&client->dev, sizeof(*w_priv), GFP_KERNEL); From ff45d87dd8a85ab66027540f7b9c61d3eee2c992 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Mon, 19 Aug 2019 14:47:37 +0930 Subject: [PATCH 058/345] dt-bindings: watchdog: Add ast2600 compatible This adds a compatible for the ast2600, a new ASPEED SoC. Signed-off-by: Joel Stanley Reviewed-by: Andrew Jeffery Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190819051738.17370-2-joel@jms.id.au Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt index c5077a1f5cb3..d78d4a8fb868 100644 --- a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt @@ -4,6 +4,7 @@ Required properties: - compatible: must be one of: - "aspeed,ast2400-wdt" - "aspeed,ast2500-wdt" + - "aspeed,ast2600-wdt" - reg: physical base address of the controller and length of memory mapped region From b3528b4874480818e38e4da019d655413c233e6a Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Mon, 19 Aug 2019 14:47:38 +0930 Subject: [PATCH 059/345] watchdog: aspeed: Add support for AST2600 The ast2600 can be supported by the same code as the ast2500. Signed-off-by: Ryan Chen Signed-off-by: Joel Stanley Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190819051738.17370-3-joel@jms.id.au Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/aspeed_wdt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index cc71861e033a..5b64bc2e8788 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -34,6 +34,7 @@ static const struct aspeed_wdt_config ast2500_config = { static const struct of_device_id aspeed_wdt_of_table[] = { { .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config }, { .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config }, + { .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config }, { }, }; MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); @@ -259,7 +260,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev) set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); } - if (of_device_is_compatible(np, "aspeed,ast2500-wdt")) { + if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) || + (of_device_is_compatible(np, "aspeed,ast2600-wdt"))) { u32 reg = readl(wdt->base + WDT_RESET_WIDTH); reg &= config->ext_pulse_width_mask; From f9eaba57c183b216afe8fc2665fd885f1caa378d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 19 Aug 2019 20:20:34 +0200 Subject: [PATCH 060/345] dt-bindings: watchdog: Add YAML schemas for the generic watchdog bindings The watchdogs have a bunch of generic properties that are needed in a device tree. Add a YAML schemas for those. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190821143835.7294-1-mripard@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/watchdog.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Documentation/devicetree/bindings/watchdog/watchdog.yaml diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml new file mode 100644 index 000000000000..187bf6cb62bf --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/watchdog.yaml @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/watchdog.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Watchdog Generic Bindings + +maintainers: + - Guenter Roeck + - Wim Van Sebroeck + +description: | + This document describes generic bindings which can be used to + describe watchdog devices in a device tree. + +properties: + $nodename: + pattern: "^watchdog(@.*|-[0-9a-f])?$" + + timeout-sec: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Contains the watchdog timeout in seconds. + +... From 2de4de20dd36daef97d550e5895ed6daebbc3254 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Aug 2019 16:38:31 +0200 Subject: [PATCH 061/345] dt-bindings: watchdog: Convert Allwinner watchdog to a schema The Allwinner SoCs have a watchdog supported in Linux, with a matching Device Tree binding. Now that we have the DT validation in place, let's convert the device tree bindings for that controller over to a YAML schemas. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190821143835.7294-2-mripard@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../watchdog/allwinner,sun4i-a10-wdt.yaml | 48 +++++++++++++++++++ .../bindings/watchdog/sunxi-wdt.txt | 22 --------- 2 files changed, 48 insertions(+), 22 deletions(-) create mode 100644 Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml delete mode 100644 Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml new file mode 100644 index 000000000000..dc7553f57708 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/allwinner,sun4i-a10-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Allwinner A10 Watchdog Device Tree Bindings + +allOf: + - $ref: "watchdog.yaml#" + +maintainers: + - Chen-Yu Tsai + - Maxime Ripard + +properties: + compatible: + oneOf: + - const: allwinner,sun4i-a10-wdt + - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,sun50i-a64-wdt + - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,sun50i-h6-wdt + - const: allwinner,sun6i-a31-wdt + - items: + - const: allwinner,suniv-f1c100s-wdt + - const: allwinner,sun4i-a10-wdt + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + wdt: watchdog@1c20c90 { + compatible = "allwinner,sun4i-a10-wdt"; + reg = <0x01c20c90 0x10>; + timeout-sec = <10>; + }; + +... diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt deleted file mode 100644 index e65198d82a2b..000000000000 --- a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt +++ /dev/null @@ -1,22 +0,0 @@ -Allwinner SoCs Watchdog timer - -Required properties: - -- compatible : should be one of - "allwinner,sun4i-a10-wdt" - "allwinner,sun6i-a31-wdt" - "allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt" - "allwinner,sun50i-h6-wdt","allwinner,sun6i-a31-wdt" - "allwinner,suniv-f1c100s-wdt", "allwinner,sun4i-a10-wdt" -- reg : Specifies base physical address and size of the registers. - -Optional properties: -- timeout-sec : Contains the watchdog timeout in seconds - -Example: - -wdt: watchdog@1c20c90 { - compatible = "allwinner,sun4i-a10-wdt"; - reg = <0x01c20c90 0x10>; - timeout-sec = <10>; -}; From f285e78fb77806900d0f938b9dd791974fb5e694 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Aug 2019 16:38:32 +0200 Subject: [PATCH 062/345] dt-bindings: watchdog: sun4i: Add the watchdog interrupts The Allwinner watchdog has an interrupt, either shared or dedicated depending on the SoC, that has been described in some DT, but not all of them. The binding is also completely missing that description. Let's add that property to be consistent. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190821143835.7294-3-mripard@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/allwinner,sun4i-a10-wdt.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml index dc7553f57708..31c95c404619 100644 --- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -31,9 +31,13 @@ properties: reg: maxItems: 1 + interrupts: + maxItems: 1 + required: - compatible - reg + - interrupts unevaluatedProperties: false @@ -42,6 +46,7 @@ examples: wdt: watchdog@1c20c90 { compatible = "allwinner,sun4i-a10-wdt"; reg = <0x01c20c90 0x10>; + interrupts = <24>; timeout-sec = <10>; }; From 284ec100d0910b6789b6079a0718120b48a51f37 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Aug 2019 16:38:33 +0200 Subject: [PATCH 063/345] dt-bindings: watchdog: sun4i: Add the watchdog clock The Allwinner watchdog has a clock that has been described in some DT, but not all of them. The binding is also completely missing that description. Let's add that property to be consistent. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190821143835.7294-4-mripard@kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/allwinner,sun4i-a10-wdt.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml index 31c95c404619..3a54f58683a0 100644 --- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml @@ -31,12 +31,16 @@ properties: reg: maxItems: 1 + clocks: + maxItems: 1 + interrupts: maxItems: 1 required: - compatible - reg + - clocks - interrupts unevaluatedProperties: false @@ -47,6 +51,7 @@ examples: compatible = "allwinner,sun4i-a10-wdt"; reg = <0x01c20c90 0x10>; interrupts = <24>; + clocks = <&osc24M>; timeout-sec = <10>; }; From 69eb8b118631e5a6741edc90aac6d9139a43e9ec Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 28 Aug 2019 09:35:00 -0400 Subject: [PATCH 064/345] dt-bindings: watchdog: Add i.MX7ULP bindings Add the watchdog bindings for Freescale i.MX7ULP. Signed-off-by: Anson Huang Reviewed-by: Rob Herring Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1566999303-18795-1-git-send-email-Anson.Huang@nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../bindings/watchdog/fsl-imx7ulp-wdt.txt | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt new file mode 100644 index 000000000000..f902508d6cac --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.txt @@ -0,0 +1,22 @@ +* Freescale i.MX7ULP Watchdog Timer (WDT) Controller + +Required properties: +- compatible : Should be "fsl,imx7ulp-wdt" +- reg : Should contain WDT registers location and length +- interrupts : Should contain WDT interrupt +- clocks: Should contain a phandle pointing to the gated peripheral clock. + +Optional properties: +- timeout-sec : Contains the watchdog timeout in seconds + +Examples: + +wdog1: watchdog@403d0000 { + compatible = "fsl,imx7ulp-wdt"; + reg = <0x403d0000 0x10000>; + interrupts = ; + clocks = <&pcc2 IMX7ULP_CLK_WDG1>; + assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>; + assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; + timeout-sec = <40>; +}; From 41b630f41bf744b0eed92a53ff8c716cfc71920a Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 28 Aug 2019 09:35:01 -0400 Subject: [PATCH 065/345] watchdog: Add i.MX7ULP watchdog support The i.MX7ULP Watchdog Timer (WDOG) module is an independent timer that is available for system use. It provides a safety feature to ensure that software is executing as planned and that the CPU is not stuck in an infinite loop or executing unintended code. If the WDOG module is not serviced (refreshed) within a certain period, it resets the MCU. Add driver support for i.MX7ULP watchdog. Signed-off-by: Anson Huang Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/1566999303-18795-2-git-send-email-Anson.Huang@nxp.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 13 ++ drivers/watchdog/Makefile | 1 + drivers/watchdog/imx7ulp_wdt.c | 243 +++++++++++++++++++++++++++++++++ 3 files changed, 257 insertions(+) create mode 100644 drivers/watchdog/imx7ulp_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index a8f5c8147d4b..d68e5b500aef 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -724,6 +724,19 @@ config IMX_SC_WDT To compile this driver as a module, choose M here: the module will be called imx_sc_wdt. +config IMX7ULP_WDT + tristate "IMX7ULP Watchdog" + depends on ARCH_MXC || COMPILE_TEST + select WATCHDOG_CORE + help + This is the driver for the hardware watchdog on the Freescale + IMX7ULP and later processors. If you have one of these + processors and wish to have watchdog support enabled, + say Y, otherwise say N. + + To compile this driver as a module, choose M here: the + module will be called imx7ulp_wdt. + config UX500_WATCHDOG tristate "ST-Ericsson Ux500 watchdog" depends on MFD_DB8500_PRCMU diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index b5a0aed537af..2ee352bf3372 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_TS4800_WATCHDOG) += ts4800_wdt.o obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o obj-$(CONFIG_IMX_SC_WDT) += imx_sc_wdt.o +obj-$(CONFIG_IMX7ULP_WDT) += imx7ulp_wdt.o obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c new file mode 100644 index 000000000000..5ce51026989a --- /dev/null +++ b/drivers/watchdog/imx7ulp_wdt.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 NXP. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WDOG_CS 0x0 +#define WDOG_CS_CMD32EN BIT(13) +#define WDOG_CS_ULK BIT(11) +#define WDOG_CS_RCS BIT(10) +#define WDOG_CS_EN BIT(7) +#define WDOG_CS_UPDATE BIT(5) + +#define WDOG_CNT 0x4 +#define WDOG_TOVAL 0x8 + +#define REFRESH_SEQ0 0xA602 +#define REFRESH_SEQ1 0xB480 +#define REFRESH ((REFRESH_SEQ1 << 16) | REFRESH_SEQ0) + +#define UNLOCK_SEQ0 0xC520 +#define UNLOCK_SEQ1 0xD928 +#define UNLOCK ((UNLOCK_SEQ1 << 16) | UNLOCK_SEQ0) + +#define DEFAULT_TIMEOUT 60 +#define MAX_TIMEOUT 128 +#define WDOG_CLOCK_RATE 1000 + +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0000); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +struct imx7ulp_wdt_device { + struct notifier_block restart_handler; + struct watchdog_device wdd; + void __iomem *base; + struct clk *clk; +}; + +static inline void imx7ulp_wdt_enable(void __iomem *base, bool enable) +{ + u32 val = readl(base + WDOG_CS); + + writel(UNLOCK, base + WDOG_CNT); + if (enable) + writel(val | WDOG_CS_EN, base + WDOG_CS); + else + writel(val & ~WDOG_CS_EN, base + WDOG_CS); +} + +static inline bool imx7ulp_wdt_is_enabled(void __iomem *base) +{ + u32 val = readl(base + WDOG_CS); + + return val & WDOG_CS_EN; +} + +static int imx7ulp_wdt_ping(struct watchdog_device *wdog) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + + writel(REFRESH, wdt->base + WDOG_CNT); + + return 0; +} + +static int imx7ulp_wdt_start(struct watchdog_device *wdog) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + + imx7ulp_wdt_enable(wdt->base, true); + + return 0; +} + +static int imx7ulp_wdt_stop(struct watchdog_device *wdog) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + + imx7ulp_wdt_enable(wdt->base, false); + + return 0; +} + +static int imx7ulp_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int timeout) +{ + struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog); + u32 val = WDOG_CLOCK_RATE * timeout; + + writel(UNLOCK, wdt->base + WDOG_CNT); + writel(val, wdt->base + WDOG_TOVAL); + + wdog->timeout = timeout; + + return 0; +} + +static const struct watchdog_ops imx7ulp_wdt_ops = { + .owner = THIS_MODULE, + .start = imx7ulp_wdt_start, + .stop = imx7ulp_wdt_stop, + .ping = imx7ulp_wdt_ping, + .set_timeout = imx7ulp_wdt_set_timeout, +}; + +static const struct watchdog_info imx7ulp_wdt_info = { + .identity = "i.MX7ULP watchdog timer", + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE, +}; + +static inline void imx7ulp_wdt_init(void __iomem *base, unsigned int timeout) +{ + u32 val; + + /* unlock the wdog for reconfiguration */ + writel_relaxed(UNLOCK_SEQ0, base + WDOG_CNT); + writel_relaxed(UNLOCK_SEQ1, base + WDOG_CNT); + + /* set an initial timeout value in TOVAL */ + writel(timeout, base + WDOG_TOVAL); + /* enable 32bit command sequence and reconfigure */ + val = BIT(13) | BIT(8) | BIT(5); + writel(val, base + WDOG_CS); +} + +static void imx7ulp_wdt_action(void *data) +{ + clk_disable_unprepare(data); +} + +static int imx7ulp_wdt_probe(struct platform_device *pdev) +{ + struct imx7ulp_wdt_device *imx7ulp_wdt; + struct device *dev = &pdev->dev; + struct watchdog_device *wdog; + int ret; + + imx7ulp_wdt = devm_kzalloc(dev, sizeof(*imx7ulp_wdt), GFP_KERNEL); + if (!imx7ulp_wdt) + return -ENOMEM; + + platform_set_drvdata(pdev, imx7ulp_wdt); + + imx7ulp_wdt->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(imx7ulp_wdt->base)) + return PTR_ERR(imx7ulp_wdt->base); + + imx7ulp_wdt->clk = devm_clk_get(dev, NULL); + if (IS_ERR(imx7ulp_wdt->clk)) { + dev_err(dev, "Failed to get watchdog clock\n"); + return PTR_ERR(imx7ulp_wdt->clk); + } + + ret = clk_prepare_enable(imx7ulp_wdt->clk); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, imx7ulp_wdt_action, imx7ulp_wdt->clk); + if (ret) + return ret; + + wdog = &imx7ulp_wdt->wdd; + wdog->info = &imx7ulp_wdt_info; + wdog->ops = &imx7ulp_wdt_ops; + wdog->min_timeout = 1; + wdog->max_timeout = MAX_TIMEOUT; + wdog->parent = dev; + wdog->timeout = DEFAULT_TIMEOUT; + + watchdog_init_timeout(wdog, 0, dev); + watchdog_stop_on_reboot(wdog); + watchdog_stop_on_unregister(wdog); + watchdog_set_drvdata(wdog, imx7ulp_wdt); + imx7ulp_wdt_init(imx7ulp_wdt->base, wdog->timeout * WDOG_CLOCK_RATE); + + return devm_watchdog_register_device(dev, wdog); +} + +static int __maybe_unused imx7ulp_wdt_suspend(struct device *dev) +{ + struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev); + + if (watchdog_active(&imx7ulp_wdt->wdd)) + imx7ulp_wdt_stop(&imx7ulp_wdt->wdd); + + clk_disable_unprepare(imx7ulp_wdt->clk); + + return 0; +} + +static int __maybe_unused imx7ulp_wdt_resume(struct device *dev) +{ + struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev); + u32 timeout = imx7ulp_wdt->wdd.timeout * WDOG_CLOCK_RATE; + int ret; + + ret = clk_prepare_enable(imx7ulp_wdt->clk); + if (ret) + return ret; + + if (imx7ulp_wdt_is_enabled(imx7ulp_wdt->base)) + imx7ulp_wdt_init(imx7ulp_wdt->base, timeout); + + if (watchdog_active(&imx7ulp_wdt->wdd)) + imx7ulp_wdt_start(&imx7ulp_wdt->wdd); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(imx7ulp_wdt_pm_ops, imx7ulp_wdt_suspend, + imx7ulp_wdt_resume); + +static const struct of_device_id imx7ulp_wdt_dt_ids[] = { + { .compatible = "fsl,imx7ulp-wdt", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, imx7ulp_wdt_dt_ids); + +static struct platform_driver imx7ulp_wdt_driver = { + .probe = imx7ulp_wdt_probe, + .driver = { + .name = "imx7ulp-wdt", + .pm = &imx7ulp_wdt_pm_ops, + .of_match_table = imx7ulp_wdt_dt_ids, + }, +}; +module_platform_driver(imx7ulp_wdt_driver); + +MODULE_AUTHOR("Anson Huang "); +MODULE_DESCRIPTION("Freescale i.MX7ULP watchdog driver"); +MODULE_LICENSE("GPL v2"); From e07a4c79ca75bf41d73ba74c06f7220cd2741bc9 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 30 Aug 2019 09:52:24 +1200 Subject: [PATCH 066/345] watchdog: orion_wdt: use timer1 as a pretimeout The orion watchdog can either reset the CPU or generate an interrupt. The interrupt would be useful for debugging as it provides panic() output about the watchdog expiry, however if the interrupt is used the watchdog can't reset the CPU in the event of being stuck in a loop with interrupts disabled or if the CPU is prevented from accessing memory (e.g. an unterminated DMA). The Armada SoCs have spare timers that aren't currently used by the Linux kernel. We can use timer1 to provide a pre-timeout ahead of the watchdog timer and provide the possibility of gathering debug before the reset triggers. Signed-off-by: Chris Packham Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190829215224.27956-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/orion_wdt.c | 66 +++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index cdb0d174c5e2..1cccf8eb1c5d 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -35,7 +35,15 @@ * Watchdog timer block registers. */ #define TIMER_CTRL 0x0000 -#define TIMER_A370_STATUS 0x04 +#define TIMER1_FIXED_ENABLE_BIT BIT(12) +#define WDT_AXP_FIXED_ENABLE_BIT BIT(10) +#define TIMER1_ENABLE_BIT BIT(2) + +#define TIMER_A370_STATUS 0x0004 +#define WDT_A370_EXPIRED BIT(31) +#define TIMER1_STATUS_BIT BIT(8) + +#define TIMER1_VAL_OFF 0x001c #define WDT_MAX_CYCLE_COUNT 0xffffffff @@ -43,9 +51,6 @@ #define WDT_A370_RATIO_SHIFT 5 #define WDT_A370_RATIO (1 << WDT_A370_RATIO_SHIFT) -#define WDT_AXP_FIXED_ENABLE_BIT BIT(10) -#define WDT_A370_EXPIRED BIT(31) - static bool nowayout = WATCHDOG_NOWAYOUT; static int heartbeat = -1; /* module parameter (seconds) */ @@ -158,6 +163,7 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev, struct orion_watchdog *dev) { int ret; + u32 val; dev->clk = of_clk_get_by_name(pdev->dev.of_node, "fixed"); if (IS_ERR(dev->clk)) @@ -168,10 +174,9 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev, return ret; } - /* Enable the fixed watchdog clock input */ - atomic_io_modify(dev->reg + TIMER_CTRL, - WDT_AXP_FIXED_ENABLE_BIT, - WDT_AXP_FIXED_ENABLE_BIT); + /* Fix the wdt and timer1 clock freqency to 25MHz */ + val = WDT_AXP_FIXED_ENABLE_BIT | TIMER1_FIXED_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, val, val); dev->clk_rate = clk_get_rate(dev->clk); return 0; @@ -183,6 +188,10 @@ static int orion_wdt_ping(struct watchdog_device *wdt_dev) /* Reload watchdog duration */ writel(dev->clk_rate * wdt_dev->timeout, dev->reg + dev->data->wdt_counter_offset); + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout), + dev->reg + TIMER1_VAL_OFF); + return 0; } @@ -194,13 +203,18 @@ static int armada375_start(struct watchdog_device *wdt_dev) /* Set watchdog duration */ writel(dev->clk_rate * wdt_dev->timeout, dev->reg + dev->data->wdt_counter_offset); + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout), + dev->reg + TIMER1_VAL_OFF); /* Clear the watchdog expiration bit */ atomic_io_modify(dev->reg + TIMER_A370_STATUS, WDT_A370_EXPIRED, 0); /* Enable watchdog timer */ - atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, - dev->data->wdt_enable_bit); + reg = dev->data->wdt_enable_bit; + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + reg |= TIMER1_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, reg, reg); /* Enable reset on watchdog */ reg = readl(dev->rstout); @@ -277,7 +291,7 @@ static int orion_stop(struct watchdog_device *wdt_dev) static int armada375_stop(struct watchdog_device *wdt_dev) { struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev); - u32 reg; + u32 reg, mask; /* Disable reset on watchdog */ atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, @@ -287,7 +301,10 @@ static int armada375_stop(struct watchdog_device *wdt_dev) writel(reg, dev->rstout); /* Disable watchdog timer */ - atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0); + mask = dev->data->wdt_enable_bit; + if (wdt_dev->info->options & WDIOF_PRETIMEOUT) + mask |= TIMER1_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, mask, 0); return 0; } @@ -349,7 +366,7 @@ static unsigned int orion_wdt_get_timeleft(struct watchdog_device *wdt_dev) return readl(dev->reg + dev->data->wdt_counter_offset) / dev->clk_rate; } -static const struct watchdog_info orion_wdt_info = { +static struct watchdog_info orion_wdt_info = { .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, .identity = "Orion Watchdog", }; @@ -368,6 +385,16 @@ static irqreturn_t orion_wdt_irq(int irq, void *devid) return IRQ_HANDLED; } +static irqreturn_t orion_wdt_pre_irq(int irq, void *devid) +{ + struct orion_watchdog *dev = devid; + + atomic_io_modify(dev->reg + TIMER_A370_STATUS, + TIMER1_STATUS_BIT, 0); + watchdog_notify_pretimeout(&dev->wdt); + return IRQ_HANDLED; +} + /* * The original devicetree binding for this driver specified only * one memory resource, so in order to keep DT backwards compatibility @@ -589,6 +616,19 @@ static int orion_wdt_probe(struct platform_device *pdev) } } + /* Optional 2nd interrupt for pretimeout */ + irq = platform_get_irq(pdev, 1); + if (irq > 0) { + orion_wdt_info.options |= WDIOF_PRETIMEOUT; + ret = devm_request_irq(&pdev->dev, irq, orion_wdt_pre_irq, + 0, pdev->name, dev); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request IRQ\n"); + goto disable_clk; + } + } + + watchdog_set_nowayout(&dev->wdt, nowayout); ret = watchdog_register_device(&dev->wdt); if (ret) From 3d9e89bda9e9f01d55ff72f58d619e77d0c5b248 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 28 Aug 2019 13:24:01 +0300 Subject: [PATCH 067/345] watchdog: aspeed: add support for dual boot Set WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION into WDT_CLEAR_TIMEOUT_STATUS to clear out boot code source and re-enable access to the primary SPI flash chip while booted via wdt2 from the alternate chip. AST2400 datasheet says: "In the 2nd flash booting mode, all the address mapping to CS0# would be re-directed to CS1#. And CS0# is not accessible under this mode. To access CS0#, firmware should clear the 2nd boot mode register in the WDT2 status register WDT30.bit[1]." Signed-off-by: Ivan Mikhaylov Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190828102402.13155-4-i.mikhaylov@yadro.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/aspeed_wdt.c | 65 ++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index 5b64bc2e8788..4ec0906bf12c 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -54,6 +54,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); #define WDT_CTRL_ENABLE BIT(0) #define WDT_TIMEOUT_STATUS 0x10 #define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1) +#define WDT_CLEAR_TIMEOUT_STATUS 0x14 +#define WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION BIT(0) /* * WDT_RESET_WIDTH controls the characteristics of the external pulse (if @@ -166,6 +168,60 @@ static int aspeed_wdt_restart(struct watchdog_device *wdd, return 0; } +/* access_cs0 shows if cs0 is accessible, hence the reverted bit */ +static ssize_t access_cs0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct aspeed_wdt *wdt = dev_get_drvdata(dev); + u32 status = readl(wdt->base + WDT_TIMEOUT_STATUS); + + return sprintf(buf, "%u\n", + !(status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY)); +} + +static ssize_t access_cs0_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t size) +{ + struct aspeed_wdt *wdt = dev_get_drvdata(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + if (val) + writel(WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION, + wdt->base + WDT_CLEAR_TIMEOUT_STATUS); + + return size; +} + +/* + * This attribute exists only if the system has booted from the alternate + * flash with 'alt-boot' option. + * + * At alternate flash the 'access_cs0' sysfs node provides: + * ast2400: a way to get access to the primary SPI flash chip at CS0 + * after booting from the alternate chip at CS1. + * ast2500: a way to restore the normal address mapping from + * (CS0->CS1, CS1->CS0) to (CS0->CS0, CS1->CS1). + * + * Clearing the boot code selection and timeout counter also resets to the + * initial state the chip select line mapping. When the SoC is in normal + * mapping state (i.e. booted from CS0), clearing those bits does nothing for + * both versions of the SoC. For alternate boot mode (booted from CS1 due to + * wdt2 expiration) the behavior differs as described above. + * + * This option can be used with wdt2 (watchdog1) only. + */ +static DEVICE_ATTR_RW(access_cs0); + +static struct attribute *bswitch_attrs[] = { + &dev_attr_access_cs0.attr, + NULL +}; +ATTRIBUTE_GROUPS(bswitch); + static const struct watchdog_ops aspeed_wdt_ops = { .start = aspeed_wdt_start, .stop = aspeed_wdt_stop, @@ -308,9 +364,16 @@ static int aspeed_wdt_probe(struct platform_device *pdev) } status = readl(wdt->base + WDT_TIMEOUT_STATUS); - if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) + if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) { wdt->wdd.bootstatus = WDIOF_CARDRESET; + if (of_device_is_compatible(np, "aspeed,ast2400-wdt") || + of_device_is_compatible(np, "aspeed,ast2500-wdt")) + wdt->wdd.groups = bswitch_groups; + } + + dev_set_drvdata(dev, wdt); + return devm_watchdog_register_device(dev, &wdt->wdd); } From ebdc0f5817321cd5a45e76c6653adf7c7b1d0aee Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 28 Aug 2019 13:24:02 +0300 Subject: [PATCH 068/345] watchdog: apseed: Add access_cs0 option for alt-boot The option for the ast2400/2500 to get access to CS0 at runtime. Signed-off-by: Ivan Mikhaylov Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190828102402.13155-5-i.mikhaylov@yadro.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- .../ABI/testing/sysfs-class-watchdog | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-watchdog b/Documentation/ABI/testing/sysfs-class-watchdog index 6317ade5ad19..675f9b537661 100644 --- a/Documentation/ABI/testing/sysfs-class-watchdog +++ b/Documentation/ABI/testing/sysfs-class-watchdog @@ -72,3 +72,37 @@ Description: It is a read/write file. When read, the currently assigned pretimeout governor is returned. When written, it sets the pretimeout governor. + +What: /sys/class/watchdog/watchdog1/access_cs0 +Date: August 2019 +Contact: Ivan Mikhaylov , + Alexander Amelkin +Description: + It is a read/write file. This attribute exists only if the + system has booted from the alternate flash chip due to + expiration of a watchdog timer of AST2400/AST2500 when + alternate boot function was enabled with 'aspeed,alt-boot' + devicetree option for that watchdog or with an appropriate + h/w strapping (for WDT2 only). + + At alternate flash the 'access_cs0' sysfs node provides: + ast2400: a way to get access to the primary SPI flash + chip at CS0 after booting from the alternate + chip at CS1. + ast2500: a way to restore the normal address mapping + from (CS0->CS1, CS1->CS0) to (CS0->CS0, + CS1->CS1). + + Clearing the boot code selection and timeout counter also + resets to the initial state the chip select line mapping. When + the SoC is in normal mapping state (i.e. booted from CS0), + clearing those bits does nothing for both versions of the SoC. + For alternate boot mode (booted from CS1 due to wdt2 + expiration) the behavior differs as described above. + + This option can be used with wdt2 (watchdog1) only. + + When read, the current status of the boot code selection is + shown. When written with any non-zero value, it clears + the boot code selection and the timeout counter, which results + in chipselect reset for AST2400/AST2500. From 3b7c09fd645ba62b3d6346625db1fcd3803bdd33 Mon Sep 17 00:00:00 2001 From: Oliver Graute Date: Thu, 5 Sep 2019 14:36:49 +0000 Subject: [PATCH 069/345] watchdog: imx_sc: this patch just fixes whitespaces Fix only whitespace errors in imx_sc_wdt_probe() Signed-off-by: Oliver Graute Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190905143644.20952-1-oliver.graute@kococonnector.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/imx_sc_wdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c index 9260475439eb..7ea5cf54e94a 100644 --- a/drivers/watchdog/imx_sc_wdt.c +++ b/drivers/watchdog/imx_sc_wdt.c @@ -176,8 +176,8 @@ static int imx_sc_wdt_probe(struct platform_device *pdev) ret = devm_watchdog_register_device(dev, wdog); if (ret) - return ret; - + return ret; + ret = imx_scu_irq_group_enable(SC_IRQ_GROUP_WDOG, SC_IRQ_WDOG, true); From 36375491a439565402d1cb2cf12955c11f2ed5a6 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Fri, 6 Sep 2019 22:54:10 +0200 Subject: [PATCH 070/345] watchdog: qcom: support pre-timeout when the bark irq is available Use the bark interrupt as the pre-timeout notifier whenever this interrupt is available. By default, the pretimeout notification shall occur one second earlier than the timeout. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190906205411.31666-2-jorge.ramirez-ortiz@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/qcom-wdt.c | 69 ++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index 7be7f87be28f..aa22e625144a 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2014, The Linux Foundation. All rights reserved. */ +#include #include #include +#include #include #include #include @@ -19,6 +21,9 @@ enum wdt_reg { WDT_BITE_TIME, }; +#define QCOM_WDT_ENABLE BIT(0) +#define QCOM_WDT_ENABLE_IRQ BIT(1) + static const u32 reg_offset_data_apcs_tmr[] = { [WDT_RST] = 0x38, [WDT_EN] = 0x40, @@ -54,15 +59,35 @@ struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd) return container_of(wdd, struct qcom_wdt, wdd); } +static inline int qcom_get_enable(struct watchdog_device *wdd) +{ + int enable = QCOM_WDT_ENABLE; + + if (wdd->pretimeout) + enable |= QCOM_WDT_ENABLE_IRQ; + + return enable; +} + +static irqreturn_t qcom_wdt_isr(int irq, void *arg) +{ + struct watchdog_device *wdd = arg; + + watchdog_notify_pretimeout(wdd); + + return IRQ_HANDLED; +} + static int qcom_wdt_start(struct watchdog_device *wdd) { struct qcom_wdt *wdt = to_qcom_wdt(wdd); + unsigned int bark = wdd->timeout - wdd->pretimeout; writel(0, wdt_addr(wdt, WDT_EN)); writel(1, wdt_addr(wdt, WDT_RST)); - writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME)); + writel(bark * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME)); writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BITE_TIME)); - writel(1, wdt_addr(wdt, WDT_EN)); + writel(qcom_get_enable(wdd), wdt_addr(wdt, WDT_EN)); return 0; } @@ -89,6 +114,13 @@ static int qcom_wdt_set_timeout(struct watchdog_device *wdd, return qcom_wdt_start(wdd); } +static int qcom_wdt_set_pretimeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + wdd->pretimeout = timeout; + return qcom_wdt_start(wdd); +} + static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, void *data) { @@ -105,7 +137,7 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action, writel(1, wdt_addr(wdt, WDT_RST)); writel(timeout, wdt_addr(wdt, WDT_BARK_TIME)); writel(timeout, wdt_addr(wdt, WDT_BITE_TIME)); - writel(1, wdt_addr(wdt, WDT_EN)); + writel(QCOM_WDT_ENABLE, wdt_addr(wdt, WDT_EN)); /* * Actually make sure the above sequence hits hardware before sleeping. @@ -121,6 +153,7 @@ static const struct watchdog_ops qcom_wdt_ops = { .stop = qcom_wdt_stop, .ping = qcom_wdt_ping, .set_timeout = qcom_wdt_set_timeout, + .set_pretimeout = qcom_wdt_set_pretimeout, .restart = qcom_wdt_restart, .owner = THIS_MODULE, }; @@ -133,6 +166,15 @@ static const struct watchdog_info qcom_wdt_info = { .identity = KBUILD_MODNAME, }; +static const struct watchdog_info qcom_wdt_pt_info = { + .options = WDIOF_KEEPALIVEPING + | WDIOF_MAGICCLOSE + | WDIOF_SETTIMEOUT + | WDIOF_PRETIMEOUT + | WDIOF_CARDRESET, + .identity = KBUILD_MODNAME, +}; + static void qcom_clk_disable_unprepare(void *data) { clk_disable_unprepare(data); @@ -146,7 +188,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) struct device_node *np = dev->of_node; const u32 *regs; u32 percpu_offset; - int ret; + int irq, ret; regs = of_device_get_match_data(dev); if (!regs) { @@ -204,7 +246,24 @@ static int qcom_wdt_probe(struct platform_device *pdev) return -EINVAL; } - wdt->wdd.info = &qcom_wdt_info; + /* check if there is pretimeout support */ + irq = platform_get_irq(pdev, 0); + if (irq > 0) { + ret = devm_request_irq(dev, irq, qcom_wdt_isr, + IRQF_TRIGGER_RISING, + "wdt_bark", &wdt->wdd); + if (ret) + return ret; + + wdt->wdd.info = &qcom_wdt_pt_info; + wdt->wdd.pretimeout = 1; + } else { + if (irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + + wdt->wdd.info = &qcom_wdt_info; + } + wdt->wdd.ops = &qcom_wdt_ops; wdt->wdd.min_timeout = 1; wdt->wdd.max_timeout = 0x10000000U / wdt->rate; From 52a142140e14d30f99b6661bef8812a70a029124 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Fri, 6 Sep 2019 22:54:11 +0200 Subject: [PATCH 071/345] watchdog: qcom: remove unnecessary variable from private storage there is no need to continue keeping the clock in private storage. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190906205411.31666-3-jorge.ramirez-ortiz@linaro.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/qcom-wdt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index aa22e625144a..a494543d3ae1 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -42,7 +42,6 @@ static const u32 reg_offset_data_kpss[] = { struct qcom_wdt { struct watchdog_device wdd; - struct clk *clk; unsigned long rate; void __iomem *base; const u32 *layout; @@ -189,6 +188,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) const u32 *regs; u32 percpu_offset; int irq, ret; + struct clk *clk; regs = of_device_get_match_data(dev); if (!regs) { @@ -215,19 +215,18 @@ static int qcom_wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); - wdt->clk = devm_clk_get(dev, NULL); - if (IS_ERR(wdt->clk)) { + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) { dev_err(dev, "failed to get input clock\n"); - return PTR_ERR(wdt->clk); + return PTR_ERR(clk); } - ret = clk_prepare_enable(wdt->clk); + ret = clk_prepare_enable(clk); if (ret) { dev_err(dev, "failed to setup clock\n"); return ret; } - ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, - wdt->clk); + ret = devm_add_action_or_reset(dev, qcom_clk_disable_unprepare, clk); if (ret) return ret; @@ -239,7 +238,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) * that it would bite before a second elapses it's usefulness is * limited. Bail if this is the case. */ - wdt->rate = clk_get_rate(wdt->clk); + wdt->rate = clk_get_rate(clk); if (wdt->rate == 0 || wdt->rate > 0x10000000U) { dev_err(dev, "invalid clock rate\n"); From ca2fc5efffde5a3827adfb0ab6a51b6f1c64d5ff Mon Sep 17 00:00:00 2001 From: Jaret Cantu Date: Thu, 12 Sep 2019 13:55:50 -0400 Subject: [PATCH 072/345] watchdog: f71808e_wdt: Add F81803 support This adds watchdog support for the Fintek F81803 Super I/O chip. Testing was done on the Seneca XK-QUAD. Signed-off-by: Jaret Cantu Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190912175550.9340-1-jaret.cantu@timesys.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 4 ++-- drivers/watchdog/f71808e_wdt.c | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index d68e5b500aef..58e7c100b6ad 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1043,8 +1043,8 @@ config F71808E_WDT depends on X86 help This is the driver for the hardware watchdog on the Fintek F71808E, - F71862FG, F71868, F71869, F71882FG, F71889FG, F81865 and F81866 - Super I/O controllers. + F71862FG, F71868, F71869, F71882FG, F71889FG, F81803, F81865, and + F81866 Super I/O controllers. You can compile this driver directly into the kernel, or use it as a module. The module will be called f71808e_wdt. diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index ff5cf1b48a4d..e46104c2fd94 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -31,8 +31,10 @@ #define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ #define SIO_REG_DEVREV 0x22 /* Device revision */ #define SIO_REG_MANID 0x23 /* Fintek ID (2 bytes) */ +#define SIO_REG_CLOCK_SEL 0x26 /* Clock select */ #define SIO_REG_ROM_ADDR_SEL 0x27 /* ROM address select */ #define SIO_F81866_REG_PORT_SEL 0x27 /* F81866 Multi-Function Register */ +#define SIO_REG_TSI_LEVEL_SEL 0x28 /* TSI Level select */ #define SIO_REG_MFUNCT1 0x29 /* Multi function select 1 */ #define SIO_REG_MFUNCT2 0x2a /* Multi function select 2 */ #define SIO_REG_MFUNCT3 0x2b /* Multi function select 3 */ @@ -49,6 +51,7 @@ #define SIO_F71869A_ID 0x1007 /* Chipset ID */ #define SIO_F71882_ID 0x0541 /* Chipset ID */ #define SIO_F71889_ID 0x0723 /* Chipset ID */ +#define SIO_F81803_ID 0x1210 /* Chipset ID */ #define SIO_F81865_ID 0x0704 /* Chipset ID */ #define SIO_F81866_ID 0x1010 /* Chipset ID */ @@ -108,7 +111,7 @@ MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with" " given initial timeout. Zero (default) disables this feature."); enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg, - f81865, f81866}; + f81803, f81865, f81866}; static const char *f71808e_names[] = { "f71808fg", @@ -118,6 +121,7 @@ static const char *f71808e_names[] = { "f71869", "f71882fg", "f71889fg", + "f81803", "f81865", "f81866", }; @@ -370,6 +374,14 @@ static int watchdog_start(void) superio_inb(watchdog.sioaddr, SIO_REG_MFUNCT3) & 0xcf); break; + case f81803: + /* Enable TSI Level register bank */ + superio_clear_bit(watchdog.sioaddr, SIO_REG_CLOCK_SEL, 3); + /* Set pin 27 to WDTRST# */ + superio_outb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL, 0x5f & + superio_inb(watchdog.sioaddr, SIO_REG_TSI_LEVEL_SEL)); + break; + case f81865: /* Set pin 70 to WDTRST# */ superio_clear_bit(watchdog.sioaddr, SIO_REG_MFUNCT3, 5); @@ -809,6 +821,9 @@ static int __init f71808e_find(int sioaddr) /* Confirmed (by datasheet) not to have a watchdog. */ err = -ENODEV; goto exit; + case SIO_F81803_ID: + watchdog.type = f81803; + break; case SIO_F81865_ID: watchdog.type = f81865; break; From 59b263620c2102171a85f4518d65a571c352ab9a Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 4 Sep 2019 17:23:11 -0400 Subject: [PATCH 073/345] drm/amd/display: Add stereo mux and dig programming calls for dcn21 [Why] The earlier patch "Hook up calls to do stereo mux and dig programming..." doesn't include update for dcn21. [How] Align dcn21 gpio settings with updated stereo control interface. Signed-off-by: Roman Li Acked-by: Aaron Liu Signed-off-by: Alex Deucher --- .../display/dc/gpio/dcn21/hw_factory_dcn21.c | 38 +++++++++++++++++-- .../dc/gpio/dcn21/hw_translate_dcn21.c | 3 +- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c index 34485d9de78a..8572678f8d4f 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_factory_dcn21.c @@ -35,12 +35,10 @@ #include "hw_factory_dcn21.h" - #include "dcn/dcn_2_1_0_offset.h" #include "dcn/dcn_2_1_0_sh_mask.h" #include "renoir_ip_offset.h" - #include "reg_helper.h" #include "../hpd_regs.h" /* begin ********************* @@ -136,6 +134,39 @@ static const struct ddc_sh_mask ddc_mask[] = { DDC_MASK_SH_LIST_DCN2(_MASK, 6) }; +#include "../generic_regs.h" + +/* set field name */ +#define SF_GENERIC(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define generic_regs(id) \ +{\ + GENERIC_REG_LIST(id)\ +} + +static const struct generic_registers generic_regs[] = { + generic_regs(A), +}; + +static const struct generic_sh_mask generic_shift[] = { + GENERIC_MASK_SH_LIST(__SHIFT, A), +}; + +static const struct generic_sh_mask generic_mask[] = { + GENERIC_MASK_SH_LIST(_MASK, A), +}; + +static void define_generic_registers(struct hw_gpio_pin *pin, uint32_t en) +{ + struct hw_generic *generic = HW_GENERIC_FROM_BASE(pin); + + generic->regs = &generic_regs[en]; + generic->shifts = &generic_shift[en]; + generic->masks = &generic_mask[en]; + generic->base.regs = &generic_regs[en].gpio; +} + static void define_ddc_registers( struct hw_gpio_pin *pin, uint32_t en) @@ -181,7 +212,8 @@ static const struct hw_factory_funcs funcs = { .get_hpd_pin = dal_hw_hpd_get_pin, .get_generic_pin = dal_hw_generic_get_pin, .define_hpd_registers = define_hpd_registers, - .define_ddc_registers = define_ddc_registers + .define_ddc_registers = define_ddc_registers, + .define_generic_registers = define_generic_registers }; /* * dal_hw_factory_dcn10_init diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c index ad7c43746291..fbb58fb8c318 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c @@ -58,7 +58,6 @@ #define SF_HPD(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix - /* macros to expend register list macro defined in HW object header file * end *********************/ @@ -71,7 +70,7 @@ static bool offset_to_id( { switch (offset) { /* GENERIC */ - case REG(DC_GENERICA): + case REG(DC_GPIO_GENERIC_A): *id = GPIO_ID_GENERIC; switch (mask) { case DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK: From 5813f97a5969bf1e7e723397a74e00b5de7278d6 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 4 Sep 2019 11:47:48 +0800 Subject: [PATCH 074/345] drm/amdgpu: disable stutter mode for renoir With stutter mode enabled, NMI prints frequently. Disable stutter for the moment because NMI warning storm, and will enable it back till the issue is addressed Signed-off-by: Aaron Liu Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e1b09bb432bd..9590ca552a28 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2384,6 +2384,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; + if (adev->asic_type == CHIP_RENOIR) + dm->dc->debug.disable_stutter = true; return 0; fail: From f79e06bd44e5be98b75f71ac0cde6c16be278180 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 4 Sep 2019 13:21:42 +0800 Subject: [PATCH 075/345] drm/amd/display: update renoir_ip_offset.h This patch updates MP1_BASE in renoir_ip_offset.h Signed-off-by: Aaron Liu Acked-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/renoir_ip_offset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/renoir_ip_offset.h b/drivers/gpu/drm/amd/include/renoir_ip_offset.h index 554714c8e000..094648cac392 100644 --- a/drivers/gpu/drm/amd/include/renoir_ip_offset.h +++ b/drivers/gpu/drm/amd/include/renoir_ip_offset.h @@ -155,7 +155,7 @@ static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0x0243FC00, 0x00DC0000 { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } } } }; -static const struct IP_BASE MP1_BASE ={ { { { 0x00016200, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } }, +static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0x02400400, 0x00E80000, 0x00EC0000, 0x00F00000 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0 } }, From 1963b7c3beda608501deaf9010b53ec6cb6adbf1 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 3 Sep 2019 16:47:40 -0400 Subject: [PATCH 076/345] drm/amdgpu: Add smu lock around in pp_smu_i2c_bus_access Protect from concurrent SMU accesses. Signed-off-by: Andrey Grodzovsky Reviewed-by: Tao Zhou Reviewed-and-tested-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index fa636cb462c1..fa8ad7db2b3a 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1531,6 +1531,7 @@ static int pp_asic_reset_mode_2(void *handle) static int pp_smu_i2c_bus_access(void *handle, bool acquire) { struct pp_hwmgr *hwmgr = handle; + int ret = 0; if (!hwmgr || !hwmgr->pm_en) return -EINVAL; @@ -1540,7 +1541,11 @@ static int pp_smu_i2c_bus_access(void *handle, bool acquire) return -EINVAL; } - return hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire); + mutex_lock(&hwmgr->smu_lock); + ret = hwmgr->hwmgr_func->smu_i2c_bus_access(hwmgr, acquire); + mutex_unlock(&hwmgr->smu_lock); + + return ret; } static const struct amd_pm_funcs pp_dpm_funcs = { From 103efdc1eaed0579e13d1778372575561b6a6ad9 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 4 Sep 2019 11:16:24 -0400 Subject: [PATCH 077/345] drm/amdgpu: Remove clock gating restore. Restoring clock gating break SMU opeartion afterwards, avoid this until this further invistigated with SMU. Signed-off-by: Andrey Grodzovsky Reviewed-by: Tao Zhou Reviewed-and-tested-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 4a5951036927..c44723c267c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -493,7 +493,15 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control) } /* Restore clock gating */ - smu_v11_0_i2c_set_clock_gating(control, true); + + /* + * TODO Reenabling clock gating seems to break subsequent SMU operation + * on the I2C bus. My guess is that SMU doesn't disable clock gating like + * we do here before working with the bus. So for now just don't restore + * it but later work with SMU to see if they have this issue and can + * update their code appropriately + */ + /* smu_v11_0_i2c_set_clock_gating(control, true); */ } From df794f679bbaf3b8848b4d726bffc49653fa4cb6 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Mon, 16 Sep 2019 09:26:28 +0800 Subject: [PATCH 078/345] drm/amdgpu: remove program of lbpw for renoir These is no LBPW on Renoir. So removing program of lbpw for renoir. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 83d45f98a461..dcadc73bffd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1650,7 +1650,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - case CHIP_RENOIR: gfx_v9_0_init_lbpw(adev); break; case CHIP_VEGA20: @@ -3026,7 +3025,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - case CHIP_RENOIR: if (amdgpu_lbpw == 0) gfx_v9_0_enable_lbpw(adev, false); else From c46e5df4ac898108da66a880c4e18f69c74f6c1b Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 20 Aug 2019 20:33:46 -0400 Subject: [PATCH 079/345] drm/amd/display: dce11.x /dce12 update formula input [Description] 1. OUTSTANDING_REQUEST_LIMIT update from 0xFF to 0x1F (HW doc update) 2. using memory type to convert UMC's MCLK to Yclk. Signed-off-by: Charlene Liu Reviewed-by: Dmytro Laktyushkin Acked-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 7 +++++-- .../gpu/drm/amd/display/dc/dce/dce_mem_input.c | 4 ++-- .../drm/amd/display/dc/dce112/dce112_resource.c | 16 ++++++++++------ .../drm/amd/display/dc/dce120/dce120_resource.c | 11 ++++++++--- drivers/gpu/drm/amd/display/dc/inc/resource.h | 2 ++ 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 5cc3acccda2a..ee32d2c19305 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -98,11 +98,14 @@ uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context) struct dc_stream_state *stream = context->streams[j]; uint32_t vertical_blank_in_pixels = 0; uint32_t vertical_blank_time = 0; + uint32_t vertical_total_min = stream->timing.v_total; + struct dc_crtc_timing_adjust adjust = stream->adjust; + if (adjust.v_total_max != adjust.v_total_min) + vertical_total_min = adjust.v_total_min; vertical_blank_in_pixels = stream->timing.h_total * - (stream->timing.v_total + (vertical_total_min - stream->timing.v_addressable); - vertical_blank_time = vertical_blank_in_pixels * 10000 / stream->timing.pix_clk_100hz; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 1488ffddf4e3..31b698bf9cfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -148,7 +148,7 @@ static void dce_mi_program_pte_vm( pte->min_pte_before_flip_horiz_scan; REG_UPDATE(GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, - GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0xff); + GRPH_PIPE_OUTSTANDING_REQUEST_LIMIT, 0x7f); REG_UPDATE_3(DVMM_PTE_CONTROL, DVMM_PAGE_WIDTH, page_width, @@ -157,7 +157,7 @@ static void dce_mi_program_pte_vm( REG_UPDATE_2(DVMM_PTE_ARB_CONTROL, DVMM_PTE_REQ_PER_CHUNK, pte->pte_req_per_chunk, - DVMM_MAX_PTE_REQ_OUTSTANDING, 0xff); + DVMM_MAX_PTE_REQ_OUTSTANDING, 0x7f); } static void program_urgency_watermark( diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 3ac4c7e73050..2b3a2917c168 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -987,6 +987,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) struct dm_pp_clock_levels_with_latency mem_clks = {0}; struct dm_pp_wm_sets_with_clock_ranges clk_ranges = {0}; struct dm_pp_clock_levels clks = {0}; + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; + + if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; /*do system clock TODO PPLIB: after PPLIB implement, * then remove old way @@ -1026,12 +1030,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) &clks); dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + clks.clocks_in_khz[0] * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ, + clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ, + clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, 1000); return; @@ -1067,12 +1071,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * YCLK = UMACLK*m_memoryTypeMultiplier */ dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 7d08154e9662..236c4c0324b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -847,6 +847,8 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) int i; unsigned int clk; unsigned int latency; + /*original logic in dal3*/ + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; /*do system clock*/ if (!dm_pp_get_clock_levels_by_type_with_latency( @@ -905,13 +907,16 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * ALSO always convert UMA clock (from PPLIB) to YCLK (HW formula): * YCLK = UMACLK*m_memoryTypeMultiplier */ + if (dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; + dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); + mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 1cc1c8ce633b..bef224bf803e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -31,6 +31,8 @@ #include "dm_pp_smu.h" #define MEMORY_TYPE_MULTIPLIER_CZ 4 +#define MEMORY_TYPE_HBM 2 + enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); From c02d6a161395dfc0c2fdabb9e976a229017288d8 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Thu, 22 Aug 2019 14:54:18 -0400 Subject: [PATCH 080/345] drm/amd/display: Add missing HBM support and raise Vega20's uclk. [Why] When more than 2 displays are connected to the graphics card, only the minimum memory clock is needed. However, when more displays are connected, the minimum memory clock is not sufficient enough to support the overwhelming bandwidth. System will hang under this circumstance. Also, the old code didn't address HBM cards, which has 2 pseudo channels. We need to add the HBM part here. [How] When graphics card connects to 2 or more displays, switch to high memory clock. Also, choose memory multiplier based on whether its regular DRAM or HBM. Signed-off-by: Zhan Liu Reviewed-by: Roman Li Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index ee32d2c19305..36277bca0326 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -174,6 +174,10 @@ void dce11_pplib_apply_display_requirements( struct dc_state *context) { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; + int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; + + if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + memory_type_multiplier = MEMORY_TYPE_HBM; pp_display_cfg->all_displays_in_sync = context->bw_ctx.bw.dce.all_displays_in_sync; @@ -186,8 +190,18 @@ void dce11_pplib_apply_display_requirements( pp_display_cfg->cpu_pstate_separation_time = context->bw_ctx.bw.dce.blackout_recovery_time_us; - pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz - / MEMORY_TYPE_MULTIPLIER_CZ; + /* + * TODO: determine whether the bandwidth has reached memory's limitation + * , then change minimum memory clock based on real-time bandwidth + * limitation. + */ + if (ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { + pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, + (uint32_t) (dc->bw_vbios->high_yclk.value / memory_type_multiplier / 10000)); + } else { + pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz + / memory_type_multiplier; + } pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box( dc, From 9dbc88d013b79c62bd845cb9e7c0256e660967c5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 7 Sep 2019 22:32:38 +0200 Subject: [PATCH 081/345] drm/radeon: Bail earlier when radeon.cik_/si_support=0 is passed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bail from the pci_driver probe function instead of from the drm_driver load function. This avoid /dev/dri/card0 temporarily getting registered and then unregistered again, sending unwanted add / remove udev events to userspace. Specifically this avoids triggering the (userspace) bug fixed by this plymouth merge-request: https://gitlab.freedesktop.org/plymouth/plymouth/merge_requests/59 Note that despite that being an userspace bug, not sending unnecessary udev events is a good idea in general. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1490490 Reviewed-by: Michel Dänzer Signed-off-by: Hans de Goede Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 31 +++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 25 ----------------------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 5838162f687f..4267cb55bc33 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -323,8 +323,39 @@ bool radeon_device_is_virtual(void); static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + unsigned long flags = 0; int ret; + if (!ent) + return -ENODEV; /* Avoid NULL-ptr deref in drm_get_pci_dev */ + + flags = ent->driver_data; + + if (!radeon_si_support) { + switch (flags & RADEON_FAMILY_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support disabled by module param\n"); + return -ENODEV; + } + } + if (!radeon_cik_support) { + switch (flags & RADEON_FAMILY_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support disabled by module param\n"); + return -ENODEV; + } + } + if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 07f7ace42c4b..e85c554eeaa9 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -100,31 +100,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) struct radeon_device *rdev; int r, acpi_status; - if (!radeon_si_support) { - switch (flags & RADEON_FAMILY_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support disabled by module param\n"); - return -ENODEV; - } - } - if (!radeon_cik_support) { - switch (flags & RADEON_FAMILY_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support disabled by module param\n"); - return -ENODEV; - } - } - rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); if (rdev == NULL) { return -ENOMEM; From 73d8e6c7b841d9bf298c8928f228fb433676635c Mon Sep 17 00:00:00 2001 From: Trek Date: Sat, 31 Aug 2019 21:25:36 +0200 Subject: [PATCH 082/345] drm/amdgpu: Check for valid number of registers to read Do not try to allocate any amount of memory requested by the user. Instead limit it to 128 registers. Actually the longest series of consecutive allowed registers are 48, mmGB_TILE_MODE0-31 and mmGB_MACROTILE_MODE0-15 (0x2644-0x2673). Bug: https://bugs.freedesktop.org/show_bug.cgi?id=111273 Signed-off-by: Trek Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0e2ec608530b..f6147528be64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -677,6 +677,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; + if (info->read_mmr_reg.count > 128) + return -EINVAL; + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); if (!regs) return -ENOMEM; From 4f3a2c10772581840d11b76b5c1147a3767710f7 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 11 Sep 2019 13:15:17 +0800 Subject: [PATCH 083/345] drm/amd/amdgpu: power up sdma engine when S3 resume back The sdma_v4 should be ungated when the IP resume back, otherwise it will hang up and resume time out error. Signed-off-by: Prike Liang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 10 ++++++---- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 3 +++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 61bd10310604..5803fcbae22f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -948,6 +948,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_VCE: + case AMD_IP_BLOCK_TYPE_SDMA: if (swsmu) ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); else @@ -956,7 +957,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block break; case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: - case AMD_IP_BLOCK_TYPE_SDMA: ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ff18b3a57892..78452cf0115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1889,8 +1889,9 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) || + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); if (!amdgpu_sriov_vf(adev)) @@ -1917,8 +1918,9 @@ static int sdma_v4_0_hw_fini(void *handle) sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); - if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs - && adev->powerplay.pp_funcs->set_powergating_by_smu) + if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs + && adev->powerplay.pp_funcs->set_powergating_by_smu) || + adev->asic_type == CHIP_RENOIR) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); return 0; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 22f3c60d380f..33960fb38a5d 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -354,6 +354,9 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, case AMD_IP_BLOCK_TYPE_GFX: ret = smu_gfx_off_control(smu, gate); break; + case AMD_IP_BLOCK_TYPE_SDMA: + ret = smu_powergate_sdma(smu, gate); + break; default: break; } From dcafbd50f2e4d5cc964aae409fb5691b743fba23 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 5 Sep 2019 19:22:02 -0400 Subject: [PATCH 084/345] drm/amdgpu: Fix KFD-related kernel oops on Hawaii MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hawaii needs to flush caches explicitly, submitting an IB in a user VMID from kernel mode. There is no s_fence in this case. Fixes: eb3961a57424 ("drm/amdgpu: remove fence context from the job") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7850084a05e3..60655834d649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->base.s_fence->scheduled.context; + fence_ctx = job->base.s_fence ? + job->base.s_fence->scheduled.context : 0; } else { vm = NULL; fence_ctx = 0; From 8ad050e6a67822a4adf1e22f396e141b3deaf8ef Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 6 Sep 2019 16:31:21 -0400 Subject: [PATCH 085/345] drm/amd/display: add Asic ID for Dali Dali is a new asic revision based on raven2 Add the ID and ASICREV_IS_DALI define Signed-off-by: Bhawanpreet Lakha Reviewed-by: Huang Rui Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 1f16892f0add..1be6c44fd32f 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -137,10 +137,13 @@ #define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF +#define PICASSO_15D8_REV_E3 0xE3 +#define PICASSO_15D8_REV_E4 0xE4 + #define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) -#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) - +#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < PICASSO_15D8_REV_E3)) +#define ASICREV_IS_DALI(eChipRev) ((eChipRev >= PICASSO_15D8_REV_E3) && (eChipRev < RAVEN1_F0)) #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) From e42a34dec68950ebad7904f235ed2dfff5bb27b5 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 6 Sep 2019 16:32:44 -0400 Subject: [PATCH 086/345] drm/amd/display: Implement voltage limitation for dali [Why] we only want the lowest voltage to be available for dali. [How] Use the get_highest_allowed_voltage_level function to return 0 for dali Signed-off-by: Bhawanpreet Lakha Reviewed-by: Huang Rui Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 383f4f8db8f4..9b2cb57bf2ba 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -708,6 +708,10 @@ static void hack_bounding_box(struct dcn_bw_internal_vars *v, unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev) { + /* for dali, the highest voltage level we want is 0 */ + if (ASICREV_IS_DALI(hw_internal_rev)) + return 0; + /* we are ok with all levels */ return 4; } From bb264220d9316f6bd7c1fd84b8da398c93912931 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 2 Sep 2019 16:33:42 +0800 Subject: [PATCH 087/345] drm/amd/display: Restore backlight brightness after system resume Laptops with AMD APU doesn't restore display backlight brightness after system resume. This issue started when DC was introduced. Let's use BL_CORE_SUSPENDRESUME so the backlight core calls update_status callback after system resume to restore the backlight level. Tested on Dell Inspiron 3180 (Stoney Ridge) and Dell Latitude 5495 (Raven Ridge). Cc: Signed-off-by: Kai-Heng Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9590ca552a28..d3f404f097eb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2113,6 +2113,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) } static const struct backlight_ops amdgpu_dm_backlight_ops = { + .options = BL_CORE_SUSPENDRESUME, .get_brightness = amdgpu_dm_backlight_get_brightness, .update_status = amdgpu_dm_backlight_update_status, }; From 8b8031703bd75ebedfcae59d64f8c0c006300bcb Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 4 Sep 2019 16:34:39 +0800 Subject: [PATCH 088/345] drm/amd/powerplay: implement sysfs for getting dpm clock With the common interface print_clk_levels can get the following dpm clock: -pp_dpm_dcefclk -pp_dpm_fclk -pp_dpm_mclk -pp_dpm_sclk -pp_dpm_socclk Signed-off-by: Prike Liang Reviewed-by: Evan Quan Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 70 ++++++++++++++++++++++ drivers/gpu/drm/amd/powerplay/renoir_ppt.h | 25 ++++++++ 2 files changed, 95 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 2a6da546fb55..e62bfba51562 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -177,12 +177,82 @@ static int renoir_get_dpm_uclk_limited(struct smu_context *smu, uint32_t *clock, } +static int renoir_print_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, char *buf) +{ + int i, size = 0, ret = 0; + uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + SmuMetrics_t metrics = {0}; + + if (!clk_table || clk_type >= SMU_CLK_COUNT) + return -EINVAL; + + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, + (void *)&metrics, false); + if (ret) + return ret; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + /* retirve table returned paramters unit is MHz */ + cur_value = metrics.ClockFrequency[CLOCK_GFXCLK]; + ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max); + if (!ret) { + /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ + if (cur_value == max) + i = 2; + else if (cur_value == min) + i = 0; + else + i = 1; + + size += sprintf(buf + size, "0: %uMhz %s\n", min, + i == 0 ? "*" : ""); + size += sprintf(buf + size, "1: %uMhz %s\n", + i == 1 ? cur_value : RENOIR_UMD_PSTATE_GFXCLK, + i == 1 ? "*" : ""); + size += sprintf(buf + size, "2: %uMhz %s\n", max, + i == 2 ? "*" : ""); + } + return size; + case SMU_SOCCLK: + count = NUM_SOCCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_SOCCLK]; + break; + case SMU_MCLK: + count = NUM_MEMCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_UMCCLK]; + break; + case SMU_DCEFCLK: + count = NUM_DCFCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_DCFCLK]; + break; + case SMU_FCLK: + count = NUM_FCLK_DPM_LEVELS; + cur_value = metrics.ClockFrequency[CLOCK_FCLK]; + break; + default: + return -EINVAL; + } + + for (i = 0; i < count; i++) { + GET_DPM_CUR_FREQ(clk_table, clk_type, i, value); + size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + cur_value == value ? "*" : ""); + } + + return size; +} + static const struct pptable_funcs renoir_ppt_funcs = { .get_smu_msg_index = renoir_get_smu_msg_index, .get_smu_table_index = renoir_get_smu_table_index, .tables_init = renoir_tables_init, .set_power_state = NULL, .get_dpm_uclk_limited = renoir_get_dpm_uclk_limited, + .print_clk_levels = renoir_print_clk_levels, }; void renoir_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h index e9b7237c0f7f..2a390ddd37dd 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h @@ -25,4 +25,29 @@ extern void renoir_set_ppt_funcs(struct smu_context *smu); +/* UMD PState Renoir Msg Parameters in MHz */ +#define RENOIR_UMD_PSTATE_GFXCLK 700 +#define RENOIR_UMD_PSTATE_SOCCLK 678 +#define RENOIR_UMD_PSTATE_FCLK 800 + +#define GET_DPM_CUR_FREQ(table, clk_type, dpm_level, freq) \ + do { \ + switch (clk_type) { \ + case SMU_SOCCLK: \ + freq = table->SocClocks[dpm_level].Freq; \ + break; \ + case SMU_MCLK: \ + freq = table->MemClocks[dpm_level].Freq; \ + break; \ + case SMU_DCEFCLK: \ + freq = table->DcfClocks[dpm_level].Freq; \ + break; \ + case SMU_FCLK: \ + freq = table->FClocks[dpm_level].Freq; \ + break; \ + default: \ + break; \ + } \ + } while (0) + #endif From 8fde7784ecd35120a5ace851bcf384e90fb2b64b Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 12 Sep 2019 14:03:41 -0500 Subject: [PATCH 089/345] drm/amdkfd: Swap trap temporary registers in gfx10 trap handler ttmp[4:5] hold information useful to the debugger. Use ttmp[14:15] instead, aligning implementation with gfx9 trap handler. Signed-off-by: Jay Cornwall Reviewed-by: shaoyun liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 6 +++--- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index a8cf82d46109..901fe3590165 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -694,10 +694,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x003f8000, 0x8f6f896f, 0x88776f77, 0x8a6eff6e, 0x023f8000, 0xb9eef807, - 0xb970f812, 0xb971f813, - 0x8ff08870, 0xf4051bb8, + 0xb97af812, 0xb97bf813, + 0x8ffa887a, 0xf4051bbd, 0xfa000000, 0xbf8cc07f, - 0xf4051c38, 0xfa000008, + 0xf4051ebd, 0xfa000008, 0xbf8cc07f, 0x87ee6e6e, 0xbf840001, 0xbe80206e, 0xb971f803, 0x8771ff71, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 35986219ce5f..cdaa523ce6be 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -187,12 +187,12 @@ L_FETCH_2ND_TRAP: // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS - s_getreg_b32 ttmp4, hwreg(HW_REG_SHADER_TMA_LO) - s_getreg_b32 ttmp5, hwreg(HW_REG_SHADER_TMA_HI) - s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 - s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO) + s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI) + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) - s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set From a82e163bca624c7c222b33a95fb6ed7c9bcee801 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Thu, 5 Sep 2019 15:28:57 +0800 Subject: [PATCH 090/345] drm/amdgpu: add navi14 PCI ID for work station SKU Add the navi14 PCI device id. Reviewed-by: Feifei Xu Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 48a2070e72f2..22a8d4c4c591 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1012,6 +1012,8 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, From 10e85054f986ce9d899cdb3efc8529f8016bf435 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Tue, 17 Sep 2019 10:35:34 +0800 Subject: [PATCH 091/345] drm/amdgpu: add navi12 pci id Add Navi12 PCI id support. Reviewed-by: Hawking Zhang Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 22a8d4c4c591..c68e54a27a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1018,6 +1018,9 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + /* Navi12 */ + {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, + {0, 0, 0} }; From 314eed30ede02fa925990f535652254b5bad6b65 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Sep 2019 11:00:25 -0700 Subject: [PATCH 092/345] usercopy: Avoid HIGHMEM pfn warning When running on a system with >512MB RAM with a 32-bit kernel built with: CONFIG_DEBUG_VIRTUAL=y CONFIG_HIGHMEM=y CONFIG_HARDENED_USERCOPY=y all execve()s will fail due to argv copying into kmap()ed pages, and on usercopy checking the calls ultimately of virt_to_page() will be looking for "bad" kmap (highmem) pointers due to CONFIG_DEBUG_VIRTUAL=y: ------------[ cut here ]------------ kernel BUG at ../arch/x86/mm/physaddr.c:83! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.3.0-rc8 #6 Hardware name: Dell Inc. Inspiron 1318/0C236D, BIOS A04 01/15/2009 EIP: __phys_addr+0xaf/0x100 ... Call Trace: __check_object_size+0xaf/0x3c0 ? __might_sleep+0x80/0xa0 copy_strings+0x1c2/0x370 copy_strings_kernel+0x2b/0x40 __do_execve_file+0x4ca/0x810 ? kmem_cache_alloc+0x1c7/0x370 do_execve+0x1b/0x20 ... The check is from arch/x86/mm/physaddr.c: VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); Due to the kmap() in fs/exec.c: kaddr = kmap(kmapped_page); ... if (copy_from_user(kaddr+offset, str, bytes_to_copy)) ... Now we can fetch the correct page to avoid the pfn check. In both cases, hardened usercopy will need to walk the page-span checker (if enabled) to do sanity checking. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Fixes: f5509cc18daa ("mm: Hardened usercopy") Cc: Matthew Wilcox Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/201909171056.7F2FFD17@keescook --- mm/usercopy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 98e924864554..660717a1ea5c 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -227,7 +228,12 @@ static inline void check_heap_object(const void *ptr, unsigned long n, if (!virt_addr_valid(ptr)) return; - page = virt_to_head_page(ptr); + /* + * When CONFIG_HIGHMEM=y, kmap_to_page() will give either the + * highmem page or fallback to virt_to_page(). The following + * is effectively a highmem-aware virt_to_head_page(). + */ + page = compound_head(kmap_to_page((void *)ptr)); if (PageSlab(page)) { /* Check slab allocator for flags and size. */ From e0f32f78e51b9989ee89f608fd0dd10e9c230652 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 17 Sep 2019 14:09:35 +0200 Subject: [PATCH 093/345] drm/kms: Duct-tape for mode object lifetime checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4f5368b5541a902f6596558b05f5c21a9770dd32 Author: Daniel Vetter Date: Fri Jun 14 08:17:23 2019 +0200 drm/kms: Catch mode_object lifetime errors uncovered a bit a mess in dp drivers. Most drivers (from a quick look, all except i915) register all the dp stuff in their init code, which is too early. With CONFIG_DRM_DP_AUX_CHARDEV this will blow up, because drm_dp_aux_register tries to add a child to a device in sysfs (the connector) which doesn't even exist yet. No one seems to have cared thus far. But with the above change I also moved the setting of dev->registered after the ->load callback, in an attempt to keep old drivers from hitting any WARN_ON backtraces. But that moved radeon.ko from the "working, by accident" to "now also broken" category. Since this is a huge mess I figured a revert would be simplest. But this check has already caught issues in i915: commit 1b9bd09630d4db4827cc04d358a41a16a6bc2cb0 Author: Ville Syrjälä Date: Tue Aug 20 19:16:57 2019 +0300 drm/i915: Do not create a new max_bpc prop for MST connectors Hence I'd like to retain it. Fix the radeon regression by moving the setting of dev->registered back to were it was, and stop the backtraces with an explicit check for dev->driver->load. Everyone else will stay as broken with CONFIG_DRM_DP_AUX_CHARDEV. The next patch will improve the kerneldoc and add a todo entry for this. Fixes: 4f5368b5541a ("drm/kms: Catch mode_object lifetime errors") Cc: Sean Paul Cc: Maarten Lankhorst Reported-by: Michel Dänzer Reviewed-by: Michel Dänzer Tested-by: Michel Dänzer Cc: Michel Dänzer Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190917120936.7501-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_drv.c | 4 ++-- drivers/gpu/drm/drm_mode_object.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c456c3d3def2..769feefeeeef 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -976,14 +976,14 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) if (ret) goto err_minors; + dev->registered = true; + if (dev->driver->load) { ret = dev->driver->load(dev, flags); if (ret) goto err_minors; } - dev->registered = true; - if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_modeset_register_all(dev); diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index c355ba8e6d5d..6a23e36ed4fe 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -42,7 +42,7 @@ int __drm_mode_object_add(struct drm_device *dev, struct drm_mode_object *obj, { int ret; - WARN_ON(dev->registered && !obj_free_cb); + WARN_ON(!dev->driver->load && dev->registered && !obj_free_cb); mutex_lock(&dev->mode_config.idr_mutex); ret = idr_alloc(&dev->mode_config.object_idr, register_obj ? obj : NULL, @@ -104,7 +104,7 @@ void drm_mode_object_register(struct drm_device *dev, void drm_mode_object_unregister(struct drm_device *dev, struct drm_mode_object *object) { - WARN_ON(dev->registered && !object->free_cb); + WARN_ON(!dev->driver->load && dev->registered && !object->free_cb); mutex_lock(&dev->mode_config.idr_mutex); if (object->id) { From 2d1d25d0a224dcd2021004d52342fc1727ccd85f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 29 Aug 2019 14:41:04 +0100 Subject: [PATCH 094/345] virtio-fs: add Documentation/filesystems/virtiofs.rst Add information about the new "virtiofs" file system. Signed-off-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi --- Documentation/filesystems/index.rst | 10 +++++ Documentation/filesystems/virtiofs.rst | 60 ++++++++++++++++++++++++++ MAINTAINERS | 12 ++++++ 3 files changed, 82 insertions(+) create mode 100644 Documentation/filesystems/virtiofs.rst diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 2de2fe2ab078..56e94bfc580f 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -32,3 +32,13 @@ filesystem implementations. journalling fscrypt + +Filesystems +=========== + +Documentation for filesystem implementations. + +.. toctree:: + :maxdepth: 2 + + virtiofs diff --git a/Documentation/filesystems/virtiofs.rst b/Documentation/filesystems/virtiofs.rst new file mode 100644 index 000000000000..4f338e3cb3f7 --- /dev/null +++ b/Documentation/filesystems/virtiofs.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================================== +virtiofs: virtio-fs host<->guest shared file system +=================================================== + +- Copyright (C) 2019 Red Hat, Inc. + +Introduction +============ +The virtiofs file system for Linux implements a driver for the paravirtualized +VIRTIO "virtio-fs" device for guest<->host file system sharing. It allows a +guest to mount a directory that has been exported on the host. + +Guests often require access to files residing on the host or remote systems. +Use cases include making files available to new guests during installation, +booting from a root file system located on the host, persistent storage for +stateless or ephemeral guests, and sharing a directory between guests. + +Although it is possible to use existing network file systems for some of these +tasks, they require configuration steps that are hard to automate and they +expose the storage network to the guest. The virtio-fs device was designed to +solve these problems by providing file system access without networking. + +Furthermore the virtio-fs device takes advantage of the co-location of the +guest and host to increase performance and provide semantics that are not +possible with network file systems. + +Usage +===== +Mount file system with tag ``myfs`` on ``/mnt``: + +.. code-block:: sh + + guest# mount -t virtiofs myfs /mnt + +Please see https://virtio-fs.gitlab.io/ for details on how to configure QEMU +and the virtiofsd daemon. + +Internals +========= +Since the virtio-fs device uses the FUSE protocol for file system requests, the +virtiofs file system for Linux is integrated closely with the FUSE file system +client. The guest acts as the FUSE client while the host acts as the FUSE +server. The /dev/fuse interface between the kernel and userspace is replaced +with the virtio-fs device interface. + +FUSE requests are placed into a virtqueue and processed by the host. The +response portion of the buffer is filled in by the host and the guest handles +the request completion. + +Mapping /dev/fuse to virtqueues requires solving differences in semantics +between /dev/fuse and virtqueues. Each time the /dev/fuse device is read, the +FUSE client may choose which request to transfer, making it possible to +prioritize certain requests over others. Virtqueues have queue semantics and +it is not possible to change the order of requests that have been enqueued. +This is especially important if the virtqueue becomes full since it is then +impossible to add high priority requests. In order to address this difference, +the virtio-fs device uses a "hiprio" virtqueue specifically for requests that +have priority over normal requests. diff --git a/MAINTAINERS b/MAINTAINERS index 9cbcf167bdd0..953e8c60d1c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17117,6 +17117,18 @@ S: Supported F: drivers/s390/virtio/ F: arch/s390/include/uapi/asm/virtio-ccw.h +VIRTIO FILE SYSTEM +M: Vivek Goyal +M: Stefan Hajnoczi +M: Miklos Szeredi +L: virtualization@lists.linux-foundation.org +L: linux-fsdevel@vger.kernel.org +W: https://virtio-fs.gitlab.io/ +S: Supported +F: fs/fuse/virtio_fs.c +F: include/uapi/linux/virtio_fs.h +F: Documentation/filesystems/virtiofs.rst + VIRTIO GPU DRIVER M: David Airlie M: Gerd Hoffmann From e16a7cbced7110866dcde84e504909ea85099bbd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Sep 2019 14:49:53 -0500 Subject: [PATCH 095/345] drm/amdgpu: flag navi12 and 14 as experimental for 5.4 We can remove this later as things get closer to launch. Reviewed-by: Xiaojie Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c68e54a27a2c..5da72ca6f3e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1011,15 +1011,15 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ - {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, - {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, - {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, /* Navi12 */ - {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, + {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; From 26b1d3b527e7bf3e24b814d617866ac5199ce68d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 5 Sep 2019 20:53:18 +0200 Subject: [PATCH 096/345] drm/atomic: Take the atomic toys away from X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The -modesetting ddx has a totally broken idea of how atomic works: - doesn't disable old connectors, assuming they get auto-disable like with the legacy setcrtc - assumes ASYNC_FLIP is wired through for the atomic ioctl - not a single call to TEST_ONLY Iow the implementation is a 1:1 translation of legacy ioctls to atomic, which is a) broken b) pointless. We already have bugs in both i915 and amdgpu-DC where this prevents us from enabling neat features. If anyone ever cares about atomic in X we can easily add a new atomic level (req->value == 2) for X to get back the shiny toys. Since these broken versions of -modesetting have been shipping, there's really no other way to get out of this bind. v2: - add an informational dmesg output (Rob, Ajax) - reorder after the DRIVER_ATOMIC check to avoid useless noise (Ilia) - allow req->value > 2 so that X can do another attempt at atomic in the future v3: Go with paranoid, insist that the X should be first (suggested by Rob) Cc: Ilia Mirkin References: https://gitlab.freedesktop.org/xorg/xserver/issues/629 References: https://gitlab.freedesktop.org/xorg/xserver/merge_requests/180 References: abbc0697d5fb ("drm/fb: revert the i915 Actually configure untiled displays from master") Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst (v1) Reviewed-by: Nicholas Kazlauskas (v1) Cc: Michel Dänzer Cc: Alex Deucher Cc: Adam Jackson Acked-by: Adam Jackson Cc: Sean Paul Cc: David Airlie Cc: Rob Clark Acked-by: Rob Clark Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190905185318.31363-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_ioctl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index f675a3bb2c88..fcd728d7cf72 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -336,7 +336,12 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) case DRM_CLIENT_CAP_ATOMIC: if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) return -EOPNOTSUPP; - if (req->value > 1) + /* The modesetting DDX has a totally broken idea of atomic. */ + if (current->comm[0] == 'X' && req->value == 1) { + pr_info("broken atomic modeset userspace detected, disabling atomic\n"); + return -EOPNOTSUPP; + } + if (req->value > 2) return -EINVAL; file_priv->atomic = req->value; file_priv->universal_planes = req->value; From f2cbda2dba11de868759cae9c0d2bab5b8411406 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 3 Sep 2019 21:06:41 +0200 Subject: [PATCH 097/345] drm/atomic: Reject FLIP_ASYNC unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's never been wired up. Only userspace that tried to use it (and didn't actually check whether anything works, but hey it builds) is the -modesetting atomic implementation. And we just shut that up. If there's anyone else then we need to silently accept this flag no matter what, and find a new one. Because once a flag is tainted, it's lost. Reviewed-by: Maarten Lankhorst Reviewed-by: Nicholas Kazlauskas Cc: Maarten Lankhorst Cc: Michel Dänzer Cc: Alex Deucher Cc: Adam Jackson Cc: Sean Paul Cc: David Airlie Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20190903190642.32588-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_atomic_uapi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 5a5b42db6f2a..7a26bfb5329c 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1305,8 +1305,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, if (arg->reserved) return -EINVAL; - if ((arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) && - !dev->mode_config.async_page_flip) + if (arg->flags & DRM_MODE_PAGE_FLIP_ASYNC) return -EINVAL; /* can't test and expect an event at the same time. */ From 4d85f45c73a22bc0ee900c7505b7210a87a7966d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 3 Sep 2019 21:06:42 +0200 Subject: [PATCH 098/345] drm/atomic: Rename crtc_state->pageflip_flags to async_flip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's the only flag anyone actually cares about. Plus if we're unlucky, the atomic ioctl might need a different flag for async flips. So better to abstract this away from the uapi a bit. Reviewed-by: Maarten Lankhorst Reviewed-by: Nicholas Kazlauskas Cc: Maarten Lankhorst Cc: Michel Dänzer Cc: Alex Deucher Cc: Adam Jackson Cc: Sean Paul Cc: David Airlie Signed-off-by: Daniel Vetter Cc: Maxime Ripard Cc: Daniel Vetter Cc: Nicholas Kazlauskas Cc: Leo Li Cc: Harry Wentland Cc: David Francis Cc: Mario Kleiner Cc: Bhawanpreet Lakha Cc: Ben Skeggs Cc: "Christian König" Cc: Ilia Mirkin Cc: Sam Ravnborg Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190903190642.32588-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++--- drivers/gpu/drm/drm_atomic_helper.c | 2 +- drivers/gpu/drm/drm_atomic_state_helper.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 4 ++-- include/drm/drm_crtc.h | 8 ++++---- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0a71ed1e7762..2f0ef0820f00 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5756,8 +5756,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * change FB pitch, DCC state, rotation or mirroing. */ bundle->flip_addrs[planes_count].flip_immediate = - (crtc->state->pageflip_flags & - DRM_MODE_PAGE_FLIP_ASYNC) != 0 && + crtc->state->async_flip && acrtc_state->update_type == UPDATE_TYPE_FAST; timestamp_ns = ktime_get_ns(); @@ -6334,7 +6333,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_enable_crtc_interrupts(dev, state, true); for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) - if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) + if (new_crtc_state->async_flip) wait_for_vblank = false; /* update planes when needed per crtc*/ diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index aa16ea17ff9b..3a67f63c3146 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3275,7 +3275,7 @@ static int page_flip_common(struct drm_atomic_state *state, return PTR_ERR(crtc_state); crtc_state->event = event; - crtc_state->pageflip_flags = flags; + crtc_state->async_flip = flags & DRM_MODE_PAGE_FLIP_ASYNC; plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c index 46dc264a248b..d0a937fb0c56 100644 --- a/drivers/gpu/drm/drm_atomic_state_helper.c +++ b/drivers/gpu/drm/drm_atomic_state_helper.c @@ -128,7 +128,7 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc, state->zpos_changed = false; state->commit = NULL; state->event = NULL; - state->pageflip_flags = 0; + state->async_flip = false; /* Self refresh should be canceled when a new update is available */ state->active = drm_atomic_crtc_effectively_active(state); diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 2db029371c91..5193b6257061 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -267,7 +267,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset, asyw->image.pitch[0] = fb->base.pitches[0]; } - if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)) + if (!asyh->state.async_flip) asyw->image.interval = 1; else asyw->image.interval = 0; @@ -383,7 +383,7 @@ nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw, } /* Can't do an immediate flip while changing the LUT. */ - asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC; + asyh->state.async_flip = false; } static int diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 7d14c11bdc0a..c4528eb5d168 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -285,12 +285,12 @@ struct drm_crtc_state { u32 target_vblank; /** - * @pageflip_flags: + * @async_flip: * - * DRM_MODE_PAGE_FLIP_* flags, as passed to the page flip ioctl. - * Zero in any other case. + * This is set when DRM_MODE_PAGE_FLIP_ASYNC is set in the legacy + * PAGE_FLIP IOCTL. It's not wired up for the atomic IOCTL itself yet. */ - u32 pageflip_flags; + bool async_flip; /** * @vrr_enabled: From a62a8ef9d97da23762a588592c8b8eb50a8deb6a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 12 Jun 2018 09:41:17 +0100 Subject: [PATCH 099/345] virtio-fs: add virtiofs filesystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a basic file system module for virtio-fs. This does not yet contain shared data support between host and guest or metadata coherency speedups. However it is already significantly faster than virtio-9p. Design Overview =============== With the goal of designing something with better performance and local file system semantics, a bunch of ideas were proposed. - Use fuse protocol (instead of 9p) for communication between guest and host. Guest kernel will be fuse client and a fuse server will run on host to serve the requests. - For data access inside guest, mmap portion of file in QEMU address space and guest accesses this memory using dax. That way guest page cache is bypassed and there is only one copy of data (on host). This will also enable mmap(MAP_SHARED) between guests. - For metadata coherency, there is a shared memory region which contains version number associated with metadata and any guest changing metadata updates version number and other guests refresh metadata on next access. This is yet to be implemented. How virtio-fs differs from existing approaches ============================================== The unique idea behind virtio-fs is to take advantage of the co-location of the virtual machine and hypervisor to avoid communication (vmexits). DAX allows file contents to be accessed without communication with the hypervisor. The shared memory region for metadata avoids communication in the common case where metadata is unchanged. By replacing expensive communication with cheaper shared memory accesses, we expect to achieve better performance than approaches based on network file system protocols. In addition, this also makes it easier to achieve local file system semantics (coherency). These techniques are not applicable to network file system protocols since the communications channel is bypassed by taking advantage of shared memory on a local machine. This is why we decided to build virtio-fs rather than focus on 9P or NFS. Caching Modes ============= Like virtio-9p, different caching modes are supported which determine the coherency level as well. The “cache=FOO” and “writeback” options control the level of coherence between the guest and host filesystems. - cache=none metadata, data and pathname lookup are not cached in guest. They are always fetched from host and any changes are immediately pushed to host. - cache=always metadata, data and pathname lookup are cached in guest and never expire. - cache=auto metadata and pathname lookup cache expires after a configured amount of time (default is 1 second). Data is cached while the file is open (close to open consistency). - writeback/no_writeback These options control the writeback strategy. If writeback is disabled, then normal writes will immediately be synchronized with the host fs. If writeback is enabled, then writes may be cached in the guest until the file is closed or an fsync(2) performed. This option has no effect on mmap-ed writes or writes going through the DAX mechanism. Signed-off-by: Stefan Hajnoczi Signed-off-by: Vivek Goyal Acked-by: Michael S. Tsirkin Signed-off-by: Miklos Szeredi --- fs/fuse/Kconfig | 11 + fs/fuse/Makefile | 1 + fs/fuse/fuse_i.h | 9 + fs/fuse/inode.c | 4 + fs/fuse/virtio_fs.c | 1195 +++++++++++++++++++++++++++++++ include/uapi/linux/virtio_fs.h | 19 + include/uapi/linux/virtio_ids.h | 1 + 7 files changed, 1240 insertions(+) create mode 100644 fs/fuse/virtio_fs.c create mode 100644 include/uapi/linux/virtio_fs.h diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 24fc5a5c1b97..0635cba19971 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -27,3 +27,14 @@ config CUSE If you want to develop or use a userspace character device based on CUSE, answer Y or M. + +config VIRTIO_FS + tristate "Virtio Filesystem" + depends on FUSE_FS + select VIRTIO + help + The Virtio Filesystem allows guests to mount file systems from the + host. + + If you want to share files between guests or with the host, answer Y + or M. diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 9485019c2a14..6419a2b3510d 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o +obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc89cb40e874..956aeaf961ae 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -353,6 +353,10 @@ struct fuse_req { /** Used to wake up the task waiting for completion of request*/ wait_queue_head_t waitq; +#if IS_ENABLED(CONFIG_VIRTIO_FS) + /** virtio-fs's physically contiguous buffer for in and out args */ + void *argbuf; +#endif }; struct fuse_iqueue; @@ -383,6 +387,11 @@ struct fuse_iqueue_ops { */ void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) __releases(fiq->lock); + + /** + * Clean up when fuse_iqueue is destroyed + */ + void (*release)(struct fuse_iqueue *fiq); }; /** /dev/fuse input queue operations */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 10d193b24fb8..3d598a5bb5b5 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -630,6 +630,10 @@ EXPORT_SYMBOL_GPL(fuse_conn_init); void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { + struct fuse_iqueue *fiq = &fc->iq; + + if (fiq->ops->release) + fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); put_user_ns(fc->user_ns); fc->release(fc); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c new file mode 100644 index 000000000000..6af3f131e468 --- /dev/null +++ b/fs/fuse/virtio_fs.c @@ -0,0 +1,1195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio-fs: Virtio Filesystem + * Copyright (C) 2018 Red Hat, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "fuse_i.h" + +/* List of virtio-fs device instances and a lock for the list. Also provides + * mutual exclusion in device removal and mounting path + */ +static DEFINE_MUTEX(virtio_fs_mutex); +static LIST_HEAD(virtio_fs_instances); + +enum { + VQ_HIPRIO, + VQ_REQUEST +}; + +/* Per-virtqueue state */ +struct virtio_fs_vq { + spinlock_t lock; + struct virtqueue *vq; /* protected by ->lock */ + struct work_struct done_work; + struct list_head queued_reqs; + struct delayed_work dispatch_work; + struct fuse_dev *fud; + bool connected; + long in_flight; + char name[24]; +} ____cacheline_aligned_in_smp; + +/* A virtio-fs device instance */ +struct virtio_fs { + struct kref refcount; + struct list_head list; /* on virtio_fs_instances */ + char *tag; + struct virtio_fs_vq *vqs; + unsigned int nvqs; /* number of virtqueues */ + unsigned int num_request_queues; /* number of request queues */ +}; + +struct virtio_fs_forget { + struct fuse_in_header ih; + struct fuse_forget_in arg; + /* This request can be temporarily queued on virt queue */ + struct list_head list; +}; + +static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) +{ + struct virtio_fs *fs = vq->vdev->priv; + + return &fs->vqs[vq->index]; +} + +static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) +{ + return &vq_to_fsvq(vq)->fud->pq; +} + +static void release_virtio_fs_obj(struct kref *ref) +{ + struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); + + kfree(vfs->vqs); + kfree(vfs); +} + +/* Make sure virtiofs_mutex is held */ +static void virtio_fs_put(struct virtio_fs *fs) +{ + kref_put(&fs->refcount, release_virtio_fs_obj); +} + +static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) +{ + struct virtio_fs *vfs = fiq->priv; + + mutex_lock(&virtio_fs_mutex); + virtio_fs_put(vfs); + mutex_unlock(&virtio_fs_mutex); +} + +static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) +{ + WARN_ON(fsvq->in_flight < 0); + + /* Wait for in flight requests to finish.*/ + while (1) { + spin_lock(&fsvq->lock); + if (!fsvq->in_flight) { + spin_unlock(&fsvq->lock); + break; + } + spin_unlock(&fsvq->lock); + /* TODO use completion instead of timeout */ + usleep_range(1000, 2000); + } + + flush_work(&fsvq->done_work); + flush_delayed_work(&fsvq->dispatch_work); +} + +static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq) +{ + struct virtio_fs_forget *forget; + + spin_lock(&fsvq->lock); + while (1) { + forget = list_first_entry_or_null(&fsvq->queued_reqs, + struct virtio_fs_forget, list); + if (!forget) + break; + list_del(&forget->list); + kfree(forget); + } + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_drain_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + if (i == VQ_HIPRIO) + drain_hiprio_queued_reqs(fsvq); + + virtio_fs_drain_queue(fsvq); + } +} + +static void virtio_fs_start_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + spin_lock(&fsvq->lock); + fsvq->connected = true; + spin_unlock(&fsvq->lock); + } +} + +/* Add a new instance to the list or return -EEXIST if tag name exists*/ +static int virtio_fs_add_instance(struct virtio_fs *fs) +{ + struct virtio_fs *fs2; + bool duplicate = false; + + mutex_lock(&virtio_fs_mutex); + + list_for_each_entry(fs2, &virtio_fs_instances, list) { + if (strcmp(fs->tag, fs2->tag) == 0) + duplicate = true; + } + + if (!duplicate) + list_add_tail(&fs->list, &virtio_fs_instances); + + mutex_unlock(&virtio_fs_mutex); + + if (duplicate) + return -EEXIST; + return 0; +} + +/* Return the virtio_fs with a given tag, or NULL */ +static struct virtio_fs *virtio_fs_find_instance(const char *tag) +{ + struct virtio_fs *fs; + + mutex_lock(&virtio_fs_mutex); + + list_for_each_entry(fs, &virtio_fs_instances, list) { + if (strcmp(fs->tag, tag) == 0) { + kref_get(&fs->refcount); + goto found; + } + } + + fs = NULL; /* not found */ + +found: + mutex_unlock(&virtio_fs_mutex); + + return fs; +} + +static void virtio_fs_free_devs(struct virtio_fs *fs) +{ + unsigned int i; + + for (i = 0; i < fs->nvqs; i++) { + struct virtio_fs_vq *fsvq = &fs->vqs[i]; + + if (!fsvq->fud) + continue; + + fuse_dev_free(fsvq->fud); + fsvq->fud = NULL; + } +} + +/* Read filesystem name from virtio config into fs->tag (must kfree()). */ +static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) +{ + char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; + char *end; + size_t len; + + virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), + &tag_buf, sizeof(tag_buf)); + end = memchr(tag_buf, '\0', sizeof(tag_buf)); + if (end == tag_buf) + return -EINVAL; /* empty tag */ + if (!end) + end = &tag_buf[sizeof(tag_buf)]; + + len = end - tag_buf; + fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); + if (!fs->tag) + return -ENOMEM; + memcpy(fs->tag, tag_buf, len); + fs->tag[len] = '\0'; + return 0; +} + +/* Work function for hiprio completion */ +static void virtio_fs_hiprio_done_work(struct work_struct *work) +{ + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + done_work); + struct virtqueue *vq = fsvq->vq; + + /* Free completed FUSE_FORGET requests */ + spin_lock(&fsvq->lock); + do { + unsigned int len; + void *req; + + virtqueue_disable_cb(vq); + + while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + kfree(req); + fsvq->in_flight--; + } + } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_dummy_dispatch_work(struct work_struct *work) +{ +} + +static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) +{ + struct virtio_fs_forget *forget; + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + dispatch_work.work); + struct virtqueue *vq = fsvq->vq; + struct scatterlist sg; + struct scatterlist *sgs[] = {&sg}; + bool notify; + int ret; + + pr_debug("virtio-fs: worker %s called.\n", __func__); + while (1) { + spin_lock(&fsvq->lock); + forget = list_first_entry_or_null(&fsvq->queued_reqs, + struct virtio_fs_forget, list); + if (!forget) { + spin_unlock(&fsvq->lock); + return; + } + + list_del(&forget->list); + if (!fsvq->connected) { + spin_unlock(&fsvq->lock); + kfree(forget); + continue; + } + + sg_init_one(&sg, forget, sizeof(*forget)); + + /* Enqueue the request */ + dev_dbg(&vq->vdev->dev, "%s\n", __func__); + ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", + ret); + list_add_tail(&forget->list, + &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + } else { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", + ret); + kfree(forget); + } + spin_unlock(&fsvq->lock); + return; + } + + fsvq->in_flight++; + notify = virtqueue_kick_prepare(vq); + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); + pr_debug("virtio-fs: worker %s dispatched one forget request.\n", + __func__); + } +} + +/* Allocate and copy args into req->argbuf */ +static int copy_args_to_argbuf(struct fuse_req *req) +{ + struct fuse_args *args = req->args; + unsigned int offset = 0; + unsigned int num_in; + unsigned int num_out; + unsigned int len; + unsigned int i; + + num_in = args->in_numargs - args->in_pages; + num_out = args->out_numargs - args->out_pages; + len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + + fuse_len_args(num_out, args->out_args); + + req->argbuf = kmalloc(len, GFP_ATOMIC); + if (!req->argbuf) + return -ENOMEM; + + for (i = 0; i < num_in; i++) { + memcpy(req->argbuf + offset, + args->in_args[i].value, + args->in_args[i].size); + offset += args->in_args[i].size; + } + + return 0; +} + +/* Copy args out of and free req->argbuf */ +static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) +{ + unsigned int remaining; + unsigned int offset; + unsigned int num_in; + unsigned int num_out; + unsigned int i; + + remaining = req->out.h.len - sizeof(req->out.h); + num_in = args->in_numargs - args->in_pages; + num_out = args->out_numargs - args->out_pages; + offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); + + for (i = 0; i < num_out; i++) { + unsigned int argsize = args->out_args[i].size; + + if (args->out_argvar && + i == args->out_numargs - 1 && + argsize > remaining) { + argsize = remaining; + } + + memcpy(args->out_args[i].value, req->argbuf + offset, argsize); + offset += argsize; + + if (i != args->out_numargs - 1) + remaining -= argsize; + } + + /* Store the actual size of the variable-length arg */ + if (args->out_argvar) + args->out_args[args->out_numargs - 1].size = remaining; + + kfree(req->argbuf); + req->argbuf = NULL; +} + +/* Work function for request completion */ +static void virtio_fs_requests_done_work(struct work_struct *work) +{ + struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, + done_work); + struct fuse_pqueue *fpq = &fsvq->fud->pq; + struct fuse_conn *fc = fsvq->fud->fc; + struct virtqueue *vq = fsvq->vq; + struct fuse_req *req; + struct fuse_args_pages *ap; + struct fuse_req *next; + struct fuse_args *args; + unsigned int len, i, thislen; + struct page *page; + LIST_HEAD(reqs); + + /* Collect completed requests off the virtqueue */ + spin_lock(&fsvq->lock); + do { + virtqueue_disable_cb(vq); + + while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + spin_lock(&fpq->lock); + list_move_tail(&req->list, &reqs); + spin_unlock(&fpq->lock); + } + } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); + spin_unlock(&fsvq->lock); + + /* End requests */ + list_for_each_entry_safe(req, next, &reqs, list) { + /* + * TODO verify that server properly follows FUSE protocol + * (oh.uniq, oh.len) + */ + args = req->args; + copy_args_from_argbuf(args, req); + + if (args->out_pages && args->page_zeroing) { + len = args->out_args[args->out_numargs - 1].size; + ap = container_of(args, typeof(*ap), args); + for (i = 0; i < ap->num_pages; i++) { + thislen = ap->descs[i].length; + if (len < thislen) { + WARN_ON(ap->descs[i].offset); + page = ap->pages[i]; + zero_user_segment(page, len, thislen); + len = 0; + } else { + len -= thislen; + } + } + } + + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + list_del_init(&req->list); + spin_unlock(&fpq->lock); + + fuse_request_end(fc, req); + spin_lock(&fsvq->lock); + fsvq->in_flight--; + spin_unlock(&fsvq->lock); + } +} + +/* Virtqueue interrupt handler */ +static void virtio_fs_vq_done(struct virtqueue *vq) +{ + struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); + + dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); + + schedule_work(&fsvq->done_work); +} + +/* Initialize virtqueues */ +static int virtio_fs_setup_vqs(struct virtio_device *vdev, + struct virtio_fs *fs) +{ + struct virtqueue **vqs; + vq_callback_t **callbacks; + const char **names; + unsigned int i; + int ret = 0; + + virtio_cread(vdev, struct virtio_fs_config, num_request_queues, + &fs->num_request_queues); + if (fs->num_request_queues == 0) + return -EINVAL; + + fs->nvqs = 1 + fs->num_request_queues; + fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); + if (!fs->vqs) + return -ENOMEM; + + vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); + callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), + GFP_KERNEL); + names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); + if (!vqs || !callbacks || !names) { + ret = -ENOMEM; + goto out; + } + + callbacks[VQ_HIPRIO] = virtio_fs_vq_done; + snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name), + "hiprio"); + names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; + INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work); + INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs); + INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work, + virtio_fs_hiprio_dispatch_work); + spin_lock_init(&fs->vqs[VQ_HIPRIO].lock); + + /* Initialize the requests virtqueues */ + for (i = VQ_REQUEST; i < fs->nvqs; i++) { + spin_lock_init(&fs->vqs[i].lock); + INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work); + INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work, + virtio_fs_dummy_dispatch_work); + INIT_LIST_HEAD(&fs->vqs[i].queued_reqs); + snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name), + "requests.%u", i - VQ_REQUEST); + callbacks[i] = virtio_fs_vq_done; + names[i] = fs->vqs[i].name; + } + + ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); + if (ret < 0) + goto out; + + for (i = 0; i < fs->nvqs; i++) + fs->vqs[i].vq = vqs[i]; + + virtio_fs_start_all_queues(fs); +out: + kfree(names); + kfree(callbacks); + kfree(vqs); + if (ret) + kfree(fs->vqs); + return ret; +} + +/* Free virtqueues (device must already be reset) */ +static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, + struct virtio_fs *fs) +{ + vdev->config->del_vqs(vdev); +} + +static int virtio_fs_probe(struct virtio_device *vdev) +{ + struct virtio_fs *fs; + int ret; + + fs = kzalloc(sizeof(*fs), GFP_KERNEL); + if (!fs) + return -ENOMEM; + kref_init(&fs->refcount); + vdev->priv = fs; + + ret = virtio_fs_read_tag(vdev, fs); + if (ret < 0) + goto out; + + ret = virtio_fs_setup_vqs(vdev, fs); + if (ret < 0) + goto out; + + /* TODO vq affinity */ + + /* Bring the device online in case the filesystem is mounted and + * requests need to be sent before we return. + */ + virtio_device_ready(vdev); + + ret = virtio_fs_add_instance(fs); + if (ret < 0) + goto out_vqs; + + return 0; + +out_vqs: + vdev->config->reset(vdev); + virtio_fs_cleanup_vqs(vdev, fs); + +out: + vdev->priv = NULL; + kfree(fs); + return ret; +} + +static void virtio_fs_stop_all_queues(struct virtio_fs *fs) +{ + struct virtio_fs_vq *fsvq; + int i; + + for (i = 0; i < fs->nvqs; i++) { + fsvq = &fs->vqs[i]; + spin_lock(&fsvq->lock); + fsvq->connected = false; + spin_unlock(&fsvq->lock); + } +} + +static void virtio_fs_remove(struct virtio_device *vdev) +{ + struct virtio_fs *fs = vdev->priv; + + mutex_lock(&virtio_fs_mutex); + /* This device is going away. No one should get new reference */ + list_del_init(&fs->list); + virtio_fs_stop_all_queues(fs); + virtio_fs_drain_all_queues(fs); + vdev->config->reset(vdev); + virtio_fs_cleanup_vqs(vdev, fs); + + vdev->priv = NULL; + /* Put device reference on virtio_fs object */ + virtio_fs_put(fs); + mutex_unlock(&virtio_fs_mutex); +} + +#ifdef CONFIG_PM_SLEEP +static int virtio_fs_freeze(struct virtio_device *vdev) +{ + /* TODO need to save state here */ + pr_warn("virtio-fs: suspend/resume not yet supported\n"); + return -EOPNOTSUPP; +} + +static int virtio_fs_restore(struct virtio_device *vdev) +{ + /* TODO need to restore state here */ + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +const static struct virtio_device_id id_table[] = { + { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, + {}, +}; + +const static unsigned int feature_table[] = {}; + +static struct virtio_driver virtio_fs_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .feature_table = feature_table, + .feature_table_size = ARRAY_SIZE(feature_table), + .probe = virtio_fs_probe, + .remove = virtio_fs_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtio_fs_freeze, + .restore = virtio_fs_restore, +#endif +}; + +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + struct fuse_forget_link *link; + struct virtio_fs_forget *forget; + struct scatterlist sg; + struct scatterlist *sgs[] = {&sg}; + struct virtio_fs *fs; + struct virtqueue *vq; + struct virtio_fs_vq *fsvq; + bool notify; + u64 unique; + int ret; + + link = fuse_dequeue_forget(fiq, 1, NULL); + unique = fuse_get_unique(fiq); + + fs = fiq->priv; + fsvq = &fs->vqs[VQ_HIPRIO]; + spin_unlock(&fiq->lock); + + /* Allocate a buffer for the request */ + forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); + + forget->ih = (struct fuse_in_header){ + .opcode = FUSE_FORGET, + .nodeid = link->forget_one.nodeid, + .unique = unique, + .len = sizeof(*forget), + }; + forget->arg = (struct fuse_forget_in){ + .nlookup = link->forget_one.nlookup, + }; + + sg_init_one(&sg, forget, sizeof(*forget)); + + /* Enqueue the request */ + spin_lock(&fsvq->lock); + + if (!fsvq->connected) { + kfree(forget); + spin_unlock(&fsvq->lock); + goto out; + } + + vq = fsvq->vq; + dev_dbg(&vq->vdev->dev, "%s\n", __func__); + + ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n", + ret); + list_add_tail(&forget->list, &fsvq->queued_reqs); + schedule_delayed_work(&fsvq->dispatch_work, + msecs_to_jiffies(1)); + } else { + pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", + ret); + kfree(forget); + } + spin_unlock(&fsvq->lock); + goto out; + } + + fsvq->in_flight++; + notify = virtqueue_kick_prepare(vq); + + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); +out: + kfree(link); +} + +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + /* + * TODO interrupts. + * + * Normal fs operations on a local filesystems aren't interruptible. + * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) + * with shared lock between host and guest. + */ + spin_unlock(&fiq->lock); +} + +/* Return the number of scatter-gather list elements required */ +static unsigned int sg_count_fuse_req(struct fuse_req *req) +{ + struct fuse_args *args = req->args; + struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); + unsigned int total_sgs = 1 /* fuse_in_header */; + + if (args->in_numargs - args->in_pages) + total_sgs += 1; + + if (args->in_pages) + total_sgs += ap->num_pages; + + if (!test_bit(FR_ISREPLY, &req->flags)) + return total_sgs; + + total_sgs += 1 /* fuse_out_header */; + + if (args->out_numargs - args->out_pages) + total_sgs += 1; + + if (args->out_pages) + total_sgs += ap->num_pages; + + return total_sgs; +} + +/* Add pages to scatter-gather list and return number of elements used */ +static unsigned int sg_init_fuse_pages(struct scatterlist *sg, + struct page **pages, + struct fuse_page_desc *page_descs, + unsigned int num_pages, + unsigned int total_len) +{ + unsigned int i; + unsigned int this_len; + + for (i = 0; i < num_pages && total_len; i++) { + sg_init_table(&sg[i], 1); + this_len = min(page_descs[i].length, total_len); + sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); + total_len -= this_len; + } + + return i; +} + +/* Add args to scatter-gather list and return number of elements used */ +static unsigned int sg_init_fuse_args(struct scatterlist *sg, + struct fuse_req *req, + struct fuse_arg *args, + unsigned int numargs, + bool argpages, + void *argbuf, + unsigned int *len_used) +{ + struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); + unsigned int total_sgs = 0; + unsigned int len; + + len = fuse_len_args(numargs - argpages, args); + if (len) + sg_init_one(&sg[total_sgs++], argbuf, len); + + if (argpages) + total_sgs += sg_init_fuse_pages(&sg[total_sgs], + ap->pages, ap->descs, + ap->num_pages, + args[numargs - 1].size); + + if (len_used) + *len_used = len; + + return total_sgs; +} + +/* Add a request to a virtqueue and kick the device */ +static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, + struct fuse_req *req) +{ + /* requests need at least 4 elements */ + struct scatterlist *stack_sgs[6]; + struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; + struct scatterlist **sgs = stack_sgs; + struct scatterlist *sg = stack_sg; + struct virtqueue *vq; + struct fuse_args *args = req->args; + unsigned int argbuf_used = 0; + unsigned int out_sgs = 0; + unsigned int in_sgs = 0; + unsigned int total_sgs; + unsigned int i; + int ret; + bool notify; + + /* Does the sglist fit on the stack? */ + total_sgs = sg_count_fuse_req(req); + if (total_sgs > ARRAY_SIZE(stack_sgs)) { + sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); + sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); + if (!sgs || !sg) { + ret = -ENOMEM; + goto out; + } + } + + /* Use a bounce buffer since stack args cannot be mapped */ + ret = copy_args_to_argbuf(req); + if (ret < 0) + goto out; + + /* Request elements */ + sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); + out_sgs += sg_init_fuse_args(&sg[out_sgs], req, + (struct fuse_arg *)args->in_args, + args->in_numargs, args->in_pages, + req->argbuf, &argbuf_used); + + /* Reply elements */ + if (test_bit(FR_ISREPLY, &req->flags)) { + sg_init_one(&sg[out_sgs + in_sgs++], + &req->out.h, sizeof(req->out.h)); + in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, + args->out_args, args->out_numargs, + args->out_pages, + req->argbuf + argbuf_used, NULL); + } + + WARN_ON(out_sgs + in_sgs != total_sgs); + + for (i = 0; i < total_sgs; i++) + sgs[i] = &sg[i]; + + spin_lock(&fsvq->lock); + + if (!fsvq->connected) { + spin_unlock(&fsvq->lock); + ret = -ENOTCONN; + goto out; + } + + vq = fsvq->vq; + ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); + if (ret < 0) { + spin_unlock(&fsvq->lock); + goto out; + } + + fsvq->in_flight++; + notify = virtqueue_kick_prepare(vq); + + spin_unlock(&fsvq->lock); + + if (notify) + virtqueue_notify(vq); + +out: + if (ret < 0 && req->argbuf) { + kfree(req->argbuf); + req->argbuf = NULL; + } + if (sgs != stack_sgs) { + kfree(sgs); + kfree(sg); + } + + return ret; +} + +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) +__releases(fiq->lock) +{ + unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ + struct virtio_fs *fs; + struct fuse_conn *fc; + struct fuse_req *req; + struct fuse_pqueue *fpq; + int ret; + + WARN_ON(list_empty(&fiq->pending)); + req = list_last_entry(&fiq->pending, struct fuse_req, list); + clear_bit(FR_PENDING, &req->flags); + list_del_init(&req->list); + WARN_ON(!list_empty(&fiq->pending)); + spin_unlock(&fiq->lock); + + fs = fiq->priv; + fc = fs->vqs[queue_id].fud->fc; + + pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", + __func__, req->in.h.opcode, req->in.h.unique, + req->in.h.nodeid, req->in.h.len, + fuse_len_args(req->args->out_numargs, req->args->out_args)); + + fpq = &fs->vqs[queue_id].fud->pq; + spin_lock(&fpq->lock); + if (!fpq->connected) { + spin_unlock(&fpq->lock); + req->out.h.error = -ENODEV; + pr_err("virtio-fs: %s disconnected\n", __func__); + fuse_request_end(fc, req); + return; + } + list_add_tail(&req->list, fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + +retry: + ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req); + if (ret < 0) { + if (ret == -ENOMEM || ret == -ENOSPC) { + /* Virtqueue full. Retry submission */ + /* TODO use completion instead of timeout */ + usleep_range(20, 30); + goto retry; + } + req->out.h.error = ret; + pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + list_del_init(&req->list); + spin_unlock(&fpq->lock); + fuse_request_end(fc, req); + return; + } +} + +const static struct fuse_iqueue_ops virtio_fs_fiq_ops = { + .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, + .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, + .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, + .release = virtio_fs_fiq_release, +}; + +static int virtio_fs_fill_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct virtio_fs *fs = fc->iq.priv; + unsigned int i; + int err; + struct fuse_fs_context ctx = { + .rootmode = S_IFDIR, + .default_permissions = 1, + .allow_other = 1, + .max_read = UINT_MAX, + .blksize = 512, + .destroy = true, + .no_control = true, + .no_force_umount = true, + }; + + mutex_lock(&virtio_fs_mutex); + + /* After holding mutex, make sure virtiofs device is still there. + * Though we are holding a reference to it, drive ->remove might + * still have cleaned up virtual queues. In that case bail out. + */ + err = -EINVAL; + if (list_empty(&fs->list)) { + pr_info("virtio-fs: tag <%s> not found\n", fs->tag); + goto err; + } + + err = -ENOMEM; + /* Allocate fuse_dev for hiprio and notification queues */ + for (i = 0; i < VQ_REQUEST; i++) { + struct virtio_fs_vq *fsvq = &fs->vqs[i]; + + fsvq->fud = fuse_dev_alloc(); + if (!fsvq->fud) + goto err_free_fuse_devs; + } + + ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; + err = fuse_fill_super_common(sb, &ctx); + if (err < 0) + goto err_free_fuse_devs; + + fc = fs->vqs[VQ_REQUEST].fud->fc; + + for (i = 0; i < fs->nvqs; i++) { + struct virtio_fs_vq *fsvq = &fs->vqs[i]; + + if (i == VQ_REQUEST) + continue; /* already initialized */ + fuse_dev_install(fsvq->fud, fc); + } + + /* Previous unmount will stop all queues. Start these again */ + virtio_fs_start_all_queues(fs); + fuse_send_init(fc); + mutex_unlock(&virtio_fs_mutex); + return 0; + +err_free_fuse_devs: + virtio_fs_free_devs(fs); +err: + mutex_unlock(&virtio_fs_mutex); + return err; +} + +static void virtio_kill_sb(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct virtio_fs *vfs; + struct virtio_fs_vq *fsvq; + + /* If mount failed, we can still be called without any fc */ + if (!fc) + return fuse_kill_sb_anon(sb); + + vfs = fc->iq.priv; + fsvq = &vfs->vqs[VQ_HIPRIO]; + + /* Stop forget queue. Soon destroy will be sent */ + spin_lock(&fsvq->lock); + fsvq->connected = false; + spin_unlock(&fsvq->lock); + virtio_fs_drain_all_queues(vfs); + + fuse_kill_sb_anon(sb); + + /* fuse_kill_sb_anon() must have sent destroy. Stop all queues + * and drain one more time and free fuse devices. Freeing fuse + * devices will drop their reference on fuse_conn and that in + * turn will drop its reference on virtio_fs object. + */ + virtio_fs_stop_all_queues(vfs); + virtio_fs_drain_all_queues(vfs); + virtio_fs_free_devs(vfs); +} + +static int virtio_fs_test_super(struct super_block *sb, + struct fs_context *fsc) +{ + struct fuse_conn *fc = fsc->s_fs_info; + + return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv; +} + +static int virtio_fs_set_super(struct super_block *sb, + struct fs_context *fsc) +{ + int err; + + err = get_anon_bdev(&sb->s_dev); + if (!err) + fuse_conn_get(fsc->s_fs_info); + + return err; +} + +static int virtio_fs_get_tree(struct fs_context *fsc) +{ + struct virtio_fs *fs; + struct super_block *sb; + struct fuse_conn *fc; + int err; + + /* This gets a reference on virtio_fs object. This ptr gets installed + * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() + * to drop the reference to this object. + */ + fs = virtio_fs_find_instance(fsc->source); + if (!fs) { + pr_info("virtio-fs: tag <%s> not found\n", fsc->source); + return -EINVAL; + } + + fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); + if (!fc) { + mutex_lock(&virtio_fs_mutex); + virtio_fs_put(fs); + mutex_unlock(&virtio_fs_mutex); + return -ENOMEM; + } + + fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops, + fs); + fc->release = fuse_free_conn; + fc->delete_stale = true; + + fsc->s_fs_info = fc; + sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); + fuse_conn_put(fc); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + err = virtio_fs_fill_super(sb); + if (err) { + deactivate_locked_super(sb); + return err; + } + + sb->s_flags |= SB_ACTIVE; + } + + WARN_ON(fsc->root); + fsc->root = dget(sb->s_root); + return 0; +} + +static const struct fs_context_operations virtio_fs_context_ops = { + .get_tree = virtio_fs_get_tree, +}; + +static int virtio_fs_init_fs_context(struct fs_context *fsc) +{ + fsc->ops = &virtio_fs_context_ops; + return 0; +} + +static struct file_system_type virtio_fs_type = { + .owner = THIS_MODULE, + .name = "virtiofs", + .init_fs_context = virtio_fs_init_fs_context, + .kill_sb = virtio_kill_sb, +}; + +static int __init virtio_fs_init(void) +{ + int ret; + + ret = register_virtio_driver(&virtio_fs_driver); + if (ret < 0) + return ret; + + ret = register_filesystem(&virtio_fs_type); + if (ret < 0) { + unregister_virtio_driver(&virtio_fs_driver); + return ret; + } + + return 0; +} +module_init(virtio_fs_init); + +static void __exit virtio_fs_exit(void) +{ + unregister_filesystem(&virtio_fs_type); + unregister_virtio_driver(&virtio_fs_driver); +} +module_exit(virtio_fs_exit); + +MODULE_AUTHOR("Stefan Hajnoczi "); +MODULE_DESCRIPTION("Virtio Filesystem"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_FS(KBUILD_MODNAME); +MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h new file mode 100644 index 000000000000..b02eb2ac3d99 --- /dev/null +++ b/include/uapi/linux/virtio_fs.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ + +#ifndef _UAPI_LINUX_VIRTIO_FS_H +#define _UAPI_LINUX_VIRTIO_FS_H + +#include +#include +#include +#include + +struct virtio_fs_config { + /* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */ + __u8 tag[36]; + + /* Number of request queues */ + __u32 num_request_queues; +} __attribute__((packed)); + +#endif /* _UAPI_LINUX_VIRTIO_FS_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 348fd0176f75..585e07b27333 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -44,6 +44,7 @@ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ From b45e0c30bc58fb6fcaa42f1d1d813cefb8ab4117 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Wed, 21 Aug 2019 14:53:40 +0530 Subject: [PATCH 100/345] riscv: dts: Add DT support for SiFive FU540 PWM driver Add the PWM DT node in SiFive FU540 soc-specific DT file. Enable the PWM nodes in HiFive Unleashed board-specific DT file. Signed-off-by: Yash Shah Cc: Palmer Dabbelt [paul.walmsley@sifive.com: added chip-specific compatible string; dropped reg-names string from pwm1] Signed-off-by: Paul Walmsley --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 18 ++++++++++++++++++ .../boot/dts/sifive/hifive-unleashed-a00.dts | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 42b5ec223100..5a29211d396e 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -230,6 +230,24 @@ #size-cells = <0>; status = "disabled"; }; + pwm0: pwm@10020000 { + compatible = "sifive,fu540-c000-pwm", "sifive,pwm0"; + reg = <0x0 0x10020000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <42 43 44 45>; + clocks = <&prci PRCI_CLK_TLCLK>; + #pwm-cells = <3>; + status = "disabled"; + }; + pwm1: pwm@10021000 { + compatible = "sifive,fu540-c000-pwm", "sifive,pwm0"; + reg = <0x0 0x10021000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <46 47 48 49>; + clocks = <&prci PRCI_CLK_TLCLK>; + #pwm-cells = <3>; + status = "disabled"; + }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 93d68cbd64fe..104d334511cd 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -85,3 +85,11 @@ reg = <0>; }; }; + +&pwm0 { + status = "okay"; +}; + +&pwm1 { + status = "okay"; +}; From b6f2b2e600a27b79801bae0bf73e3c34438dc0ae Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Wed, 18 Sep 2019 18:38:24 +0800 Subject: [PATCH 101/345] RISC-V: Fix building error when CONFIG_SPARSEMEM_MANUAL=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a build break by adjusting where VMALLOC_* and FIXADDR_* are defined. This fixes the definition of the MEMMAP_* macros. CC init/main.o In file included from ./include/linux/mm.h:99, from ./include/linux/ring_buffer.h:5, from ./include/linux/trace_events.h:6, from ./include/trace/syscall.h:7, from ./include/linux/syscalls.h:85, from init/main.c:21: ./arch/riscv/include/asm/pgtable.h: In function ‘pmd_page’: ./arch/riscv/include/asm/pgtable.h:95:24: error: ‘VMALLOC_START’ undeclared (first use in this function); did you mean ‘VMEMMAP_START’? #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) ^~~~~~~~~~~~~ Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: Greentime Hu [paul.walmsley@sifive.com: minor patch description fix] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 80905b27ee98..4f4162d90586 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -83,6 +83,18 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define FIXADDR_TOP VMALLOC_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then @@ -429,18 +441,6 @@ static inline void pgtable_cache_init(void) /* No page table caches to initialize */ } -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define FIXADDR_TOP VMALLOC_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. From 1cec0ce2e97f3e48d9af52e69e21bc614b14dd16 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Wed, 4 Sep 2019 16:16:16 +0000 Subject: [PATCH 102/345] RISC-V: Enable VIRTIO drivers in RV64 and RV32 defconfig This patch enables more VIRTIO drivers (such as console, rpmsg, 9p, rng, etc.) which are usable on KVM RISC-V Guest and Xvisor RISC-V Guest. Signed-off-by: Anup Patel Acked-by: Paolo Bonzini Reviewed-by: Paolo Bonzini Reviewed-by: Alexander Graf Signed-off-by: Paul Walmsley --- arch/riscv/configs/defconfig | 11 +++++++++++ arch/riscv/configs/rv32_defconfig | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 3efff552a261..420a0dbef386 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y @@ -54,6 +57,7 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y @@ -61,6 +65,7 @@ CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y +CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -73,7 +78,12 @@ CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_MMC=y CONFIG_MMC_SPI=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y +CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -86,6 +96,7 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index 7da93e494445..87ee6e62b64b 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -29,6 +29,8 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -39,6 +41,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y @@ -54,11 +57,13 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y +CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -69,7 +74,12 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y +CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_VIRTIO=y CONFIG_SIFIVE_PLIC=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -83,6 +93,7 @@ CONFIG_NFS_V4=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y From 2d2e0b90a08f1e3a47b3ee852b27c219295423ef Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 18 Sep 2019 16:07:28 -0400 Subject: [PATCH 103/345] drm: Fix kerneldoc and remove unused struct member in self_refresh helper Artifacts of previous revisions. Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link to v1: https://patchwork.freedesktop.org/patch/msgid/20190917200443.64481-1-sean@poorly.run Link: https://patchwork.freedesktop.org/patch/msgid/20190918200734.149876-1-sean@poorly.run Changes in v2: - None --- drivers/gpu/drm/drm_self_refresh_helper.c | 1 - include/drm/drm_crtc.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c index 4b9424a8f1f1..9095cebf2147 100644 --- a/drivers/gpu/drm/drm_self_refresh_helper.c +++ b/drivers/gpu/drm/drm_self_refresh_helper.c @@ -53,7 +53,6 @@ struct drm_self_refresh_data { struct drm_crtc *crtc; struct delayed_work entry_work; - struct drm_atomic_state *save_state; unsigned int entry_delay_ms; }; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c4528eb5d168..408b6f4e63c0 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1108,7 +1108,7 @@ struct drm_crtc { /** * @self_refresh_data: Holds the state for the self refresh helpers * - * Initialized via drm_self_refresh_helper_register(). + * Initialized via drm_self_refresh_helper_init(). */ struct drm_self_refresh_data *self_refresh_data; }; From d4da4e33341c5e6159543acc03559cb24f520bc2 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 18 Sep 2019 16:07:29 -0400 Subject: [PATCH 104/345] drm: Measure Self Refresh Entry/Exit times to avoid thrashing Currently the self refresh idle timer is a const set by the crtc. This is fine if the self refresh entry/exit times are well-known for all panels used on that crtc. However panels and workloads can vary quite a bit, and a timeout which works well for one doesn't work well for another. In the extreme, if the timeout is too short we could get in a situation where the self refresh exits are taking so long we queue up a self refresh entry before the exit commit is even finished. This patch changes the idle timeout to a moving average of the entry times + a moving average of exit times + the crtc constant. This patch was tested on rockchip, with a kevin CrOS panel the idle delay averages out to about ~235ms (35 entry + 100 exit + 100 const). On the same board, the bob panel idle delay lands around ~340ms (90 entry + 150 exit + 100 const). WRT the dedicated mutex in self_refresh_data, it would be nice if we could rely on drm_crtc.mutex to protect the average times, but there are a few reasons why a separate lock is a better choice: - We can't rely on drm_crtc.mutex being held if we're doing a nonblocking commit - We can't grab drm_crtc.mutex since drm_modeset_lock() doesn't tell us whether the lock was already held in the acquire context (it eats -EALREADY), so we can't tell if we should drop it or not - We don't need such a heavy-handed lock for what we're trying to do, commit ordering doesn't matter, so a point-of-use lock will be less contentious Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link to v1: https://patchwork.freedesktop.org/patch/msgid/20190917200443.64481-2-sean@poorly.run Link: https://patchwork.freedesktop.org/patch/msgid/20190918200734.149876-2-sean@poorly.run Changes in v2: - Migrate locking explanation from comment to commit msg (Daniel) - Turf constant entry delay and multiply the avg times by 2 (Daniel) --- drivers/gpu/drm/drm_atomic_helper.c | 20 ++++++ drivers/gpu/drm/drm_self_refresh_helper.c | 72 +++++++++++++++++++-- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 5 +- include/drm/drm_self_refresh_helper.h | 6 +- 4 files changed, 90 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 3a67f63c3146..3ef2ac52ce94 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -26,6 +26,7 @@ */ #include +#include #include #include @@ -1580,9 +1581,23 @@ static void commit_tail(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; const struct drm_mode_config_helper_funcs *funcs; + ktime_t start; + s64 commit_time_ms; funcs = dev->mode_config.helper_private; + /* + * We're measuring the _entire_ commit, so the time will vary depending + * on how many fences and objects are involved. For the purposes of self + * refresh, this is desirable since it'll give us an idea of how + * congested things are. This will inform our decision on how often we + * should enter self refresh after idle. + * + * These times will be averaged out in the self refresh helpers to avoid + * overreacting over one outlier frame + */ + start = ktime_get(); + drm_atomic_helper_wait_for_fences(dev, old_state, false); drm_atomic_helper_wait_for_dependencies(old_state); @@ -1592,6 +1607,11 @@ static void commit_tail(struct drm_atomic_state *old_state) else drm_atomic_helper_commit_tail(old_state); + commit_time_ms = ktime_ms_delta(ktime_get(), start); + if (commit_time_ms > 0) + drm_self_refresh_helper_update_avg_times(old_state, + (unsigned long)commit_time_ms); + drm_atomic_helper_commit_cleanup_done(old_state); drm_atomic_state_put(old_state); diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c index 9095cebf2147..68f4765a5896 100644 --- a/drivers/gpu/drm/drm_self_refresh_helper.c +++ b/drivers/gpu/drm/drm_self_refresh_helper.c @@ -5,6 +5,7 @@ * Authors: * Sean Paul */ +#include #include #include #include @@ -50,10 +51,17 @@ * atomic_check when &drm_crtc_state.self_refresh_active is true. */ +#define SELF_REFRESH_AVG_SEED_MS 200 + +DECLARE_EWMA(psr_time, 4, 4) + struct drm_self_refresh_data { struct drm_crtc *crtc; struct delayed_work entry_work; - unsigned int entry_delay_ms; + + struct mutex avg_mutex; + struct ewma_psr_time entry_avg_ms; + struct ewma_psr_time exit_avg_ms; }; static void drm_self_refresh_helper_entry_work(struct work_struct *work) @@ -121,6 +129,44 @@ out_drop_locks: drm_modeset_acquire_fini(&ctx); } +/** + * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages + * @state: the state which has just been applied to hardware + * @commit_time_ms: the amount of time in ms that this commit took to complete + * + * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will + * update the average entry/exit self refresh times on self refresh transitions. + * These averages will be used when calculating how long to delay before + * entering self refresh mode after activity. + */ +void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + int i; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { + struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; + struct ewma_psr_time *time; + + if (old_crtc_state->self_refresh_active == + new_crtc_state->self_refresh_active) + continue; + + if (new_crtc_state->self_refresh_active) + time = &sr_data->entry_avg_ms; + else + time = &sr_data->exit_avg_ms; + + mutex_lock(&sr_data->avg_mutex); + ewma_psr_time_add(time, commit_time_ms); + mutex_unlock(&sr_data->avg_mutex); + } +} +EXPORT_SYMBOL(drm_self_refresh_helper_update_avg_times); + /** * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit * @state: the state currently being checked @@ -152,6 +198,7 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state) for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct drm_self_refresh_data *sr_data; + unsigned int delay; /* Don't trigger the entry timer when we're already in SR */ if (crtc_state->self_refresh_active) @@ -161,8 +208,13 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state) if (!sr_data) continue; + mutex_lock(&sr_data->avg_mutex); + delay = (ewma_psr_time_read(&sr_data->entry_avg_ms) + + ewma_psr_time_read(&sr_data->exit_avg_ms)) * 2; + mutex_unlock(&sr_data->avg_mutex); + mod_delayed_work(system_wq, &sr_data->entry_work, - msecs_to_jiffies(sr_data->entry_delay_ms)); + msecs_to_jiffies(delay)); } } EXPORT_SYMBOL(drm_self_refresh_helper_alter_state); @@ -170,12 +222,10 @@ EXPORT_SYMBOL(drm_self_refresh_helper_alter_state); /** * drm_self_refresh_helper_init - Initializes self refresh helpers for a crtc * @crtc: the crtc which supports self refresh supported displays - * @entry_delay_ms: amount of inactivity to wait before entering self refresh * * Returns zero if successful or -errno on failure */ -int drm_self_refresh_helper_init(struct drm_crtc *crtc, - unsigned int entry_delay_ms) +int drm_self_refresh_helper_init(struct drm_crtc *crtc) { struct drm_self_refresh_data *sr_data = crtc->self_refresh_data; @@ -189,8 +239,18 @@ int drm_self_refresh_helper_init(struct drm_crtc *crtc, INIT_DELAYED_WORK(&sr_data->entry_work, drm_self_refresh_helper_entry_work); - sr_data->entry_delay_ms = entry_delay_ms; sr_data->crtc = crtc; + mutex_init(&sr_data->avg_mutex); + ewma_psr_time_init(&sr_data->entry_avg_ms); + ewma_psr_time_init(&sr_data->exit_avg_ms); + + /* + * Seed the averages so they're non-zero (and sufficiently large + * for even poorly performing panels). As time goes on, this will be + * averaged out and the values will trend to their true value. + */ + ewma_psr_time_add(&sr_data->entry_avg_ms, SELF_REFRESH_AVG_SEED_MS); + ewma_psr_time_add(&sr_data->exit_avg_ms, SELF_REFRESH_AVG_SEED_MS); crtc->self_refresh_data = sr_data; return 0; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 2f821c58007c..613404f86668 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -39,8 +39,6 @@ #include "rockchip_drm_vop.h" #include "rockchip_rgb.h" -#define VOP_SELF_REFRESH_ENTRY_DELAY_MS 100 - #define VOP_WIN_SET(vop, win, name, v) \ vop_reg_set(vop, &win->phy->name, win->base, ~0, v, #name) #define VOP_SCL_SET(vop, win, name, v) \ @@ -1563,8 +1561,7 @@ static int vop_create_crtc(struct vop *vop) init_completion(&vop->line_flag_completion); crtc->port = port; - ret = drm_self_refresh_helper_init(crtc, - VOP_SELF_REFRESH_ENTRY_DELAY_MS); + ret = drm_self_refresh_helper_init(crtc); if (ret) DRM_DEV_DEBUG_KMS(vop->dev, "Failed to init %s with SR helpers %d, ignoring\n", diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h index 397a583ccca7..5b79d253fb46 100644 --- a/include/drm/drm_self_refresh_helper.h +++ b/include/drm/drm_self_refresh_helper.h @@ -12,9 +12,9 @@ struct drm_atomic_state; struct drm_crtc; void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state); +void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state, + unsigned int commit_time_ms); -int drm_self_refresh_helper_init(struct drm_crtc *crtc, - unsigned int entry_delay_ms); - +int drm_self_refresh_helper_init(struct drm_crtc *crtc); void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc); #endif From e430d802d6a3aaf61bd3ed03d9404888a29b9bf9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 19 Sep 2019 20:04:47 +0800 Subject: [PATCH 105/345] timer: Read jiffies once when forwarding base clk The timer delayed for more than 3 seconds warning was triggered during testing. Workqueue: events_unbound sched_tick_remote RIP: 0010:sched_tick_remote+0xee/0x100 ... Call Trace: process_one_work+0x18c/0x3a0 worker_thread+0x30/0x380 kthread+0x113/0x130 ret_from_fork+0x22/0x40 The reason is that the code in collect_expired_timers() uses jiffies unprotected: if (next_event > jiffies) base->clk = jiffies; As the compiler is allowed to reload the value base->clk can advance between the check and the store and in the worst case advance farther than next event. That causes the timer expiry to be delayed until the wheel pointer wraps around. Convert the code to use READ_ONCE() Fixes: 236968383cf5 ("timers: Optimize collect_expired_timers() for NOHZ") Signed-off-by: Li RongQing Signed-off-by: Liang ZhiCheng Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1568894687-14499-1-git-send-email-lirongqing@baidu.com --- kernel/time/timer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0e315a2e77ae..4820823515e9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1678,24 +1678,26 @@ void timer_clear_idle(void) static int collect_expired_timers(struct timer_base *base, struct hlist_head *heads) { + unsigned long now = READ_ONCE(jiffies); + /* * NOHZ optimization. After a long idle sleep we need to forward the * base to current jiffies. Avoid a loop by searching the bitfield for * the next expiring timer. */ - if ((long)(jiffies - base->clk) > 2) { + if ((long)(now - base->clk) > 2) { unsigned long next = __next_timer_interrupt(base); /* * If the next timer is ahead of time forward to current * jiffies, otherwise forward to the next expiry time: */ - if (time_after(next, jiffies)) { + if (time_after(next, now)) { /* * The call site will increment base->clk and then * terminate the expiry loop immediately. */ - base->clk = jiffies; + base->clk = now; return 0; } base->clk = next; From edfa07504c5bb188b262fb2f4fc487de966f89a2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 4 Sep 2019 13:30:32 +0100 Subject: [PATCH 106/345] drm/panfrost: Fix regulator_get_optional() misuse The panfrost driver requests a supply using regulator_get_optional() but both the name of the supply and the usage pattern suggest that it is being used for the main power for the device and is not at all optional for the device for function, there is no meaningful handling for absent supplies. Such regulators should use the vanilla regulator_get() interface, it will ensure that even if a supply is not described in the system integration one will be provided in software. Signed-off-by: Mark Brown Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20190904123032.23263-1-broonie@kernel.org --- drivers/gpu/drm/panfrost/panfrost_device.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index 46b0b02e4289..238fb6d54df4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -89,12 +89,9 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev) { int ret; - pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali"); + pfdev->regulator = devm_regulator_get(pfdev->dev, "mali"); if (IS_ERR(pfdev->regulator)) { ret = PTR_ERR(pfdev->regulator); - pfdev->regulator = NULL; - if (ret == -ENODEV) - return 0; dev_err(pfdev->dev, "failed to get regulator: %d\n", ret); return ret; } @@ -110,8 +107,7 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev) static void panfrost_regulator_fini(struct panfrost_device *pfdev) { - if (pfdev->regulator) - regulator_disable(pfdev->regulator); + regulator_disable(pfdev->regulator); } int panfrost_device_init(struct panfrost_device *pfdev) From d18a96620411298f7925bf9a9e26e9e7add8ea2b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 6 Sep 2019 15:20:53 +0100 Subject: [PATCH 107/345] drm/panfrost: Remove NULL checks for regulator devm_regulator_get() is now used to populate pfdev->regulator which ensures that this cannot be NULL (a dummy regulator will be returned if necessary). So remove the checks in panfrost_devfreq_target(). Signed-off-by: Steven Price Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/3e3a2c8a-b4fc-8af6-39e1-b26160db2c7c@arm.com --- drivers/gpu/drm/panfrost/panfrost_devfreq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index a1f5fa6a742a..12ff77dacc95 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -39,7 +39,7 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, * If frequency scaling from low to high, adjust voltage first. * If frequency scaling from high to low, adjust frequency first. */ - if (old_clk_rate < target_rate && pfdev->regulator) { + if (old_clk_rate < target_rate) { err = regulator_set_voltage(pfdev->regulator, target_volt, target_volt); if (err) { @@ -53,14 +53,12 @@ static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, if (err) { dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate, err); - if (pfdev->regulator) - regulator_set_voltage(pfdev->regulator, - pfdev->devfreq.cur_volt, - pfdev->devfreq.cur_volt); + regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt, + pfdev->devfreq.cur_volt); return err; } - if (old_clk_rate > target_rate && pfdev->regulator) { + if (old_clk_rate > target_rate) { err = regulator_set_voltage(pfdev->regulator, target_volt, target_volt); if (err) From 65e51e30d8625c82ddfe405da46124e9bbffaa71 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 13 Sep 2019 17:03:10 +0100 Subject: [PATCH 108/345] drm/panfrost: Prevent race when handling page fault When handling a GPU page fault addr_to_drm_mm_node() is used to translate the GPU address to a buffer object. However it is possible for the buffer object to be freed after the function has returned resulting in a use-after-free of the BO. Change addr_to_drm_mm_node to return the panfrost_gem_object with an extra reference on it, preventing the BO from being freed until after the page fault has been handled. Signed-off-by: Steven Price Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20190913160310.50444-1-steven.price@arm.com --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 55 ++++++++++++++++--------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 387d830cb7cf..6e7891ded464 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -386,28 +386,40 @@ void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv) free_io_pgtable_ops(mmu->pgtbl_ops); } -static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) +static struct panfrost_gem_object * +addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) { - struct drm_mm_node *node = NULL; + struct panfrost_gem_object *bo = NULL; + struct panfrost_file_priv *priv; + struct drm_mm_node *node; u64 offset = addr >> PAGE_SHIFT; struct panfrost_mmu *mmu; spin_lock(&pfdev->as_lock); list_for_each_entry(mmu, &pfdev->as_lru_list, list) { - struct panfrost_file_priv *priv; - if (as != mmu->as) - continue; + if (as == mmu->as) + break; + } + if (as != mmu->as) + goto out; - priv = container_of(mmu, struct panfrost_file_priv, mmu); - drm_mm_for_each_node(node, &priv->mm) { - if (offset >= node->start && offset < (node->start + node->size)) - goto out; + priv = container_of(mmu, struct panfrost_file_priv, mmu); + + spin_lock(&priv->mm_lock); + + drm_mm_for_each_node(node, &priv->mm) { + if (offset >= node->start && + offset < (node->start + node->size)) { + bo = drm_mm_node_to_panfrost_bo(node); + drm_gem_object_get(&bo->base.base); + break; } } + spin_unlock(&priv->mm_lock); out: spin_unlock(&pfdev->as_lock); - return node; + return bo; } #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) @@ -415,29 +427,28 @@ out: int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) { int ret, i; - struct drm_mm_node *node; struct panfrost_gem_object *bo; struct address_space *mapping; pgoff_t page_offset; struct sg_table *sgt; struct page **pages; - node = addr_to_drm_mm_node(pfdev, as, addr); - if (!node) + bo = addr_to_drm_mm_node(pfdev, as, addr); + if (!bo) return -ENOENT; - bo = drm_mm_node_to_panfrost_bo(node); if (!bo->is_heap) { dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)", - node->start << PAGE_SHIFT); - return -EINVAL; + bo->node.start << PAGE_SHIFT); + ret = -EINVAL; + goto err_bo; } WARN_ON(bo->mmu->as != as); /* Assume 2MB alignment and size multiple */ addr &= ~((u64)SZ_2M - 1); page_offset = addr >> PAGE_SHIFT; - page_offset -= node->start; + page_offset -= bo->node.start; mutex_lock(&bo->base.pages_lock); @@ -446,7 +457,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO); if (!bo->sgts) { mutex_unlock(&bo->base.pages_lock); - return -ENOMEM; + ret = -ENOMEM; + goto err_bo; } pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT, @@ -455,7 +467,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) kfree(bo->sgts); bo->sgts = NULL; mutex_unlock(&bo->base.pages_lock); - return -ENOMEM; + ret = -ENOMEM; + goto err_bo; } bo->base.pages = pages; bo->base.pages_use_count = 1; @@ -493,12 +506,16 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr); + drm_gem_object_put_unlocked(&bo->base.base); + return 0; err_map: sg_free_table(sgt); err_pages: drm_gem_shmem_put_pages(&bo->base); +err_bo: + drm_gem_object_put_unlocked(&bo->base.base); return ret; } From 91d99a724e9c60e14332c26ab2284bf696b94c8e Mon Sep 17 00:00:00 2001 From: Wang Xiayang Date: Fri, 20 Sep 2019 14:55:47 +0800 Subject: [PATCH 109/345] nios2: force the string buffer NULL-terminated strncpy() does not ensure NULL-termination when the input string size equals to the destination buffer size COMMAND_LINE_SIZE. Besides, grep under arch/ with 'boot_command_line' shows no other arch-specific code uses strncpy() when copying boot_command_line. Use strlcpy() instead. This issue is identified by a Coccinelle script. Signed-off-by: Wang Xiayang Signed-off-by: Ley Foon Tan --- arch/nios2/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 6bbd4ae2beb0..4cf35b09c0ec 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -123,7 +123,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, dtb_passed = r6; if (r7) - strncpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); + strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); } #endif @@ -131,10 +131,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, #ifndef CONFIG_CMDLINE_FORCE if (cmdline_passed[0]) - strncpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); #ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB else - strncpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif #endif From 79743bc927f695e5a24f26c31cfe2d69f6ee00f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Sep 2019 15:37:21 -0300 Subject: [PATCH 110/345] perf jvmti: Link against tools/lib/string.o to have weak strlcpy() That is needed in systems such some S/390 distros. $ readelf -s /tmp/build/perf/jvmti/jvmti-in.o | grep strlcpy 452: 0000000000002990 125 FUNC WEAK DEFAULT 119 strlcpy $ Thanks to Jiri Olsa for fixing up my initial stab at this, I forgot how Makefiles are picky about spaces versus tabs. Reported-by: Thomas Richter Cc: Adrian Hunter Cc: Andreas Krebbel Cc: Jiri Olsa Cc: Namhyung Kim Cc: Sergey Melnikov Link: https://lkml.kernel.org/n/tip-x8vg9sffgb2t1tzqmhkrulh7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Build | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build index eaeb8cb5379b..1e148bbdf820 100644 --- a/tools/perf/jvmti/Build +++ b/tools/perf/jvmti/Build @@ -1,8 +1,17 @@ jvmti-y += libjvmti.o jvmti-y += jvmti_agent.o +# For strlcpy +jvmti-y += libstring.o + CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux CFLAGS_REMOVE_jvmti = -Wmissing-declarations CFLAGS_REMOVE_jvmti += -Wstrict-prototypes CFLAGS_REMOVE_jvmti += -Wextra CFLAGS_REMOVE_jvmti += -Wwrite-strings + +CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" + +$(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) From 9e282b739466dd319667788417426145da62e381 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 2 Sep 2019 16:08:19 +0000 Subject: [PATCH 111/345] perf tools: Add PMU event JSON files for ARM Cortex-A76 and, Neoverse N1. The source of the event codes and description text was the Neoverse N1 technical reference manual at: http://infocenter.arm.com/help/topic/com.arm.doc.100616_0301_01_en/neoverse_n1_trm_100616_0301_01_en.pdf The Cortex-A76 shares the same event IDs as the Neoverse N1 and they can be viewed at: https://static.docs.arm.com/100798/0400/cortex_a76_trm_100798_0400_00_en.pdf Signed-off-by: James Clark Cc: "linux-perf-users@vger.kernel.org" Cc: Alexander Shishkin Cc: Jeremy Linton Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: james clark Cc: nd Link: http://lore.kernel.org/lkml/20190902160713.1425-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/cortex-a76-n1/branch.json | 14 ++ .../arch/arm64/arm/cortex-a76-n1/bus.json | 24 ++ .../arch/arm64/arm/cortex-a76-n1/cache.json | 207 ++++++++++++++++++ .../arm64/arm/cortex-a76-n1/exception.json | 52 +++++ .../arm64/arm/cortex-a76-n1/instruction.json | 108 +++++++++ .../arch/arm64/arm/cortex-a76-n1/memory.json | 23 ++ .../arch/arm64/arm/cortex-a76-n1/other.json | 7 + .../arm64/arm/cortex-a76-n1/pipeline.json | 14 ++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 2 + 9 files changed, 451 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json new file mode 100644 index 000000000000..b5e5d055c70d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json @@ -0,0 +1,14 @@ +[ + { + "PublicDescription": "Mispredicted or not predicted branch speculatively executed. This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken.", + "EventCode": "0x10", + "EventName": "BR_MIS_PRED", + "BriefDescription": "Mispredicted or not predicted branch speculatively executed." + }, + { + "PublicDescription": "Predictable branch speculatively executed. This event counts all predictable branches.", + "EventCode": "0x12", + "EventName": "BR_PRED", + "BriefDescription": "Predictable branch speculatively executed." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json new file mode 100644 index 000000000000..fce7309ae624 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json @@ -0,0 +1,24 @@ +[ + { + "EventCode": "0x11", + "EventName": "CPU_CYCLES", + "BriefDescription": "The number of core clock cycles." + }, + { + "PublicDescription": "Bus access. This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", + "EventCode": "0x19", + "EventName": "BUS_ACCESS", + "BriefDescription": "Bus access." + }, + { + "EventCode": "0x1D", + "EventName": "BUS_CYCLES", + "BriefDescription": "Bus cycles. This event duplicates CPU_CYCLES." + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json new file mode 100644 index 000000000000..24594081c199 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json @@ -0,0 +1,207 @@ +[ + { + "PublicDescription": "L1 instruction cache refill. This event counts any instruction fetch which misses in the cache.", + "EventCode": "0x01", + "EventName": "L1I_CACHE_REFILL", + "BriefDescription": "L1 instruction cache refill" + }, + { + "PublicDescription": "L1 instruction TLB refill. This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", + "EventCode": "0x02", + "EventName": "L1I_TLB_REFILL", + "BriefDescription": "L1 instruction TLB refill" + }, + { + "PublicDescription": "L1 data cache refill. This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", + "EventCode": "0x03", + "EventName": "L1D_CACHE_REFILL", + "BriefDescription": "L1 data cache refill" + }, + { + "PublicDescription": "L1 data cache access. This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", + "EventCode": "0x04", + "EventName": "L1D_CACHE", + "BriefDescription": "L1 data cache access" + }, + { + "PublicDescription": "L1 data TLB refill. This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", + "EventCode": "0x05", + "EventName": "L1D_TLB_REFILL", + "BriefDescription": "L1 data TLB refill" + }, + { + "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.", + "EventCode": "0x14", + "EventName": "L1I_CACHE", + "BriefDescription": "L1 instruction cache access" + }, + { + "PublicDescription": "L1 data cache Write-Back. This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", + "EventCode": "0x15", + "EventName": "L1D_CACHE_WB", + "BriefDescription": "L1 data cache Write-Back" + }, + { + "PublicDescription": "L2 data cache access. This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", + "EventCode": "0x16", + "EventName": "L2D_CACHE", + "BriefDescription": "L2 data cache access" + }, + { + "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted", + "EventCode": "0x17", + "EventName": "L2D_CACHE_REFILL", + "BriefDescription": "L2 data cache refill" + }, + { + "PublicDescription": "L2 data cache write-back. This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", + "EventCode": "0x18", + "EventName": "L2D_CACHE_WB", + "BriefDescription": "L2 data cache write-back" + }, + { + "PublicDescription": "L2 data cache allocation without refill. This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", + "EventCode": "0x20", + "EventName": "L2D_CACHE_ALLOCATE", + "BriefDescription": "L2 data cache allocation without refill" + }, + { + "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", + "EventCode": "0x25", + "EventName": "L1D_TLB", + "BriefDescription": "Level 1 data TLB access." + }, + { + "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB.This event counts regardless of whether the MMU is enabled.", + "EventCode": "0x26", + "EventName": "L1I_TLB", + "BriefDescription": "Level 1 instruction TLB access" + }, + { + "PublicDescription": "This event counts any full cache line write into the L3 cache which does not cause a linefill, including write-backs from L2 to L3 and full-line writes which do not allocate into L2", + "EventCode": "0x29", + "EventName": "L3D_CACHE_ALLOCATE", + "BriefDescription": "Allocation without refill" + }, + { + "PublicDescription": "Attributable Level 3 unified cache refill. This event counts for any cacheable read transaction returning datafrom the SCU for which the data source was outside the cluster. Transactions such as ReadUnique are counted here as 'read' transactions, even though they can be generated by store instructions.", + "EventCode": "0x2A", + "EventName": "L3D_CACHE_REFILL", + "BriefDescription": "Attributable Level 3 unified cache refill." + }, + { + "PublicDescription": "Attributable Level 3 unified cache access. This event counts for any cacheable read transaction returning datafrom the SCU, or for any cacheable write to the SCU.", + "EventCode": "0x2B", + "EventName": "L3D_CACHE", + "BriefDescription": "Attributable Level 3 unified cache access." + }, + { + "PublicDescription": "Attributable L2 data or unified TLB refill. This event counts on anyrefill of the L2 TLB, caused by either an instruction or data access.This event does not count if the MMU is disabled.", + "EventCode": "0x2D", + "EventName": "L2D_TLB_REFILL", + "BriefDescription": "Attributable L2 data or unified TLB refill" + }, + { + "PublicDescription": "Attributable L2 data or unified TLB access. This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", + "EventCode": "0x2F", + "EventName": "L2D_TLB", + "BriefDescription": "Attributable L2 data or unified TLB access" + }, + { + "PublicDescription": "Access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count.", + "EventCode": "0x34", + "EventName": "DTLB_WALK", + "BriefDescription": "Access to data TLB that caused a page table walk." + }, + { + "PublicDescription": "Access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2D_TLB_REFILL to count.", + "EventCode": "0x35", + "EventName": "ITLB_WALK", + "BriefDescription": "Access to instruction TLB that caused a page table walk." + }, + { + "EventCode": "0x36", + "EventName": "LL_CACHE_RD", + "BriefDescription": "Last level cache access, read" + }, + { + "EventCode": "0x37", + "EventName": "LL_CACHE_MISS_RD", + "BriefDescription": "Last level cache miss, read" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json new file mode 100644 index 000000000000..98d29c862320 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json @@ -0,0 +1,52 @@ +[ + { + "EventCode": "0x09", + "EventName": "EXC_TAKEN", + "BriefDescription": "Exception taken." + }, + { + "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", + "EventCode": "0x1A", + "EventName": "MEMORY_ERROR", + "BriefDescription": "Local memory error." + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_UNDEF" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json new file mode 100644 index 000000000000..c153ac706d8d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json @@ -0,0 +1,108 @@ +[ + { + "PublicDescription": "Software increment. Instruction architecturally executed (condition code check pass).", + "EventCode": "0x00", + "EventName": "SW_INCR", + "BriefDescription": "Software increment." + }, + { + "PublicDescription": "Instruction architecturally executed. This event counts all retired instructions, including those that fail their condition check.", + "EventCode": "0x08", + "EventName": "INST_RETIRED", + "BriefDescription": "Instruction architecturally executed." + }, + { + "EventCode": "0x0A", + "EventName": "EXC_RETURN", + "BriefDescription": "Instruction architecturally executed, condition code check pass, exception return." + }, + { + "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR. This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", + "EventCode": "0x0B", + "EventName": "CID_WRITE_RETIRED", + "BriefDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR." + }, + { + "EventCode": "0x1B", + "EventName": "INST_SPEC", + "BriefDescription": "Operation speculatively executed" + }, + { + "PublicDescription": "Instruction architecturally executed, condition code check pass, write to TTBR. This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", + "EventCode": "0x1C", + "EventName": "TTBR_WRITE_RETIRED", + "BriefDescription": "Instruction architecturally executed, condition code check pass, write to TTBR" + }, + { + "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", + "EventCode": "0x21", + "EventName": "BR_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch." + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", + "EventCode": "0x22", + "EventName": "BR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch." + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json new file mode 100644 index 000000000000..b86643253f19 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json @@ -0,0 +1,23 @@ +[ + { + "PublicDescription": "Data memory access. This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", + "EventCode": "0x13", + "EventName": "MEM_ACCESS", + "BriefDescription": "Data memory access" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json new file mode 100644 index 000000000000..8bde029a62d5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json @@ -0,0 +1,7 @@ +[ + { + "EventCode": "0x31", + "EventName": "REMOTE_ACCESS", + "BriefDescription": "Access to another socket in a multi-socket system" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json new file mode 100644 index 000000000000..010a647f9d02 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json @@ -0,0 +1,14 @@ +[ + { + "PublicDescription": "No operation issued because of the frontend. The counter counts on any cycle when there are no fetched instructions available to dispatch.", + "EventCode": "0x23", + "EventName": "STALL_FRONTEND", + "BriefDescription": "No operation issued because of the frontend." + }, + { + "PublicDescription": "No operation issued because of the backend. The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", + "EventCode": "0x24", + "EventName": "STALL_BACKEND", + "BriefDescription": "No operation issued because of the backend." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 927fcddcb4aa..0d609149b82a 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -16,6 +16,8 @@ 0x00000000420f1000,v1,arm/cortex-a53,core 0x00000000410fd070,v1,arm/cortex-a57-a72,core 0x00000000410fd080,v1,arm/cortex-a57-a72,core +0x00000000410fd0b0,v1,arm/cortex-a76-n1,core +0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000480fd010,v1,hisilicon/hip08,core From 8fcbeae44fde9756036147664d1e74fab7c9902c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2019 09:47:53 -0300 Subject: [PATCH 112/345] perf tools: Remove needless builtin.h include directives Now that builtin.h isn't included by any other header, we can check where it is really needed, i.e. we can remove it and be sure that it isn't being obtained indirectly. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-mn7jheex85iw9qo6tlv26hb2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 1 - tools/perf/bench/sched-messaging.c | 1 - tools/perf/bench/sched-pipe.c | 1 - tools/perf/util/jitdump.c | 1 - 4 files changed, 4 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 62b8ef4bcb1f..5797253b9700 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -9,7 +9,6 @@ /* For the CLR_() macros */ #include -#include "../builtin.h" #include #include "../util/cloexec.h" diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index c63eb9a46346..6e499b32bf00 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -12,7 +12,6 @@ #include "../util/util.h" #include -#include "../builtin.h" #include "bench.h" /* Test groups of 20 processes spraying to 20 receivers */ diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 35b07f197d48..edd40aafa318 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -11,7 +11,6 @@ */ #include "../util/util.h" #include -#include "../builtin.h" #include "bench.h" #include diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index b80f29bfc7bb..00db9957fdb0 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -27,7 +27,6 @@ #include "jit.h" #include "jitdump.h" #include "genelf.h" -#include "../builtin.h" #include #include From b22bb139dcb370cd3a7f3c5ae29d55bb69235ace Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2019 09:57:50 -0300 Subject: [PATCH 113/345] perf debug: No need to include ui/util.h Nothing from that file is used in util/debug.h, it is only needed in some places that get it indirectly via including util/debug.h, remove that entanglement. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-hn9v4jdova2nt018fqsjyzun@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/scripts.c | 1 + tools/perf/ui/gtk/setup.c | 3 ++- tools/perf/util/debug.h | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 586a21acc13d..3b81075b8da8 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -2,6 +2,7 @@ #include "../../builtin.h" #include "../../perf.h" #include "../../util/util.h" +#include "../util.h" #include "../../util/hist.h" #include "../../util/debug.h" #include "../../util/symbol.h" diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 1a2616b97b5c..f5eee4d66873 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "gtk.h" -#include "../../util/debug.h" +#include +#include "../util.h" extern struct perf_error_ops perf_gtk_eops; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index b2deee987ffa..d25ae1c4cee9 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -3,9 +3,9 @@ #ifndef __PERF_DEBUG_H #define __PERF_DEBUG_H +#include #include #include -#include "../ui/util.h" extern int verbose; extern bool quiet, dump_trace; From 4a903c2e151423be9af19c7eb35d4667be21c4c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2019 10:11:53 -0300 Subject: [PATCH 114/345] perf tools: Remove debug.h from places where it is not needed Pruning a bit more the includes dependency tree. Building this thing on lots of containers takes time, we better reduce the time per build, each container is doing 6 builds when clang and clang-devel are available, and the plan is to do a 'make -C tools/perf build-test' that have many more. Also helps when doing normal development, as touching some random file will have a much reduced chance of triggering lots of rebuilds. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-r889ur2cxe16m91m2a4pl15p@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/unwind-libunwind.c | 2 +- tools/perf/arch/powerpc/util/sym-handling.c | 1 - tools/perf/tests/clang.c | 1 - tools/perf/ui/browsers/header.c | 1 - tools/perf/ui/gtk/helpline.c | 1 - tools/perf/ui/gtk/util.c | 1 - tools/perf/ui/helpline.c | 1 - tools/perf/ui/tui/helpline.c | 1 - tools/perf/ui/tui/util.c | 1 - tools/perf/util/branch.c | 1 - tools/perf/util/demangle-java.c | 1 - tools/perf/util/libunwind/arm64.c | 1 - tools/perf/util/libunwind/x86_32.c | 1 - tools/perf/util/target.c | 1 - tools/perf/util/usage.c | 1 - tools/perf/util/zlib.c | 2 -- 16 files changed, 1 insertion(+), 17 deletions(-) diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index 002520d4036b..1495a9523a23 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -5,8 +5,8 @@ #include #include "perf_regs.h" #include "../../util/unwind.h" -#include "../../util/debug.h" #endif +#include "../../util/debug.h" int LIBUNWIND__ARCH_REG_ID(int regnum) { diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 8a4b717e0a53..abb7a12d8f93 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -4,7 +4,6 @@ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation */ -#include "debug.h" #include "dso.h" #include "symbol.h" #include "map.h" diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index f45fe11dcf50..ff2711a40940 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "tests.h" -#include "debug.h" #include "util.h" #include "c++/clang-c.h" #include diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index 0f59a7001479..57e6e4332f74 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "util/debug.h" #include "ui/browser.h" #include "ui/keysyms.h" #include "ui/ui.h" diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c index e166da9ec767..e40a006aead8 100644 --- a/tools/perf/ui/gtk/helpline.c +++ b/tools/perf/ui/gtk/helpline.c @@ -6,7 +6,6 @@ #include "gtk.h" #include "../ui.h" #include "../helpline.h" -#include "../../util/debug.h" static void gtk_helpline_pop(void) { diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index c2c558958b9c..c47f5c387838 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "../util.h" -#include "../../util/debug.h" #include "gtk.h" #include diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 54bcd08df87e..e00901450f91 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -3,7 +3,6 @@ #include #include -#include "../util/debug.h" #include "helpline.h" #include "ui.h" #include "../util/util.h" diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 5f188f678c55..298d6af82fdd 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -6,7 +6,6 @@ #include #include -#include "../../util/debug.h" #include "../helpline.h" #include "../ui.h" #include "../libslang.h" diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index 087d9ab054c8..b98dd0e31dc1 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -5,7 +5,6 @@ #include #include -#include "../../util/debug.h" #include "../browser.h" #include "../keysyms.h" #include "../helpline.h" diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 9d1e090084a2..52261e5f718a 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -1,5 +1,4 @@ #include "util/util.h" -#include "util/debug.h" #include "util/map_symbol.h" #include "util/branch.h" #include diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 763328c151e9..6fb7f34c0814 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -3,7 +3,6 @@ #include #include #include -#include "debug.h" #include "symbol.h" #include "demangle-java.h" diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 66756e6be111..6b4e5a0892f8 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -22,7 +22,6 @@ #define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP #include "unwind.h" -#include "debug.h" #include "libunwind-aarch64.h" #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> #include "../../arch/arm64/util/unwind-libunwind.c" diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index c5e568188e19..21c216c40a3b 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -22,7 +22,6 @@ #define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP #include "unwind.h" -#include "debug.h" #include "libunwind-x86.h" #include <../../../../arch/x86/include/uapi/asm/perf_regs.h> diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 565f7aef7e6c..e152d2bbe3a3 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -7,7 +7,6 @@ #include "target.h" #include "util.h" -#include "debug.h" #include #include diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 3949a60b00ae..196438ee4c9d 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -8,7 +8,6 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "util.h" -#include "debug.h" #include #include #include diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 59d456f716e9..deb6e69adb5a 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -10,8 +10,6 @@ #include "util/compress.h" #include "util/util.h" -#include "util/debug.h" - #define CHUNK_SIZE 16384 From fb71c86cc804b8f490fce1b9140014043ec41858 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Sep 2019 10:56:06 -0300 Subject: [PATCH 115/345] perf tools: Remove util.h from where it is not needed Check that it is not needed and remove, fixing up some fallout for places where it was only serving to get something else. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-9h6dg6lsqe2usyqjh5rrues4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/arm64/util/dwarf-regs.c | 1 - tools/perf/arch/powerpc/util/dwarf-regs.c | 1 - tools/perf/arch/powerpc/util/header.c | 1 - tools/perf/arch/s390/util/machine.c | 2 +- tools/perf/arch/x86/tests/intel-cqm.c | 1 - tools/perf/arch/x86/tests/rdpmc.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/arch/x86/util/machine.c | 2 +- tools/perf/bench/sched-messaging.c | 1 - tools/perf/bench/sched-pipe.c | 1 - tools/perf/builtin-config.c | 1 - tools/perf/builtin-evlist.c | 2 -- tools/perf/builtin-report.c | 2 +- tools/perf/perf.c | 2 +- tools/perf/tests/clang.c | 1 - tools/perf/tests/dso-data.c | 1 - tools/perf/tests/event-times.c | 1 - tools/perf/tests/llvm.c | 1 - tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/tests/parse-events.c | 1 - tools/perf/tests/parse-no-sample-id-all.c | 2 -- tools/perf/tests/perf-hooks.c | 1 - tools/perf/tests/pmu.c | 1 - tools/perf/tests/sample-parsing.c | 1 - tools/perf/tests/topology.c | 1 - tools/perf/tests/vmlinux-kallsyms.c | 2 +- tools/perf/ui/browser.c | 1 - tools/perf/ui/browsers/annotate.c | 1 - tools/perf/ui/browsers/map.c | 1 - tools/perf/ui/browsers/res_sample.c | 2 +- tools/perf/ui/browsers/scripts.c | 2 +- tools/perf/ui/gtk/progress.c | 1 - tools/perf/ui/helpline.c | 1 - tools/perf/ui/hist.c | 1 - tools/perf/ui/setup.c | 2 +- tools/perf/ui/tui/setup.c | 2 +- tools/perf/util/annotate.c | 2 +- tools/perf/util/auxtrace.c | 4 +++- tools/perf/util/branch.c | 1 - tools/perf/util/branch.h | 9 ++++++++- tools/perf/util/build-id.c | 2 +- tools/perf/util/cloexec.c | 2 +- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 1 - tools/perf/util/cs-etm.c | 1 - tools/perf/util/data.c | 3 ++- tools/perf/util/debug.c | 1 - tools/perf/util/demangle-rust.c | 1 - tools/perf/util/dwarf-regs.c | 1 - tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 1 + tools/perf/util/header.c | 3 ++- tools/perf/util/jitdump.c | 1 - tools/perf/util/llvm-utils.c | 1 + tools/perf/util/lzma.c | 2 +- tools/perf/util/perf-hooks.c | 1 - tools/perf/util/record.c | 1 - tools/perf/util/rwsem.c | 1 + tools/perf/util/s390-sample-raw.c | 1 - tools/perf/util/scripting-engines/trace-event-python.c | 1 - tools/perf/util/session.c | 1 + tools/perf/util/srccode.c | 2 +- tools/perf/util/symbol-elf.c | 2 ++ tools/perf/util/symbol-minimal.c | 3 +-- tools/perf/util/symbol.c | 2 +- tools/perf/util/target.c | 1 - tools/perf/util/trace-event-info.c | 2 +- tools/perf/util/trace-event-read.c | 1 - tools/perf/util/trace-event.c | 1 - tools/perf/util/unwind-libdw.c | 1 - tools/perf/util/unwind-libunwind-local.c | 1 - tools/perf/util/vdso.c | 2 +- tools/perf/util/zlib.c | 2 +- 75 files changed, 47 insertions(+), 73 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c32db09baf0d..5d120b1e35ed 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -25,7 +25,7 @@ #include "../../util/evsel.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" -#include "../../util/util.h" +#include "../../util/util.h" // page_size #include "../../util/session.h" #include diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 4b364692da67..eebbf31f995c 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -16,7 +16,7 @@ #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/session.h" -#include "../../util/util.h" +#include "../../util/util.h" // page_size #include "../../util/pmu.h" #include "../../util/debug.h" #include "../../util/auxtrace.h" diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index b047b882c5b1..917b97d7c5d3 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -11,7 +11,6 @@ #include #include /* for struct user_pt_regs */ #include -#include "util.h" struct pt_regs_dwarfnum { const char *name; diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 4952890b9428..0c4f4caf53ac 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -12,7 +12,6 @@ #include #include #include -#include "util.h" struct pt_regs_dwarfnum { const char *name; diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 0b242664f5ea..b6b7bc7e31a1 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -6,7 +6,6 @@ #include #include #include "header.h" -#include "util.h" #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index c8c86a0c9b79..df099d6cc3f5 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -2,7 +2,7 @@ #include #include #include -#include "util.h" +#include "util.h" // page_size #include "machine.h" #include "api/fs/fs.h" #include "debug.h" diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 3b5cc3373821..111c0ab2e7b5 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -5,7 +5,6 @@ #include "evlist.h" #include "evsel.h" #include "arch-tests.h" -#include "util.h" #include #include diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 6e67cee792b1..e7640fb047de 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -13,7 +13,7 @@ #include "tests/tests.h" #include "cloexec.h" #include "event.h" -#include "util.h" +#include "util.h" // page_size #include "arch-tests.h" static u64 rdpmc(unsigned int counter) diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d263430c045f..090d90e093df 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -22,7 +22,7 @@ #include "../../util/tsc.h" #include "../../util/auxtrace.h" #include "../../util/intel-bts.h" -#include "../../util/util.h" +#include "../../util/util.h" // page_size #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index cb7cf16af79c..3d041b89f018 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -26,7 +26,7 @@ #include "../../util/record.h" #include "../../util/target.h" #include "../../util/tsc.h" -#include "../../util/util.h" +#include "../../util/util.h" // page_size #include "../../util/intel-pt.h" #define KiB(x) ((x) * 1024) diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index 1e9ec783b9a1..42418040bc07 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -3,7 +3,7 @@ #include #include -#include "../../util/util.h" +#include "../../util/util.h" // page_size #include "../../util/machine.h" #include "../../util/map.h" #include "../../util/symbol.h" diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 6e499b32bf00..97e4a4fb3362 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -10,7 +10,6 @@ * */ -#include "../util/util.h" #include #include "bench.h" diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index edd40aafa318..3c88d1f201f1 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -9,7 +9,6 @@ * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c * Ported to perf by Hitoshi Mitake */ -#include "../util/util.h" #include #include "bench.h" diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 42d8157e047a..2603015f98be 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -9,7 +9,6 @@ #include "util/cache.h" #include -#include "util/util.h" #include "util/debug.h" #include "util/config.h" #include diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 238fa3876805..294494e60e48 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -5,8 +5,6 @@ */ #include "builtin.h" -#include "util/util.h" - #include #include "perf.h" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b18fab94d38d..3047e5169d9d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -48,7 +48,7 @@ #include "util/auxtrace.h" #include "util/units.h" #include "util/branch.h" -#include "util/util.h" +#include "util/util.h" // perf_tip() #include "ui/ui.h" #include "ui/progress.h" diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1193b923e801..bb40a108b8f7 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -20,7 +20,7 @@ #include "util/bpf-loader.h" #include "util/debug.h" #include "util/event.h" -#include "util/util.h" +#include "util/util.h" // page_size, usage() #include "ui/ui.h" #include "perf-sys.h" #include diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index ff2711a40940..2577d3ed1531 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "tests.h" -#include "util.h" #include "c++/clang-c.h" #include diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index a4874d4ce7ef..627c1aaf1c9e 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -10,7 +10,6 @@ #include #include #include "dso.h" -#include "util.h" #include "machine.h" #include "symbol.h" #include "tests.h" diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index d824a726906c..0228ba435a2a 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -9,7 +9,6 @@ #include "tests.h" #include "evlist.h" #include "evsel.h" -#include "util.h" #include "debug.h" #include "parse-events.h" #include "thread_map.h" diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 022e4c9cf092..ae6cda81c209 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -7,7 +7,6 @@ #include "llvm.h" #include "tests.h" #include "debug.h" -#include "util.h" #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 360d70deb855..f72889c13538 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -14,7 +14,7 @@ #include "map.h" #include "symbol.h" #include "thread.h" -#include "util.h" +#include "util.h" // page_size #define THREADS 4 diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 02ba696fb87f..c25c8e7b41e5 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -6,7 +6,6 @@ #include "tests.h" #include "debug.h" #include "pmu.h" -#include "util.h" #include #include #include diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index 8284752a60c8..adf3c9c4a416 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0 #include #include #include @@ -8,7 +7,6 @@ #include "event.h" #include "evlist.h" #include "header.h" -#include "util.h" #include "debug.h" static int process_event(struct evlist **pevlist, union perf_event *event) diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index a693bcf017ea..dbc27199c65e 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -4,7 +4,6 @@ #include "tests.h" #include "debug.h" -#include "util.h" #include "perf-hooks.h" static void sigsegv_handler(int sig __maybe_unused) diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 14a78898d79e..74379ff1f7fa 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "parse-events.h" #include "pmu.h" -#include "util.h" #include "tests.h" #include #include diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 5fcc06817076..e3b965e7a233 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -9,7 +9,6 @@ #include "map_symbol.h" #include "branch.h" -#include "util.h" #include "event.h" #include "evsel.h" #include "debug.h" diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index a4f9f5182b47..c963f301d15e 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -4,7 +4,6 @@ #include #include #include "tests.h" -#include "util.h" #include "session.h" #include "evlist.h" #include "debug.h" diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 01f434c067c6..7f28775875c2 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -7,7 +7,7 @@ #include "dso.h" #include "map.h" #include "symbol.h" -#include "util.h" +#include "util.h" // page_size #include "tests.h" #include "debug.h" #include "machine.h" diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index f93d40b1c203..781afe42e90e 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../util/util.h" #include "../util/string2.h" #include "../util/config.h" #include "libslang.h" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ac74ed2c23a0..82207db8f97c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -2,7 +2,6 @@ #include "../browser.h" #include "../helpline.h" #include "../ui.h" -#include "../util.h" #include "../../util/annotate.h" #include "../../util/debug.h" #include "../../util/dso.h" diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 893b065971f6..3d49b916c9e4 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -5,7 +5,6 @@ #include #include #include -#include "../../util/util.h" #include "../../util/debug.h" #include "../../util/map.h" #include "../../util/dso.h" diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c index f16a38fea45e..76d356a18790 100644 --- a/tools/perf/ui/browsers/res_sample.c +++ b/tools/perf/ui/browsers/res_sample.c @@ -7,7 +7,7 @@ #include "config.h" #include "time-utils.h" #include "../util.h" -#include "../../util/util.h" +#include "../../util/util.h" // perf_exe() #include "../../perf.h" #include #include diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 3b81075b8da8..fc733a6354d4 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../builtin.h" #include "../../perf.h" -#include "../../util/util.h" +#include "../../util/util.h" // perf_exe() #include "../util.h" #include "../../util/hist.h" #include "../../util/debug.h" diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c index b6ad8857da78..eea6fcde518a 100644 --- a/tools/perf/ui/gtk/progress.c +++ b/tools/perf/ui/gtk/progress.c @@ -3,7 +3,6 @@ #include "gtk.h" #include "../progress.h" -#include "util.h" static GtkWidget *dialog; static GtkWidget *progress; diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index e00901450f91..911182b3f5e6 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -5,7 +5,6 @@ #include "helpline.h" #include "ui.h" -#include "../util/util.h" char ui_helpline__current[512]; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 3e533de7d852..f73675500061 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -8,7 +8,6 @@ #include "../util/callchain.h" #include "../util/debug.h" #include "../util/hist.h" -#include "../util/util.h" #include "../util/sort.h" #include "../util/evsel.h" #include "../util/evlist.h" diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index c7a86b4be9f5..700335cde618 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include "../util/debug.h" #include "../util/hist.h" -#include "../util/util.h" #include "ui.h" pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 56651a4f5aa0..e9bfe856a5de 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -2,13 +2,13 @@ #include #include #include +#include #include #ifdef HAVE_BACKTRACE_SUPPORT #include #endif #include "../../util/debug.h" -#include "../../util/util.h" #include "../../perf.h" #include "../browser.h" #include "../helpline.h" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1748f528b6e9..d441cca6a517 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -14,7 +14,7 @@ #include #include #include -#include "util.h" +#include "util.h" // hex_width() #include "ui/ui.h" #include "sort.h" #include "build-id.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 6f25224a3def..7ec0a6caa6cd 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -50,10 +50,12 @@ #include "intel-bts.h" #include "arm-spe.h" #include "s390-cpumsf.h" -#include "util.h" +#include "util.h" // page_size #include +#include #include "symbol/kallsyms.h" +#include static bool auxtrace__dont_decode(struct perf_session *session) { diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 52261e5f718a..2285b1eb3128 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -1,4 +1,3 @@ -#include "util/util.h" #include "util/map_symbol.h" #include "util/branch.h" #include diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 06f66dad0b79..88e00d268f6f 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -1,8 +1,15 @@ #ifndef _PERF_BRANCH_H #define _PERF_BRANCH_H 1 - +/* + * The linux/stddef.h isn't need here, but is needed for __always_inline used + * in files included from uapi/linux/perf_event.h such as + * /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h, + * detected in at least musl libc, used in Alpine Linux. -acme + */ #include #include +#include +#include #include #include diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e5fb77755d9e..7928c398a063 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -7,7 +7,7 @@ * Copyright (C) 2009, 2010 Red Hat Inc. * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo */ -#include "util.h" +#include "util.h" // copyfile_ns(), lsdir(), mkdir_p(), rm_rf() #include #include #include diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 4e904fcb2783..a12872f2856a 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "util.h" +#include "util.h" // for sched_getcpu() #include "../perf-sys.h" #include "cloexec.h" #include "event.h" diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 37d7c492b155..cd92a99eb89d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -17,7 +17,6 @@ #include "cs-etm.h" #include "cs-etm-decoder.h" #include "intlist.h" -#include "util.h" /* use raw logging */ #ifdef CS_DEBUG_RAW diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 707afdbd9529..f87b9c1c9f9a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -35,7 +35,6 @@ #include "thread.h" #include "thread-stack.h" #include -#include "util.h" #define MAX_TIMESTAMP (~0ULL) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e75c3a279fe8..88fba2ba549f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -13,9 +13,10 @@ #include #include "data.h" -#include "util.h" +#include "util.h" // rm_rf_perf_data() #include "debug.h" #include "header.h" +#include static void close_dir(struct perf_data_file *files, int nr) { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index a1b59bd35519..e55114f0336f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -17,7 +17,6 @@ #include "event.h" #include "debug.h" #include "print_binary.h" -#include "util.h" #include "target.h" #include "ui/helpline.h" #include "ui/ui.h" diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c index 423afbbd386b..a659fc69f73a 100644 --- a/tools/perf/util/demangle-rust.c +++ b/tools/perf/util/demangle-rust.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "util.h" #include "debug.h" #include "demangle-rust.h" diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index db55eddce8cd..1b49ecee5aff 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -5,7 +5,6 @@ * Written by: Masami Hiramatsu */ -#include #include #include #include diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 095924aa186b..ea9517d506d8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -16,7 +16,7 @@ #include "evsel.h" #include "debug.h" #include "units.h" -#include "util.h" +#include "util.h" // page_size #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 85825384f9e8..c194ec787f96 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -45,6 +45,7 @@ #include "../perf-sys.h" #include "util/parse-branch-options.h" #include +#include #include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b0c34dda30a0..d85827de1b60 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -42,11 +42,12 @@ #include "tool.h" #include "time-utils.h" #include "units.h" -#include "util.h" +#include "util.h" // page_size, perf_exe() #include "cputopo.h" #include "bpf-event.h" #include +#include /* * magic2 = "PERFILE2" diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 00db9957fdb0..9f9c6c6a2fd3 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -15,7 +15,6 @@ #include #include "build-id.h" -#include "util.h" #include "event.h" #include "debug.h" #include "evlist.h" diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 55fb4b3b1157..8d04e3d070b1 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 397447066033..39062df02629 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -7,10 +7,10 @@ #include #include #include "compress.h" -#include "util.h" #include "debug.h" #include #include +#include #define BUFSIZE 8192 diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c index e635c594f773..7a0ab3507bd5 100644 --- a/tools/perf/util/perf-hooks.c +++ b/tools/perf/util/perf-hooks.c @@ -12,7 +12,6 @@ #include #include #include -#include "util/util.h" #include "util/debug.h" #include "util/perf-hooks.h" diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 286fe816c0f3..860c48895ab6 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -10,7 +10,6 @@ #include #include #include -#include "util.h" #include "cloexec.h" #include "record.h" #include "../perf-sys.h" diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c index 5e52e7baa7b6..f3d29d8ddc99 100644 --- a/tools/perf/util/rwsem.c +++ b/tools/perf/util/rwsem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "util.h" #include "rwsem.h" diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 4d9593e331ea..05b43ab4eeef 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -22,7 +22,6 @@ #include #include "debug.h" -#include "util.h" #include "session.h" #include "evlist.h" #include "color.h" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 666a56e88d8e..bb6828532741 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -37,7 +37,6 @@ #include "../dso.h" #include "../callchain.h" #include "../evsel.h" -#include "../util.h" #include "../event.h" #include "../thread.h" #include "../comm.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e9e4a04f15db..2b133bc22caa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -34,6 +34,7 @@ #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" +#include #ifdef HAVE_ZSTD_SUPPORT static int perf_session__process_compressed_event(struct perf_session *session, diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index adfcf1ff464c..b402f9ca89ab 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -15,7 +15,7 @@ #include #include "srccode.h" #include "debug.h" -#include "util.h" +#include "util.h" // page_size #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9428639872a6..8b9199c343dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -18,8 +18,10 @@ #include "debug.h" #include "util.h" #include +#include #include #include +#include #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7e2813ec9498..d6e99af263ec 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,8 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0 #include "dso.h" #include "symbol.h" #include "symsrc.h" -#include "util.h" #include #include @@ -13,6 +11,7 @@ #include #include #include +#include static bool check_need_swap(int file_endian) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 765c75df2904..a8f80e427674 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -19,7 +19,7 @@ #include "build-id.h" #include "cap.h" #include "dso.h" -#include "util.h" +#include "util.h" // lsdir() #include "debug.h" #include "event.h" #include "machine.h" diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index e152d2bbe3a3..a3db13dea937 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -6,7 +6,6 @@ */ #include "target.h" -#include "util.h" #include #include diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index d63d542b2cde..fe07e7550930 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2008,2009, Steven Rostedt */ -#include "util.h" +#include "util.h" // page_size #include #include #include diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index b6c0db068be0..8593d3c200c6 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -15,7 +15,6 @@ #include #include -#include "util.h" #include "trace-event.h" #include "debug.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 01b9d89bf5bf..b3ee651e3d91 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -14,7 +14,6 @@ #include #include "trace-event.h" #include "machine.h" -#include "util.h" /* * global trace_event object used by trace_event__tp_format diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 9ece188ae48a..15f6e46d7124 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -17,7 +17,6 @@ #include "event.h" #include "perf_regs.h" #include "callchain.h" -#include "util.h" static char *debuginfo_path; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index ebdbb056510c..1800887b2255 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -37,7 +37,6 @@ #include "unwind.h" #include "map.h" #include "symbol.h" -#include "util.h" #include "debug.h" #include "asm/bug.h" #include "dso.h" diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index e5e6599603f4..ba4b4395f35d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -11,7 +11,7 @@ #include "vdso.h" #include "dso.h" -#include "util.h" +#include #include "map.h" #include "symbol.h" #include "machine.h" diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index deb6e69adb5a..78d2297c1b67 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -7,9 +7,9 @@ #include #include #include +#include #include "util/compress.h" -#include "util/util.h" #define CHUNK_SIZE 16384 From 36f3f450a8dc02d9ba0d31bc31d5b329759ed855 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Sep 2019 16:16:27 +0100 Subject: [PATCH 116/345] perf probe: Add missing build-id.h header. It uses things defined in that header and was getting it only indirectly, thru dso.h, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-7u3sf4j5huhi3mqa1q77524b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index d13db55a2feb..b659466ea498 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -16,6 +16,7 @@ #include "strlist.h" #include "strfilter.h" #include "debug.h" +#include "build-id.h" #include "dso.h" #include "color.h" #include "symbol.h" From 09aa3b002c8cfcea65b2925ec4ff5a6ccdc44d87 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Sep 2019 16:17:19 +0100 Subject: [PATCH 117/345] perf symbols: Add missing dso.h header This was being obtained only indirectly, by luck. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xeolxwr3iftwfw9kmw26shfe@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8b9199c343dd..6fbfdf8bf61f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,6 +7,7 @@ #include #include +#include "dso.h" #include "map.h" #include "map_groups.h" #include "symbol.h" From 87ffb6c6407023419ae6b2770142b0754d9cbaa1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Sep 2019 16:29:02 +0100 Subject: [PATCH 118/345] perf env: Remove needless cpumap.h header Only a 'struct perf_cmp_map' forward allocation is necessary, fix the places that need the header but were getting it indirectly, by luck, from env.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-3sj3n534zghxhk7ygzeaqlx9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/header.c | 4 +++- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 1 - tools/perf/bench/epoll-ctl.c | 2 +- tools/perf/bench/epoll-wait.c | 2 +- tools/perf/bench/futex-hash.c | 2 +- tools/perf/bench/futex-lock-pi.c | 2 +- tools/perf/bench/futex-requeue.c | 2 +- tools/perf/bench/futex-wake-parallel.c | 3 ++- tools/perf/bench/futex-wake.c | 2 +- tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-sched.c | 2 ++ tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/code-reading.c | 1 - tools/perf/tests/event_update.c | 1 + tools/perf/tests/keep-tracking.c | 3 +-- tools/perf/tests/mem2node.c | 2 +- tools/perf/tests/mmap-basic.c | 3 +-- tools/perf/tests/openat-syscall-all-cpus.c | 5 +++-- tools/perf/tests/switch-tracking.c | 1 - tools/perf/tests/task-exit.c | 2 +- tools/perf/tests/topology.c | 1 + tools/perf/util/arm-spe.c | 1 - tools/perf/util/auxtrace.c | 1 - tools/perf/util/env.h | 3 ++- tools/perf/util/event.c | 2 ++ tools/perf/util/evsel.c | 2 +- tools/perf/util/intel-bts.c | 1 - tools/perf/util/parse-events.c | 1 - tools/perf/util/pmu.c | 1 - tools/perf/util/python.c | 1 - tools/perf/util/record.c | 1 - tools/perf/util/s390-cpumsf.c | 1 - tools/perf/util/scripting-engines/trace-event-python.c | 1 - tools/perf/util/session.c | 1 - tools/perf/util/stat.c | 1 + tools/perf/util/svghelper.c | 2 +- tools/perf/util/top.c | 1 - 37 files changed, 31 insertions(+), 34 deletions(-) diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index e41defaaa2e6..a32e4b72a98f 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include "debug.h" #include "header.h" @@ -29,7 +31,7 @@ char *get_cpuid_str(struct perf_pmu *pmu) /* read midr from list of cpus mapped to this pmu */ cpus = perf_cpu_map__get(pmu->cpus); - for (cpu = 0; cpu < cpus->nr; cpu++) { + for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, sysfs, cpus->map[cpu]); diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index eb3635941c2b..0a4570b340fa 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -15,7 +15,6 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "record.h" #include "tsc.h" #include "tests/tests.h" diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index d1caa4a0a12a..bb617e568841 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -21,12 +21,12 @@ #include #include #include +#include #include #include "../util/stat.h" #include #include "bench.h" -#include "cpumap.h" #include diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index f6b4472847d2..7af694437f4e 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -76,12 +76,12 @@ #include #include #include +#include #include #include "../util/stat.h" #include #include "bench.h" -#include "cpumap.h" #include diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 80e138904c66..8ba0c3330a9a 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -20,13 +20,13 @@ #include #include #include +#include #include #include "../util/stat.h" #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index c5d6d0abbaa9..d0cae8125423 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -14,10 +14,10 @@ #include #include #include +#include #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include #include diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 75d3418c1a88..a00a6891447a 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include #include diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 163fe16c275a..a053cf2b7039 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -29,7 +29,8 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe #include #include #include "futex.h" -#include "cpumap.h" +#include +#include #include #include diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 77dcdc13618a..df810096abfe 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include "bench.h" #include "futex.h" -#include "cpumap.h" #include #include diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b09b12e0976b..61aaacc2aedd 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -20,6 +20,7 @@ #include #include "debug.h" #include "builtin.h" +#include #include #include #include "map_symbol.h" diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ec96d64aec69..511e19a7aafa 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3,6 +3,7 @@ #include "perf.h" #include "perf-sys.h" +#include "util/cpumap.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/symbol.h" @@ -36,6 +37,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index db2aadff3708..96c137360918 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -2,8 +2,8 @@ #include #include #include +#include #include "tests.h" -#include "cpumap.h" #include "debug.h" #define NBITS 100 diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index c1c29e08e7fb..8d9020c46ca9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -19,7 +19,6 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "machine.h" #include "map.h" #include "symbol.h" diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index cac4290e233a..317eb8c5ccd4 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -2,6 +2,7 @@ #include #include #include +#include "cpumap.h" #include "evlist.h" #include "evsel.h" #include "header.h" diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 9f0762d987fa..df0fd5a44e04 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -12,7 +12,6 @@ #include "evsel.h" #include "record.h" #include "thread_map.h" -#include "cpumap.h" #include "tests.h" #define CHECK__(x) { \ @@ -143,7 +142,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un found = find_comm(evlist, comm); if (found != 1) { - pr_debug("Seconf time, failed to find tracking event.\n"); + pr_debug("Second time, failed to find tracking event.\n"); goto out_err; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 7672ade70f20..a258bd51f1a4 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -4,7 +4,7 @@ #include #include #include -#include "cpumap.h" +#include #include "debug.h" #include "env.h" #include "mem2node.h" diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 85e1d7337dc0..042757629e90 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -10,7 +10,6 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "tests.h" #include #include @@ -53,7 +52,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse cpus = perf_cpu_map__new(NULL); if (cpus == NULL) { - pr_debug("cpu_map__new\n"); + pr_debug("perf_cpu_map__new\n"); goto out_free_threads; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 9171f77cd9cd..93c176523e38 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -14,7 +14,8 @@ #include "evsel.h" #include "tests.h" #include "thread_map.h" -#include "cpumap.h" +#include +#include #include "debug.h" #include "stat.h" #include "util/counts.h" @@ -37,7 +38,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int cpus = perf_cpu_map__new(NULL); if (cpus == NULL) { - pr_debug("cpu_map__new\n"); + pr_debug("perf_cpu_map__new\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 1a60fa1219f5..3fb1ff7b8a2f 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -14,7 +14,6 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#include "cpumap.h" #include "record.h" #include "tests.h" diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index f610e8c0a083..088c7708b03a 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -4,12 +4,12 @@ #include "evsel.h" #include "target.h" #include "thread_map.h" -#include "cpumap.h" #include "tests.h" #include #include #include +#include #include static int exited; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index c963f301d15e..7d845d913d7d 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -3,6 +3,7 @@ #include #include #include +#include "cpumap.h" #include "tests.h" #include "session.h" #include "evlist.h" diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 8a7340f6a2a2..53be12b23ff4 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -16,7 +16,6 @@ #include #include -#include "cpumap.h" #include "color.h" #include "evsel.h" #include "machine.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 7ec0a6caa6cd..1c0ff5acff83 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,7 +31,6 @@ #include "map.h" #include "pmu.h" #include "evsel.h" -#include "cpumap.h" #include "symbol.h" #include "thread_map.h" #include "asm/bug.h" diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index d8e083d42610..db40906e2937 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -4,9 +4,10 @@ #include #include -#include "cpumap.h" #include "rwsem.h" +struct perf_cpu_map; + struct cpu_topology_map { int socket_id; int die_id; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f4afbb858ebb..d65ae7cf9316 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,7 @@ #include #include #include +#include "cpumap.h" #include "dso.h" #include "event.h" #include "debug.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c194ec787f96..5b40b840624c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -31,7 +31,7 @@ #include "event.h" #include "evsel.h" #include "evlist.h" -#include "cpumap.h" +#include #include "thread_map.h" #include "target.h" #include "perf_regs.h" diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index aacffa2b0362..15f87a09f4fe 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -14,7 +14,6 @@ #include #include -#include "cpumap.h" #include "color.h" #include "evsel.h" #include "evlist.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5ec21d21113c..a33a1d5059a2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -30,7 +30,6 @@ #include "parse-events-flex.h" #include "pmu.h" #include "thread_map.h" -#include "cpumap.h" #include "probe-file.h" #include "asm/bug.h" #include "util/parse-branch-options.h" diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index fb597fa94234..5608da82ad23 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -20,7 +20,6 @@ #include "debug.h" #include "pmu.h" #include "parse-events.h" -#include "cpumap.h" #include "header.h" #include "pmu-events/pmu-events.h" #include "string2.h" diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 07ca4535e6f7..96dd3333c911 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -11,7 +11,6 @@ #include "callchain.h" #include "evsel.h" #include "event.h" -#include "cpumap.h" #include "print_binary.h" #include "thread_map.h" #include "trace-event.h" diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 860c48895ab6..8a015fc0aba0 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,7 +2,6 @@ #include "debug.h" #include "evlist.h" #include "evsel.h" -#include "cpumap.h" #include "parse-events.h" #include #include diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 24a99909d8b3..6785cd87aa4d 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -151,7 +151,6 @@ #include #include -#include "cpumap.h" #include "color.h" #include "evsel.h" #include "evlist.h" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index bb6828532741..5d341efc3237 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -48,7 +48,6 @@ #include "map.h" #include "symbol.h" #include "thread_map.h" -#include "cpumap.h" #include "print_binary.h" #include "stat.h" #include "mem-events.h" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2b133bc22caa..2b583e6adb49 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -22,7 +22,6 @@ #include "symbol.h" #include "session.h" #include "tool.h" -#include "cpumap.h" #include "perf_regs.h" #include "asm/bug.h" #include "auxtrace.h" diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8f1ea27f976f..d309c1cc13db 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -4,6 +4,7 @@ #include #include #include "counts.h" +#include "cpumap.h" #include "debug.h" #include "header.h" #include "stat.h" diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 582f4a69cd48..96f941e01681 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -17,11 +17,11 @@ #include #include #include +#include #include #include "env.h" #include "svghelper.h" -#include "cpumap.h" static u64 first_time, last_time; static u64 turbo_frequency, max_freq; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 51fb574998bb..ef96e3dd6902 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -5,7 +5,6 @@ * Refactored from builtin-top.c, see that files for further copyright notes. */ -#include "cpumap.h" #include "event.h" #include "evlist.h" #include "evsel.h" From 278306163882a3557fb2c69fd2cc632a2f9ef601 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Sep 2019 16:42:52 +0100 Subject: [PATCH 119/345] perf event: Move perf_event__synthesize* to event.h Where is the perf_event__handler_t typedef they need, which was the only reason for header.h to be including event.h, untangle that. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-outjyzh1o29ndcv9lsqyzt87@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 36 ++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 42 +++------------------------------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 47ad81d47b1a..4e6d33c76d57 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -279,6 +279,9 @@ enum { void perf_event__print_totals(void); +struct evlist; +struct evsel; +struct perf_session; struct perf_tool; struct perf_thread_map; struct perf_cpu_map; @@ -290,6 +293,39 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine); +int perf_event__synthesize_attr(struct perf_tool *tool, + struct perf_event_attr *attr, u32 ids, u64 *id, + perf_event__handler_t process); +int perf_event__synthesize_attrs(struct perf_tool *tool, + struct evlist *evlist, + perf_event__handler_t process); +int perf_event__synthesize_build_id(struct perf_tool *tool, + struct dso *pos, u16 misc, + perf_event__handler_t process, + struct machine *machine); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe); +int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, + struct evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_event_update_name(struct perf_tool *tool, + struct evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_event_update_scale(struct perf_tool *tool, + struct evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_event_update_unit(struct perf_tool *tool, + struct evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_features(struct perf_tool *tool, + struct perf_session *session, + struct evlist *evlist, + perf_event__handler_t process); +int perf_event__synthesize_tracing_data(struct perf_tool *tool, + int fd, struct evlist *evlist, + perf_event__handler_t process); int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 3e48ae3c49b1..999dac41871e 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -5,10 +5,10 @@ #include #include #include +#include // FILE #include #include #include -#include "event.h" #include "env.h" #include "pmu.h" @@ -94,6 +94,8 @@ struct perf_header { struct evlist; struct perf_session; +struct perf_tool; +union perf_event; int perf_session__read_header(struct perf_session *session); int perf_session__write_header(struct perf_session *session, @@ -115,54 +117,16 @@ int perf_header__process_sections(struct perf_header *header, int fd, int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); -int perf_event__synthesize_features(struct perf_tool *tool, - struct perf_session *session, - struct evlist *evlist, - perf_event__handler_t process); - -int perf_event__synthesize_extra_attr(struct perf_tool *tool, - struct evlist *evsel_list, - perf_event__handler_t process, - bool is_pipe); - int perf_event__process_feature(struct perf_session *session, union perf_event *event); - -int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u32 ids, u64 *id, - perf_event__handler_t process); -int perf_event__synthesize_attrs(struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process); -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_name(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); - -int perf_event__synthesize_tracing_data(struct perf_tool *tool, - int fd, struct evlist *evlist, - perf_event__handler_t process); int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event); - -int perf_event__synthesize_build_id(struct perf_tool *tool, - struct dso *pos, u16 misc, - perf_event__handler_t process, - struct machine *machine); int perf_event__process_build_id(struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); From b251892d6ceafa3c8f8e6835a664e248766b1b3e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Sep 2019 17:17:33 +0100 Subject: [PATCH 120/345] perf stat: Move perf_stat_synthesize_config() to event.h Together with the other synthsizers, and rename it to perf_event__synthesize_stat_events(). This allows us to stop including event.h in util/stat.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-q5ebhrp44txboobs86htu5r9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 ++-- tools/perf/util/event.h | 5 +++++ tools/perf/util/stat.c | 10 +++++----- tools/perf/util/stat.h | 8 ++------ 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5bc0c570b7b6..b55e8060810b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -540,8 +540,8 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, - process_synthesized_event, is_pipe); + err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, + process_synthesized_event, is_pipe); if (err < 0) return err; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 4e6d33c76d57..89a2404170a0 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -293,6 +293,11 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine); +int perf_event__synthesize_stat_events(struct perf_stat_config *config, + struct perf_tool *tool, + struct evlist *evlist, + perf_event__handler_t process, + bool attrs); int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index d309c1cc13db..2e318d95c528 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -495,11 +495,11 @@ int create_perf_stat_counter(struct evsel *evsel, return perf_evsel__open_per_thread(evsel, evsel->core.threads); } -int perf_stat_synthesize_config(struct perf_stat_config *config, - struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process, - bool attrs) +int perf_event__synthesize_stat_events(struct perf_stat_config *config, + struct perf_tool *tool, + struct evlist *evlist, + perf_event__handler_t process, + bool attrs) { int err; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 14fe3e548229..0f9c9f6e2041 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,8 +7,9 @@ #include #include #include "rblist.h" -#include "event.h" +struct perf_cpu_map; +struct perf_stat_config; struct timespec; struct stats { @@ -210,11 +211,6 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target); -int perf_stat_synthesize_config(struct perf_stat_config *config, - struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process, - bool attrs); void perf_evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, From 9c9e754fb804828473c5131bb3e7df78bde396e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Sep 2019 17:24:07 +0100 Subject: [PATCH 121/345] perf callchain: Remove needless event.h include All we need is a bunch of struct forward declarations and then add event.h to the only place that was getting it indirectly via callchain.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-qq2xhyuxcvx5vmxha9otjd8d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 1 + tools/perf/util/callchain.h | 5 ++++- tools/perf/util/evsel_fprintf.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index fc9c2f5fcd52..3018a054526a 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -13,6 +13,7 @@ #include "util/callchain.h" #include "util/debug.h" #include "util/dso.h" +#include "util/event.h" // struct ip_callchain #include "util/map.h" #include "util/symbol.h" diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b042ceef4114..83398e5bbe4b 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -4,12 +4,15 @@ #include #include -#include "event.h" #include "map_symbol.h" #include "branch.h" +struct addr_location; struct evsel; +struct ip_callchain; struct map; +struct perf_sample; +struct thread; #define HELP_PAD "\t\t\t\t" diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 496fec01f5d1..a8cbdf4c2753 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -4,6 +4,7 @@ #include #include #include "evsel.h" +#include "util/event.h" #include "callchain.h" #include "map.h" #include "strlist.h" From 5939cacc60d22161adba3d6c8b3fe88b76e0f6c0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Sep 2019 12:54:33 +0100 Subject: [PATCH 122/345] perf python: Remove debug.h We only need to have the prototype for the eprintf() replacement we use in the python binding, provide it and avoid dragging debug.h as a dependency. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-s0gy4ur3drmhsknsddwjco59@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 96dd3333c911..0ba19dd75510 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,7 +6,6 @@ #include #include #include -#include "debug.h" #include "evlist.h" #include "callchain.h" #include "evsel.h" @@ -60,6 +59,8 @@ int parse_callchain_record(const char *arg __maybe_unused, */ int verbose; +int eprintf(int level, int var, const char *fmt, ...); + int eprintf(int level, int var, const char *fmt, ...) { va_list args; From 3793d4de06fac8dcd25bf91386cf88415dbe99ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 10:09:54 -0300 Subject: [PATCH 123/345] perf hist: Add missing 'struct branch_stack' forward declaration Its needed, was being obtained indirectly, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-srzphk0ehptfn3zqmpkgsi65@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 34803e33dc80..6a186b668303 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -15,6 +15,7 @@ struct addr_location; struct map_symbol; struct mem_info; struct branch_info; +struct branch_stack; struct block_info; struct symbol; struct ui_progress; From 3f79132a47035dd4609cc5f47715331dfafaa1fa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 10:10:43 -0300 Subject: [PATCH 124/345] perf annotate: Add missing machine.h include directive We use what is defined there, were getting it by luck, indirectly, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-56g4jshmktniundmiw7h845k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4e4d2e76232e..553c651fa0a2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -27,6 +27,7 @@ #include "util/sort.h" #include "util/hist.h" #include "util/dso.h" +#include "util/machine.h" #include "util/map.h" #include "util/session.h" #include "util/tool.h" From f12be047d981bb802d8cf78eb220db3ee97f0513 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 10:11:20 -0300 Subject: [PATCH 125/345] perf sched: Add missing event.h include directive We use what is defined there, were getting it by luck, indirectly, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-e1cdt9557ctpvs3jb9c16qe6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 511e19a7aafa..f0b828c201cc 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -24,6 +24,7 @@ #include "util/trace-event.h" #include "util/debug.h" +#include "util/event.h" #include #include From bd23ac11fe9312bab40e129b402757fd7a23dc8e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 10:12:07 -0300 Subject: [PATCH 126/345] perf auxtrace: Add missing 'struct perf_sample' forward declaration Its needed, was being obtained indirectly, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-c3k1il7sm28old4e22nwlm7l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 37e70dc01436..1ed902a24a39 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -24,6 +24,7 @@ struct perf_session; struct evlist; struct perf_tool; struct perf_mmap; +struct perf_sample; struct option; struct record_opts; struct perf_record_auxtrace_info; From ea49e01cfabd73c94a61649cd04fa524a2beff3c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 11:36:13 -0300 Subject: [PATCH 127/345] perf tools: Move event synthesizing routines to separate header Those are the only routines using the perf_event__handler_t typedef and are all related, so move to a separate header to reduce the header dependency tree, lots of places were getting event.h and even stdio.h, limits.h indirectly, so fix those as well. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-yvx9u1mf7baq6cu1abfhbqgs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/archinsn.c | 1 + tools/perf/arch/x86/util/event.c | 2 + tools/perf/arch/x86/util/machine.c | 1 + tools/perf/arch/x86/util/tsc.c | 2 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/builtin-top.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/cpumap.c | 1 + tools/perf/tests/dwarf-unwind.c | 1 + tools/perf/tests/event_update.c | 1 + tools/perf/tests/hists_common.c | 2 + tools/perf/tests/mmap-thread-lookup.c | 2 + tools/perf/tests/sample-parsing.c | 1 + tools/perf/tests/stat.c | 1 + tools/perf/tests/thread-map.c | 1 + tools/perf/ui/stdio/hist.c | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/auxtrace.h | 17 +--- tools/perf/util/bpf-event.c | 1 + tools/perf/util/bpf-event.h | 15 +--- tools/perf/util/callchain.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/event.h | 118 +------------------------- tools/perf/util/evsel.c | 1 + tools/perf/util/header.c | 1 + tools/perf/util/intel-bts.c | 1 + tools/perf/util/intel-pt.c | 1 + tools/perf/util/machine.c | 10 +++ tools/perf/util/machine.h | 15 ---- tools/perf/util/session.c | 1 + tools/perf/util/session.h | 5 -- tools/perf/util/stat.c | 1 + tools/perf/util/synthetic-events.h | 103 ++++++++++++++++++++++ tools/perf/util/tsc.h | 14 +-- 38 files changed, 154 insertions(+), 177 deletions(-) create mode 100644 tools/perf/util/synthetic-events.h diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c index 9876c7a7ed7c..3e6791531ca5 100644 --- a/tools/perf/arch/x86/util/archinsn.c +++ b/tools/perf/arch/x86/util/archinsn.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../../../arch/x86/include/asm/insn.h" #include "archinsn.h" +#include "event.h" #include "machine.h" #include "thread.h" #include "symbol.h" diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index a3a0b6884779..d357c625c09f 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -3,6 +3,8 @@ #include #include +#include "../../util/event.h" +#include "../../util/synthetic-events.h" #include "../../util/machine.h" #include "../../util/tool.h" #include "../../util/map.h" diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index 42418040bc07..f0c289862f9f 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include "../../util/util.h" // page_size diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index c5197a15119b..2f55afb14e1f 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -8,6 +8,8 @@ #include #include #include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" #include "../../../util/tsc.h" int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index c14f40b858bc..23a76cf3846f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -21,6 +21,7 @@ #include "util/auxtrace.h" #include "util/jit.h" #include "util/symbol.h" +#include "util/synthetic-events.h" #include "util/thread.h" #include diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0a4fcbe32bf6..ac6d6e061dc5 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -17,6 +17,7 @@ #include "util/debug.h" #include "util/tool.h" #include "util/stat.h" +#include "util/synthetic-events.h" #include "util/top.h" #include "util/data.h" #include "util/ordered-events.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1447004eee8a..907d4d4677a3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -38,6 +38,7 @@ #include "util/trigger.h" #include "util/perf-hooks.h" #include "util/cpu-set-sched.h" +#include "util/synthetic-events.h" #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b55e8060810b..eece3d1e429a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -61,6 +61,7 @@ #include "util/tool.h" #include "util/string2.h" #include "util/metricgroup.h" +#include "util/synthetic-events.h" #include "util/target.h" #include "util/time-utils.h" #include "util/top.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 726e3f2dd8c7..b052470f89b4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -32,6 +32,7 @@ #include "util/map.h" #include "util/session.h" #include "util/symbol.h" +#include "util/synthetic-events.h" #include "util/top.h" #include "util/util.h" #include diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0f633f0d6be8..f0f735093e21 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -28,6 +28,7 @@ #include "util/dso.h" #include "util/env.h" #include "util/event.h" +#include "util/synthetic-events.h" #include "util/evlist.h" #include "util/evswitch.h" #include diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 8d9020c46ca9..fd02c1f1d976 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -24,6 +24,7 @@ #include "symbol.h" #include "event.h" #include "record.h" +#include "util/synthetic-events.h" #include "thread.h" #include "tests.h" diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 39493de50117..8a0d236202b0 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -3,6 +3,7 @@ #include #include "cpumap.h" #include "event.h" +#include "util/synthetic-events.h" #include #include #include diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 4125255ff637..4f4ecbcbe87e 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -15,6 +15,7 @@ #include "symbol.h" #include "thread.h" #include "callchain.h" +#include "util/synthetic-events.h" #if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__) #include "arch-tests.h" diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 317eb8c5ccd4..4bb772e2b73d 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -7,6 +7,7 @@ #include "evsel.h" #include "header.h" #include "machine.h" +#include "util/synthetic-events.h" #include "tool.h" #include "tests.h" #include "debug.h" diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index de110d8f169b..6f34d08b84e5 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -2,6 +2,7 @@ #include #include "util/debug.h" #include "util/dso.h" +#include "util/event.h" // struct perf_sample #include "util/map.h" #include "util/symbol.h" #include "util/sort.h" @@ -10,6 +11,7 @@ #include "util/thread.h" #include "tests/hists_common.h" #include +#include static struct { u32 pid; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index f72889c13538..33b496d194f4 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -8,11 +8,13 @@ #include #include #include "debug.h" +#include "event.h" #include "tests.h" #include "machine.h" #include "thread_map.h" #include "map.h" #include "symbol.h" +#include "util/synthetic-events.h" #include "thread.h" #include "util.h" // page_size diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index e3b965e7a233..3a02426db9a6 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -12,6 +12,7 @@ #include "event.h" #include "evsel.h" #include "debug.h" +#include "util/synthetic-events.h" #include "tests.h" diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index cc10b4116c9f..c1911501c39c 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -5,6 +5,7 @@ #include "stat.h" #include "counts.h" #include "debug.h" +#include "util/synthetic-events.h" static bool has_term(struct perf_record_stat_config *config, u64 tag, u64 val) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 39168c57943b..28f51c4bd373 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -8,6 +8,7 @@ #include "thread_map.h" #include "debug.h" #include "event.h" +#include "util/synthetic-events.h" #include #include diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 832ca6cfbe30..5365606e9dad 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -5,6 +5,7 @@ #include "../../util/callchain.h" #include "../../util/debug.h" +#include "../../util/event.h" #include "../../util/hist.h" #include "../../util/map.h" #include "../../util/map_groups.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 1c0ff5acff83..0e8c89cf7cad 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -32,6 +32,7 @@ #include "pmu.h" #include "evsel.h" #include "symbol.h" +#include "util/synthetic-events.h" #include "thread_map.h" #include "asm/bug.h" #include "auxtrace.h" diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 1ed902a24a39..b110aec1da4d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -11,14 +11,13 @@ #include #include #include +#include // FILE #include #include #include #include #include -#include "event.h" - union perf_event; struct perf_session; struct evlist; @@ -27,6 +26,7 @@ struct perf_mmap; struct perf_sample; struct option; struct record_opts; +struct perf_record_auxtrace_error; struct perf_record_auxtrace_info; struct events_stats; @@ -525,10 +525,6 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int int code, int cpu, pid_t pid, pid_t tid, u64 ip, const char *msg, u64 timestamp); -int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, - struct perf_tool *tool, - struct perf_session *session, - perf_event__handler_t process); int perf_event__process_auxtrace_info(struct perf_session *session, union perf_event *event); s64 perf_event__process_auxtrace(struct perf_session *session, @@ -605,15 +601,6 @@ void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused) { } -static inline int -perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, - struct perf_tool *tool __maybe_unused, - struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused) -{ - return -EINVAL; -} - static inline int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused, struct evlist *evlist __maybe_unused, diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 7a3d4b125323..f7ed5d122e22 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -16,6 +16,7 @@ #include "map.h" #include "evlist.h" #include "record.h" +#include "util/synthetic-events.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index a01c2fd68c03..81fdc88e6c1a 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -6,9 +6,9 @@ #include #include #include -#include "event.h" #include +struct bpf_prog_info; struct machine; union perf_event; struct perf_env; @@ -33,11 +33,6 @@ struct btf_node { #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); - -int perf_event__synthesize_bpf_events(struct perf_session *session, - perf_event__handler_t process, - struct machine *machine, - struct record_opts *opts); int bpf_event__add_sb_event(struct evlist **evlist, struct perf_env *env); void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, @@ -51,14 +46,6 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused, return 0; } -static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused, - struct record_opts *opts __maybe_unused) -{ - return 0; -} - static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused, struct perf_env *env __maybe_unused) { diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index c14646c1f2eb..9a9b56ed3f0a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -23,6 +23,7 @@ #include "debug.h" #include "dso.h" +#include "event.h" #include "hist.h" #include "sort.h" #include "machine.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index d65ae7cf9316..043a08fc7398 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -32,6 +32,7 @@ #include "stat.h" #include "session.h" #include "bpf-event.h" +#include "synthetic-events.h" #include "tool.h" #include "../perf.h" diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 89a2404170a0..a0a0c91cde4a 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -279,95 +279,13 @@ enum { void perf_event__print_totals(void); -struct evlist; -struct evsel; -struct perf_session; -struct perf_tool; -struct perf_thread_map; struct perf_cpu_map; +struct perf_record_stat_config; struct perf_stat_config; -struct perf_counts_values; +struct perf_tool; -typedef int (*perf_event__handler_t)(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct machine *machine); - -int perf_event__synthesize_stat_events(struct perf_stat_config *config, - struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process, - bool attrs); -int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u32 ids, u64 *id, - perf_event__handler_t process); -int perf_event__synthesize_attrs(struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process); -int perf_event__synthesize_build_id(struct perf_tool *tool, - struct dso *pos, u16 misc, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_extra_attr(struct perf_tool *tool, - struct evlist *evsel_list, - perf_event__handler_t process, - bool is_pipe); -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_name(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process); -int perf_event__synthesize_features(struct perf_tool *tool, - struct perf_session *session, - struct evlist *evlist, - perf_event__handler_t process); -int perf_event__synthesize_tracing_data(struct perf_tool *tool, - int fd, struct evlist *evlist, - perf_event__handler_t process); -int perf_event__synthesize_thread_map(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine, bool mmap_data); -int perf_event__synthesize_thread_map2(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_cpu_map(struct perf_tool *tool, - struct perf_cpu_map *cpus, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, bool mmap_data, - unsigned int nr_threads_synthesize); -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_stat_config(struct perf_tool *tool, - struct perf_stat_config *config, - perf_event__handler_t process, - struct machine *machine); void perf_event__read_stat_config(struct perf_stat_config *config, struct perf_record_stat_config *event); -int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, - struct perf_counts_values *count, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_stat_round(struct perf_tool *tool, - u64 time, u64 type, - perf_event__handler_t process, - struct machine *machine); -int perf_event__synthesize_modules(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); int perf_event__process_comm(struct perf_tool *tool, union perf_event *event, @@ -421,10 +339,6 @@ int perf_event__process_bpf(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_tool__process_synth_event(struct perf_tool *tool, - union perf_event *event, - struct machine *machine, - perf_event__handler_t process); int perf_event__process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -446,34 +360,6 @@ void thread__resolve(struct thread *thread, struct addr_location *al, const char *perf_event__name(unsigned int id); -size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, - u64 read_format); -int perf_event__synthesize_sample(union perf_event *event, u64 type, - u64 read_format, - const struct perf_sample *sample); - -pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine); - -int perf_event__synthesize_namespaces(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine); - -int perf_event__synthesize_mmap_events(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data); - -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); - size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5b40b840624c..5af025c80ec5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -40,6 +40,7 @@ #include "trace-event.h" #include "stat.h" #include "string2.h" +#include "util/synthetic-events.h" #include "memswap.h" #include "util.h" #include "../perf-sys.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d85827de1b60..a4a8342eba98 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -45,6 +45,7 @@ #include "util.h" // page_size, perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "util/synthetic-events.h" #include #include diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 15f87a09f4fe..3888d4cd3ed1 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -28,6 +28,7 @@ #include "auxtrace.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" #include "intel-bts.h" +#include "util/synthetic-events.h" #define MAX_TIMESTAMP (~0ULL) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 9b56fb74bedf..bcdc0359f7cf 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/synthetic-events.h" #include "time-utils.h" #include "../arch/x86/include/uapi/asm/perf_regs.h" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b4749d3eed08..132de5cfb9b9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -20,6 +20,7 @@ #include "symbol.h" #include "sort.h" #include "strlist.h" +#include "util/synthetic-events.h" #include "target.h" #include "thread.h" #include "util.h" @@ -2624,6 +2625,15 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too return 0; } +int machine__synthesize_threads(struct machine *machine, struct target *target, + struct perf_thread_map *threads, bool data_mmap, + unsigned int nr_threads_synthesize) +{ + return __machine__synthesize_threads(machine, NULL, target, threads, + perf_event__process, data_mmap, + nr_threads_synthesize); +} + pid_t machine__get_current_tid(struct machine *machine, int cpu) { int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index ffd391a925a6..18e13c0ccd6a 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -6,7 +6,6 @@ #include #include "map_groups.h" #include "dsos.h" -#include "event.h" #include "rwsem.h" struct addr_location; @@ -252,20 +251,6 @@ int machines__for_each_thread(struct machines *machines, int (*fn)(struct thread *thread, void *p), void *priv); -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, - struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize); -static inline -int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, - unsigned int nr_threads_synthesize) -{ - return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap, - nr_threads_synthesize); -} - pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2b583e6adb49..6267613b551d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -29,6 +29,7 @@ #include "thread-stack.h" #include "sample-raw.h" #include "stat.h" +#include "util/synthetic-events.h" #include "util.h" #include "ui/progress.h" #include "../perf.h" diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b7aa076ab6fd..b4c9428c18f0 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -138,9 +138,4 @@ int perf_session__deliver_synth_event(struct perf_session *session, int perf_event__process_id_index(struct perf_session *session, union perf_event *event); -int perf_event__synthesize_id_index(struct perf_tool *tool, - perf_event__handler_t process, - struct evlist *evlist, - struct machine *machine); - #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2e318d95c528..46c8a5027e12 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -12,6 +12,7 @@ #include "target.h" #include "evlist.h" #include "evsel.h" +#include "util/synthetic-events.h" #include "thread_map.h" #include diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h new file mode 100644 index 000000000000..baead0cdc381 --- /dev/null +++ b/tools/perf/util/synthetic-events.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SYNTHETIC_EVENTS_H +#define __PERF_SYNTHETIC_EVENTS_H + +#include +#include // pid_t +#include +#include + +struct auxtrace_record; +struct dso; +struct evlist; +struct evsel; +struct machine; +struct perf_counts_values; +struct perf_cpu_map; +struct perf_event_attr; +struct perf_event_mmap_page; +struct perf_sample; +struct perf_session; +struct perf_stat_config; +struct perf_thread_map; +struct perf_tool; +struct record_opts; +struct target; + +union perf_event; + +typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct machine *machine); + +int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); +int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cpu_map(struct perf_tool *tool, struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe); +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine); +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); +int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); +int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); + +int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); + +size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); + +int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, + struct target *target, struct perf_thread_map *threads, + perf_event__handler_t process, bool data_mmap, + unsigned int nr_threads_synthesize); +int machine__synthesize_threads(struct machine *machine, struct target *target, + struct perf_thread_map *threads, bool data_mmap, + unsigned int nr_threads_synthesize); + +#ifdef HAVE_AUXTRACE_SUPPORT +int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, + struct perf_session *session, perf_event__handler_t process); + +#else // HAVE_AUXTRACE_SUPPORT + +#include + +static inline int +perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, + struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + perf_event__handler_t process __maybe_unused) +{ + return -EINVAL; +} +#endif // HAVE_AUXTRACE_SUPPORT + +#ifdef HAVE_LIBBPF_SUPPORT +int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, + struct machine *machine, struct record_opts *opts); +#else // HAVE_LIBBPF_SUPPORT +static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT + +#endif // __PERF_SYNTHETIC_EVENTS_H diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index e0c3af34ac8d..3c5a632ee57c 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -4,13 +4,12 @@ #include -#include "event.h" - struct perf_tsc_conversion { u16 time_shift; u32 time_mult; u64 time_zero; }; + struct perf_event_mmap_page; int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, @@ -20,13 +19,4 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -struct perf_event_mmap_page; -struct perf_tool; -struct machine; - -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine); - -#endif +#endif // __PERF_TSC_H From 5cac8ea3e6e736664ee272f94d9099891e25f782 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 12:28:41 -0300 Subject: [PATCH 128/345] perf memswap: Adopt 'struct u64_swap' from evsel.h As it is not used in evsel.h and is a memory swap struct, so fits better in memswap.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-wvzxu7a5l3m868ywwphrnnqo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 5 ----- tools/perf/util/memswap.h | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 68321d10eb2d..74df298acb31 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -179,11 +179,6 @@ struct evsel { } side_band; }; -union u64_swap { - u64 val64; - u32 val32[2]; -}; - struct perf_missing_features { bool sample_id_all; bool exclude_guest; diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h index 1e29ff903ca9..2c38e8c2d548 100644 --- a/tools/perf/util/memswap.h +++ b/tools/perf/util/memswap.h @@ -2,6 +2,13 @@ #ifndef PERF_MEMSWAP_H_ #define PERF_MEMSWAP_H_ +#include + +union u64_swap { + u64 val64; + u32 val32[2]; +}; + void mem_bswap_64(void *src, int byte_size); void mem_bswap_32(void *src, int byte_size); From 055c67ed39887c5563e9540470a4617c1b772aec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Sep 2019 16:08:52 -0300 Subject: [PATCH 129/345] perf tools: Move event synthesizing routines to separate .c file For better grouping, in time we may end up making most of these static, i.e. generalizing the 'perf record' synthesizing code so that based on the target it can do the right thing and call the needed synthesizers. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-s9zxxhk40s95pjng9panet16@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 9 - tools/perf/util/Build | 1 + tools/perf/util/cs-etm.c | 1 + tools/perf/util/event.c | 1108 +--------------- tools/perf/util/evsel.c | 278 ---- tools/perf/util/header.c | 393 +----- tools/perf/util/header.h | 18 + tools/perf/util/machine.c | 25 - tools/perf/util/namespaces.c | 18 + tools/perf/util/namespaces.h | 2 + tools/perf/util/session.c | 71 -- tools/perf/util/stat.c | 43 - tools/perf/util/synthetic-events.c | 1884 ++++++++++++++++++++++++++++ 13 files changed, 1928 insertions(+), 1923 deletions(-) create mode 100644 tools/perf/util/synthetic-events.c diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 907d4d4677a3..4bd11c918e73 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1181,15 +1181,6 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); static void alarm_sig_handler(int sig); -int __weak -perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, - struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - static const struct perf_event_mmap_page * perf_evlist__pick_pc(struct evlist *evlist) { diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 0b4d8e0d474c..fd89d6a8cd65 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -86,6 +86,7 @@ perf-y += stat-display.o perf-y += record.o perf-y += srcline.o perf-y += srccode.o +perf-y += synthetic-events.o perf-y += data.o perf-y += tsc.o perf-y += cloexec.o diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f87b9c1c9f9a..6021974577d5 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -35,6 +35,7 @@ #include "thread.h" #include "thread-stack.h" #include +#include "util/synthetic-events.h" #define MAX_TIMESTAMP (~0ULL) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 043a08fc7398..fc1e5a991008 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -9,7 +8,6 @@ #include #include #include /* To get things like MAP_HUGETLB even on older libc headers */ -#include #include #include #include "cpumap.h" @@ -26,18 +24,16 @@ #include "time-utils.h" #include #include "map.h" +#include "util/namespaces.h" #include "symbol.h" #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" #include "session.h" #include "bpf-event.h" -#include "synthetic-events.h" #include "tool.h" #include "../perf.h" -#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 - static const char *perf_event__names[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", @@ -78,18 +74,6 @@ static const char *perf_event__names[] = { [PERF_RECORD_COMPRESSED] = "COMPRESSED", }; -static const char *perf_ns__names[] = { - [NET_NS_INDEX] = "net", - [UTS_NS_INDEX] = "uts", - [IPC_NS_INDEX] = "ipc", - [PID_NS_INDEX] = "pid", - [USER_NS_INDEX] = "user", - [MNT_NS_INDEX] = "mnt", - [CGROUP_NS_INDEX] = "cgroup", -}; - -unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; - const char *perf_event__name(unsigned int id) { if (id >= ARRAY_SIZE(perf_event__names)) @@ -99,775 +83,6 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } -static const char *perf_ns__name(unsigned int id) -{ - if (id >= ARRAY_SIZE(perf_ns__names)) - return "UNKNOWN"; - return perf_ns__names[id]; -} - -int perf_tool__process_synth_event(struct perf_tool *tool, - union perf_event *event, - struct machine *machine, - perf_event__handler_t process) -{ - struct perf_sample synth_sample = { - .pid = -1, - .tid = -1, - .time = -1, - .stream_id = -1, - .cpu = -1, - .period = 1, - .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK, - }; - - return process(tool, event, &synth_sample, machine); -}; - -/* - * Assumes that the first 4095 bytes of /proc/pid/stat contains - * the comm, tgid and ppid. - */ -static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, - pid_t *tgid, pid_t *ppid) -{ - char filename[PATH_MAX]; - char bf[4096]; - int fd; - size_t size = 0; - ssize_t n; - char *name, *tgids, *ppids; - - *tgid = -1; - *ppid = -1; - - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - - fd = open(filename, O_RDONLY); - if (fd < 0) { - pr_debug("couldn't open %s\n", filename); - return -1; - } - - n = read(fd, bf, sizeof(bf) - 1); - close(fd); - if (n <= 0) { - pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", - pid); - return -1; - } - bf[n] = '\0'; - - name = strstr(bf, "Name:"); - tgids = strstr(bf, "Tgid:"); - ppids = strstr(bf, "PPid:"); - - if (name) { - char *nl; - - name = skip_spaces(name + 5); /* strlen("Name:") */ - nl = strchr(name, '\n'); - if (nl) - *nl = '\0'; - - size = strlen(name); - if (size >= len) - size = len - 1; - memcpy(comm, name, size); - comm[size] = '\0'; - } else { - pr_debug("Name: string not found for pid %d\n", pid); - } - - if (tgids) { - tgids += 5; /* strlen("Tgid:") */ - *tgid = atoi(tgids); - } else { - pr_debug("Tgid: string not found for pid %d\n", pid); - } - - if (ppids) { - ppids += 5; /* strlen("PPid:") */ - *ppid = atoi(ppids); - } else { - pr_debug("PPid: string not found for pid %d\n", pid); - } - - return 0; -} - -static int perf_event__prepare_comm(union perf_event *event, pid_t pid, - struct machine *machine, - pid_t *tgid, pid_t *ppid) -{ - size_t size; - - *ppid = -1; - - memset(&event->comm, 0, sizeof(event->comm)); - - if (machine__is_host(machine)) { - if (perf_event__get_comm_ids(pid, event->comm.comm, - sizeof(event->comm.comm), - tgid, ppid) != 0) { - return -1; - } - } else { - *tgid = machine->pid; - } - - if (*tgid < 0) - return -1; - - event->comm.pid = *tgid; - event->comm.header.type = PERF_RECORD_COMM; - - size = strlen(event->comm.comm) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - memset(event->comm.comm + size, 0, machine->id_hdr_size); - event->comm.header.size = (sizeof(event->comm) - - (sizeof(event->comm.comm) - size) + - machine->id_hdr_size); - event->comm.tid = pid; - - return 0; -} - -pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine) -{ - pid_t tgid, ppid; - - if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) - return -1; - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) - return -1; - - return tgid; -} - -static void perf_event__get_ns_link_info(pid_t pid, const char *ns, - struct perf_ns_link_info *ns_link_info) -{ - struct stat64 st; - char proc_ns[128]; - - sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); - if (stat64(proc_ns, &st) == 0) { - ns_link_info->dev = st.st_dev; - ns_link_info->ino = st.st_ino; - } -} - -int perf_event__synthesize_namespaces(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine) -{ - u32 idx; - struct perf_ns_link_info *ns_link_info; - - if (!tool || !tool->namespace_events) - return 0; - - memset(&event->namespaces, 0, (sizeof(event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size)); - - event->namespaces.pid = tgid; - event->namespaces.tid = pid; - - event->namespaces.nr_namespaces = NR_NAMESPACES; - - ns_link_info = event->namespaces.link_info; - - for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) - perf_event__get_ns_link_info(pid, perf_ns__name(idx), - &ns_link_info[idx]); - - event->namespaces.header.type = PERF_RECORD_NAMESPACES; - - event->namespaces.header.size = (sizeof(event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size); - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) - return -1; - - return 0; -} - -static int perf_event__synthesize_fork(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, pid_t ppid, - perf_event__handler_t process, - struct machine *machine) -{ - memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); - - /* - * for main thread set parent to ppid from status file. For other - * threads set parent pid to main thread. ie., assume main thread - * spawns all threads in a process - */ - if (tgid == pid) { - event->fork.ppid = ppid; - event->fork.ptid = ppid; - } else { - event->fork.ppid = tgid; - event->fork.ptid = tgid; - } - event->fork.pid = tgid; - event->fork.tid = pid; - event->fork.header.type = PERF_RECORD_FORK; - event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; - - event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) - return -1; - - return 0; -} - -int perf_event__synthesize_mmap_events(struct perf_tool *tool, - union perf_event *event, - pid_t pid, pid_t tgid, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data) -{ - char filename[PATH_MAX]; - FILE *fp; - unsigned long long t; - bool truncation = false; - unsigned long long timeout = proc_map_timeout * 1000000ULL; - int rc = 0; - const char *hugetlbfs_mnt = hugetlbfs__mountpoint(); - int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0; - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", - machine->root_dir, pid, pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { - /* - * We raced with a task exiting - just return: - */ - pr_debug("couldn't open %s\n", filename); - return -1; - } - - event->header.type = PERF_RECORD_MMAP2; - t = rdclock(); - - while (1) { - char bf[BUFSIZ]; - char prot[5]; - char execname[PATH_MAX]; - char anonstr[] = "//anon"; - unsigned int ino; - size_t size; - ssize_t n; - - if (fgets(bf, sizeof(bf), fp) == NULL) - break; - - if ((rdclock() - t) > timeout) { - pr_warning("Reading %s time out. " - "You may want to increase " - "the time limit by --proc-map-timeout\n", - filename); - truncation = true; - goto out; - } - - /* ensure null termination since stack will be reused. */ - strcpy(execname, ""); - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - /* - * Anon maps don't have the execname. - */ - if (n < 7) - continue; - - event->mmap2.ino = (u64)ino; - - /* - * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_USER; - else - event->header.misc = PERF_RECORD_MISC_GUEST_USER; - - /* map protection and flags bits */ - event->mmap2.prot = 0; - event->mmap2.flags = 0; - if (prot[0] == 'r') - event->mmap2.prot |= PROT_READ; - if (prot[1] == 'w') - event->mmap2.prot |= PROT_WRITE; - if (prot[2] == 'x') - event->mmap2.prot |= PROT_EXEC; - - if (prot[3] == 's') - event->mmap2.flags |= MAP_SHARED; - else - event->mmap2.flags |= MAP_PRIVATE; - - if (prot[2] != 'x') { - if (!mmap_data || prot[0] != 'r') - continue; - - event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; - } - -out: - if (truncation) - event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; - - if (!strcmp(execname, "")) - strcpy(execname, anonstr); - - if (hugetlbfs_mnt_len && - !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { - strcpy(execname, anonstr); - event->mmap2.flags |= MAP_HUGETLB; - } - - size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); - size = PERF_ALIGN(size, sizeof(u64)); - event->mmap2.len -= event->mmap.start; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.pid = tgid; - event->mmap2.tid = pid; - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - - if (truncation) - break; - } - - fclose(fp); - return rc; -} - -int perf_event__synthesize_modules(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - int rc = 0; - struct map *pos; - struct maps *maps = machine__kernel_maps(machine); - union perf_event *event = zalloc((sizeof(event->mmap) + - machine->id_hdr_size)); - if (event == NULL) { - pr_debug("Not enough memory synthesizing mmap event " - "for kernel modules\n"); - return -1; - } - - event->header.type = PERF_RECORD_MMAP; - - /* - * kernel uses 0 for user space maps, see kernel/perf_event.c - * __perf_event_mmap - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; - else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - for (pos = maps__first(maps); pos; pos = map__next(pos)) { - size_t size; - - if (!__map__is_kmodule(pos)) - continue; - - size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = pos->start; - event->mmap.len = pos->end - pos->start; - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, pos->dso->long_name, - pos->dso->long_name_len + 1); - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); - return rc; -} - -static int __event__synthesize_thread(union perf_event *comm_event, - union perf_event *mmap_event, - union perf_event *fork_event, - union perf_event *namespaces_event, - pid_t pid, int full, - perf_event__handler_t process, - struct perf_tool *tool, - struct machine *machine, - bool mmap_data) -{ - char filename[PATH_MAX]; - DIR *tasks; - struct dirent *dirent; - pid_t tgid, ppid; - int rc = 0; - - /* special case: only send one comm event using passed in pid */ - if (!full) { - tgid = perf_event__synthesize_comm(tool, comm_event, pid, - process, machine); - - if (tgid == -1) - return -1; - - if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, - tgid, process, machine) < 0) - return -1; - - /* - * send mmap only for thread group leader - * see thread__init_map_groups - */ - if (pid == tgid && - perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data)) - return -1; - - return 0; - } - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(filename, sizeof(filename), "%s/proc/%d/task", - machine->root_dir, pid); - - tasks = opendir(filename); - if (tasks == NULL) { - pr_debug("couldn't open %s\n", filename); - return 0; - } - - while ((dirent = readdir(tasks)) != NULL) { - char *end; - pid_t _pid; - - _pid = strtol(dirent->d_name, &end, 10); - if (*end) - continue; - - rc = -1; - if (perf_event__prepare_comm(comm_event, _pid, machine, - &tgid, &ppid) != 0) - break; - - if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, - ppid, process, machine) < 0) - break; - - if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, - tgid, process, machine) < 0) - break; - - /* - * Send the prepared comm event - */ - if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0) - break; - - rc = 0; - if (_pid == pid) { - /* process the parent's maps too */ - rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data); - if (rc) - break; - } - } - - closedir(tasks); - return rc; -} - -int perf_event__synthesize_thread_map(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data) -{ - union perf_event *comm_event, *mmap_event, *fork_event; - union perf_event *namespaces_event; - int err = -1, thread, j; - - comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); - if (comm_event == NULL) - goto out; - - mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); - if (mmap_event == NULL) - goto out_free_comm; - - fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); - if (fork_event == NULL) - goto out_free_mmap; - - namespaces_event = malloc(sizeof(namespaces_event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size); - if (namespaces_event == NULL) - goto out_free_fork; - - err = 0; - for (thread = 0; thread < threads->nr; ++thread) { - if (__event__synthesize_thread(comm_event, mmap_event, - fork_event, namespaces_event, - perf_thread_map__pid(threads, thread), 0, - process, tool, machine, - mmap_data)) { - err = -1; - break; - } - - /* - * comm.pid is set to thread group id by - * perf_event__synthesize_comm - */ - if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) { - bool need_leader = true; - - /* is thread group leader in thread_map? */ - for (j = 0; j < threads->nr; ++j) { - if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) { - need_leader = false; - break; - } - } - - /* if not, generate events for it */ - if (need_leader && - __event__synthesize_thread(comm_event, mmap_event, - fork_event, namespaces_event, - comm_event->comm.pid, 0, - process, tool, machine, - mmap_data)) { - err = -1; - break; - } - } - } - free(namespaces_event); -out_free_fork: - free(fork_event); -out_free_mmap: - free(mmap_event); -out_free_comm: - free(comm_event); -out: - return err; -} - -static int __perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data, - struct dirent **dirent, - int start, - int num) -{ - union perf_event *comm_event, *mmap_event, *fork_event; - union perf_event *namespaces_event; - int err = -1; - char *end; - pid_t pid; - int i; - - comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); - if (comm_event == NULL) - goto out; - - mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); - if (mmap_event == NULL) - goto out_free_comm; - - fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); - if (fork_event == NULL) - goto out_free_mmap; - - namespaces_event = malloc(sizeof(namespaces_event->namespaces) + - (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + - machine->id_hdr_size); - if (namespaces_event == NULL) - goto out_free_fork; - - for (i = start; i < start + num; i++) { - if (!isdigit(dirent[i]->d_name[0])) - continue; - - pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); - /* only interested in proper numerical dirents */ - if (*end) - continue; - /* - * We may race with exiting thread, so don't stop just because - * one thread couldn't be synthesized. - */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, - namespaces_event, pid, 1, process, - tool, machine, mmap_data); - } - err = 0; - - free(namespaces_event); -out_free_fork: - free(fork_event); -out_free_mmap: - free(mmap_event); -out_free_comm: - free(comm_event); -out: - return err; -} - -struct synthesize_threads_arg { - struct perf_tool *tool; - perf_event__handler_t process; - struct machine *machine; - bool mmap_data; - struct dirent **dirent; - int num; - int start; -}; - -static void *synthesize_threads_worker(void *arg) -{ - struct synthesize_threads_arg *args = arg; - - __perf_event__synthesize_threads(args->tool, args->process, - args->machine, args->mmap_data, - args->dirent, - args->start, args->num); - return NULL; -} - -int perf_event__synthesize_threads(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine, - bool mmap_data, - unsigned int nr_threads_synthesize) -{ - struct synthesize_threads_arg *args = NULL; - pthread_t *synthesize_threads = NULL; - char proc_path[PATH_MAX]; - struct dirent **dirent; - int num_per_thread; - int m, n, i, j; - int thread_nr; - int base = 0; - int err = -1; - - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - n = scandir(proc_path, &dirent, 0, alphasort); - if (n < 0) - return err; - - if (nr_threads_synthesize == UINT_MAX) - thread_nr = sysconf(_SC_NPROCESSORS_ONLN); - else - thread_nr = nr_threads_synthesize; - - if (thread_nr <= 1) { - err = __perf_event__synthesize_threads(tool, process, - machine, mmap_data, - dirent, base, n); - goto free_dirent; - } - if (thread_nr > n) - thread_nr = n; - - synthesize_threads = calloc(sizeof(pthread_t), thread_nr); - if (synthesize_threads == NULL) - goto free_dirent; - - args = calloc(sizeof(*args), thread_nr); - if (args == NULL) - goto free_threads; - - num_per_thread = n / thread_nr; - m = n % thread_nr; - for (i = 0; i < thread_nr; i++) { - args[i].tool = tool; - args[i].process = process; - args[i].machine = machine; - args[i].mmap_data = mmap_data; - args[i].dirent = dirent; - } - for (i = 0; i < m; i++) { - args[i].num = num_per_thread + 1; - args[i].start = i * args[i].num; - } - if (i != 0) - base = args[i-1].start + args[i-1].num; - for (j = i; j < thread_nr; j++) { - args[j].num = num_per_thread; - args[j].start = base + (j - i) * args[i].num; - } - - for (i = 0; i < thread_nr; i++) { - if (pthread_create(&synthesize_threads[i], NULL, - synthesize_threads_worker, &args[i])) - goto out_join; - } - err = 0; -out_join: - for (i = 0; i < thread_nr; i++) - pthread_join(synthesize_threads[i], NULL); - free(args); -free_threads: - free(synthesize_threads); -free_dirent: - for (i = 0; i < n; i++) - zfree(&dirent[i]); - free(dirent); - - return err; -} - struct process_symbol_args { const char *name; u64 start; @@ -902,327 +117,6 @@ int kallsyms__get_function_start(const char *kallsyms_filename, return 0; } -int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - -static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - size_t size; - struct map *map = machine__kernel_map(machine); - struct kmap *kmap; - int err; - union perf_event *event; - - if (map == NULL) - return -1; - - kmap = map__kmap(map); - if (!kmap->ref_reloc_sym) - return -1; - - /* - * We should get this from /sys/kernel/sections/.text, but till that is - * available use this, and after it is use this as a fallback for older - * kernels. - */ - event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); - if (event == NULL) { - pr_debug("Not enough memory synthesizing mmap event " - "for kernel modules\n"); - return -1; - } - - if (machine__is_host(machine)) { - /* - * kernel uses PERF_RECORD_MISC_USER for user space maps, - * see kernel/perf_event.c __perf_event_mmap - */ - event->header.misc = PERF_RECORD_MISC_KERNEL; - } else { - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - } - - size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = kmap->ref_reloc_sym->addr; - event->mmap.start = map->start; - event->mmap.len = map->end - event->mmap.start; - event->mmap.pid = machine->pid; - - err = perf_tool__process_synth_event(tool, event, machine, process); - free(event); - - return err; -} - -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - int err; - - err = __perf_event__synthesize_kernel_mmap(tool, process, machine); - if (err < 0) - return err; - - return perf_event__synthesize_extra_kmaps(tool, process, machine); -} - -int perf_event__synthesize_thread_map2(struct perf_tool *tool, - struct perf_thread_map *threads, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event *event; - int i, err, size; - - size = sizeof(event->thread_map); - size += threads->nr * sizeof(event->thread_map.entries[0]); - - event = zalloc(size); - if (!event) - return -ENOMEM; - - event->header.type = PERF_RECORD_THREAD_MAP; - event->header.size = size; - event->thread_map.nr = threads->nr; - - for (i = 0; i < threads->nr; i++) { - struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i]; - char *comm = perf_thread_map__comm(threads, i); - - if (!comm) - comm = (char *) ""; - - entry->pid = perf_thread_map__pid(threads, i); - strncpy((char *) &entry->comm, comm, sizeof(entry->comm)); - } - - err = process(tool, event, NULL, machine); - - free(event); - return err; -} - -static void synthesize_cpus(struct cpu_map_entries *cpus, - struct perf_cpu_map *map) -{ - int i; - - cpus->nr = map->nr; - - for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i]; -} - -static void synthesize_mask(struct perf_record_record_cpu_map *mask, - struct perf_cpu_map *map, int max) -{ - int i; - - mask->nr = BITS_TO_LONGS(max); - mask->long_size = sizeof(long); - - for (i = 0; i < map->nr; i++) - set_bit(map->map[i], mask->mask); -} - -static size_t cpus_size(struct perf_cpu_map *map) -{ - return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); -} - -static size_t mask_size(struct perf_cpu_map *map, int *max) -{ - int i; - - *max = 0; - - for (i = 0; i < map->nr; i++) { - /* bit possition of the cpu is + 1 */ - int bit = map->map[i] + 1; - - if (bit > *max) - *max = bit; - } - - return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long); -} - -void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max) -{ - size_t size_cpus, size_mask; - bool is_dummy = perf_cpu_map__empty(map); - - /* - * Both array and mask data have variable size based - * on the number of cpus and their actual values. - * The size of the 'struct perf_record_cpu_map_data' is: - * - * array = size of 'struct cpu_map_entries' + - * number of cpus * sizeof(u64) - * - * mask = size of 'struct perf_record_record_cpu_map' + - * maximum cpu bit converted to size of longs - * - * and finaly + the size of 'struct perf_record_cpu_map_data'. - */ - size_cpus = cpus_size(map); - size_mask = mask_size(map, max); - - if (is_dummy || (size_cpus < size_mask)) { - *size += size_cpus; - *type = PERF_CPU_MAP__CPUS; - } else { - *size += size_mask; - *type = PERF_CPU_MAP__MASK; - } - - *size += sizeof(struct perf_record_cpu_map_data); - *size = PERF_ALIGN(*size, sizeof(u64)); - return zalloc(*size); -} - -void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map, - u16 type, int max) -{ - data->type = type; - - switch (type) { - case PERF_CPU_MAP__CPUS: - synthesize_cpus((struct cpu_map_entries *) data->data, map); - break; - case PERF_CPU_MAP__MASK: - synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); - default: - break; - }; -} - -static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) -{ - size_t size = sizeof(struct perf_record_cpu_map); - struct perf_record_cpu_map *event; - int max; - u16 type; - - event = cpu_map_data__alloc(map, &size, &type, &max); - if (!event) - return NULL; - - event->header.type = PERF_RECORD_CPU_MAP; - event->header.size = size; - event->data.type = type; - - cpu_map_data__synthesize(&event->data, map, type, max); - return event; -} - -int perf_event__synthesize_cpu_map(struct perf_tool *tool, - struct perf_cpu_map *map, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_cpu_map *event; - int err; - - event = cpu_map_event__new(map); - if (!event) - return -ENOMEM; - - err = process(tool, (union perf_event *) event, NULL, machine); - - free(event); - return err; -} - -int perf_event__synthesize_stat_config(struct perf_tool *tool, - struct perf_stat_config *config, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_stat_config *event; - int size, i = 0, err; - - size = sizeof(*event); - size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0])); - - event = zalloc(size); - if (!event) - return -ENOMEM; - - event->header.type = PERF_RECORD_STAT_CONFIG; - event->header.size = size; - event->nr = PERF_STAT_CONFIG_TERM__MAX; - -#define ADD(__term, __val) \ - event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \ - event->data[i].val = __val; \ - i++; - - ADD(AGGR_MODE, config->aggr_mode) - ADD(INTERVAL, config->interval) - ADD(SCALE, config->scale) - - WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX, - "stat config terms unbalanced\n"); -#undef ADD - - err = process(tool, (union perf_event *) event, NULL, machine); - - free(event); - return err; -} - -int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, - struct perf_counts_values *count, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_stat event; - - event.header.type = PERF_RECORD_STAT; - event.header.size = sizeof(event); - event.header.misc = 0; - - event.id = id; - event.cpu = cpu; - event.thread = thread; - event.val = count->val; - event.ena = count->ena; - event.run = count->run; - - return process(tool, (union perf_event *) &event, NULL, machine); -} - -int perf_event__synthesize_stat_round(struct perf_tool *tool, - u64 evtime, u64 type, - perf_event__handler_t process, - struct machine *machine) -{ - struct perf_record_stat_round event; - - event.header.type = PERF_RECORD_STAT_ROUND; - event.header.size = sizeof(event); - event.header.misc = 0; - - event.time = evtime; - event.type = type; - - return process(tool, (union perf_event *) &event, NULL, machine); -} - void perf_event__read_stat_config(struct perf_stat_config *config, struct perf_record_stat_config *event) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5af025c80ec5..8e335d168503 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -40,7 +40,6 @@ #include "trace-event.h" #include "stat.h" #include "string2.h" -#include "util/synthetic-events.h" #include "memswap.h" #include "util.h" #include "../perf-sys.h" @@ -2421,283 +2420,6 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel, return 0; } -size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, - u64 read_format) -{ - size_t sz, result = sizeof(struct perf_record_sample); - - if (type & PERF_SAMPLE_IDENTIFIER) - result += sizeof(u64); - - if (type & PERF_SAMPLE_IP) - result += sizeof(u64); - - if (type & PERF_SAMPLE_TID) - result += sizeof(u64); - - if (type & PERF_SAMPLE_TIME) - result += sizeof(u64); - - if (type & PERF_SAMPLE_ADDR) - result += sizeof(u64); - - if (type & PERF_SAMPLE_ID) - result += sizeof(u64); - - if (type & PERF_SAMPLE_STREAM_ID) - result += sizeof(u64); - - if (type & PERF_SAMPLE_CPU) - result += sizeof(u64); - - if (type & PERF_SAMPLE_PERIOD) - result += sizeof(u64); - - if (type & PERF_SAMPLE_READ) { - result += sizeof(u64); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - result += sizeof(u64); - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - result += sizeof(u64); - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_GROUP) { - sz = sample->read.group.nr * - sizeof(struct sample_read_value); - result += sz; - } else { - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_CALLCHAIN) { - sz = (sample->callchain->nr + 1) * sizeof(u64); - result += sz; - } - - if (type & PERF_SAMPLE_RAW) { - result += sizeof(u32); - result += sample->raw_size; - } - - if (type & PERF_SAMPLE_BRANCH_STACK) { - sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); - result += sz; - } - - if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - result += sizeof(u64); - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - result += sz; - } else { - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_STACK_USER) { - sz = sample->user_stack.size; - result += sizeof(u64); - if (sz) { - result += sz; - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_WEIGHT) - result += sizeof(u64); - - if (type & PERF_SAMPLE_DATA_SRC) - result += sizeof(u64); - - if (type & PERF_SAMPLE_TRANSACTION) - result += sizeof(u64); - - if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - result += sizeof(u64); - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - result += sz; - } else { - result += sizeof(u64); - } - } - - if (type & PERF_SAMPLE_PHYS_ADDR) - result += sizeof(u64); - - return result; -} - -int perf_event__synthesize_sample(union perf_event *event, u64 type, - u64 read_format, - const struct perf_sample *sample) -{ - __u64 *array; - size_t sz; - /* - * used for cross-endian analysis. See git commit 65014ab3 - * for why this goofiness is needed. - */ - union u64_swap u; - - array = event->sample.array; - - if (type & PERF_SAMPLE_IDENTIFIER) { - *array = sample->id; - array++; - } - - if (type & PERF_SAMPLE_IP) { - *array = sample->ip; - array++; - } - - if (type & PERF_SAMPLE_TID) { - u.val32[0] = sample->pid; - u.val32[1] = sample->tid; - *array = u.val64; - array++; - } - - if (type & PERF_SAMPLE_TIME) { - *array = sample->time; - array++; - } - - if (type & PERF_SAMPLE_ADDR) { - *array = sample->addr; - array++; - } - - if (type & PERF_SAMPLE_ID) { - *array = sample->id; - array++; - } - - if (type & PERF_SAMPLE_STREAM_ID) { - *array = sample->stream_id; - array++; - } - - if (type & PERF_SAMPLE_CPU) { - u.val32[0] = sample->cpu; - u.val32[1] = 0; - *array = u.val64; - array++; - } - - if (type & PERF_SAMPLE_PERIOD) { - *array = sample->period; - array++; - } - - if (type & PERF_SAMPLE_READ) { - if (read_format & PERF_FORMAT_GROUP) - *array = sample->read.group.nr; - else - *array = sample->read.one.value; - array++; - - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - *array = sample->read.time_enabled; - array++; - } - - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - *array = sample->read.time_running; - array++; - } - - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_GROUP) { - sz = sample->read.group.nr * - sizeof(struct sample_read_value); - memcpy(array, sample->read.group.values, sz); - array = (void *)array + sz; - } else { - *array = sample->read.one.id; - array++; - } - } - - if (type & PERF_SAMPLE_CALLCHAIN) { - sz = (sample->callchain->nr + 1) * sizeof(u64); - memcpy(array, sample->callchain, sz); - array = (void *)array + sz; - } - - if (type & PERF_SAMPLE_RAW) { - u.val32[0] = sample->raw_size; - *array = u.val64; - array = (void *)array + sizeof(u32); - - memcpy(array, sample->raw_data, sample->raw_size); - array = (void *)array + sample->raw_size; - } - - if (type & PERF_SAMPLE_BRANCH_STACK) { - sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); - memcpy(array, sample->branch_stack, sz); - array = (void *)array + sz; - } - - if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - *array++ = sample->user_regs.abi; - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - memcpy(array, sample->user_regs.regs, sz); - array = (void *)array + sz; - } else { - *array++ = 0; - } - } - - if (type & PERF_SAMPLE_STACK_USER) { - sz = sample->user_stack.size; - *array++ = sz; - if (sz) { - memcpy(array, sample->user_stack.data, sz); - array = (void *)array + sz; - *array++ = sz; - } - } - - if (type & PERF_SAMPLE_WEIGHT) { - *array = sample->weight; - array++; - } - - if (type & PERF_SAMPLE_DATA_SRC) { - *array = sample->data_src; - array++; - } - - if (type & PERF_SAMPLE_TRANSACTION) { - *array = sample->transaction; - array++; - } - - if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - *array++ = sample->intr_regs.abi; - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - memcpy(array, sample->intr_regs.regs, sz); - array = (void *)array + sz; - } else { - *array++ = 0; - } - } - - if (type & PERF_SAMPLE_PHYS_ADDR) { - *array = sample->phys_addr; - array++; - } - - return 0; -} - struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a4a8342eba98..5722ff717777 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -45,7 +45,6 @@ #include "util.h" // page_size, perf_exe() #include "cputopo.h" #include "bpf-event.h" -#include "util/synthetic-events.h" #include #include @@ -72,15 +71,6 @@ struct perf_file_attr { struct perf_file_section ids; }; -struct feat_fd { - struct perf_header *ph; - int fd; - void *buf; /* Either buf != NULL or fd >= 0 */ - ssize_t offset; - size_t size; - struct evsel *events; -}; - void perf_header__set_feat(struct perf_header *header, int feat) { set_bit(feat, header->adds_features); @@ -2825,15 +2815,6 @@ static int process_compressed(struct feat_fd *ff, return 0; } -struct feature_ops { - int (*write)(struct feat_fd *ff, struct evlist *evlist); - void (*print)(struct feat_fd *ff, FILE *fp); - int (*process)(struct feat_fd *ff, void *data); - const char *name; - bool full_only; - bool synthesize; -}; - #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -2860,8 +2841,10 @@ struct feature_ops { #define process_branch_stack NULL #define process_stat NULL +// Only used in util/synthetic-events.c +const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; -static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { +const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(TRACING_DATA, tracing_data, false), FEAT_OPN(BUILD_ID, build_id, false), FEAT_OPR(HOSTNAME, hostname, false), @@ -3658,105 +3641,6 @@ out_delete_evlist: return -ENOMEM; } -int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u32 ids, u64 *id, - perf_event__handler_t process) -{ - union perf_event *ev; - size_t size; - int err; - - size = sizeof(struct perf_event_attr); - size = PERF_ALIGN(size, sizeof(u64)); - size += sizeof(struct perf_event_header); - size += ids * sizeof(u64); - - ev = zalloc(size); - - if (ev == NULL) - return -ENOMEM; - - ev->attr.attr = *attr; - memcpy(ev->attr.id, id, ids * sizeof(u64)); - - ev->attr.header.type = PERF_RECORD_HEADER_ATTR; - ev->attr.header.size = (u16)size; - - if (ev->attr.header.size == size) - err = process(tool, ev, NULL, NULL); - else - err = -E2BIG; - - free(ev); - - return err; -} - -int perf_event__synthesize_features(struct perf_tool *tool, - struct perf_session *session, - struct evlist *evlist, - perf_event__handler_t process) -{ - struct perf_header *header = &session->header; - struct feat_fd ff; - struct perf_record_header_feature *fe; - size_t sz, sz_hdr; - int feat, ret; - - sz_hdr = sizeof(fe->header); - sz = sizeof(union perf_event); - /* get a nice alignment */ - sz = PERF_ALIGN(sz, page_size); - - memset(&ff, 0, sizeof(ff)); - - ff.buf = malloc(sz); - if (!ff.buf) - return -ENOMEM; - - ff.size = sz - sz_hdr; - ff.ph = &session->header; - - for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { - if (!feat_ops[feat].synthesize) { - pr_debug("No record header feature for header :%d\n", feat); - continue; - } - - ff.offset = sizeof(*fe); - - ret = feat_ops[feat].write(&ff, evlist); - if (ret || ff.offset <= (ssize_t)sizeof(*fe)) { - pr_debug("Error writing feature\n"); - continue; - } - /* ff.buf may have changed due to realloc in do_write() */ - fe = ff.buf; - memset(fe, 0, sizeof(*fe)); - - fe->feat_id = feat; - fe->header.type = PERF_RECORD_HEADER_FEATURE; - fe->header.size = ff.offset; - - ret = process(tool, ff.buf, NULL, NULL); - if (ret) { - free(ff.buf); - return ret; - } - } - - /* Send HEADER_LAST_FEATURE mark. */ - fe = ff.buf; - fe->feat_id = HEADER_LAST_FEATURE; - fe->header.type = PERF_RECORD_HEADER_FEATURE; - fe->header.size = sizeof(*fe); - - ret = process(tool, ff.buf, NULL, NULL); - - free(ff.buf); - return ret; -} - int perf_event__process_feature(struct perf_session *session, union perf_event *event) { @@ -3799,113 +3683,6 @@ int perf_event__process_feature(struct perf_session *session, return 0; } -static struct perf_record_event_update * -event_update_event__new(size_t size, u64 type, u64 id) -{ - struct perf_record_event_update *ev; - - size += sizeof(*ev); - size = PERF_ALIGN(size, sizeof(u64)); - - ev = zalloc(size); - if (ev) { - ev->header.type = PERF_RECORD_EVENT_UPDATE; - ev->header.size = (u16)size; - ev->type = type; - ev->id = id; - } - return ev; -} - -int -perf_event__synthesize_event_update_unit(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - struct perf_record_event_update *ev; - size_t size = strlen(evsel->unit); - int err; - - ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]); - if (ev == NULL) - return -ENOMEM; - - strlcpy(ev->data, evsel->unit, size + 1); - err = process(tool, (union perf_event *)ev, NULL, NULL); - free(ev); - return err; -} - -int -perf_event__synthesize_event_update_scale(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - struct perf_record_event_update *ev; - struct perf_record_event_update_scale *ev_data; - int err; - - ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]); - if (ev == NULL) - return -ENOMEM; - - ev_data = (struct perf_record_event_update_scale *)ev->data; - ev_data->scale = evsel->scale; - err = process(tool, (union perf_event*) ev, NULL, NULL); - free(ev); - return err; -} - -int -perf_event__synthesize_event_update_name(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - struct perf_record_event_update *ev; - size_t len = strlen(evsel->name); - int err; - - ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]); - if (ev == NULL) - return -ENOMEM; - - strlcpy(ev->data, evsel->name, len + 1); - err = process(tool, (union perf_event*) ev, NULL, NULL); - free(ev); - return err; -} - -int -perf_event__synthesize_event_update_cpus(struct perf_tool *tool, - struct evsel *evsel, - perf_event__handler_t process) -{ - size_t size = sizeof(struct perf_record_event_update); - struct perf_record_event_update *ev; - int max, err; - u16 type; - - if (!evsel->core.own_cpus) - return 0; - - ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max); - if (!ev) - return -ENOMEM; - - ev->header.type = PERF_RECORD_EVENT_UPDATE; - ev->header.size = (u16)size; - ev->type = PERF_EVENT_UPDATE__CPUS; - ev->id = evsel->id[0]; - - cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data, - evsel->core.own_cpus, - type, max); - - err = process(tool, (union perf_event*) ev, NULL, NULL); - free(ev); - return err; -} - size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) { struct perf_record_event_update *ev = &event->event_update; @@ -3945,93 +3722,6 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) return ret; } -int perf_event__synthesize_attrs(struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process) -{ - struct evsel *evsel; - int err = 0; - - evlist__for_each_entry(evlist, evsel) { - err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids, - evsel->id, process); - if (err) { - pr_debug("failed to create perf header attribute\n"); - return err; - } - } - - return err; -} - -static bool has_unit(struct evsel *counter) -{ - return counter->unit && *counter->unit; -} - -static bool has_scale(struct evsel *counter) -{ - return counter->scale != 1; -} - -int perf_event__synthesize_extra_attr(struct perf_tool *tool, - struct evlist *evsel_list, - perf_event__handler_t process, - bool is_pipe) -{ - struct evsel *counter; - int err; - - /* - * Synthesize other events stuff not carried within - * attr event - unit, scale, name - */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->supported) - continue; - - /* - * Synthesize unit and scale only if it's defined. - */ - if (has_unit(counter)) { - err = perf_event__synthesize_event_update_unit(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel unit.\n"); - return err; - } - } - - if (has_scale(counter)) { - err = perf_event__synthesize_event_update_scale(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel counter.\n"); - return err; - } - } - - if (counter->core.own_cpus) { - err = perf_event__synthesize_event_update_cpus(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel cpus.\n"); - return err; - } - } - - /* - * Name is needed only for pipe output, - * perf.data carries event names. - */ - if (is_pipe) { - err = perf_event__synthesize_event_update_name(tool, counter, process); - if (err < 0) { - pr_err("Couldn't synthesize evsel name.\n"); - return err; - } - } - } - return 0; -} - int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) @@ -4116,55 +3806,6 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, return 0; } -int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, - struct evlist *evlist, - perf_event__handler_t process) -{ - union perf_event ev; - struct tracing_data *tdata; - ssize_t size = 0, aligned_size = 0, padding; - struct feat_fd ff; - int err __maybe_unused = 0; - - /* - * We are going to store the size of the data followed - * by the data contents. Since the fd descriptor is a pipe, - * we cannot seek back to store the size of the data once - * we know it. Instead we: - * - * - write the tracing data to the temp file - * - get/write the data size to pipe - * - write the tracing data from the temp file - * to the pipe - */ - tdata = tracing_data_get(&evlist->core.entries, fd, true); - if (!tdata) - return -1; - - memset(&ev, 0, sizeof(ev)); - - ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; - size = tdata->size; - aligned_size = PERF_ALIGN(size, sizeof(u64)); - padding = aligned_size - size; - ev.tracing_data.header.size = sizeof(ev.tracing_data); - ev.tracing_data.size = aligned_size; - - process(tool, &ev, NULL, NULL); - - /* - * The put function will copy all the tracing data - * stored in temp file to the pipe. - */ - tracing_data_put(tdata); - - ff = (struct feat_fd){ .fd = fd }; - if (write_padded(&ff, NULL, 0, padding)) - return -1; - - return aligned_size; -} - int perf_event__process_tracing_data(struct perf_session *session, union perf_event *event) { @@ -4204,34 +3845,6 @@ int perf_event__process_tracing_data(struct perf_session *session, return size_read + padding; } -int perf_event__synthesize_build_id(struct perf_tool *tool, - struct dso *pos, u16 misc, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event ev; - size_t len; - int err = 0; - - if (!pos->hit) - return err; - - memset(&ev, 0, sizeof(ev)); - - len = pos->long_name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); - ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; - ev.build_id.header.misc = misc; - ev.build_id.pid = machine->pid; - ev.build_id.header.size = sizeof(ev.build_id) + len; - memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); - - err = process(tool, &ev, NULL, machine); - - return err; -} - int perf_event__process_build_id(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 999dac41871e..ca53a929e9fd 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -92,6 +92,24 @@ struct perf_header { struct perf_env env; }; +struct feat_fd { + struct perf_header *ph; + int fd; + void *buf; /* Either buf != NULL or fd >= 0 */ + ssize_t offset; + size_t size; + struct evsel *events; +}; + +struct perf_header_feature_ops { + int (*write)(struct feat_fd *ff, struct evlist *evlist); + void (*print)(struct feat_fd *ff, FILE *fp); + int (*process)(struct feat_fd *ff, void *data); + const char *name; + bool full_only; + bool synthesize; +}; + struct evlist; struct perf_session; struct perf_tool; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 132de5cfb9b9..0535338f2d7a 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -20,7 +20,6 @@ #include "symbol.h" #include "sort.h" #include "strlist.h" -#include "util/synthetic-events.h" #include "target.h" #include "thread.h" #include "util.h" @@ -2610,30 +2609,6 @@ int machines__for_each_thread(struct machines *machines, return rc; } -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, - struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize) -{ - if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); - else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, - machine, data_mmap, - nr_threads_synthesize); - /* command specified */ - return 0; -} - -int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, - unsigned int nr_threads_synthesize) -{ - return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap, - nr_threads_synthesize); -} - pid_t machine__get_current_tid(struct machine *machine, int cpu) { int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 99be15dd2b6b..285d6f30d912 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -17,8 +17,26 @@ #include #include #include +#include #include +static const char *perf_ns__names[] = { + [NET_NS_INDEX] = "net", + [UTS_NS_INDEX] = "uts", + [IPC_NS_INDEX] = "ipc", + [PID_NS_INDEX] = "pid", + [USER_NS_INDEX] = "user", + [MNT_NS_INDEX] = "mnt", + [CGROUP_NS_INDEX] = "cgroup", +}; + +const char *perf_ns__name(unsigned int id) +{ + if (id >= ARRAY_SIZE(perf_ns__names)) + return "UNKNOWN"; + return perf_ns__names[id]; +} + struct namespaces *namespaces__new(struct perf_record_namespaces *event) { struct namespaces *namespaces; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 40edef56cb52..4b33f684eddd 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -66,4 +66,6 @@ static inline void __nsinfo__zput(struct nsinfo **nsip) #define nsinfo__zput(nsi) __nsinfo__zput(&nsi) +const char *perf_ns__name(unsigned int id); + #endif /* __PERF_NAMESPACES_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6267613b551d..58b5bc34ba12 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -29,7 +29,6 @@ #include "thread-stack.h" #include "sample-raw.h" #include "stat.h" -#include "util/synthetic-events.h" #include "util.h" #include "ui/progress.h" #include "../perf.h" @@ -2413,73 +2412,3 @@ int perf_event__process_id_index(struct perf_session *session, } return 0; } - -int perf_event__synthesize_id_index(struct perf_tool *tool, - perf_event__handler_t process, - struct evlist *evlist, - struct machine *machine) -{ - union perf_event *ev; - struct evsel *evsel; - size_t nr = 0, i = 0, sz, max_nr, n; - int err; - - pr_debug2("Synthesizing id index\n"); - - max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / - sizeof(struct id_index_entry); - - evlist__for_each_entry(evlist, evsel) - nr += evsel->ids; - - n = nr > max_nr ? max_nr : nr; - sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry); - ev = zalloc(sz); - if (!ev) - return -ENOMEM; - - ev->id_index.header.type = PERF_RECORD_ID_INDEX; - ev->id_index.header.size = sz; - ev->id_index.nr = n; - - evlist__for_each_entry(evlist, evsel) { - u32 j; - - for (j = 0; j < evsel->ids; j++) { - struct id_index_entry *e; - struct perf_sample_id *sid; - - if (i >= n) { - err = process(tool, ev, NULL, machine); - if (err) - goto out_err; - nr -= n; - i = 0; - } - - e = &ev->id_index.entries[i++]; - - e->id = evsel->id[j]; - - sid = perf_evlist__id2sid(evlist, e->id); - if (!sid) { - free(ev); - return -ENOENT; - } - - e->idx = sid->idx; - e->cpu = sid->cpu; - e->tid = sid->tid; - } - } - - sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry); - ev->id_index.header.size = sz; - ev->id_index.nr = nr; - - err = process(tool, ev, NULL, machine); -out_err: - free(ev); - - return err; -} diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 46c8a5027e12..06571209cb0b 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -12,7 +12,6 @@ #include "target.h" #include "evlist.h" #include "evsel.h" -#include "util/synthetic-events.h" #include "thread_map.h" #include @@ -495,45 +494,3 @@ int create_perf_stat_counter(struct evsel *evsel, return perf_evsel__open_per_thread(evsel, evsel->core.threads); } - -int perf_event__synthesize_stat_events(struct perf_stat_config *config, - struct perf_tool *tool, - struct evlist *evlist, - perf_event__handler_t process, - bool attrs) -{ - int err; - - if (attrs) { - err = perf_event__synthesize_attrs(tool, evlist, process); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - return err; - } - } - - err = perf_event__synthesize_extra_attr(tool, evlist, process, - attrs); - - err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, - process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, - process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_stat_config(tool, config, process, NULL); - if (err < 0) { - pr_err("Couldn't synthesize config.\n"); - return err; - } - - return 0; -} diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c new file mode 100644 index 000000000000..8322028a9a97 --- /dev/null +++ b/tools/perf/util/synthetic-events.c @@ -0,0 +1,1884 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "util/debug.h" +#include "util/dso.h" +#include "util/event.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/map.h" +#include "util/map_symbol.h" +#include "util/branch.h" +#include "util/memswap.h" +#include "util/namespaces.h" +#include "util/session.h" +#include "util/stat.h" +#include "util/symbol.h" +#include "util/synthetic-events.h" +#include "util/target.h" +#include "util/time-utils.h" +#include "util/util.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* To get things like MAP_HUGETLB even on older libc headers */ +#include +#include +#include +#include +#include + +#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 + +unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; + +int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process) +{ + struct perf_sample synth_sample = { + .pid = -1, + .tid = -1, + .time = -1, + .stream_id = -1, + .cpu = -1, + .period = 1, + .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK, + }; + + return process(tool, event, &synth_sample, machine); +}; + +/* + * Assumes that the first 4095 bytes of /proc/pid/stat contains + * the comm, tgid and ppid. + */ +static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, + pid_t *tgid, pid_t *ppid) +{ + char filename[PATH_MAX]; + char bf[4096]; + int fd; + size_t size = 0; + ssize_t n; + char *name, *tgids, *ppids; + + *tgid = -1; + *ppid = -1; + + snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + pr_debug("couldn't open %s\n", filename); + return -1; + } + + n = read(fd, bf, sizeof(bf) - 1); + close(fd); + if (n <= 0) { + pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", + pid); + return -1; + } + bf[n] = '\0'; + + name = strstr(bf, "Name:"); + tgids = strstr(bf, "Tgid:"); + ppids = strstr(bf, "PPid:"); + + if (name) { + char *nl; + + name = skip_spaces(name + 5); /* strlen("Name:") */ + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + size = strlen(name); + if (size >= len) + size = len - 1; + memcpy(comm, name, size); + comm[size] = '\0'; + } else { + pr_debug("Name: string not found for pid %d\n", pid); + } + + if (tgids) { + tgids += 5; /* strlen("Tgid:") */ + *tgid = atoi(tgids); + } else { + pr_debug("Tgid: string not found for pid %d\n", pid); + } + + if (ppids) { + ppids += 5; /* strlen("PPid:") */ + *ppid = atoi(ppids); + } else { + pr_debug("PPid: string not found for pid %d\n", pid); + } + + return 0; +} + +static int perf_event__prepare_comm(union perf_event *event, pid_t pid, + struct machine *machine, + pid_t *tgid, pid_t *ppid) +{ + size_t size; + + *ppid = -1; + + memset(&event->comm, 0, sizeof(event->comm)); + + if (machine__is_host(machine)) { + if (perf_event__get_comm_ids(pid, event->comm.comm, + sizeof(event->comm.comm), + tgid, ppid) != 0) { + return -1; + } + } else { + *tgid = machine->pid; + } + + if (*tgid < 0) + return -1; + + event->comm.pid = *tgid; + event->comm.header.type = PERF_RECORD_COMM; + + size = strlen(event->comm.comm) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + memset(event->comm.comm + size, 0, machine->id_hdr_size); + event->comm.header.size = (sizeof(event->comm) - + (sizeof(event->comm.comm) - size) + + machine->id_hdr_size); + event->comm.tid = pid; + + return 0; +} + +pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine) +{ + pid_t tgid, ppid; + + if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) + return -1; + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return tgid; +} + +static void perf_event__get_ns_link_info(pid_t pid, const char *ns, + struct perf_ns_link_info *ns_link_info) +{ + struct stat64 st; + char proc_ns[128]; + + sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); + if (stat64(proc_ns, &st) == 0) { + ns_link_info->dev = st.st_dev; + ns_link_info->ino = st.st_ino; + } +} + +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine) +{ + u32 idx; + struct perf_ns_link_info *ns_link_info; + + if (!tool || !tool->namespace_events) + return 0; + + memset(&event->namespaces, 0, (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size)); + + event->namespaces.pid = tgid; + event->namespaces.tid = pid; + + event->namespaces.nr_namespaces = NR_NAMESPACES; + + ns_link_info = event->namespaces.link_info; + + for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) + perf_event__get_ns_link_info(pid, perf_ns__name(idx), + &ns_link_info[idx]); + + event->namespaces.header.type = PERF_RECORD_NAMESPACES; + + event->namespaces.header.size = (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + +static int perf_event__synthesize_fork(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, pid_t ppid, + perf_event__handler_t process, + struct machine *machine) +{ + memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); + + /* + * for main thread set parent to ppid from status file. For other + * threads set parent pid to main thread. ie., assume main thread + * spawns all threads in a process + */ + if (tgid == pid) { + event->fork.ppid = ppid; + event->fork.ptid = ppid; + } else { + event->fork.ppid = tgid; + event->fork.ptid = tgid; + } + event->fork.pid = tgid; + event->fork.tid = pid; + event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; + + event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + +int perf_event__synthesize_mmap_events(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data) +{ + char filename[PATH_MAX]; + FILE *fp; + unsigned long long t; + bool truncation = false; + unsigned long long timeout = proc_map_timeout * 1000000ULL; + int rc = 0; + const char *hugetlbfs_mnt = hugetlbfs__mountpoint(); + int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0; + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); + + fp = fopen(filename, "r"); + if (fp == NULL) { + /* + * We raced with a task exiting - just return: + */ + pr_debug("couldn't open %s\n", filename); + return -1; + } + + event->header.type = PERF_RECORD_MMAP2; + t = rdclock(); + + while (1) { + char bf[BUFSIZ]; + char prot[5]; + char execname[PATH_MAX]; + char anonstr[] = "//anon"; + unsigned int ino; + size_t size; + ssize_t n; + + if (fgets(bf, sizeof(bf), fp) == NULL) + break; + + if ((rdclock() - t) > timeout) { + pr_warning("Reading %s time out. " + "You may want to increase " + "the time limit by --proc-map-timeout\n", + filename); + truncation = true; + goto out; + } + + /* ensure null termination since stack will be reused. */ + strcpy(execname, ""); + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", + &event->mmap2.start, &event->mmap2.len, prot, + &event->mmap2.pgoff, &event->mmap2.maj, + &event->mmap2.min, + &ino, execname); + + /* + * Anon maps don't have the execname. + */ + if (n < 7) + continue; + + event->mmap2.ino = (u64)ino; + + /* + * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_USER; + else + event->header.misc = PERF_RECORD_MISC_GUEST_USER; + + /* map protection and flags bits */ + event->mmap2.prot = 0; + event->mmap2.flags = 0; + if (prot[0] == 'r') + event->mmap2.prot |= PROT_READ; + if (prot[1] == 'w') + event->mmap2.prot |= PROT_WRITE; + if (prot[2] == 'x') + event->mmap2.prot |= PROT_EXEC; + + if (prot[3] == 's') + event->mmap2.flags |= MAP_SHARED; + else + event->mmap2.flags |= MAP_PRIVATE; + + if (prot[2] != 'x') { + if (!mmap_data || prot[0] != 'r') + continue; + + event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; + } + +out: + if (truncation) + event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; + + if (!strcmp(execname, "")) + strcpy(execname, anonstr); + + if (hugetlbfs_mnt_len && + !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { + strcpy(execname, anonstr); + event->mmap2.flags |= MAP_HUGETLB; + } + + size = strlen(execname) + 1; + memcpy(event->mmap2.filename, execname, size); + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap2.len -= event->mmap.start; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.pid = tgid; + event->mmap2.tid = pid; + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { + rc = -1; + break; + } + + if (truncation) + break; + } + + fclose(fp); + return rc; +} + +int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, + struct machine *machine) +{ + int rc = 0; + struct map *pos; + struct maps *maps = machine__kernel_maps(machine); + union perf_event *event = zalloc((sizeof(event->mmap) + + machine->id_hdr_size)); + if (event == NULL) { + pr_debug("Not enough memory synthesizing mmap event " + "for kernel modules\n"); + return -1; + } + + event->header.type = PERF_RECORD_MMAP; + + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + + for (pos = maps__first(maps); pos; pos = map__next(pos)) { + size_t size; + + if (!__map__is_kmodule(pos)) + continue; + + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pid = machine->pid; + + memcpy(event->mmap.filename, pos->dso->long_name, + pos->dso->long_name_len + 1); + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { + rc = -1; + break; + } + } + + free(event); + return rc; +} + +static int __event__synthesize_thread(union perf_event *comm_event, + union perf_event *mmap_event, + union perf_event *fork_event, + union perf_event *namespaces_event, + pid_t pid, int full, perf_event__handler_t process, + struct perf_tool *tool, struct machine *machine, bool mmap_data) +{ + char filename[PATH_MAX]; + DIR *tasks; + struct dirent *dirent; + pid_t tgid, ppid; + int rc = 0; + + /* special case: only send one comm event using passed in pid */ + if (!full) { + tgid = perf_event__synthesize_comm(tool, comm_event, pid, + process, machine); + + if (tgid == -1) + return -1; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, + tgid, process, machine) < 0) + return -1; + + /* + * send mmap only for thread group leader + * see thread__init_map_groups + */ + if (pid == tgid && + perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data)) + return -1; + + return 0; + } + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(filename, sizeof(filename), "%s/proc/%d/task", + machine->root_dir, pid); + + tasks = opendir(filename); + if (tasks == NULL) { + pr_debug("couldn't open %s\n", filename); + return 0; + } + + while ((dirent = readdir(tasks)) != NULL) { + char *end; + pid_t _pid; + + _pid = strtol(dirent->d_name, &end, 10); + if (*end) + continue; + + rc = -1; + if (perf_event__prepare_comm(comm_event, _pid, machine, + &tgid, &ppid) != 0) + break; + + if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + ppid, process, machine) < 0) + break; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, + tgid, process, machine) < 0) + break; + + /* + * Send the prepared comm event + */ + if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0) + break; + + rc = 0; + if (_pid == pid) { + /* process the parent's maps too */ + rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data); + if (rc) + break; + } + } + + closedir(tasks); + return rc; +} + +int perf_event__synthesize_thread_map(struct perf_tool *tool, + struct perf_thread_map *threads, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data) +{ + union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; + int err = -1, thread, j; + + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); + if (comm_event == NULL) + goto out; + + mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); + if (mmap_event == NULL) + goto out_free_comm; + + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + + err = 0; + for (thread = 0; thread < threads->nr; ++thread) { + if (__event__synthesize_thread(comm_event, mmap_event, + fork_event, namespaces_event, + perf_thread_map__pid(threads, thread), 0, + process, tool, machine, + mmap_data)) { + err = -1; + break; + } + + /* + * comm.pid is set to thread group id by + * perf_event__synthesize_comm + */ + if ((int) comm_event->comm.pid != perf_thread_map__pid(threads, thread)) { + bool need_leader = true; + + /* is thread group leader in thread_map? */ + for (j = 0; j < threads->nr; ++j) { + if ((int) comm_event->comm.pid == perf_thread_map__pid(threads, j)) { + need_leader = false; + break; + } + } + + /* if not, generate events for it */ + if (need_leader && + __event__synthesize_thread(comm_event, mmap_event, + fork_event, namespaces_event, + comm_event->comm.pid, 0, + process, tool, machine, + mmap_data)) { + err = -1; + break; + } + } + } + free(namespaces_event); +out_free_fork: + free(fork_event); +out_free_mmap: + free(mmap_event); +out_free_comm: + free(comm_event); +out: + return err; +} + +static int __perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + struct dirent **dirent, + int start, + int num) +{ + union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; + int err = -1; + char *end; + pid_t pid; + int i; + + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); + if (comm_event == NULL) + goto out; + + mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); + if (mmap_event == NULL) + goto out_free_comm; + + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + + for (i = start; i < start + num; i++) { + if (!isdigit(dirent[i]->d_name[0])) + continue; + + pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); + /* only interested in proper numerical dirents */ + if (*end) + continue; + /* + * We may race with exiting thread, so don't stop just because + * one thread couldn't be synthesized. + */ + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data); + } + err = 0; + + free(namespaces_event); +out_free_fork: + free(fork_event); +out_free_mmap: + free(mmap_event); +out_free_comm: + free(comm_event); +out: + return err; +} + +struct synthesize_threads_arg { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + bool mmap_data; + struct dirent **dirent; + int num; + int start; +}; + +static void *synthesize_threads_worker(void *arg) +{ + struct synthesize_threads_arg *args = arg; + + __perf_event__synthesize_threads(args->tool, args->process, + args->machine, args->mmap_data, + args->dirent, + args->start, args->num); + return NULL; +} + +int perf_event__synthesize_threads(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + bool mmap_data, + unsigned int nr_threads_synthesize) +{ + struct synthesize_threads_arg *args = NULL; + pthread_t *synthesize_threads = NULL; + char proc_path[PATH_MAX]; + struct dirent **dirent; + int num_per_thread; + int m, n, i, j; + int thread_nr; + int base = 0; + int err = -1; + + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); + n = scandir(proc_path, &dirent, 0, alphasort); + if (n < 0) + return err; + + if (nr_threads_synthesize == UINT_MAX) + thread_nr = sysconf(_SC_NPROCESSORS_ONLN); + else + thread_nr = nr_threads_synthesize; + + if (thread_nr <= 1) { + err = __perf_event__synthesize_threads(tool, process, + machine, mmap_data, + dirent, base, n); + goto free_dirent; + } + if (thread_nr > n) + thread_nr = n; + + synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + if (synthesize_threads == NULL) + goto free_dirent; + + args = calloc(sizeof(*args), thread_nr); + if (args == NULL) + goto free_threads; + + num_per_thread = n / thread_nr; + m = n % thread_nr; + for (i = 0; i < thread_nr; i++) { + args[i].tool = tool; + args[i].process = process; + args[i].machine = machine; + args[i].mmap_data = mmap_data; + args[i].dirent = dirent; + } + for (i = 0; i < m; i++) { + args[i].num = num_per_thread + 1; + args[i].start = i * args[i].num; + } + if (i != 0) + base = args[i-1].start + args[i-1].num; + for (j = i; j < thread_nr; j++) { + args[j].num = num_per_thread; + args[j].start = base + (j - i) * args[i].num; + } + + for (i = 0; i < thread_nr; i++) { + if (pthread_create(&synthesize_threads[i], NULL, + synthesize_threads_worker, &args[i])) + goto out_join; + } + err = 0; +out_join: + for (i = 0; i < thread_nr; i++) + pthread_join(synthesize_threads[i], NULL); + free(args); +free_threads: + free(synthesize_threads); +free_dirent: + for (i = 0; i < n; i++) + zfree(&dirent[i]); + free(dirent); + + return err; +} + +int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + size_t size; + struct map *map = machine__kernel_map(machine); + struct kmap *kmap; + int err; + union perf_event *event; + + if (map == NULL) + return -1; + + kmap = map__kmap(map); + if (!kmap->ref_reloc_sym) + return -1; + + /* + * We should get this from /sys/kernel/sections/.text, but till that is + * available use this, and after it is use this as a fallback for older + * kernels. + */ + event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); + if (event == NULL) { + pr_debug("Not enough memory synthesizing mmap event " + "for kernel modules\n"); + return -1; + } + + if (machine__is_host(machine)) { + /* + * kernel uses PERF_RECORD_MISC_USER for user space maps, + * see kernel/perf_event.c __perf_event_mmap + */ + event->header.misc = PERF_RECORD_MISC_KERNEL; + } else { + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + } + + size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; + size = PERF_ALIGN(size, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); + event->mmap.pgoff = kmap->ref_reloc_sym->addr; + event->mmap.start = map->start; + event->mmap.len = map->end - event->mmap.start; + event->mmap.pid = machine->pid; + + err = perf_tool__process_synth_event(tool, event, machine, process); + free(event); + + return err; +} + +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int err; + + err = __perf_event__synthesize_kernel_mmap(tool, process, machine); + if (err < 0) + return err; + + return perf_event__synthesize_extra_kmaps(tool, process, machine); +} + +int perf_event__synthesize_thread_map2(struct perf_tool *tool, + struct perf_thread_map *threads, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event *event; + int i, err, size; + + size = sizeof(event->thread_map); + size += threads->nr * sizeof(event->thread_map.entries[0]); + + event = zalloc(size); + if (!event) + return -ENOMEM; + + event->header.type = PERF_RECORD_THREAD_MAP; + event->header.size = size; + event->thread_map.nr = threads->nr; + + for (i = 0; i < threads->nr; i++) { + struct perf_record_thread_map_entry *entry = &event->thread_map.entries[i]; + char *comm = perf_thread_map__comm(threads, i); + + if (!comm) + comm = (char *) ""; + + entry->pid = perf_thread_map__pid(threads, i); + strncpy((char *) &entry->comm, comm, sizeof(entry->comm)); + } + + err = process(tool, event, NULL, machine); + + free(event); + return err; +} + +static void synthesize_cpus(struct cpu_map_entries *cpus, + struct perf_cpu_map *map) +{ + int i; + + cpus->nr = map->nr; + + for (i = 0; i < map->nr; i++) + cpus->cpu[i] = map->map[i]; +} + +static void synthesize_mask(struct perf_record_record_cpu_map *mask, + struct perf_cpu_map *map, int max) +{ + int i; + + mask->nr = BITS_TO_LONGS(max); + mask->long_size = sizeof(long); + + for (i = 0; i < map->nr; i++) + set_bit(map->map[i], mask->mask); +} + +static size_t cpus_size(struct perf_cpu_map *map) +{ + return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); +} + +static size_t mask_size(struct perf_cpu_map *map, int *max) +{ + int i; + + *max = 0; + + for (i = 0; i < map->nr; i++) { + /* bit possition of the cpu is + 1 */ + int bit = map->map[i] + 1; + + if (bit > *max) + *max = bit; + } + + return sizeof(struct perf_record_record_cpu_map) + BITS_TO_LONGS(*max) * sizeof(long); +} + +void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int *max) +{ + size_t size_cpus, size_mask; + bool is_dummy = perf_cpu_map__empty(map); + + /* + * Both array and mask data have variable size based + * on the number of cpus and their actual values. + * The size of the 'struct perf_record_cpu_map_data' is: + * + * array = size of 'struct cpu_map_entries' + + * number of cpus * sizeof(u64) + * + * mask = size of 'struct perf_record_record_cpu_map' + + * maximum cpu bit converted to size of longs + * + * and finaly + the size of 'struct perf_record_cpu_map_data'. + */ + size_cpus = cpus_size(map); + size_mask = mask_size(map, max); + + if (is_dummy || (size_cpus < size_mask)) { + *size += size_cpus; + *type = PERF_CPU_MAP__CPUS; + } else { + *size += size_mask; + *type = PERF_CPU_MAP__MASK; + } + + *size += sizeof(struct perf_record_cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); + return zalloc(*size); +} + +void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf_cpu_map *map, + u16 type, int max) +{ + data->type = type; + + switch (type) { + case PERF_CPU_MAP__CPUS: + synthesize_cpus((struct cpu_map_entries *) data->data, map); + break; + case PERF_CPU_MAP__MASK: + synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); + default: + break; + }; +} + +static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) +{ + size_t size = sizeof(struct perf_record_cpu_map); + struct perf_record_cpu_map *event; + int max; + u16 type; + + event = cpu_map_data__alloc(map, &size, &type, &max); + if (!event) + return NULL; + + event->header.type = PERF_RECORD_CPU_MAP; + event->header.size = size; + event->data.type = type; + + cpu_map_data__synthesize(&event->data, map, type, max); + return event; +} + +int perf_event__synthesize_cpu_map(struct perf_tool *tool, + struct perf_cpu_map *map, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_cpu_map *event; + int err; + + event = cpu_map_event__new(map); + if (!event) + return -ENOMEM; + + err = process(tool, (union perf_event *) event, NULL, machine); + + free(event); + return err; +} + +int perf_event__synthesize_stat_config(struct perf_tool *tool, + struct perf_stat_config *config, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_stat_config *event; + int size, i = 0, err; + + size = sizeof(*event); + size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0])); + + event = zalloc(size); + if (!event) + return -ENOMEM; + + event->header.type = PERF_RECORD_STAT_CONFIG; + event->header.size = size; + event->nr = PERF_STAT_CONFIG_TERM__MAX; + +#define ADD(__term, __val) \ + event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \ + event->data[i].val = __val; \ + i++; + + ADD(AGGR_MODE, config->aggr_mode) + ADD(INTERVAL, config->interval) + ADD(SCALE, config->scale) + + WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX, + "stat config terms unbalanced\n"); +#undef ADD + + err = process(tool, (union perf_event *) event, NULL, machine); + + free(event); + return err; +} + +int perf_event__synthesize_stat(struct perf_tool *tool, + u32 cpu, u32 thread, u64 id, + struct perf_counts_values *count, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_stat event; + + event.header.type = PERF_RECORD_STAT; + event.header.size = sizeof(event); + event.header.misc = 0; + + event.id = id; + event.cpu = cpu; + event.thread = thread; + event.val = count->val; + event.ena = count->ena; + event.run = count->run; + + return process(tool, (union perf_event *) &event, NULL, machine); +} + +int perf_event__synthesize_stat_round(struct perf_tool *tool, + u64 evtime, u64 type, + perf_event__handler_t process, + struct machine *machine) +{ + struct perf_record_stat_round event; + + event.header.type = PERF_RECORD_STAT_ROUND; + event.header.size = sizeof(event); + event.header.misc = 0; + + event.time = evtime; + event.type = type; + + return process(tool, (union perf_event *) &event, NULL, machine); +} + +size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) +{ + size_t sz, result = sizeof(struct perf_record_sample); + + if (type & PERF_SAMPLE_IDENTIFIER) + result += sizeof(u64); + + if (type & PERF_SAMPLE_IP) + result += sizeof(u64); + + if (type & PERF_SAMPLE_TID) + result += sizeof(u64); + + if (type & PERF_SAMPLE_TIME) + result += sizeof(u64); + + if (type & PERF_SAMPLE_ADDR) + result += sizeof(u64); + + if (type & PERF_SAMPLE_ID) + result += sizeof(u64); + + if (type & PERF_SAMPLE_STREAM_ID) + result += sizeof(u64); + + if (type & PERF_SAMPLE_CPU) + result += sizeof(u64); + + if (type & PERF_SAMPLE_PERIOD) + result += sizeof(u64); + + if (type & PERF_SAMPLE_READ) { + result += sizeof(u64); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + result += sizeof(u64); + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + result += sizeof(u64); + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_GROUP) { + sz = sample->read.group.nr * + sizeof(struct sample_read_value); + result += sz; + } else { + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_CALLCHAIN) { + sz = (sample->callchain->nr + 1) * sizeof(u64); + result += sz; + } + + if (type & PERF_SAMPLE_RAW) { + result += sizeof(u32); + result += sample->raw_size; + } + + if (type & PERF_SAMPLE_BRANCH_STACK) { + sz = sample->branch_stack->nr * sizeof(struct branch_entry); + sz += sizeof(u64); + result += sz; + } + + if (type & PERF_SAMPLE_REGS_USER) { + if (sample->user_regs.abi) { + result += sizeof(u64); + sz = hweight64(sample->user_regs.mask) * sizeof(u64); + result += sz; + } else { + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + sz = sample->user_stack.size; + result += sizeof(u64); + if (sz) { + result += sz; + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_WEIGHT) + result += sizeof(u64); + + if (type & PERF_SAMPLE_DATA_SRC) + result += sizeof(u64); + + if (type & PERF_SAMPLE_TRANSACTION) + result += sizeof(u64); + + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + result += sizeof(u64); + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + result += sz; + } else { + result += sizeof(u64); + } + } + + if (type & PERF_SAMPLE_PHYS_ADDR) + result += sizeof(u64); + + return result; +} + +int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, + const struct perf_sample *sample) +{ + __u64 *array; + size_t sz; + /* + * used for cross-endian analysis. See git commit 65014ab3 + * for why this goofiness is needed. + */ + union u64_swap u; + + array = event->sample.array; + + if (type & PERF_SAMPLE_IDENTIFIER) { + *array = sample->id; + array++; + } + + if (type & PERF_SAMPLE_IP) { + *array = sample->ip; + array++; + } + + if (type & PERF_SAMPLE_TID) { + u.val32[0] = sample->pid; + u.val32[1] = sample->tid; + *array = u.val64; + array++; + } + + if (type & PERF_SAMPLE_TIME) { + *array = sample->time; + array++; + } + + if (type & PERF_SAMPLE_ADDR) { + *array = sample->addr; + array++; + } + + if (type & PERF_SAMPLE_ID) { + *array = sample->id; + array++; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + *array = sample->stream_id; + array++; + } + + if (type & PERF_SAMPLE_CPU) { + u.val32[0] = sample->cpu; + u.val32[1] = 0; + *array = u.val64; + array++; + } + + if (type & PERF_SAMPLE_PERIOD) { + *array = sample->period; + array++; + } + + if (type & PERF_SAMPLE_READ) { + if (read_format & PERF_FORMAT_GROUP) + *array = sample->read.group.nr; + else + *array = sample->read.one.value; + array++; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + *array = sample->read.time_enabled; + array++; + } + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + *array = sample->read.time_running; + array++; + } + + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_GROUP) { + sz = sample->read.group.nr * + sizeof(struct sample_read_value); + memcpy(array, sample->read.group.values, sz); + array = (void *)array + sz; + } else { + *array = sample->read.one.id; + array++; + } + } + + if (type & PERF_SAMPLE_CALLCHAIN) { + sz = (sample->callchain->nr + 1) * sizeof(u64); + memcpy(array, sample->callchain, sz); + array = (void *)array + sz; + } + + if (type & PERF_SAMPLE_RAW) { + u.val32[0] = sample->raw_size; + *array = u.val64; + array = (void *)array + sizeof(u32); + + memcpy(array, sample->raw_data, sample->raw_size); + array = (void *)array + sample->raw_size; + } + + if (type & PERF_SAMPLE_BRANCH_STACK) { + sz = sample->branch_stack->nr * sizeof(struct branch_entry); + sz += sizeof(u64); + memcpy(array, sample->branch_stack, sz); + array = (void *)array + sz; + } + + if (type & PERF_SAMPLE_REGS_USER) { + if (sample->user_regs.abi) { + *array++ = sample->user_regs.abi; + sz = hweight64(sample->user_regs.mask) * sizeof(u64); + memcpy(array, sample->user_regs.regs, sz); + array = (void *)array + sz; + } else { + *array++ = 0; + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + sz = sample->user_stack.size; + *array++ = sz; + if (sz) { + memcpy(array, sample->user_stack.data, sz); + array = (void *)array + sz; + *array++ = sz; + } + } + + if (type & PERF_SAMPLE_WEIGHT) { + *array = sample->weight; + array++; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + *array = sample->data_src; + array++; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + *array = sample->transaction; + array++; + } + + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + *array++ = sample->intr_regs.abi; + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + memcpy(array, sample->intr_regs.regs, sz); + array = (void *)array + sz; + } else { + *array++ = 0; + } + } + + if (type & PERF_SAMPLE_PHYS_ADDR) { + *array = sample->phys_addr; + array++; + } + + return 0; +} + +int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, + struct evlist *evlist, struct machine *machine) +{ + union perf_event *ev; + struct evsel *evsel; + size_t nr = 0, i = 0, sz, max_nr, n; + int err; + + pr_debug2("Synthesizing id index\n"); + + max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / + sizeof(struct id_index_entry); + + evlist__for_each_entry(evlist, evsel) + nr += evsel->ids; + + n = nr > max_nr ? max_nr : nr; + sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry); + ev = zalloc(sz); + if (!ev) + return -ENOMEM; + + ev->id_index.header.type = PERF_RECORD_ID_INDEX; + ev->id_index.header.size = sz; + ev->id_index.nr = n; + + evlist__for_each_entry(evlist, evsel) { + u32 j; + + for (j = 0; j < evsel->ids; j++) { + struct id_index_entry *e; + struct perf_sample_id *sid; + + if (i >= n) { + err = process(tool, ev, NULL, machine); + if (err) + goto out_err; + nr -= n; + i = 0; + } + + e = &ev->id_index.entries[i++]; + + e->id = evsel->id[j]; + + sid = perf_evlist__id2sid(evlist, e->id); + if (!sid) { + free(ev); + return -ENOENT; + } + + e->idx = sid->idx; + e->cpu = sid->cpu; + e->tid = sid->tid; + } + } + + sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry); + ev->id_index.header.size = sz; + ev->id_index.nr = nr; + + err = process(tool, ev, NULL, machine); +out_err: + free(ev); + + return err; +} + +int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, + struct target *target, struct perf_thread_map *threads, + perf_event__handler_t process, bool data_mmap, + unsigned int nr_threads_synthesize) +{ + if (target__has_task(target)) + return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); + else if (target__has_cpu(target)) + return perf_event__synthesize_threads(tool, process, + machine, data_mmap, + nr_threads_synthesize); + /* command specified */ + return 0; +} + +int machine__synthesize_threads(struct machine *machine, struct target *target, + struct perf_thread_map *threads, bool data_mmap, + unsigned int nr_threads_synthesize) +{ + return __machine__synthesize_threads(machine, NULL, target, threads, + perf_event__process, data_mmap, + nr_threads_synthesize); +} + +static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id) +{ + struct perf_record_event_update *ev; + + size += sizeof(*ev); + size = PERF_ALIGN(size, sizeof(u64)); + + ev = zalloc(size); + if (ev) { + ev->header.type = PERF_RECORD_EVENT_UPDATE; + ev->header.size = (u16)size; + ev->type = type; + ev->id = id; + } + return ev; +} + +int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + size_t size = strlen(evsel->unit); + struct perf_record_event_update *ev; + int err; + + ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]); + if (ev == NULL) + return -ENOMEM; + + strlcpy(ev->data, evsel->unit, size + 1); + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + struct perf_record_event_update *ev; + struct perf_record_event_update_scale *ev_data; + int err; + + ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]); + if (ev == NULL) + return -ENOMEM; + + ev_data = (struct perf_record_event_update_scale *)ev->data; + ev_data->scale = evsel->scale; + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + struct perf_record_event_update *ev; + size_t len = strlen(evsel->name); + int err; + + ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]); + if (ev == NULL) + return -ENOMEM; + + strlcpy(ev->data, evsel->name, len + 1); + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, + perf_event__handler_t process) +{ + size_t size = sizeof(struct perf_record_event_update); + struct perf_record_event_update *ev; + int max, err; + u16 type; + + if (!evsel->core.own_cpus) + return 0; + + ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max); + if (!ev) + return -ENOMEM; + + ev->header.type = PERF_RECORD_EVENT_UPDATE; + ev->header.size = (u16)size; + ev->type = PERF_EVENT_UPDATE__CPUS; + ev->id = evsel->id[0]; + + cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data, + evsel->core.own_cpus, type, max); + + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, + perf_event__handler_t process) +{ + struct evsel *evsel; + int err = 0; + + evlist__for_each_entry(evlist, evsel) { + err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids, + evsel->id, process); + if (err) { + pr_debug("failed to create perf header attribute\n"); + return err; + } + } + + return err; +} + +static bool has_unit(struct evsel *evsel) +{ + return evsel->unit && *evsel->unit; +} + +static bool has_scale(struct evsel *evsel) +{ + return evsel->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, + perf_event__handler_t process, bool is_pipe) +{ + struct evsel *evsel; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, evsel) { + if (!evsel->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(evsel)) { + err = perf_event__synthesize_event_update_unit(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(evsel)) { + err = perf_event__synthesize_event_update_scale(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel evsel.\n"); + return err; + } + } + + if (evsel->core.own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, evsel, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + +int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, + u32 ids, u64 *id, perf_event__handler_t process) +{ + union perf_event *ev; + size_t size; + int err; + + size = sizeof(struct perf_event_attr); + size = PERF_ALIGN(size, sizeof(u64)); + size += sizeof(struct perf_event_header); + size += ids * sizeof(u64); + + ev = zalloc(size); + + if (ev == NULL) + return -ENOMEM; + + ev->attr.attr = *attr; + memcpy(ev->attr.id, id, ids * sizeof(u64)); + + ev->attr.header.type = PERF_RECORD_HEADER_ATTR; + ev->attr.header.size = (u16)size; + + if (ev->attr.header.size == size) + err = process(tool, ev, NULL, NULL); + else + err = -E2BIG; + + free(ev); + + return err; +} + +int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, + perf_event__handler_t process) +{ + union perf_event ev; + struct tracing_data *tdata; + ssize_t size = 0, aligned_size = 0, padding; + struct feat_fd ff; + + /* + * We are going to store the size of the data followed + * by the data contents. Since the fd descriptor is a pipe, + * we cannot seek back to store the size of the data once + * we know it. Instead we: + * + * - write the tracing data to the temp file + * - get/write the data size to pipe + * - write the tracing data from the temp file + * to the pipe + */ + tdata = tracing_data_get(&evlist->core.entries, fd, true); + if (!tdata) + return -1; + + memset(&ev, 0, sizeof(ev)); + + ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; + size = tdata->size; + aligned_size = PERF_ALIGN(size, sizeof(u64)); + padding = aligned_size - size; + ev.tracing_data.header.size = sizeof(ev.tracing_data); + ev.tracing_data.size = aligned_size; + + process(tool, &ev, NULL, NULL); + + /* + * The put function will copy all the tracing data + * stored in temp file to the pipe. + */ + tracing_data_put(tdata); + + ff = (struct feat_fd){ .fd = fd }; + if (write_padded(&ff, NULL, 0, padding)) + return -1; + + return aligned_size; +} + +int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, + perf_event__handler_t process, struct machine *machine) +{ + union perf_event ev; + size_t len; + + if (!pos->hit) + return 0; + + memset(&ev, 0, sizeof(ev)); + + len = pos->long_name_len + 1; + len = PERF_ALIGN(len, NAME_ALIGN); + memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); + ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; + ev.build_id.header.misc = misc; + ev.build_id.pid = machine->pid; + ev.build_id.header.size = sizeof(ev.build_id) + len; + memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); + + return process(tool, &ev, NULL, machine); +} + +int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, + struct evlist *evlist, perf_event__handler_t process, bool attrs) +{ + int err; + + if (attrs) { + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + } + + err = perf_event__synthesize_extra_attr(tool, evlist, process, attrs); + err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_stat_config(tool, config, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize config.\n"); + return err; + } + + return 0; +} + +int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, + struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; + +int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, + struct evlist *evlist, perf_event__handler_t process) +{ + struct perf_header *header = &session->header; + struct perf_record_header_feature *fe; + struct feat_fd ff; + size_t sz, sz_hdr; + int feat, ret; + + sz_hdr = sizeof(fe->header); + sz = sizeof(union perf_event); + /* get a nice alignment */ + sz = PERF_ALIGN(sz, page_size); + + memset(&ff, 0, sizeof(ff)); + + ff.buf = malloc(sz); + if (!ff.buf) + return -ENOMEM; + + ff.size = sz - sz_hdr; + ff.ph = &session->header; + + for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { + if (!feat_ops[feat].synthesize) { + pr_debug("No record header feature for header :%d\n", feat); + continue; + } + + ff.offset = sizeof(*fe); + + ret = feat_ops[feat].write(&ff, evlist); + if (ret || ff.offset <= (ssize_t)sizeof(*fe)) { + pr_debug("Error writing feature\n"); + continue; + } + /* ff.buf may have changed due to realloc in do_write() */ + fe = ff.buf; + memset(fe, 0, sizeof(*fe)); + + fe->feat_id = feat; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = ff.offset; + + ret = process(tool, ff.buf, NULL, NULL); + if (ret) { + free(ff.buf); + return ret; + } + } + + /* Send HEADER_LAST_FEATURE mark. */ + fe = ff.buf; + fe->feat_id = HEADER_LAST_FEATURE; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = sizeof(*fe); + + ret = process(tool, ff.buf, NULL, NULL); + + free(ff.buf); + return ret; +} From b295c3e39c1383e06ba1db4dd836018502e2ff3a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 18 Sep 2019 16:34:07 +0300 Subject: [PATCH 130/345] tools lib traceevent: Convert remaining %p[fF] users to %p[sS] There are no in-kernel %p[fF] users left. Convert the traceevent tool, too, to align with the kernel. Signed-off-by: Sakari Ailus Cc: Andy Shevchenko Cc: devicetree@vger.kernel.org Cc: Heikki Krogerus Cc: Jiri Olsa Cc: Joe Perches Cc: linux-acpi@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Cc: Namhyung Kim Cc: Petr Mladek Cc: Rafael J. Wysocki Cc: Rob Herring Cc: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov Link: http://lore.kernel.org/lkml/20190918133419.7969-2-sakari.ailus@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/libtraceevent-func_apis.txt | 10 +++++----- tools/lib/traceevent/event-parse.c | 18 ++++++++++++++---- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt index 38bfea30a5f6..f6aca0df2151 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt @@ -59,12 +59,12 @@ parser context. The _tep_register_function()_ function registers a function name mapped to an address and (optional) module. This mapping is used in case the function tracer -or events have "%pF" or "%pS" parameter in its format string. It is common to -pass in the kallsyms function names with their corresponding addresses with this +or events have "%pS" parameter in its format string. It is common to pass in +the kallsyms function names with their corresponding addresses with this function. The _tep_ argument is the trace event parser context. The _name_ is -the name of the function, the string is copied internally. The _addr_ is -the start address of the function. The _mod_ is the kernel module -the function may be in (NULL for none). +the name of the function, the string is copied internally. The _addr_ is the +start address of the function. The _mod_ is the kernel module the function may +be in (NULL for none). The _tep_register_print_string()_ function registers a string by the address it was stored in the kernel. Some strings internal to the kernel with static diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index bb22238debfe..6f842af4550b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4367,10 +4367,20 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s switch (*ptr) { case 's': case 'S': - case 'f': - case 'F': case 'x': break; + case 'f': + case 'F': + /* + * Pre-5.5 kernels use %pf and + * %pF for printing symbols + * while kernels since 5.5 use + * %pfw for fwnodes. So check + * %p[fF] isn't followed by 'w'. + */ + if (ptr[1] != 'w') + break; + /* fall through */ default: /* * Older kernels do not process @@ -4487,12 +4497,12 @@ get_bprint_format(void *data, int size __maybe_unused, printk = find_printk(tep, addr); if (!printk) { - if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0) + if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0) return NULL; return format; } - if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0) + if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0) return NULL; return format; From b63fd11cced17fcb8e133def29001b0f6aaa5e06 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 4 Sep 2019 15:17:37 +0530 Subject: [PATCH 131/345] perf stat: Reset previous counts on repeat with interval When using 'perf stat' with repeat and interval option, it shows wrong values for events. The wrong values will be shown for the first interval on the second and subsequent repetitions. Without the fix: # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 2.000282489 53 faults 2.000282489 513 sched:sched_switch 4.005478208 3,721 faults 4.005478208 2,666 sched:sched_switch 5.025470933 395 faults 5.025470933 1,307 sched:sched_switch 2.009602825 1,84,46,74,40,73,70,95,47,520 faults <------ 2.009602825 1,84,46,74,40,73,70,95,49,568 sched:sched_switch <------ 4.019612206 4,730 faults 4.019612206 2,746 sched:sched_switch 5.039615484 3,953 faults 5.039615484 1,496 sched:sched_switch 2.000274620 1,84,46,74,40,73,70,95,47,520 faults <------ 2.000274620 1,84,46,74,40,73,70,95,47,520 sched:sched_switch <------ 4.000480342 4,282 faults 4.000480342 2,303 sched:sched_switch 5.000916811 1,322 faults 5.000916811 1,064 sched:sched_switch # prev_raw_counts is allocated when using intervals. This is used when calculating the difference in the counts of events when using interval. The current counts are stored in prev_raw_counts to calculate the differences in the next iteration. On the first interval of the second and subsequent repetitions, prev_raw_counts would be the values stored in the last interval of the previous repetitions, while the current counts will only be for the first interval of the current repetition. Hence there is a possibility of events showing up as big number. Fix this by resetting prev_raw_counts whenever perf stat repeats the command. With the fix: # perf stat -r 3 -I 2000 -e faults -e sched:sched_switch -a sleep 5 2.019349347 2,597 faults 2.019349347 2,753 sched:sched_switch 4.019577372 3,098 faults 4.019577372 2,532 sched:sched_switch 5.019415481 1,879 faults 5.019415481 1,356 sched:sched_switch 2.000178813 8,468 faults 2.000178813 2,254 sched:sched_switch 4.000404621 7,440 faults 4.000404621 1,266 sched:sched_switch 5.040196079 2,458 faults 5.040196079 556 sched:sched_switch 2.000191939 6,870 faults 2.000191939 1,170 sched:sched_switch 4.000414103 541 faults 4.000414103 902 sched:sched_switch 5.000809863 450 faults 5.000809863 364 sched:sched_switch # Committer notes: This was broken since the cset introducing the --interval feature, i.e. --repeat + --interval wasn't tested at that point, add the Fixes tag so that automatic scripts can pick this up. Fixes: 13370a9b5bb8 ("perf stat: Add interval printing") Signed-off-by: Srikar Dronamraju Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Stephane Eranian Cc: stable@vger.kernel.org # v3.9+ Link: http://lore.kernel.org/lkml/20190904094738.9558-2-srikar@linux.vnet.ibm.com [ Fixed up conflicts with libperf, i.e. some perf_{evsel,evlist} lost the 'perf' prefix ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 +++ tools/perf/util/stat.c | 17 +++++++++++++++++ tools/perf/util/stat.h | 1 + 3 files changed, 21 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index eece3d1e429a..fa4b148ecfca 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1952,6 +1952,9 @@ int cmd_stat(int argc, const char **argv) fprintf(output, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); + if (run_idx != 0) + perf_evlist__reset_prev_raw_counts(evsel_list); + status = run_perf_stat(argc, argv, run_idx); if (forever && status != -1) { print_counters(NULL, argc, argv); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 06571209cb0b..fcd54342c04c 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -162,6 +162,15 @@ static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) evsel->prev_raw_counts = NULL; } +static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel) +{ + if (evsel->prev_raw_counts) { + evsel->prev_raw_counts->aggr.val = 0; + evsel->prev_raw_counts->aggr.ena = 0; + evsel->prev_raw_counts->aggr.run = 0; + } +} + static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { int ncpus = perf_evsel__nr_cpus(evsel); @@ -212,6 +221,14 @@ void perf_evlist__reset_stats(struct evlist *evlist) } } +void perf_evlist__reset_prev_raw_counts(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + perf_evsel__reset_prev_raw_counts(evsel); +} + static void zero_per_pkg(struct evsel *counter) { if (counter->per_pkg_mask) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0f9c9f6e2041..edbeb2f63e8d 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -193,6 +193,7 @@ void perf_stat__collect_metric_expr(struct evlist *); int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct evlist *evlist); void perf_evlist__reset_stats(struct evlist *evlist); +void perf_evlist__reset_prev_raw_counts(struct evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); From 443f2d5ba13d65ccfd879460f77941875159d154 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 4 Sep 2019 15:17:38 +0530 Subject: [PATCH 132/345] perf stat: Fix a segmentation fault when using repeat forever Observe a segmentation fault when 'perf stat' is asked to repeat forever with the interval option. Without fix: # perf stat -r 0 -I 5000 -e cycles -a sleep 10 # time counts unit events 5.000211692 3,13,89,82,34,157 cycles 10.000380119 1,53,98,52,22,294 cycles 10.040467280 17,16,79,265 cycles Segmentation fault This problem was only observed when we use forever option aka -r 0 and works with limited repeats. Calling print_counter with ts being set to NULL, is not a correct option when interval is set. Hence avoid print_counter(NULL,..) if interval is set. With fix: # perf stat -r 0 -I 5000 -e cycles -a sleep 10 # time counts unit events 5.019866622 3,15,14,43,08,697 cycles 10.039865756 3,15,16,31,95,261 cycles 10.059950628 1,26,05,47,158 cycles 5.009902655 3,14,52,62,33,932 cycles 10.019880228 3,14,52,22,89,154 cycles 10.030543876 66,90,18,333 cycles 5.009848281 3,14,51,98,25,437 cycles 10.029854402 3,15,14,93,04,918 cycles 5.009834177 3,14,51,95,92,316 cycles Committer notes: Did the 'git bisect' to find the cset introducing the problem to add the Fixes tag below, and at that time the problem reproduced as: (gdb) run stat -r0 -I500 sleep 1 Program received signal SIGSEGV, Segmentation fault. print_interval (prefix=prefix@entry=0x7fffffffc8d0 "", ts=ts@entry=0x0) at builtin-stat.c:866 866 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); (gdb) bt #0 print_interval (prefix=prefix@entry=0x7fffffffc8d0 "", ts=ts@entry=0x0) at builtin-stat.c:866 #1 0x000000000041860a in print_counters (ts=ts@entry=0x0, argc=argc@entry=2, argv=argv@entry=0x7fffffffd640) at builtin-stat.c:938 #2 0x0000000000419a7f in cmd_stat (argc=2, argv=0x7fffffffd640, prefix=) at builtin-stat.c:1411 #3 0x000000000045c65a in run_builtin (p=p@entry=0x6291b8 , argc=argc@entry=5, argv=argv@entry=0x7fffffffd640) at perf.c:370 #4 0x000000000045c893 in handle_internal_command (argc=5, argv=0x7fffffffd640) at perf.c:429 #5 0x000000000045c8f1 in run_argv (argcp=argcp@entry=0x7fffffffd4ac, argv=argv@entry=0x7fffffffd4a0) at perf.c:473 #6 0x000000000045cac9 in main (argc=, argv=) at perf.c:588 (gdb) Mostly the same as just before this patch: Program received signal SIGSEGV, Segmentation fault. 0x00000000005874a7 in print_interval (config=0xa1f2a0 , evlist=0xbc9b90, prefix=0x7fffffffd1c0 "`", ts=0x0) at util/stat-display.c:964 964 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); (gdb) bt #0 0x00000000005874a7 in print_interval (config=0xa1f2a0 , evlist=0xbc9b90, prefix=0x7fffffffd1c0 "`", ts=0x0) at util/stat-display.c:964 #1 0x0000000000588047 in perf_evlist__print_counters (evlist=0xbc9b90, config=0xa1f2a0 , _target=0xa1f0c0 , ts=0x0, argc=2, argv=0x7fffffffd670) at util/stat-display.c:1172 #2 0x000000000045390f in print_counters (ts=0x0, argc=2, argv=0x7fffffffd670) at builtin-stat.c:656 #3 0x0000000000456bb5 in cmd_stat (argc=2, argv=0x7fffffffd670) at builtin-stat.c:1960 #4 0x00000000004dd2e0 in run_builtin (p=0xa30e00 , argc=5, argv=0x7fffffffd670) at perf.c:310 #5 0x00000000004dd54d in handle_internal_command (argc=5, argv=0x7fffffffd670) at perf.c:362 #6 0x00000000004dd694 in run_argv (argcp=0x7fffffffd4cc, argv=0x7fffffffd4c0) at perf.c:406 #7 0x00000000004dda11 in main (argc=5, argv=0x7fffffffd670) at perf.c:531 (gdb) Fixes: d4f63a4741a8 ("perf stat: Introduce print_counters function") Signed-off-by: Srikar Dronamraju Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Namhyung Kim Cc: Naveen N. Rao Cc: stable@vger.kernel.org # v4.2+ Link: http://lore.kernel.org/lkml/20190904094738.9558-3-srikar@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fa4b148ecfca..60cdd383af81 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1956,7 +1956,7 @@ int cmd_stat(int argc, const char **argv) perf_evlist__reset_prev_raw_counts(evsel_list); status = run_perf_stat(argc, argv, run_idx); - if (forever && status != -1) { + if (forever && status != -1 && !interval) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); } From ce095c9ac293d1683f9fedb214811727dff0d508 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 16:21:48 +0100 Subject: [PATCH 133/345] perf test: Fix spelling mistake "allos" -> "allocate" There is a spelling mistake in a TEST_ASSERT_VAL message. Fix it. Signed-off-by: Colin King Reviewed-by: Mukesh Ojha Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lore.kernel.org/lkml/20190911152148.17031-1-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/event_update.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 4bb772e2b73d..0497d900ced2 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -94,7 +94,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu evsel = perf_evlist__first(evlist); - TEST_ASSERT_VAL("failed to allos ids", + TEST_ASSERT_VAL("failed to allocate ids", !perf_evsel__alloc_id(evsel, 1, 1)); perf_evlist__id_add(evlist, evsel, 0, 0, 123); From 8067b3da970baa12e6045400fdf009673b8dd3c2 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 18 Jul 2019 23:47:47 +0530 Subject: [PATCH 134/345] perf kvm: Move kvm-stat header file from conditional inclusion to common include section Move kvm-stat header file to the common include section, and make the definitions in the header file under the conditional inclusion `#ifdef HAVE_KVM_STAT_SUPPORT`. This helps to define other 'perf kvm' related function prototypes in kvm-stat header file, which may not need kvm-stat support. Signed-off-by: Anju T Sudhakar Reviewed-By: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20190718181749.30612-1-anju@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/util/kvm-stat.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index ac6d6e061dc5..2b822be87673 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -21,6 +21,7 @@ #include "util/top.h" #include "util/data.h" #include "util/ordered-events.h" +#include "util/kvm-stat.h" #include "ui/ui.h" #include @@ -59,7 +60,6 @@ static const char *get_filename_for_perf_kvm(void) } #ifdef HAVE_KVM_STAT_SUPPORT -#include "util/kvm-stat.h" void exit_event_get_key(struct evsel *evsel, struct perf_sample *sample, diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 46913637085b..8fd6ec20662c 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -2,6 +2,8 @@ #ifndef __PERF_KVM_STAT_H #define __PERF_KVM_STAT_H +#ifdef HAVE_KVM_STAT_SUPPORT + #include "tool.h" #include "stat.h" #include "record.h" @@ -144,5 +146,6 @@ extern const int decode_str_len; extern const char *kvm_exit_reason; extern const char *kvm_entry_trace; extern const char *kvm_exit_trace; +#endif /* HAVE_KVM_STAT_SUPPORT */ #endif /* __PERF_KVM_STAT_H */ From 124eb5f82bf9395419b20205c4dcc1b8fcda7f29 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 18 Jul 2019 23:47:48 +0530 Subject: [PATCH 135/345] perf kvm: Add arch neutral function to choose event for perf kvm record 'perf kvm record' uses 'cycles'(if the user did not specify any event) as the default event to profile the guest. This will not provide any proper samples from the guest incase of powerpc architecture, since in powerpc the PMUs are controlled by the guest rather than the host. Patch adds a function to pick an arch specific event for 'perf kvm record', instead of selecting 'cycles' as a default event for all architectures. For powerpc this function checks for any user specified event, and if there isn't any it returns invalid instead of proceeding with 'cycles' event. Signed-off-by: Anju T Sudhakar Reviewed-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20190718181749.30612-2-anju@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/kvm-stat.c | 37 +++++++++++++++++++++++++ tools/perf/builtin-kvm.c | 12 +++++++- tools/perf/util/kvm-stat.h | 1 + 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index f0dbf7b075c8..ec5b771029e4 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -8,6 +8,7 @@ #include "book3s_hv_exits.h" #include "book3s_hcalls.h" +#include #define NR_TPS 4 @@ -172,3 +173,39 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) return ret; } + +/* + * Incase of powerpc architecture, pmu registers are programmable + * by guest kernel. So monitoring guest via host may not provide + * valid samples. It is better to fail the "perf kvm record" + * with default "cycles" event to monitor guest in powerpc. + * + * Function to parse the arguments and return appropriate values. + */ +int kvm_add_default_arch_event(int *argc, const char **argv) +{ + const char **tmp; + bool event = false; + int i, j = *argc; + + const struct option event_options[] = { + OPT_BOOLEAN('e', "event", &event, NULL), + OPT_END() + }; + + tmp = calloc(j + 1, sizeof(char *)); + if (!tmp) + return -EINVAL; + + for (i = 0; i < j; i++) + tmp[i] = argv[i]; + + parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN); + if (!event) { + free(tmp); + return -EINVAL; + } + + free(tmp); + return 0; +} diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 2b822be87673..6e3e36658900 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1514,11 +1514,21 @@ perf_stat: } #endif /* HAVE_KVM_STAT_SUPPORT */ +int __weak kvm_add_default_arch_event(int *argc __maybe_unused, + const char **argv __maybe_unused) +{ + return 0; +} + static int __cmd_record(const char *file_name, int argc, const char **argv) { - int rec_argc, i = 0, j; + int rec_argc, i = 0, j, ret; const char **rec_argv; + ret = kvm_add_default_arch_event(&argc, argv); + if (ret) + return -EINVAL; + rec_argc = argc + 2; rec_argv = calloc(rec_argc + 1, sizeof(char *)); rec_argv[i++] = strdup("record"); diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 8fd6ec20662c..6f0fa05b62b6 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -148,4 +148,5 @@ extern const char *kvm_entry_trace; extern const char *kvm_exit_trace; #endif /* HAVE_KVM_STAT_SUPPORT */ +extern int kvm_add_default_arch_event(int *argc, const char **argv); #endif /* __PERF_KVM_STAT_H */ From 2bff2b828502b5e5d5ea5a52643d3542053df03f Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 18 Jul 2019 23:47:49 +0530 Subject: [PATCH 136/345] perf kvm stat: Set 'trace_cycles' as default event for 'perf kvm record' in powerpc Use 'trace_imc/trace_cycles' as the default event for 'perf kvm record' in powerpc. Signed-off-by: Anju T Sudhakar Reviewed-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20190718181749.30612-3-anju@linux.vnet.ibm.com [ Add missing pmu.h header, needed because this patch uses pmu_have_event() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/kvm-stat.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index ec5b771029e4..9cc1c4a9dec4 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -5,6 +5,7 @@ #include "util/debug.h" #include "util/evsel.h" #include "util/evlist.h" +#include "util/pmu.h" #include "book3s_hv_exits.h" #include "book3s_hcalls.h" @@ -177,8 +178,9 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) /* * Incase of powerpc architecture, pmu registers are programmable * by guest kernel. So monitoring guest via host may not provide - * valid samples. It is better to fail the "perf kvm record" - * with default "cycles" event to monitor guest in powerpc. + * valid samples with default 'cycles' event. It is better to use + * 'trace_imc/trace_cycles' event for guest profiling, since it + * can track the guest instruction pointer in the trace-record. * * Function to parse the arguments and return appropriate values. */ @@ -202,8 +204,14 @@ int kvm_add_default_arch_event(int *argc, const char **argv) parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN); if (!event) { - free(tmp); - return -EINVAL; + if (pmu_have_event("trace_imc", "trace_cycles")) { + argv[j++] = strdup("-e"); + argv[j++] = strdup("trace_imc/trace_cycles/"); + *argc += 2; + } else { + free(tmp); + return -EINVAL; + } } free(tmp); From b47613da3b71ca59cc6924bad2d74974f86fab92 Mon Sep 17 00:00:00 2001 From: Xiang Wang Date: Fri, 6 Sep 2019 11:56:09 +0800 Subject: [PATCH 137/345] arch/riscv: disable excess harts before picking main boot hart Harts with id greater than or equal to CONFIG_NR_CPUS need to be disabled. But the kernel can pick any hart as the main hart. So, before picking the main hart, the kernel must disable harts with ids greater than or equal to CONFIG_NR_CPUS. Signed-off-by: Xiang Wang Reviewed-by: Palmer Dabbelt Reviewed-by: Anup Patel [paul.walmsley@sifive.com: updated to apply; cleaned up patch description] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/head.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 15a9189f91ad..72f89b7590dd 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -63,6 +63,11 @@ _start_kernel: li t0, SR_FS csrc CSR_SSTATUS, t0 +#ifdef CONFIG_SMP + li t0, CONFIG_NR_CPUS + bgeu a0, t0, .Lsecondary_park +#endif + /* Pick one hart to run the main boot sequence */ la a3, hart_lottery li a2, 1 @@ -154,9 +159,6 @@ relocate: .Lsecondary_start: #ifdef CONFIG_SMP - li a1, CONFIG_NR_CPUS - bgeu a0, a1, .Lsecondary_park - /* Set trap vector to spin forever to help debug */ la a3, .Lsecondary_park csrw CSR_STVEC, a3 From dee04eee9182dae91801d0db5bb2acfd5365a749 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Wed, 4 Sep 2019 16:13:26 +0000 Subject: [PATCH 138/345] KVM: RISC-V: Add KVM_REG_RISCV for ONE_REG interface We will be using ONE_REG interface accessing VCPU registers from user-space hence we add KVM_REG_RISCV for RISC-V VCPU registers. Signed-off-by: Anup Patel Acked-by: Paolo Bonzini Reviewed-by: Paolo Bonzini Reviewed-by: Alexander Graf Signed-off-by: Paul Walmsley --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 233efbb1c81c..18a2b43097f8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1145,6 +1145,7 @@ struct kvm_dirty_tlb { #define KVM_REG_S390 0x5000000000000000ULL #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL +#define KVM_REG_RISCV 0x8000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL From d3d7a0ce020e2d14967159b5351158c80b681760 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 4 Sep 2019 16:14:06 +0000 Subject: [PATCH 139/345] RISC-V: Export kernel symbols for kvm Export a few symbols used by kvm module. Without this, kvm cannot be compiled as a module. Signed-off-by: Atish Patra Signed-off-by: Anup Patel Acked-by: Paolo Bonzini Reviewed-by: Paolo Bonzini Reviewed-by: Alexander Graf [paul.walmsley@sifive.com: updated to apply; clarified short patch description] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/smp.c | 1 + arch/riscv/kernel/time.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 3836760d7aaf..b18cd6c8e8fb 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -206,3 +206,4 @@ void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 541a2b885814..9dd1f2e64db1 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -9,6 +9,7 @@ #include unsigned long riscv_timebase; +EXPORT_SYMBOL_GPL(riscv_timebase); void __init time_init(void) { From 3bcca2a5a933e05db628ba731567de86ba7ed372 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Thu, 5 Sep 2019 05:46:14 -0700 Subject: [PATCH 140/345] riscv: dts: sifive: Add ethernet0 to the aliases node U-Boot expects this alias to be in place in order to fix up the mac address of the ethernet node. Signed-off-by: Bin Meng Reviewed-by: Christoph Hellwig Signed-off-by: Paul Walmsley --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 5a29211d396e..ae5c42d6943a 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -13,6 +13,7 @@ aliases { serial0 = &uart0; serial1 = &uart1; + ethernet0 = ð0; }; chosen { From c81007116bd23e9e2103c267184dc38d3acc1099 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Thu, 5 Sep 2019 05:45:53 -0700 Subject: [PATCH 141/345] riscv: dts: sifive: Drop "clock-frequency" property of cpu nodes The "clock-frequency" property of cpu nodes isn't required. Drop it. Signed-off-by: Bin Meng Reviewed-by: Christoph Hellwig Signed-off-by: Paul Walmsley --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index ae5c42d6943a..afa43c7ea369 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -61,7 +61,6 @@ }; }; cpu2: cpu@2 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -85,7 +84,6 @@ }; }; cpu3: cpu@3 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; @@ -109,7 +107,6 @@ }; }; cpu4: cpu@4 { - clock-frequency = <0>; compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; d-cache-block-size = <64>; d-cache-sets = <64>; From c82dd6d078a2bb29d41eda032bb96d05699a524d Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 16 Sep 2019 16:47:41 +0800 Subject: [PATCH 142/345] riscv: Avoid interrupts being erroneously enabled in handle_exception() When the handle_exception function addresses an exception, the interrupts will be unconditionally enabled after finishing the context save. However, It may erroneously enable the interrupts if the interrupts are disabled before entering the handle_exception. For example, one of the WARN_ON() condition is satisfied in the scheduling where the interrupt is disabled and rq.lock is locked. The WARN_ON will trigger a break exception and the handle_exception function will enable the interrupts before entering do_trap_break function. During the procedure, if a timer interrupt is pending, it will be taken when interrupts are enabled. In this case, it may cause a deadlock problem if the rq.lock is locked again in the timer ISR. Hence, the handle_exception() can only enable interrupts when the state of sstatus.SPIE is 1. This patch is tested on HiFive Unleashed board. Signed-off-by: Vincent Chen Reviewed-by: Palmer Dabbelt [paul.walmsley@sifive.com: updated to apply] Fixes: bcae803a21317 ("RISC-V: Enable IRQ during exception handling") Cc: David Abdurachmanov Cc: stable@vger.kernel.org Signed-off-by: Paul Walmsley --- arch/riscv/kernel/entry.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 74ccfd464071..da7aa88113c2 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -166,9 +166,13 @@ ENTRY(handle_exception) move a0, sp /* pt_regs */ tail do_IRQ 1: - /* Exceptions run with interrupts enabled */ + /* Exceptions run with interrupts enabled or disabled + depending on the state of sstatus.SR_SPIE */ + andi t0, s1, SR_SPIE + beqz t0, 1f csrs CSR_SSTATUS, SR_SIE +1: /* Handle syscalls */ li t0, EXC_SYSCALL beq s4, t0, handle_syscall From b117b9b48b24cbb090dd3844b563b464d60a6278 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 5 Sep 2019 11:09:24 +0200 Subject: [PATCH 143/345] perf tests: Fix static build test Disable the potentional shared library features, which breaks static build if they are enabled and detected: jvmti and vdso libraries. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190905090924.GA1949@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 6b3afed5d910..c850d1664c56 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -100,7 +100,7 @@ make_install_info := install-info make_install_pdf := install-pdf make_install_prefix := install prefix=/tmp/krava make_install_prefix_slash := install prefix=/tmp/krava/ -make_static := LDFLAGS=-static +make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX32=1 NO_JVMTI=1 # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 From 7b678ccdf5f608c408e1368e525ed191ca456bcf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2019 14:40:30 -0300 Subject: [PATCH 144/345] tools headers uapi: Sync prctl.h with the kernel sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get the changes in: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") that introduces prctl options that then automagically gets catched by the prctl cmd table generator, and thus supported in the 'perf trace' prctl beautifier for the 'option' argument: $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2019-09-20 14:38:41.386720870 -0300 +++ after 2019-09-20 14:40:02.583990802 -0300 @@ -49,6 +49,8 @@ [52] = "GET_SPECULATION_CTRL", [53] = "SET_SPECULATION_CTRL", [54] = "PAC_RESET_KEYS", + [55] = "SET_TAGGED_ADDR_CTRL", + [56] = "GET_TAGGED_ADDR_CTRL", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ For now just the translation of 55 and 56 to the respecting strings are done, more work needed to allow for filters to be used using strings. This, for instance, already works: # perf record -e syscalls:sys_enter_close --filter="fd==4" # perf script | head -5 gpm 1018 [006] 21327.171436: syscalls:sys_enter_close: fd: 0x00000004 gpm 1018 [006] 21329.171583: syscalls:sys_enter_close: fd: 0x00000004 bash 4882 [002] 21330.785496: syscalls:sys_enter_close: fd: 0x00000004 bash 20672 [001] 21330.785719: syscalls:sys_enter_close: fd: 0x00000004 find 20672 [001] 21330.789082: syscalls:sys_enter_close: fd: 0x00000004 # perf record -e syscalls:sys_enter_close --filter="fd>=4" ^C[ perf record: Woken up 1 times to write data ] # perf script | head -5 gpm 1018 [005] 21401.178501: syscalls:sys_enter_close: fd: 0x00000004 gsd-housekeepin 2287 [006] 21402.225365: syscalls:sys_enter_close: fd: 0x0000000b gsd-housekeepin 2287 [006] 21402.226234: syscalls:sys_enter_close: fd: 0x0000000b gsd-housekeepin 2287 [006] 21402.227255: syscalls:sys_enter_close: fd: 0x0000000b gsd-housekeepin 2287 [006] 21402.228088: syscalls:sys_enter_close: fd: 0x0000000b # Being able to pass something like: # perf record -e syscalls:sys_enter_prctl --filter="option=*TAGGED_ADDR*" Should be easy enough, first using tracepoint filters, then via the augmented_raw_syscalls.c BPF method. This addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Cc: Adrian Hunter Cc: Brendan Gregg Cc: Catalin Marinas Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-y8u8kvflooyo9x0if1g3jska@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 094bb03b9cc2..7da1b37b27aa 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -181,7 +181,7 @@ struct prctl_mm_map { #define PR_GET_THP_DISABLE 42 /* - * Tell the kernel to start/stop helping userspace manage bounds tables. + * No longer implemented, but left here to ensure the numbers stay reserved: */ #define PR_MPX_ENABLE_MANAGEMENT 43 #define PR_MPX_DISABLE_MANAGEMENT 44 @@ -229,4 +229,9 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) +/* Tagged user address controls for arm64 */ +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) + #endif /* _LINUX_PRCTL_H */ From 9846f1366489e1a30d871a5c3198b14394365be7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2019 14:52:24 -0300 Subject: [PATCH 145/345] tools uapi asm-generic: Sync unistd.h with the kernel sources To pick the change from: 78e05972c5e6 ("ipc: fix semtimedop for generic 32-bit architectures") Which doesn't trigger any change in tooling and silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-hpnjuyjzoudltqe7dvbokqdt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 1be0e798e362..1fc8faa6e973 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -569,7 +569,7 @@ __SYSCALL(__NR_semget, sys_semget) __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) +__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop) #endif #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) From 761830a03c5c99fc0a7142e16cf0811cb68bd7af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2019 14:56:47 -0300 Subject: [PATCH 146/345] tools arch x86 uapi: Synch asm/unistd.h with the kernel sources To pick up the change in: 45e29d119e99 ("x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long") That doesn't trigger any changes in tooling and silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/unistd.h' differs from latest version at 'arch/x86/include/uapi/asm/unistd.h' diff -u tools/arch/x86/include/uapi/asm/unistd.h arch/x86/include/uapi/asm/unistd.h Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h index 30d7d04d72d6..196fdd02b8b1 100644 --- a/tools/arch/x86/include/uapi/asm/unistd.h +++ b/tools/arch/x86/include/uapi/asm/unistd.h @@ -3,7 +3,7 @@ #define _UAPI_ASM_X86_UNISTD_H /* x32 syscall flag bit */ -#define __X32_SYSCALL_BIT 0x40000000 +#define __X32_SYSCALL_BIT 0x40000000UL #ifndef __KERNEL__ # ifdef __i386__ From 40f1c039c7c6de913ee04eac585102e2fce7f6f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Sep 2019 15:00:49 -0300 Subject: [PATCH 147/345] tools arch x86: Sync asm/cpufeatures.h with the kernel sources To pick up the changes from: b4dd4f6e3648 ("x86/vmware: Add a header file for hypercall definitions") f36cf386e3fe ("x86/speculation/swapgs: Exclude ATOMs from speculation through SWAPGS") be261ffce6f1 ("x86: Remove X86_FEATURE_MFENCE_RDTSC") 018ebca8bd70 ("x86/cpufeatures: Enable a new AVX512 CPU feature") These don't cause any changes in tooling, just silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h To clarify, updating those files cause these bits of tools/perf to rebuild: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o INSTALL GTK UI LD /tmp/build/perf/bench/perf-in.o Those use just: $ grep FEATURE tools/arch/x86/lib/mem*.S tools/arch/x86/lib/memcpy_64.S: ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ tools/arch/x86/lib/memcpy_64.S: "jmp memcpy_erms", X86_FEATURE_ERMS tools/arch/x86/lib/memset_64.S: ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ tools/arch/x86/lib/memset_64.S: "jmp memset_erms", X86_FEATURE_ERMS $ I.e. none of the feature defines added/removed by the patches above. Cc: Adrian Hunter Cc: Borislav Petkov Cc: Gayatri Kammela Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Thomas Hellstrom Link: https://lkml.kernel.org/n/tip-pq63abgknsaeov23p80d8gjv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 5171b9c7ca3e..0652d3eed9bd 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -231,6 +231,8 @@ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -354,6 +356,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ From 0216234c2eed1367a318daeb9f4a97d8217412a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Sep 2019 12:52:35 +0200 Subject: [PATCH 148/345] perf tools: Fix segfault in cpu_cache_level__read() We release wrong pointer on error path in cpu_cache_level__read function, leading to segfault: (gdb) r record ls Starting program: /root/perf/tools/perf/perf record ls ... [ perf record: Woken up 1 times to write data ] double free or corruption (out) Thread 1 "perf" received signal SIGABRT, Aborted. 0x00007ffff7463798 in raise () from /lib64/power9/libc.so.6 (gdb) bt #0 0x00007ffff7463798 in raise () from /lib64/power9/libc.so.6 #1 0x00007ffff7443bac in abort () from /lib64/power9/libc.so.6 #2 0x00007ffff74af8bc in __libc_message () from /lib64/power9/libc.so.6 #3 0x00007ffff74b92b8 in malloc_printerr () from /lib64/power9/libc.so.6 #4 0x00007ffff74bb874 in _int_free () from /lib64/power9/libc.so.6 #5 0x0000000010271260 in __zfree (ptr=0x7fffffffa0b0) at ../../lib/zalloc.. #6 0x0000000010139340 in cpu_cache_level__read (cache=0x7fffffffa090, cac.. #7 0x0000000010143c90 in build_caches (cntp=0x7fffffffa118, size= Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org: # v4.6+ Link: http://lore.kernel.org/lkml/20190912105235.10689-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5722ff717777..0167f9697172 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1073,7 +1073,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); if (sysfs__read_str(file, &cache->map, &len)) { - zfree(&cache->map); + zfree(&cache->size); zfree(&cache->type); return -1; } From 1a375ae7659ab740d4c885ea98c1659b8a6e2071 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Sep 2019 12:41:10 +0900 Subject: [PATCH 149/345] perf probe: Skip same probe address for a given line Fix to skip making a same probe address on given line. Since a DWARF line info contains several entries for one line with different column, perf probe will make a different probe on same address if user specifies a probe point by "function:line" or "file:line". e.g. $ perf probe -D kernel_read:8 p:probe/kernel_read_L8 kernel_read+39 p:probe/kernel_read_L8_1 kernel_read+39 This skips such duplicated probe addresses. Committer testing: # uname -a Linux quaco 5.3.0+ #2 SMP Thu Sep 19 16:13:22 -03 2019 x86_64 x86_64 x86_64 GNU/Linux # Before: # perf probe -D kernel_read:8 p:probe/kernel_read _text+3115191 p:probe/kernel_read_1 _text+3115191 # After: # perf probe -D kernel_read:8 p:probe/kernel_read _text+3115191 # Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lore.kernel.org/lkml/156886447061.10772.4261569305869149178.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 505905fc21c5..cd9f95e5044e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1245,6 +1245,17 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, return n; } +static bool trace_event_finder_overlap(struct trace_event_finder *tf) +{ + int i; + + for (i = 0; i < tf->ntevs; i++) { + if (tf->pf.addr == tf->tevs[i].point.address) + return true; + } + return false; +} + /* Add a found probe point into trace event list */ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) { @@ -1255,6 +1266,14 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) struct perf_probe_arg *args = NULL; int ret, i; + /* + * For some reason (e.g. different column assigned to same address) + * This callback can be called with the address which already passed. + * Ignore it first. + */ + if (trace_event_finder_overlap(tf)) + return 0; + /* Check number of tevs */ if (tf->ntevs == tf->max_tevs) { pr_warning("Too many( > %d) probe point found.\n", From 9e6124d9d635957b56717f85219a88701617253f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Sep 2019 01:44:40 +0900 Subject: [PATCH 150/345] perf probe: Fix to clear tev->nargs in clear_probe_trace_event() Since add_probe_trace_event() can reuse tf->tevs[i] after calling clear_probe_trace_event(), this can make perf-probe crash if the 1st attempt of probe event finding fails to find an event argument, and the 2nd attempt fails to find probe point. E.g. $ perf probe -D "task_pid_nr tsk" Failed to find 'tsk' in this function. Failed to get entry address of warn_bad_vsyscall Segmentation fault (core dumped) Committer testing: After the patch: $ perf probe -D "task_pid_nr tsk" Failed to find 'tsk' in this function. Failed to get entry address of warn_bad_vsyscall Failed to get entry address of signal_fault Failed to get entry address of show_signal Failed to get entry address of umip_printk Failed to get entry address of __bad_area_nosemaphore Failed to get entry address of sock_set_timeout Failed to get entry address of tcp_recvmsg Probe point 'task_pid_nr' not found. Error: Failed to add events. $ Fixes: 092b1f0b5f9f ("perf probe: Clear probe_trace_event when add_probe_trace_event() fails") Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lore.kernel.org/lkml/156856587999.25775.5145779959474477595.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b8e0967c5c21..91cab5f669d2 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2331,6 +2331,7 @@ void clear_probe_trace_event(struct probe_trace_event *tev) } } zfree(&tev->args); + tev->nargs = 0; } struct kprobe_blacklist_node { From 6ef81c55a2b6584cb642917f5fdf3632ef44b670 Mon Sep 17 00:00:00 2001 From: Mamatha Inamdar Date: Thu, 22 Aug 2019 12:50:49 +0530 Subject: [PATCH 151/345] perf session: Return error code for perf_session__new() function on failure This patch is to return error code of perf_new_session function on failure instead of NULL. Test Results: Before Fix: $ perf c2c report -input failed to open nput: No such file or directory $ echo $? 0 $ After Fix: $ perf c2c report -input failed to open nput: No such file or directory $ echo $? 254 $ Committer notes: Fix 'perf tests topology' case, where we use that TEST_ASSERT_VAL(..., session), i.e. we need to pass zero in case of failure, which was the case before when NULL was returned by perf_session__new() for failure, but now we need to negate the result of IS_ERR(session) to respect that TEST_ASSERT_VAL) expectation of zero meaning failure. Reported-by: Nageswara R Sastry Signed-off-by: Mamatha Inamdar Tested-by: Arnaldo Carvalho de Melo Tested-by: Nageswara R Sastry Acked-by: Ravi Bangoria Reviewed-by: Jiri Olsa Reviewed-by: Mukesh Ojha Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Greg Kroah-Hartman Cc: Jeremie Galarneau Cc: Kate Stewart Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shawn Landden Cc: Song Liu Cc: Thomas Gleixner Cc: Tzvetomir Stoyanov Link: http://lore.kernel.org/lkml/20190822071223.17892.45782.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 5 +++-- tools/perf/builtin-buildid-cache.c | 5 +++-- tools/perf/builtin-buildid-list.c | 5 +++-- tools/perf/builtin-c2c.c | 6 ++++-- tools/perf/builtin-diff.c | 9 +++++---- tools/perf/builtin-evlist.c | 5 +++-- tools/perf/builtin-inject.c | 5 +++-- tools/perf/builtin-kmem.c | 5 +++-- tools/perf/builtin-kvm.c | 9 +++++---- tools/perf/builtin-lock.c | 5 +++-- tools/perf/builtin-mem.c | 5 +++-- tools/perf/builtin-record.c | 5 +++-- tools/perf/builtin-report.c | 4 ++-- tools/perf/builtin-sched.c | 11 ++++++----- tools/perf/builtin-script.c | 9 +++++---- tools/perf/builtin-stat.c | 11 ++++++----- tools/perf/builtin-timechart.c | 5 +++-- tools/perf/builtin-top.c | 5 +++-- tools/perf/builtin-trace.c | 4 ++-- tools/perf/tests/topology.c | 5 +++-- tools/perf/util/data-convert-bt.c | 5 ++++- tools/perf/util/session.c | 15 +++++++++++---- 22 files changed, 86 insertions(+), 57 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 553c651fa0a2..8db8fc9bddef 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -40,6 +40,7 @@ #include #include #include +#include struct perf_annotate { struct perf_tool tool; @@ -584,8 +585,8 @@ int cmd_annotate(int argc, const char **argv) data.path = input_name; annotate.session = perf_session__new(&data, false, &annotate.tool); - if (annotate.session == NULL) - return -1; + if (IS_ERR(annotate.session)) + return PTR_ERR(annotate.session); annotate.has_br_stack = perf_header__has_feat(&annotate.session->header, HEADER_BRANCH_STACK); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 1a69eb565dc0..39efa51d7fb3 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -28,6 +28,7 @@ #include "util/util.h" #include "util/probe-file.h" #include +#include static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { @@ -422,8 +423,8 @@ int cmd_buildid_cache(int argc, const char **argv) data.force = force; session = perf_session__new(&data, false, NULL); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); } if (symbol__init(session ? &session->header.env : NULL) < 0) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 5a0d8b378cb5..e3ef75583514 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -18,6 +18,7 @@ #include "util/symbol.h" #include "util/data.h" #include +#include static int sysfs__fprintf_build_id(FILE *fp) { @@ -65,8 +66,8 @@ static int perf_session__list_build_ids(bool force, bool with_hits) goto out; session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); /* * We take all buildids when the file contains AUX area tracing data diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 61aaacc2aedd..3542b6ab9813 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -2781,8 +2782,9 @@ static int perf_c2c__report(int argc, const char **argv) } session = perf_session__new(&data, 0, &c2c.tool); - if (session == NULL) { - pr_debug("No memory for session\n"); + if (IS_ERR(session)) { + err = PTR_ERR(session); + pr_debug("Error creating perf session\n"); goto out; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 827e4800d862..c37a78677955 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -23,6 +23,7 @@ #include "util/time-utils.h" #include "util/annotate.h" #include "util/map.h" +#include #include #include #include @@ -1153,9 +1154,9 @@ static int check_file_brstack(void) data__for_each_file(i, d) { d->session = perf_session__new(&d->data, false, &pdiff.tool); - if (!d->session) { + if (IS_ERR(d->session)) { pr_err("Failed to open %s\n", d->data.path); - return -1; + return PTR_ERR(d->session); } has_br_stack = perf_header__has_feat(&d->session->header, @@ -1185,9 +1186,9 @@ static int __cmd_diff(void) data__for_each_file(i, d) { d->session = perf_session__new(&d->data, false, &pdiff.tool); - if (!d->session) { + if (IS_ERR(d->session)) { + ret = PTR_ERR(d->session); pr_err("Failed to open %s\n", d->data.path); - ret = -1; goto out_delete; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 294494e60e48..60509ce4dd28 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -15,6 +15,7 @@ #include "util/session.h" #include "util/data.h" #include "util/debug.h" +#include static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { @@ -28,8 +29,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details bool has_tracepoint = false; session = perf_session__new(&data, 0, NULL); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); evlist__for_each_entry(session->evlist, pos) { perf_evsel__fprintf(pos, details, stdout); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 23a76cf3846f..372ecb3e2c06 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -23,6 +23,7 @@ #include "util/symbol.h" #include "util/synthetic-events.h" #include "util/thread.h" +#include #include @@ -835,8 +836,8 @@ int cmd_inject(int argc, const char **argv) data.path = inject.input_name; inject.session = perf_session__new(&data, true, &inject.tool); - if (inject.session == NULL) - return -1; + if (IS_ERR(inject.session)) + return PTR_ERR(inject.session); if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index b5682beaad72..1e61e353f579 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -14,6 +14,7 @@ #include "util/tool.h" #include "util/callchain.h" #include "util/time-utils.h" +#include #include #include @@ -1956,8 +1957,8 @@ int cmd_kmem(int argc, const char **argv) data.path = input_name; kmem_session = session = perf_session__new(&data, false, &perf_kmem); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); ret = -1; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 6e3e36658900..c4b22e1b0a40 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -1091,9 +1092,9 @@ static int read_events(struct perf_kvm_stat *kvm) kvm->tool = eops; kvm->session = perf_session__new(&file, false, &kvm->tool); - if (!kvm->session) { + if (IS_ERR(kvm->session)) { pr_err("Initializing perf session failed\n"); - return -1; + return PTR_ERR(kvm->session); } symbol__init(&kvm->session->header.env); @@ -1446,8 +1447,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, * perf session */ kvm->session = perf_session__new(&data, false, &kvm->tool); - if (kvm->session == NULL) { - err = -1; + if (IS_ERR(kvm->session)) { + err = PTR_ERR(kvm->session); goto out; } kvm->session->evlist = kvm->evlist; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 4c2b7f437cdf..474dfd59d7eb 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -30,6 +30,7 @@ #include #include #include +#include static struct perf_session *session; @@ -872,9 +873,9 @@ static int __cmd_report(bool display_info) }; session = perf_session__new(&data, false, &eops); - if (!session) { + if (IS_ERR(session)) { pr_err("Initializing perf session failed\n"); - return -1; + return PTR_ERR(session); } symbol__init(&session->header.env); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 27d2bde943a8..a13f5817d6fc 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -17,6 +17,7 @@ #include "util/dso.h" #include "util/map.h" #include "util/symbol.h" +#include #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -249,8 +250,8 @@ static int report_raw_events(struct perf_mem *mem) struct perf_session *session = perf_session__new(&data, false, &mem->tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4bd11c918e73..3f66a49a997f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1354,9 +1355,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } session = perf_session__new(data, false, tool); - if (session == NULL) { + if (IS_ERR(session)) { pr_err("Perf session creation failed.\n"); - return -1; + return PTR_ERR(session); } fd = perf_data__fd(data); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3047e5169d9d..aae0e57c60fb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1269,8 +1269,8 @@ int cmd_report(int argc, const char **argv) repeat: session = perf_session__new(&data, false, &report.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); ret = evswitch__init(&report.evswitch, session->evlist, stderr); if (ret) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f0b828c201cc..079e67a36904 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -1797,9 +1798,9 @@ static int perf_sched__read_events(struct perf_sched *sched) int rc = -1; session = perf_session__new(&data, false, &sched->tool); - if (session == NULL) { - pr_debug("No Memory for session\n"); - return -1; + if (IS_ERR(session)) { + pr_debug("Error creating perf session"); + return PTR_ERR(session); } symbol__init(&session->header.env); @@ -2989,8 +2990,8 @@ static int perf_sched__timehist(struct perf_sched *sched) symbol_conf.use_callchain = sched->show_callchain; session = perf_session__new(&data, false, &sched->tool); - if (session == NULL) - return -ENOMEM; + if (IS_ERR(session)) + return PTR_ERR(session); evlist = session->evlist; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e079b34201f2..a17a9306bdf6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "util/record.h" #include "util/util.h" #include "perf.h" @@ -3083,8 +3084,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array, int num, int i = 0; session = perf_session__new(&data, false, NULL); - if (!session) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); @@ -3754,8 +3755,8 @@ int cmd_script(int argc, const char **argv) } session = perf_session__new(&data, false, &script.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); if (header || header_only) { script.tool.show_feat_hdr = SHOW_FEAT_HEADER; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 60cdd383af81..f7d13326b830 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #include @@ -1436,9 +1437,9 @@ static int __cmd_record(int argc, const char **argv) } session = perf_session__new(data, false, NULL); - if (session == NULL) { - pr_err("Perf session creation failed.\n"); - return -1; + if (IS_ERR(session)) { + pr_err("Perf session creation failed\n"); + return PTR_ERR(session); } init_features(session); @@ -1635,8 +1636,8 @@ static int __cmd_report(int argc, const char **argv) perf_stat.data.mode = PERF_DATA_MODE_READ; session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); perf_stat.session = session; stat_config.output = stderr; diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e0e822695a29..9e84fae9b096 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -35,6 +35,7 @@ #include "util/tool.h" #include "util/data.h" #include "util/debug.h" +#include #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); @@ -1601,8 +1602,8 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) &tchart->tool); int ret = -EINVAL; - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); symbol__init(&session->header.env); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b052470f89b4..8da3c939e6b0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -77,6 +77,7 @@ #include #include #include +#include #include @@ -1672,8 +1673,8 @@ int cmd_top(int argc, const char **argv) } top.session = perf_session__new(NULL, false, NULL); - if (top.session == NULL) { - status = -1; + if (IS_ERR(top.session)) { + status = PTR_ERR(top.session); goto out_delete_evlist; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f0f735093e21..a292658b4232 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3585,8 +3585,8 @@ static int trace__replay(struct trace *trace) trace->multiple_threads = true; session = perf_session__new(&data, false, &trace->tool); - if (session == NULL) - return -1; + if (IS_ERR(session)) + return PTR_ERR(session); if (trace->opts.target.pid) symbol_conf.pid_list_str = strdup(trace->opts.target.pid); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 7d845d913d7d..4a800499d7c3 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -8,6 +8,7 @@ #include "session.h" #include "evlist.h" #include "debug.h" +#include #define TEMPL "/tmp/perf-test-XXXXXX" #define DATA_SIZE 10 @@ -39,7 +40,7 @@ static int session_write_header(char *path) }; session = perf_session__new(&data, false, NULL); - TEST_ASSERT_VAL("can't get session", session); + TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); session->evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("can't get evlist", session->evlist); @@ -70,7 +71,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) int i; session = perf_session__new(&data, false, NULL); - TEST_ASSERT_VAL("can't get session", session); + TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); /* On platforms with large numbers of CPUs process_cpu_topology() * might issue an error while reading the perf.data file section diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 0c268449959c..dbc772bfb04e 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -30,6 +30,7 @@ #include "machine.h" #include "config.h" #include +#include #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -1619,8 +1620,10 @@ int bt_convert__perf2ctf(const char *input, const char *path, err = -1; /* perf.data session */ session = perf_session__new(&data, 0, &c.tool); - if (!session) + if (IS_ERR(session)) { + err = PTR_ERR(session); goto free_writer; + } if (c.queue_size) { ordered_events__set_alloc_size(&session->ordered_events, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 58b5bc34ba12..a621c73bad42 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -34,6 +34,7 @@ #include "../perf.h" #include "arch/common.h" #include +#include #ifdef HAVE_ZSTD_SUPPORT static int perf_session__process_compressed_event(struct perf_session *session, @@ -187,6 +188,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe, struct perf_session *perf_session__new(struct perf_data *data, bool repipe, struct perf_tool *tool) { + int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); if (!session) @@ -201,13 +203,15 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_env__init(&session->header.env); if (data) { - if (perf_data__open(data)) + ret = perf_data__open(data); + if (ret < 0) goto out_delete; session->data = data; if (perf_data__is_read(data)) { - if (perf_session__open(session) < 0) + ret = perf_session__open(session); + if (ret < 0) goto out_delete; /* @@ -222,8 +226,11 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_evlist__init_trace_event_sample_raw(session->evlist); /* Open the directory data. */ - if (data->is_dir && perf_data__open_dir(data)) + if (data->is_dir) { + ret = perf_data__open_dir(data); + if (ret) goto out_delete; + } } } else { session->machines.host.env = &perf_env; @@ -256,7 +263,7 @@ struct perf_session *perf_session__new(struct perf_data *data, out_delete: perf_session__delete(session); out: - return NULL; + return ERR_PTR(ret); } static void perf_session__delete_threads(struct perf_session *session) From 9f014e3a66bc936412b6614304a4e6c70c70230e Mon Sep 17 00:00:00 2001 From: Roy Ben Shlomo Date: Fri, 20 Sep 2019 20:12:53 +0300 Subject: [PATCH 152/345] perf/core: Fix several typos in comments Fix typos in a few functions' documentation comments. Signed-off-by: Roy Ben Shlomo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: royb@sentinelone.com Link: http://lore.kernel.org/lkml/20190920171254.31373-1-royb@sentinelone.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4f08b17d6426..275eae05af20 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2239,7 +2239,7 @@ static void __perf_event_disable(struct perf_event *event, * * If event->ctx is a cloned context, callers must make sure that * every task struct that event->ctx->task could possibly point to - * remains valid. This condition is satisifed when called through + * remains valid. This condition is satisfied when called through * perf_event_for_each_child or perf_event_for_each because they * hold the top-level event's child_mutex, so any descendant that * goes to exit will block in perf_event_exit_event(). @@ -6054,7 +6054,7 @@ static void perf_sample_regs_intr(struct perf_regs *regs_intr, * Get remaining task size from user stack pointer. * * It'd be better to take stack vma map and limit this more - * precisly, but there's no way to get it safely under interrupt, + * precisely, but there's no way to get it safely under interrupt, * so using TASK_SIZE as limit. */ static u64 perf_ustack_task_size(struct pt_regs *regs) @@ -6616,7 +6616,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_STACK_USER) { /* - * Either we need PERF_SAMPLE_STACK_USER bit to be allways + * Either we need PERF_SAMPLE_STACK_USER bit to be always * processed as the last one or have additional check added * in case new sample type is added, because we could eat * up the rest of the sample size. From a315614b68993318f6913cc28b9b4e472ebbdf26 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Thu, 18 Jul 2019 09:32:05 +0800 Subject: [PATCH 153/345] pwm: mxs: Use devm_platform_ioremap_resource() to simplify code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the new helper devm_platform_ioremap_resource() which wraps the platform_get_resource() and devm_ioremap_resource() together, to simplify the code. Signed-off-by: Anson Huang Reviewed-by: Dong Aisheng Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mxs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index 04c0f6b95c1a..b14376b47ac8 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -126,15 +126,13 @@ static int mxs_pwm_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct mxs_pwm_chip *mxs; - struct resource *res; int ret; mxs = devm_kzalloc(&pdev->dev, sizeof(*mxs), GFP_KERNEL); if (!mxs) return -ENOMEM; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mxs->base = devm_ioremap_resource(&pdev->dev, res); + mxs->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(mxs->base)) return PTR_ERR(mxs->base); From f6abac0379b8368519f28016c8c8821b8bd17f5e Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Thu, 18 Jul 2019 15:51:11 +0800 Subject: [PATCH 154/345] pwm: sifive: Remove redundant error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_ioremap_resource() already outputs an error message, so remove the extra error message on failure. Signed-off-by: Ding Xiang Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sifive.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index a7c107f19e66..bb4f02ce4f94 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -250,10 +250,8 @@ static int pwm_sifive_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ddata->regs = devm_ioremap_resource(dev, res); - if (IS_ERR(ddata->regs)) { - dev_err(dev, "Unable to map IO resources\n"); + if (IS_ERR(ddata->regs)) return PTR_ERR(ddata->regs); - } ddata->clk = devm_clk_get(dev, NULL); if (IS_ERR(ddata->clk)) { From 3b442c60cf9766c76bc3c1e44e3e387853074a08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 30 Jul 2019 14:32:29 +0200 Subject: [PATCH 155/345] pwm: jz4740: Document known limitations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The JZ4740 PWM implementation doesn't fulfill the (up to now insufficiently documented) requirements of the PWM API. At least document them in the driver. Signed-off-by: Uwe Kleine-König Reviewed-by: Paul Cercueil Signed-off-by: Thierry Reding --- drivers/pwm/pwm-jz4740.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index f901e8a0d33d..9d444d012f92 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -2,6 +2,11 @@ /* * Copyright (C) 2010, Lars-Peter Clausen * JZ4740 platform PWM support + * + * Limitations: + * - The .apply callback doesn't complete the currently running period before + * reconfiguring the hardware. + * - Each period starts with the inactive part. */ #include From f6960976c465179cd1b64dfae2edfb647820af1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 30 Jul 2019 14:45:27 +0200 Subject: [PATCH 156/345] pwm: imx: Document known limitations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-imx27.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 434a351fb626..91c23cbbc167 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -3,6 +3,10 @@ * simple driver for PWM (Pulse Width Modulator) controller * * Derived from pxa PWM driver by eric miao + * + * Limitations: + * - When disabled the output is driven to 0 independent of the configured + * polarity. */ #include From fb5a35dbee8d6bd2ff638b7ebaf540b86860ee0a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 30 Jul 2019 11:15:36 -0700 Subject: [PATCH 157/345] pwm: Remove dev_err() usage after platform_get_irq() We don't need dev_err() messages when platform_get_irq() fails now that platform_get_irq() prints an error message itself when something goes wrong. Let's remove these prints with a simple semantic patch. // @@ expression ret; struct platform_device *E; @@ ret = ( platform_get_irq(E, ...) | platform_get_irq_byname(E, ...) ); if ( \( ret < 0 \| ret <= 0 \) ) { ( -if (ret != -EPROBE_DEFER) -{ ... -dev_err(...); -... } | ... -dev_err(...); ) ... } // While we're here, remove braces on if statements that only have one statement (manually). Cc: Thierry Reding Cc: linux-pwm@vger.kernel.org Cc: Greg Kroah-Hartman Signed-off-by: Stephen Boyd Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sti.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index 20450e34ad57..1508616d794c 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -564,10 +564,8 @@ static int sti_pwm_probe(struct platform_device *pdev) return PTR_ERR(pc->regmap); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "Failed to obtain IRQ\n"); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, sti_pwm_interrupt, 0, pdev->name, pc); From 4b046497341c034c29405295e7b37f2547f44f71 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Mon, 5 Aug 2019 14:58:47 +0200 Subject: [PATCH 158/345] dt-bindings: pwm: mediatek: Add documentation for MT8516 Add the device-tree documentation for the PWM IP on the MediaTek MT8516 SoCs. Signed-off-by: Fabien Parent Reviewed-by: Rob Herring Signed-off-by: Thierry Reding --- Documentation/devicetree/bindings/pwm/pwm-mediatek.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt index 991728cb46cb..9152bf5afe56 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt @@ -6,6 +6,7 @@ Required properties: - "mediatek,mt7622-pwm": found on mt7622 SoC. - "mediatek,mt7623-pwm": found on mt7623 SoC. - "mediatek,mt7628-pwm": found on mt7628 SoC. + - "mediatek,mt8516-pwm": found on mt8516 SoC. - reg: physical base address and length of the controller's registers. - #pwm-cells: must be 2. See pwm.txt in this directory for a description of the cell format. From 8d190728fd8e272b733e1575e000fc1982b5d9b2 Mon Sep 17 00:00:00 2001 From: Fabien Parent Date: Mon, 5 Aug 2019 14:58:48 +0200 Subject: [PATCH 159/345] pwm: mediatek: Add MT8516 SoC support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the compatible and the platform data to support PWM on the MT8516 SoC. Signed-off-by: Fabien Parent Reviewed-by: Matthias Brugger Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index eb6674ce995f..6697e30811e7 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -302,11 +302,18 @@ static const struct mtk_pwm_platform_data mt7628_pwm_data = { .has_clks = false, }; +static const struct mtk_pwm_platform_data mt8516_pwm_data = { + .num_pwms = 5, + .pwm45_fixup = false, + .has_clks = true, +}; + static const struct of_device_id mtk_pwm_of_match[] = { { .compatible = "mediatek,mt2712-pwm", .data = &mt2712_pwm_data }, { .compatible = "mediatek,mt7622-pwm", .data = &mt7622_pwm_data }, { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data }, { .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data }, + { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data }, { }, }; MODULE_DEVICE_TABLE(of, mtk_pwm_of_match); From bdaadd5948179f07ca8b508a62a8f8b00ece51ed Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 14 Aug 2019 20:46:10 +0800 Subject: [PATCH 160/345] dt-bindings: pwm: sprd: Add Spreadtrum PWM documentation Add Spreadtrum PWM controller documentation. Signed-off-by: Baolin Wang Reviewed-by: Rob Herring Signed-off-by: Thierry Reding --- .../devicetree/bindings/pwm/pwm-sprd.txt | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Documentation/devicetree/bindings/pwm/pwm-sprd.txt diff --git a/Documentation/devicetree/bindings/pwm/pwm-sprd.txt b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt new file mode 100644 index 000000000000..16fa5a096206 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-sprd.txt @@ -0,0 +1,40 @@ +Spreadtrum PWM controller + +Spreadtrum SoCs PWM controller provides 4 PWM channels. + +Required properties: +- compatible : Should be "sprd,ums512-pwm". +- reg: Physical base address and length of the controller's registers. +- clocks: The phandle and specifier referencing the controller's clocks. +- clock-names: Should contain following entries: + "pwmn": used to derive the functional clock for PWM channel n (n range: 0 ~ 3). + "enablen": for PWM channel n enable clock (n range: 0 ~ 3). +- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of + the cells format. + +Optional properties: +- assigned-clocks: Reference to the PWM clock entries. +- assigned-clock-parents: The phandle of the parent clock of PWM clock. + +Example: + pwms: pwm@32260000 { + compatible = "sprd,ums512-pwm"; + reg = <0 0x32260000 0 0x10000>; + clock-names = "pwm0", "enable0", + "pwm1", "enable1", + "pwm2", "enable2", + "pwm3", "enable3"; + clocks = <&aon_clk CLK_PWM0>, <&aonapb_gate CLK_PWM0_EB>, + <&aon_clk CLK_PWM1>, <&aonapb_gate CLK_PWM1_EB>, + <&aon_clk CLK_PWM2>, <&aonapb_gate CLK_PWM2_EB>, + <&aon_clk CLK_PWM3>, <&aonapb_gate CLK_PWM3_EB>; + assigned-clocks = <&aon_clk CLK_PWM0>, + <&aon_clk CLK_PWM1>, + <&aon_clk CLK_PWM2>, + <&aon_clk CLK_PWM3>; + assigned-clock-parents = <&ext_26m>, + <&ext_26m>, + <&ext_26m>, + <&ext_26m>; + #pwm-cells = <2>; + }; From 8aae4b02e8a6dd138afd2b54d9984d17685b0364 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 14 Aug 2019 20:46:11 +0800 Subject: [PATCH 161/345] pwm: sprd: Add Spreadtrum PWM support This patch adds the Spreadtrum PWM support, which provides maximum 4 channels. Signed-off-by: Neo Hou Signed-off-by: Baolin Wang Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 11 ++ drivers/pwm/Makefile | 1 + drivers/pwm/pwm-sprd.c | 309 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 321 insertions(+) create mode 100644 drivers/pwm/pwm-sprd.c diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index a7e57516959e..31dfc88ef362 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -423,6 +423,17 @@ config PWM_SPEAR To compile this driver as a module, choose M here: the module will be called pwm-spear. +config PWM_SPRD + tristate "Spreadtrum PWM support" + depends on ARCH_SPRD || COMPILE_TEST + depends on HAS_IOMEM + help + Generic PWM framework driver for the PWM controller on + Spreadtrum SoCs. + + To compile this driver as a module, choose M here: the module + will be called pwm-sprd. + config PWM_STI tristate "STiH4xx PWM support" depends on ARCH_STI diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index 76b555b51887..26326adf71d7 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o obj-$(CONFIG_PWM_SIFIVE) += pwm-sifive.o obj-$(CONFIG_PWM_SPEAR) += pwm-spear.o +obj-$(CONFIG_PWM_SPRD) += pwm-sprd.o obj-$(CONFIG_PWM_STI) += pwm-sti.o obj-$(CONFIG_PWM_STM32) += pwm-stm32.o obj-$(CONFIG_PWM_STM32_LP) += pwm-stm32-lp.o diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c new file mode 100644 index 000000000000..68c2d9f0411b --- /dev/null +++ b/drivers/pwm/pwm-sprd.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Spreadtrum Communications Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SPRD_PWM_PRESCALE 0x0 +#define SPRD_PWM_MOD 0x4 +#define SPRD_PWM_DUTY 0x8 +#define SPRD_PWM_ENABLE 0x18 + +#define SPRD_PWM_MOD_MAX GENMASK(7, 0) +#define SPRD_PWM_DUTY_MSK GENMASK(15, 0) +#define SPRD_PWM_PRESCALE_MSK GENMASK(7, 0) +#define SPRD_PWM_ENABLE_BIT BIT(0) + +#define SPRD_PWM_CHN_NUM 4 +#define SPRD_PWM_REGS_SHIFT 5 +#define SPRD_PWM_CHN_CLKS_NUM 2 +#define SPRD_PWM_CHN_OUTPUT_CLK 1 + +struct sprd_pwm_chn { + struct clk_bulk_data clks[SPRD_PWM_CHN_CLKS_NUM]; + u32 clk_rate; +}; + +struct sprd_pwm_chip { + void __iomem *base; + struct device *dev; + struct pwm_chip chip; + int num_pwms; + struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM]; +}; + +/* + * The list of clocks required by PWM channels, and each channel has 2 clocks: + * enable clock and pwm clock. + */ +static const char * const sprd_pwm_clks[] = { + "enable0", "pwm0", + "enable1", "pwm1", + "enable2", "pwm2", + "enable3", "pwm3", +}; + +static u32 sprd_pwm_read(struct sprd_pwm_chip *spc, u32 hwid, u32 reg) +{ + u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT); + + return readl_relaxed(spc->base + offset); +} + +static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid, + u32 reg, u32 val) +{ + u32 offset = reg + (hwid << SPRD_PWM_REGS_SHIFT); + + writel_relaxed(val, spc->base + offset); +} + +static void sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct sprd_pwm_chip *spc = + container_of(chip, struct sprd_pwm_chip, chip); + struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; + u32 val, duty, prescale; + u64 tmp; + int ret; + + /* + * The clocks to PWM channel has to be enabled first before + * reading to the registers. + */ + ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, chn->clks); + if (ret) { + dev_err(spc->dev, "failed to enable pwm%u clocks\n", + pwm->hwpwm); + return; + } + + val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_ENABLE); + if (val & SPRD_PWM_ENABLE_BIT) + state->enabled = true; + else + state->enabled = false; + + /* + * The hardware provides a counter that is feed by the source clock. + * The period length is (PRESCALE + 1) * MOD counter steps. + * The duty cycle length is (PRESCALE + 1) * DUTY counter steps. + * Thus the period_ns and duty_ns calculation formula should be: + * period_ns = NSEC_PER_SEC * (prescale + 1) * mod / clk_rate + * duty_ns = NSEC_PER_SEC * (prescale + 1) * duty / clk_rate + */ + val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_PRESCALE); + prescale = val & SPRD_PWM_PRESCALE_MSK; + tmp = (prescale + 1) * NSEC_PER_SEC * SPRD_PWM_MOD_MAX; + state->period = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate); + + val = sprd_pwm_read(spc, pwm->hwpwm, SPRD_PWM_DUTY); + duty = val & SPRD_PWM_DUTY_MSK; + tmp = (prescale + 1) * NSEC_PER_SEC * duty; + state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate); + + /* Disable PWM clocks if the PWM channel is not in enable state. */ + if (!state->enabled) + clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks); +} + +static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm, + int duty_ns, int period_ns) +{ + struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; + u32 prescale, duty; + u64 tmp; + + /* + * The hardware provides a counter that is feed by the source clock. + * The period length is (PRESCALE + 1) * MOD counter steps. + * The duty cycle length is (PRESCALE + 1) * DUTY counter steps. + * + * To keep the maths simple we're always using MOD = SPRD_PWM_MOD_MAX. + * The value for PRESCALE is selected such that the resulting period + * gets the maximal length not bigger than the requested one with the + * given settings (MOD = SPRD_PWM_MOD_MAX and input clock). + */ + duty = duty_ns * SPRD_PWM_MOD_MAX / period_ns; + + tmp = (u64)chn->clk_rate * period_ns; + do_div(tmp, NSEC_PER_SEC); + prescale = DIV_ROUND_CLOSEST_ULL(tmp, SPRD_PWM_MOD_MAX) - 1; + if (prescale > SPRD_PWM_PRESCALE_MSK) + prescale = SPRD_PWM_PRESCALE_MSK; + + /* + * Note: Writing DUTY triggers the hardware to actually apply the + * values written to MOD and DUTY to the output, so must keep writing + * DUTY last. + * + * The hardware can ensures that current running period is completed + * before changing a new configuration to avoid mixed settings. + */ + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_PRESCALE, prescale); + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_MOD, SPRD_PWM_MOD_MAX); + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_DUTY, duty); + + return 0; +} + +static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + struct sprd_pwm_chip *spc = + container_of(chip, struct sprd_pwm_chip, chip); + struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm]; + struct pwm_state *cstate = &pwm->state; + int ret; + + if (state->enabled) { + if (!cstate->enabled) { + /* + * The clocks to PWM channel has to be enabled first + * before writing to the registers. + */ + ret = clk_bulk_prepare_enable(SPRD_PWM_CHN_CLKS_NUM, + chn->clks); + if (ret) { + dev_err(spc->dev, + "failed to enable pwm%u clocks\n", + pwm->hwpwm); + return ret; + } + } + + if (state->period != cstate->period || + state->duty_cycle != cstate->duty_cycle) { + ret = sprd_pwm_config(spc, pwm, state->duty_cycle, + state->period); + if (ret) + return ret; + } + + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 1); + } else if (cstate->enabled) { + /* + * Note: After setting SPRD_PWM_ENABLE to zero, the controller + * will not wait for current period to be completed, instead it + * will stop the PWM channel immediately. + */ + sprd_pwm_write(spc, pwm->hwpwm, SPRD_PWM_ENABLE, 0); + + clk_bulk_disable_unprepare(SPRD_PWM_CHN_CLKS_NUM, chn->clks); + } + + return 0; +} + +static const struct pwm_ops sprd_pwm_ops = { + .apply = sprd_pwm_apply, + .get_state = sprd_pwm_get_state, + .owner = THIS_MODULE, +}; + +static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc) +{ + struct clk *clk_pwm; + int ret, i; + + for (i = 0; i < SPRD_PWM_CHN_NUM; i++) { + struct sprd_pwm_chn *chn = &spc->chn[i]; + int j; + + for (j = 0; j < SPRD_PWM_CHN_CLKS_NUM; ++j) + chn->clks[j].id = + sprd_pwm_clks[i * SPRD_PWM_CHN_CLKS_NUM + j]; + + ret = devm_clk_bulk_get(spc->dev, SPRD_PWM_CHN_CLKS_NUM, + chn->clks); + if (ret) { + if (ret == -ENOENT) + break; + + if (ret != -EPROBE_DEFER) + dev_err(spc->dev, + "failed to get channel clocks\n"); + + return ret; + } + + clk_pwm = chn->clks[SPRD_PWM_CHN_OUTPUT_CLK].clk; + chn->clk_rate = clk_get_rate(clk_pwm); + } + + if (!i) { + dev_err(spc->dev, "no available PWM channels\n"); + return -ENODEV; + } + + spc->num_pwms = i; + + return 0; +} + +static int sprd_pwm_probe(struct platform_device *pdev) +{ + struct sprd_pwm_chip *spc; + int ret; + + spc = devm_kzalloc(&pdev->dev, sizeof(*spc), GFP_KERNEL); + if (!spc) + return -ENOMEM; + + spc->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(spc->base)) + return PTR_ERR(spc->base); + + spc->dev = &pdev->dev; + platform_set_drvdata(pdev, spc); + + ret = sprd_pwm_clk_init(spc); + if (ret) + return ret; + + spc->chip.dev = &pdev->dev; + spc->chip.ops = &sprd_pwm_ops; + spc->chip.base = -1; + spc->chip.npwm = spc->num_pwms; + + ret = pwmchip_add(&spc->chip); + if (ret) + dev_err(&pdev->dev, "failed to add PWM chip\n"); + + return ret; +} + +static int sprd_pwm_remove(struct platform_device *pdev) +{ + struct sprd_pwm_chip *spc = platform_get_drvdata(pdev); + + return pwmchip_remove(&spc->chip); +} + +static const struct of_device_id sprd_pwm_of_match[] = { + { .compatible = "sprd,ums512-pwm", }, + { }, +}; +MODULE_DEVICE_TABLE(of, sprd_pwm_of_match); + +static struct platform_driver sprd_pwm_driver = { + .driver = { + .name = "sprd-pwm", + .of_match_table = sprd_pwm_of_match, + }, + .probe = sprd_pwm_probe, + .remove = sprd_pwm_remove, +}; + +module_platform_driver(sprd_pwm_driver); + +MODULE_DESCRIPTION("Spreadtrum PWM Driver"); +MODULE_LICENSE("GPL v2"); From c79468b8955b4924b5aca858247395265201ac42 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 19 Aug 2019 15:20:12 +0900 Subject: [PATCH 162/345] pwm: rcar: Remove a redundant condition in rcar_pwm_apply() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the rcar_pwm_apply() has already checked whether state->enabled is set or not, this patch removes a redundant condition. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Reviewed-by: Uwe Kleine-König Reviewed-by: Simon Horman Signed-off-by: Thierry Reding --- drivers/pwm/pwm-rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 5b2b8ecc354c..c8cd43f91efc 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -187,7 +187,7 @@ static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, /* The SYNC should be set to 0 even if rcar_pwm_set_counter failed */ rcar_pwm_update(rp, RCAR_PWMCR_SYNC, 0, RCAR_PWMCR); - if (!ret && state->enabled) + if (!ret) ret = rcar_pwm_enable(rp); return ret; From 4537e52a526698f503dcb151234414cfa46d534b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 24 Aug 2019 16:09:46 +0200 Subject: [PATCH 163/345] pwm: bcm2835: Suppress error message for invalid period_ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PWM config can be triggered via sysfs, so we better suppress the error message in case of an invalid period to avoid kernel log spamming. Signed-off-by: Stefan Wahren Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm2835.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index f6fe0b922e1e..52763063c100 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -72,11 +72,8 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate); - if (period_ns <= MIN_PERIOD) { - dev_err(pc->dev, "period %d not supported, minimum %d\n", - period_ns, MIN_PERIOD); + if (period_ns <= MIN_PERIOD) return -EINVAL; - } writel(DIV_ROUND_CLOSEST(duty_ns, scaler), pc->base + DUTY(pwm->hwpwm)); From 7e9713af316187f323b25ebf7eb1a8f03425e7e9 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 24 Aug 2019 16:09:47 +0200 Subject: [PATCH 164/345] pwm: bcm2835: Fix period_ns range check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The range check for period_ns was written under assumption of a fixed PWM clock. With clk-bcm2835 driver the PWM clock is a dynamic one. So fix this by doing the range check on the period register value. Signed-off-by: Stefan Wahren Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm2835.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index 52763063c100..2c82386c0ebf 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -21,7 +21,7 @@ #define PERIOD(x) (((x) * 0x10) + 0x10) #define DUTY(x) (((x) * 0x10) + 0x14) -#define MIN_PERIOD 108 /* 9.2 MHz max. PWM clock */ +#define PERIOD_MIN 0x2 struct bcm2835_pwm { struct pwm_chip chip; @@ -64,6 +64,7 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, struct bcm2835_pwm *pc = to_bcm2835_pwm(chip); unsigned long rate = clk_get_rate(pc->clk); unsigned long scaler; + u32 period; if (!rate) { dev_err(pc->dev, "failed to get clock rate\n"); @@ -71,14 +72,14 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate); + period = DIV_ROUND_CLOSEST(period_ns, scaler); - if (period_ns <= MIN_PERIOD) + if (period < PERIOD_MIN) return -EINVAL; writel(DIV_ROUND_CLOSEST(duty_ns, scaler), pc->base + DUTY(pwm->hwpwm)); - writel(DIV_ROUND_CLOSEST(period_ns, scaler), - pc->base + PERIOD(pwm->hwpwm)); + writel(period, pc->base + PERIOD(pwm->hwpwm)); return 0; } From 9e3ca01f7e58ef4662c18adcb766ee3422ace0fb Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 24 Aug 2019 16:09:48 +0200 Subject: [PATCH 165/345] pwm: bcm2835: Suppress error message during deferred probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This suppresses error messages in case the PWM clock isn't ready yet. Signed-off-by: Stefan Wahren Reviewed-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-bcm2835.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index 2c82386c0ebf..91e24f01b54e 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -153,8 +153,11 @@ static int bcm2835_pwm_probe(struct platform_device *pdev) pc->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pc->clk)) { - dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk)); - return PTR_ERR(pc->clk); + ret = PTR_ERR(pc->clk); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "clock not found: %d\n", ret); + + return ret; } ret = clk_prepare_enable(pc->clk); From ba73deb16ff5b2d70b1ffc025c84c1126aad7fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 2 Sep 2019 16:39:41 +0200 Subject: [PATCH 166/345] pwm: rockchip: Set polarity unconditionally in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't rely on *state being zero initialized and PWM_POLARITY_NORMAL being zero. So always assign .polarity. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-rockchip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 51b96cb7dd25..8eb2db59741d 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -90,10 +90,10 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip, state->enabled = ((val & enable_conf) == enable_conf) ? true : false; - if (pc->data->supports_polarity) { - if (!(val & PWM_DUTY_POSITIVE)) - state->polarity = PWM_POLARITY_INVERSED; - } + if (pc->data->supports_polarity && !(val & PWM_DUTY_POSITIVE)) + state->polarity = PWM_POLARITY_INVERSED; + else + state->polarity = PWM_POLARITY_NORMAL; clk_disable(pc->pclk); } From fc3c5512e337bdb8b019883ea9078bbccc00c4e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 17:37:02 +0200 Subject: [PATCH 167/345] pwm: Introduce local struct pwm_chip in pwm_apply_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pwm->chip is dereferenced several times in the pwm_apply_state() function. Introducing a local variable for it helps keeping some lines a bit shorter. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 8edfac17364e..4ab683a30629 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -454,20 +454,23 @@ EXPORT_SYMBOL_GPL(pwm_free); */ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) { + struct pwm_chip *chip; int err; if (!pwm || !state || !state->period || state->duty_cycle > state->period) return -EINVAL; + chip = pwm->chip; + if (state->period == pwm->state.period && state->duty_cycle == pwm->state.duty_cycle && state->polarity == pwm->state.polarity && state->enabled == pwm->state.enabled) return 0; - if (pwm->chip->ops->apply) { - err = pwm->chip->ops->apply(pwm->chip, pwm, state); + if (chip->ops->apply) { + err = chip->ops->apply(chip, pwm, state); if (err) return err; @@ -477,7 +480,7 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) * FIXME: restore the initial state in case of error. */ if (state->polarity != pwm->state.polarity) { - if (!pwm->chip->ops->set_polarity) + if (!chip->ops->set_polarity) return -ENOTSUPP; /* @@ -486,12 +489,12 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) * ->apply(). */ if (pwm->state.enabled) { - pwm->chip->ops->disable(pwm->chip, pwm); + chip->ops->disable(chip, pwm); pwm->state.enabled = false; } - err = pwm->chip->ops->set_polarity(pwm->chip, pwm, - state->polarity); + err = chip->ops->set_polarity(chip, pwm, + state->polarity); if (err) return err; @@ -500,9 +503,9 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) if (state->period != pwm->state.period || state->duty_cycle != pwm->state.duty_cycle) { - err = pwm->chip->ops->config(pwm->chip, pwm, - state->duty_cycle, - state->period); + err = chip->ops->config(pwm->chip, pwm, + state->duty_cycle, + state->period); if (err) return err; @@ -512,11 +515,11 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) if (state->enabled != pwm->state.enabled) { if (state->enabled) { - err = pwm->chip->ops->enable(pwm->chip, pwm); + err = chip->ops->enable(chip, pwm); if (err) return err; } else { - pwm->chip->ops->disable(pwm->chip, pwm); + chip->ops->disable(chip, pwm); } pwm->state.enabled = state->enabled; From 01ccf903edd65f6421612321648fa5a7f4b7cb10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 17:37:03 +0200 Subject: [PATCH 168/345] pwm: Let pwm_get_state() return the last implemented state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When pwm_apply_state() is called the lowlevel driver usually has to apply some rounding because the hardware doesn't support nanosecond resolution. So let pwm_get_state() return the actually implemented state instead of the last applied one if possible. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 4ab683a30629..449ba161877d 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -474,7 +474,14 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) if (err) return err; - pwm->state = *state; + /* + * .apply might have to round some values in *state, if possible + * read the actually implemented value back. + */ + if (chip->ops->get_state) + chip->ops->get_state(chip, pwm, &pwm->state); + else + pwm->state = *state; } else { /* * FIXME: restore the initial state in case of error. From 797a5ebc26daee5552e668ee4622bc3f47c1f743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 17:37:04 +0200 Subject: [PATCH 169/345] pwm: rockchip: Don't update the state for the caller of pwm_apply_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pwm-rockchip driver is one of only three PWM drivers which updates the state for the caller of pwm_apply_state(). This might have surprising results if the caller reuses the values expecting them to still represent the same state. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-rockchip.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 8eb2db59741d..83c7627868d8 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -212,12 +212,6 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, goto out; } - /* - * Update the state with the real hardware, which can differ a bit - * because of period/duty_cycle approximation. - */ - rockchip_pwm_get_state(chip, pwm, state); - out: clk_disable(pc->pclk); From deb9c462f4e539cc7f8389b9855eb7a507c78e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 17:37:05 +0200 Subject: [PATCH 170/345] pwm: sun4i: Don't update the state for the caller of pwm_apply_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pwm-sun4i driver is one of only three PWM drivers which updates the state for the caller of pwm_apply_state(). This might have surprising results if the caller reuses the values expecting them to still represent the same state. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sun4i.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index de78c824bbfd..39007a7c0d83 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -192,12 +192,6 @@ static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm, *dty = div; *prsclr = prescaler; - div = (u64)pval * NSEC_PER_SEC * *prd; - state->period = DIV_ROUND_CLOSEST_ULL(div, clk_rate); - - div = (u64)pval * NSEC_PER_SEC * *dty; - state->duty_cycle = DIV_ROUND_CLOSEST_ULL(div, clk_rate); - return 0; } From c9675829ba4b0e95c613f6d6d83d2b5cb9c5371c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 17:37:06 +0200 Subject: [PATCH 171/345] pwm: fsl-ftm: Don't update the state for the caller of pwm_apply_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pwm-fsl-ftm driver is one of only three PWM drivers which updates the state for the caller of pwm_apply_state(). This might have surprising results if the caller reuses the values expecting them to still represent the same state. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-fsl-ftm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 9d31a217111d..3c9738617ceb 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -292,10 +292,6 @@ static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc, regmap_update_bits(fpc->regmap, FTM_POL, BIT(pwm->hwpwm), reg_polarity); - newstate->period = fsl_pwm_ticks_to_ns(fpc, - fpc->period.mod_period + 1); - newstate->duty_cycle = fsl_pwm_ticks_to_ns(fpc, duty); - ftm_set_write_protection(fpc); return 0; From 71523d1812aca61e32e742e87ec064e3d8c615e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 17:37:07 +0200 Subject: [PATCH 172/345] pwm: Ensure pwm_apply_state() doesn't modify the state argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is surprising for a PWM consumer when the variable holding the requested state is modified by pwm_apply_state(). Consider for example a driver doing: #define PERIOD 5000000 #define DUTY_LITTLE 10 ... struct pwm_state state = { .period = PERIOD, .duty_cycle = DUTY_LITTLE, .polarity = PWM_POLARITY_NORMAL, .enabled = true, }; pwm_apply_state(mypwm, &state); ... state.duty_cycle = PERIOD / 2; pwm_apply_state(mypwm, &state); For sure the second call to pwm_apply_state() should still have state.period = PERIOD and not something the hardware driver chose for a reason that doesn't necessarily apply to the second call. So declare the state argument as a pointer to a const type and adapt all drivers' .apply callbacks. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/gpio/gpio-mvebu.c | 2 +- drivers/pwm/core.c | 6 ++---- drivers/pwm/pwm-atmel-hlcdc.c | 2 +- drivers/pwm/pwm-atmel.c | 2 +- drivers/pwm/pwm-bcm-iproc.c | 2 +- drivers/pwm/pwm-cros-ec.c | 2 +- drivers/pwm/pwm-fsl-ftm.c | 4 ++-- drivers/pwm/pwm-hibvt.c | 2 +- drivers/pwm/pwm-imx-tpm.c | 4 ++-- drivers/pwm/pwm-imx27.c | 2 +- drivers/pwm/pwm-jz4740.c | 2 +- drivers/pwm/pwm-lpss.c | 2 +- drivers/pwm/pwm-meson.c | 4 ++-- drivers/pwm/pwm-rcar.c | 2 +- drivers/pwm/pwm-rockchip.c | 4 ++-- drivers/pwm/pwm-sifive.c | 2 +- drivers/pwm/pwm-sprd.c | 2 +- drivers/pwm/pwm-stm32-lp.c | 2 +- drivers/pwm/pwm-stm32.c | 4 ++-- drivers/pwm/pwm-sun4i.c | 4 ++-- drivers/pwm/pwm-zx.c | 2 +- include/linux/pwm.h | 4 ++-- 22 files changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 869d47f89599..6c0687694341 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -694,7 +694,7 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, } static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct mvebu_pwm *mvpwm = to_mvebu_pwm(chip); struct mvebu_gpio_chip *mvchip = mvpwm->mvchip; diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 449ba161877d..6ad51aa60c03 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -448,11 +448,9 @@ EXPORT_SYMBOL_GPL(pwm_free); /** * pwm_apply_state() - atomically apply a new state to a PWM device * @pwm: PWM device - * @state: new state to apply. This can be adjusted by the PWM driver - * if the requested config is not achievable, for example, - * ->duty_cycle and ->period might be approximated. + * @state: new state to apply */ -int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) +int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state) { struct pwm_chip *chip; int err; diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index d13a83f430ac..dcbc0489dfd4 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -39,7 +39,7 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip) } static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c); struct atmel_hlcdc *hlcdc = chip->hlcdc; diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index e5e1eaf372fa..53bc7b9b3581 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -209,7 +209,7 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm, } static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip); struct pwm_state cstate; diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index d961a8207b1c..56c38cfae92c 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -115,7 +115,7 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm, } static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { unsigned long prescale = IPROC_PWM_PRESCALE_MIN; struct iproc_pwmc *ip = to_iproc_pwmc(chip); diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index 98f6ac6cf6ab..db5faa79c33f 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -93,7 +93,7 @@ static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index) } static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); int duty_cycle; diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 3c9738617ceb..59272a920479 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -227,7 +227,7 @@ static bool fsl_pwm_is_other_pwm_enabled(struct fsl_pwm_chip *fpc, static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc, struct pwm_device *pwm, - struct pwm_state *newstate) + const struct pwm_state *newstate) { unsigned int duty; u32 reg_polarity; @@ -298,7 +298,7 @@ static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc, } static int fsl_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *newstate) + const struct pwm_state *newstate) { struct fsl_pwm_chip *fpc = to_fsl_chip(chip); struct pwm_state *oldstate = &pwm->state; diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index 753bd58111e4..ad205fdad372 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -149,7 +149,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, } static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct hibvt_pwm_chip *hi_pwm_chip = to_hibvt_pwm_chip(chip); diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index e8385c1cf342..9145f6160649 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -89,7 +89,7 @@ to_imx_tpm_pwm_chip(struct pwm_chip *chip) static int pwm_imx_tpm_round_state(struct pwm_chip *chip, struct imx_tpm_pwm_param *p, struct pwm_state *real_state, - struct pwm_state *state) + const struct pwm_state *state) { struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip); u32 rate, prescale, period_count, clock_unit; @@ -289,7 +289,7 @@ static int pwm_imx_tpm_apply_hw(struct pwm_chip *chip, static int pwm_imx_tpm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip); struct imx_tpm_pwm_param param; diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 91c23cbbc167..ae11d8577f18 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -209,7 +209,7 @@ static void pwm_imx27_wait_fifo_slot(struct pwm_chip *chip, } static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { unsigned long period_cycles, duty_cycles, prescale; struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip); diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 9d444d012f92..9d78cc21cb12 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -88,7 +88,7 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) } static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip); unsigned long long tmp; diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 4098a4601691..75bbfe5f3bc2 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -122,7 +122,7 @@ static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) } static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct pwm_lpss_chip *lpwm = to_lpwm(chip); int ret; diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 3cbff5cbb789..6245bbdb6e6c 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -159,7 +159,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) } static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); unsigned int duty, period, pre_div, cnt, duty_cnt; @@ -265,7 +265,7 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm) } static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); struct meson_pwm *meson = to_meson_pwm(chip); diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index c8cd43f91efc..852eb2347954 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -158,7 +158,7 @@ static void rcar_pwm_disable(struct rcar_pwm_chip *rp) } static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip); struct pwm_state cur_state; diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 83c7627868d8..73352e6fbccb 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -99,7 +99,7 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip, } static void rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); unsigned long period, duty; @@ -183,7 +183,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip, } static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); struct pwm_state curstate; diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index bb4f02ce4f94..cc63f9baa481 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -147,7 +147,7 @@ static int pwm_sifive_enable(struct pwm_chip *chip, bool enable) } static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip); struct pwm_state cur_state; diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c index 68c2d9f0411b..be2394227423 100644 --- a/drivers/pwm/pwm-sprd.c +++ b/drivers/pwm/pwm-sprd.c @@ -156,7 +156,7 @@ static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm, } static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct sprd_pwm_chip *spc = container_of(chip, struct sprd_pwm_chip, chip); diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 2211a642066d..21cb260dc2c0 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -32,7 +32,7 @@ static inline struct stm32_pwm_lp *to_stm32_pwm_lp(struct pwm_chip *chip) #define STM32_LPTIM_MAX_PRESCALER 128 static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct stm32_pwm_lp *priv = to_stm32_pwm_lp(chip); unsigned long long prd, div, dty; diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 740e2dec8313..359b08596d9e 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -440,7 +440,7 @@ static void stm32_pwm_disable(struct stm32_pwm *priv, int ch) } static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { bool enabled; struct stm32_pwm *priv = to_stm32_pwm_dev(chip); @@ -468,7 +468,7 @@ static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct stm32_pwm *priv = to_stm32_pwm_dev(chip); int ret; diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index 39007a7c0d83..6f5840a1a82d 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -145,7 +145,7 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip, } static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm, - struct pwm_state *state, + const struct pwm_state *state, u32 *dty, u32 *prd, unsigned int *prsclr) { u64 clk_rate, div = 0; @@ -196,7 +196,7 @@ static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm, } static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip); struct pwm_state cstate; diff --git a/drivers/pwm/pwm-zx.c b/drivers/pwm/pwm-zx.c index e24f4be35316..e2c21cc34a96 100644 --- a/drivers/pwm/pwm-zx.c +++ b/drivers/pwm/pwm-zx.c @@ -148,7 +148,7 @@ static int zx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } static int zx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state) + const struct pwm_state *state) { struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip); struct pwm_state cstate; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 24632a7a7d11..b2c9c460947d 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -262,7 +262,7 @@ struct pwm_ops { int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, - struct pwm_state *state); + const struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); struct module *owner; @@ -316,7 +316,7 @@ struct pwm_capture { /* PWM user APIs */ struct pwm_device *pwm_request(int pwm_id, const char *label); void pwm_free(struct pwm_device *pwm); -int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state); +int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** From c91e3234c6035baf5a79763cb4fcd5d23ce75c2b Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 18 Sep 2019 16:54:21 +0200 Subject: [PATCH 173/345] pwm: stm32-lp: Add check in case requested period cannot be achieved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LPTimer can use a 32KHz clock for counting. It depends on clock tree configuration. In such a case, PWM output frequency range is limited. Although unlikely, nothing prevents user from requesting a PWM frequency above counting clock (32KHz for instance): - This causes (prd - 1) = 0xffff to be written in ARR register later in the apply() routine. This results in badly configured PWM period (and also duty_cycle). Add a check to report an error is such a case. Signed-off-by: Fabrice Gasnier Reviewed-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-stm32-lp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 21cb260dc2c0..67fca62524dc 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -59,6 +59,12 @@ static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm, /* Calculate the period and prescaler value */ div = (unsigned long long)clk_get_rate(priv->clk) * state->period; do_div(div, NSEC_PER_SEC); + if (!div) { + /* Clock is too slow to achieve requested period. */ + dev_dbg(priv->chip.dev, "Can't reach %u ns\n", state->period); + return -EINVAL; + } + prd = div; while (div > STM32_LPTIM_MAX_ARR) { presc++; From 3d4d85741ad3d880cb432d9121f5563e6a57ff2e Mon Sep 17 00:00:00 2001 From: Kamel Bouhara Date: Wed, 18 Sep 2019 16:57:16 +0200 Subject: [PATCH 174/345] pwm: atmel: Remove platform_device_id and use only dt bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 26202873bb51 ("avr32: remove support for AVR32 architecture") there is no more user of platform_device_id and we should only use dt bindings Signed-off-by: Kamel Bouhara Acked-by: Alexandre Belloni Acked-by: Claudiu Beznea Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/Kconfig | 2 +- drivers/pwm/pwm-atmel.c | 35 +++-------------------------------- 2 files changed, 4 insertions(+), 33 deletions(-) diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 31dfc88ef362..08f35e2b167e 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -44,7 +44,7 @@ config PWM_AB8500 config PWM_ATMEL tristate "Atmel PWM support" - depends on ARCH_AT91 + depends on ARCH_AT91 && OF help Generic PWM framework driver for Atmel SoC. diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 53bc7b9b3581..5b861da3e325 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -318,19 +318,6 @@ static const struct atmel_pwm_data mchp_sam9x60_pwm_data = { }, }; -static const struct platform_device_id atmel_pwm_devtypes[] = { - { - .name = "at91sam9rl-pwm", - .driver_data = (kernel_ulong_t)&atmel_sam9rl_pwm_data, - }, { - .name = "sama5d3-pwm", - .driver_data = (kernel_ulong_t)&atmel_sama5_pwm_data, - }, { - /* sentinel */ - }, -}; -MODULE_DEVICE_TABLE(platform, atmel_pwm_devtypes); - static const struct of_device_id atmel_pwm_dt_ids[] = { { .compatible = "atmel,at91sam9rl-pwm", @@ -350,19 +337,6 @@ static const struct of_device_id atmel_pwm_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids); -static inline const struct atmel_pwm_data * -atmel_pwm_get_driver_data(struct platform_device *pdev) -{ - const struct platform_device_id *id; - - if (pdev->dev.of_node) - return of_device_get_match_data(&pdev->dev); - - id = platform_get_device_id(pdev); - - return (struct atmel_pwm_data *)id->driver_data; -} - static int atmel_pwm_probe(struct platform_device *pdev) { const struct atmel_pwm_data *data; @@ -370,7 +344,7 @@ static int atmel_pwm_probe(struct platform_device *pdev) struct resource *res; int ret; - data = atmel_pwm_get_driver_data(pdev); + data = of_device_get_match_data(&pdev->dev); if (!data) return -ENODEV; @@ -396,10 +370,8 @@ static int atmel_pwm_probe(struct platform_device *pdev) atmel_pwm->chip.dev = &pdev->dev; atmel_pwm->chip.ops = &atmel_pwm_ops; - if (pdev->dev.of_node) { - atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; - atmel_pwm->chip.of_pwm_n_cells = 3; - } + atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; + atmel_pwm->chip.of_pwm_n_cells = 3; atmel_pwm->chip.base = -1; atmel_pwm->chip.npwm = 4; @@ -437,7 +409,6 @@ static struct platform_driver atmel_pwm_driver = { .name = "atmel-pwm", .of_match_table = of_match_ptr(atmel_pwm_dt_ids), }, - .id_table = atmel_pwm_devtypes, .probe = atmel_pwm_probe, .remove = atmel_pwm_remove, }; From d85b9ce198e3689141cce965cb840f1c435ac4d5 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Sat, 21 Sep 2019 01:55:48 +0200 Subject: [PATCH 175/345] pwm: atmel: Remove unneeded check for match data Since the driver is now exclusively DT, it only binds if it finds a match in the of_device_id table. But in that case the associated data can never be NULL, so drop the unnecessary check. While at it, drop the extra local variable and store the pointer to this per-SoC data in the driver data directly. Signed-off-by: Thierry Reding --- drivers/pwm/pwm-atmel.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 5b861da3e325..2e0b3d985d24 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -339,19 +339,16 @@ MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids); static int atmel_pwm_probe(struct platform_device *pdev) { - const struct atmel_pwm_data *data; struct atmel_pwm_chip *atmel_pwm; struct resource *res; int ret; - data = of_device_get_match_data(&pdev->dev); - if (!data) - return -ENODEV; - atmel_pwm = devm_kzalloc(&pdev->dev, sizeof(*atmel_pwm), GFP_KERNEL); if (!atmel_pwm) return -ENOMEM; + atmel_pwm->data = of_device_get_match_data(&pdev->dev); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); atmel_pwm->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(atmel_pwm->base)) @@ -375,7 +372,6 @@ static int atmel_pwm_probe(struct platform_device *pdev) atmel_pwm->chip.base = -1; atmel_pwm->chip.npwm = 4; - atmel_pwm->data = data; atmel_pwm->updated_pwms = 0; mutex_init(&atmel_pwm->isr_lock); From 9193c16e5a9899e742a862a0fec9bb5235008370 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Sat, 21 Sep 2019 01:58:58 +0200 Subject: [PATCH 176/345] pwm: atmel: Consolidate driver data initialization This helps readability by separating the driver-specific bits from the PWM framework bits. Signed-off-by: Thierry Reding --- drivers/pwm/pwm-atmel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 2e0b3d985d24..9ba733467e26 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -347,7 +347,9 @@ static int atmel_pwm_probe(struct platform_device *pdev) if (!atmel_pwm) return -ENOMEM; + mutex_init(&atmel_pwm->isr_lock); atmel_pwm->data = of_device_get_match_data(&pdev->dev); + atmel_pwm->updated_pwms = 0; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); atmel_pwm->base = devm_ioremap_resource(&pdev->dev, res); @@ -366,14 +368,10 @@ static int atmel_pwm_probe(struct platform_device *pdev) atmel_pwm->chip.dev = &pdev->dev; atmel_pwm->chip.ops = &atmel_pwm_ops; - atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; atmel_pwm->chip.of_pwm_n_cells = 3; - atmel_pwm->chip.base = -1; atmel_pwm->chip.npwm = 4; - atmel_pwm->updated_pwms = 0; - mutex_init(&atmel_pwm->isr_lock); ret = pwmchip_add(&atmel_pwm->chip); if (ret < 0) { From e6c7c258f035ffec9d8a808c1bc34b6a5beae0ef Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Fri, 20 Sep 2019 06:49:02 +0800 Subject: [PATCH 177/345] pwm: mediatek: Drop the check for of_device_get_match_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch drop the check for of_device_get_match_data. Due to the only way call driver probe is compatible match. The data pointer which points to the SoC specify data is directly set by driver, and it should not be NULL in our case. We can safety remove the check for the result of of_device_get_match_data(). Signed-off-by: Ryder Lee Signed-off-by: Sam Shih Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 6697e30811e7..459b2ccd0b75 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -226,7 +226,6 @@ static const struct pwm_ops mtk_pwm_ops = { static int mtk_pwm_probe(struct platform_device *pdev) { - const struct mtk_pwm_platform_data *data; struct mtk_pwm_chip *pc; struct resource *res; unsigned int i; @@ -236,17 +235,14 @@ static int mtk_pwm_probe(struct platform_device *pdev) if (!pc) return -ENOMEM; - data = of_device_get_match_data(&pdev->dev); - if (data == NULL) - return -EINVAL; - pc->soc = data; + pc->soc = of_device_get_match_data(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); pc->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(pc->regs)) return PTR_ERR(pc->regs); - for (i = 0; i < data->num_pwms + 2 && pc->soc->has_clks; i++) { + for (i = 0; i < pc->soc->num_pwms + 2 && pc->soc->has_clks; i++) { pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]); if (IS_ERR(pc->clks[i])) { dev_err(&pdev->dev, "clock: %s fail: %ld\n", @@ -260,7 +256,7 @@ static int mtk_pwm_probe(struct platform_device *pdev) pc->chip.dev = &pdev->dev; pc->chip.ops = &mtk_pwm_ops; pc->chip.base = -1; - pc->chip.npwm = data->num_pwms; + pc->chip.npwm = pc->soc->num_pwms; ret = pwmchip_add(&pc->chip); if (ret < 0) { From 4ec8d984895fef43abfc896c8a3346aca7a66649 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 20 Sep 2019 16:03:56 -0700 Subject: [PATCH 178/345] perf record: Fix priv level with branch sampling for paranoid=2 Now that the default perf_events paranoid level is set to 2, a regular user cannot monitor kernel level activity anymore. As such, with the following cmdline: $ perf record -e cycles date The perf tool first tries cycles:uk but then falls back to cycles:u as can be seen in the perf report --header-only output: cmdline : /export/hda3/tmp/perf.tip record -e cycles ls event : name = cycles:u, , id = { 436186, ... } This is okay as long as there is way to learn the priv level was changed internally by the tool. But consider a similar example: $ perf record -b -e cycles date Error: You may not have permission to collect stats. Consider tweaking /proc/sys/kernel/perf_event_paranoid, which controls use of the performance events system by unprivileged users (without CAP_SYS_ADMIN). ... Why is that treated differently given that the branch sampling inherits the priv level of the first event in this case, i.e., cycles:u? It turns out that the branch sampling code is more picky and also checks exclude_hv. In the fallback path, perf record is setting exclude_kernel = 1, but it does not change exclude_hv. This does not seem to match the restriction imposed by paranoid = 2. This patch fixes the problem by forcing exclude_hv = 1 in the fallback for paranoid=2. With this in place: $ perf record -b -e cycles date cmdline : /export/hda3/tmp/perf.tip record -b -e cycles ls event : name = cycles:u, , id = { 436847, ... } And the command succeeds as expected. V2 fix a white space. Committer testing: After aplying the patch we get: [acme@quaco ~]$ perf record -b -e cycles date WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Mon 23 Sep 2019 11:00:59 AM -03 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.005 MB perf.data (14 samples) ] [acme@quaco ~]$ perf evlist -v cycles:u: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY [acme@quaco ~]$ That warning about restricted kernel maps will be suppressed in a follow up patch, as perf_event_attr.exclude_kernel is set, i.e. no samples for the kernel will be taken and thus no need for those maps. Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190920230356.41420-1-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8e335d168503..502bc3d50e0d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2535,9 +2535,11 @@ bool perf_evsel__fallback(struct evsel *evsel, int err, if (evsel->name) free(evsel->name); evsel->name = new_name; - scnprintf(msg, msgsize, -"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid); + scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying " + "to fall back to excluding kernel and hypervisor " + " samples", paranoid); evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_hv = 1; return true; } From c8b567c8a96a35acff746b1eef6f859e2a9fc195 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Sep 2019 11:07:29 -0300 Subject: [PATCH 179/345] perf record: Move restricted maps check to after a possible fallback to not collect kernel samples Before: [acme@quaco ~]$ perf record -b -e cycles date WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Mon 23 Sep 2019 11:00:59 AM -03 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.005 MB perf.data (14 samples) ] [acme@quaco ~]$ But we did a fallback and exclude_kernel was set, so no need for resolving kernel symbols: $ perf evlist -v cycles:u: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY $ After: [acme@quaco ~]$ perf record -b -e cycles date Mon 23 Sep 2019 11:07:18 AM -03 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.007 MB perf.data (16 samples) ] [acme@quaco ~]$ perf evlist -v cycles:u: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY [acme@quaco ~]$ No needless warning is emitted. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: https://lkml.kernel.org/n/tip-5yqnr8xcqwhr15xktj2097ac@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3f66a49a997f..1e1f97139f16 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -788,6 +788,17 @@ try_again: pos->supported = true; } + if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) { + pr_warning( +"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" +"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" +"Samples in kernel functions may not be resolved if a suitable vmlinux\n" +"file is not found in the buildid cache or in the vmlinux path.\n\n" +"Samples in kernel modules won't be resolved at all.\n\n" +"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" +"even with a suitable vmlinux or kallsyms file.\n\n"); + } + if (perf_evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", pos->filter, perf_evsel__name(pos), errno, @@ -2364,16 +2375,6 @@ int cmd_record(int argc, const char **argv) err = -ENOMEM; - if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) - pr_warning( -"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" -"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" -"Samples in kernel functions may not be resolved if a suitable vmlinux\n" -"file is not found in the buildid cache or in the vmlinux path.\n\n" -"Samples in kernel modules won't be resolved at all.\n\n" -"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" -"even with a suitable vmlinux or kallsyms file.\n\n"); - if (rec->no_buildid_cache || rec->no_buildid) { disable_buildid_cache(); } else if (rec->switch_output.enabled) { From 88282297fff00796e81f5e67734a6afdfb31fbc4 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Mon, 26 Aug 2019 08:43:02 -0600 Subject: [PATCH 180/345] selftests/seccomp: fix build on older kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The seccomp selftest goes to some length to build against older kernel headers, viz. all the #ifdefs at the beginning of the file. Commit 201766a20e30 ("ptrace: add PTRACE_GET_SYSCALL_INFO request") introduces some additional macros, but doesn't do the #ifdef dance. Let's add that dance here to avoid: gcc -Wl,-no-as-needed -Wall seccomp_bpf.c -lpthread -o seccomp_bpf In file included from seccomp_bpf.c:51: seccomp_bpf.c: In function ‘tracer_ptrace’: seccomp_bpf.c:1787:20: error: ‘PTRACE_EVENTMSG_SYSCALL_ENTRY’ undeclared (first use in this function); did you mean ‘PTRACE_EVENT_CLONE’? EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../kselftest_harness.h:608:13: note: in definition of macro ‘__EXPECT’ __typeof__(_expected) __exp = (_expected); \ ^~~~~~~~~ seccomp_bpf.c:1787:2: note: in expansion of macro ‘EXPECT_EQ’ EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY ^~~~~~~~~ seccomp_bpf.c:1787:20: note: each undeclared identifier is reported only once for each function it appears in EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../kselftest_harness.h:608:13: note: in definition of macro ‘__EXPECT’ __typeof__(_expected) __exp = (_expected); \ ^~~~~~~~~ seccomp_bpf.c:1787:2: note: in expansion of macro ‘EXPECT_EQ’ EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY ^~~~~~~~~ seccomp_bpf.c:1788:6: error: ‘PTRACE_EVENTMSG_SYSCALL_EXIT’ undeclared (first use in this function); did you mean ‘PTRACE_EVENT_EXIT’? : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../kselftest_harness.h:608:13: note: in definition of macro ‘__EXPECT’ __typeof__(_expected) __exp = (_expected); \ ^~~~~~~~~ seccomp_bpf.c:1787:2: note: in expansion of macro ‘EXPECT_EQ’ EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY ^~~~~~~~~ make: *** [Makefile:12: seccomp_bpf] Error 1 [skhan@linuxfoundation.org: Fix checkpatch error in commit log] Signed-off-by: Tycho Andersen Fixes: 201766a20e30 ("ptrace: add PTRACE_GET_SYSCALL_INFO request") Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_bpf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 6ef7f16c4cf5..7f8b5c8982e3 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -199,6 +199,11 @@ struct seccomp_notif_sizes { }; #endif +#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY +#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 +#define PTRACE_EVENTMSG_SYSCALL_EXIT 2 +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { From a4864a33f56caa6592acc69a6d265f3f3dca5eae Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 30 Aug 2019 15:32:23 -0400 Subject: [PATCH 181/345] selftests: watchdog: Add optional file argument Some systems have multiple watchdog devices where the first device registered is assigned to the /dev/watchdog device file. In order to test other watchdog devices, add an optional file argument for selecting non-default watchdog devices for testing. Tested-by: Eugeniu Rosca Signed-off-by: George G. Davis Signed-off-by: Shuah Khan --- .../selftests/watchdog/watchdog-test.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index c2333c78cf04..6a68b486dd61 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NL"; +static const char sopts[] = "bdehp:t:Tn:NLf:"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -31,6 +31,7 @@ static const struct option lopts[] = { {"pretimeout", required_argument, NULL, 'n'}, {"getpretimeout", no_argument, NULL, 'N'}, {"gettimeleft", no_argument, NULL, 'L'}, + {"file", required_argument, NULL, 'f'}, {NULL, no_argument, NULL, 0x0} }; @@ -69,6 +70,8 @@ static void term(int sig) static void usage(char *progname) { printf("Usage: %s [options]\n", progname); + printf(" -f, --file Open watchdog device file\n"); + printf(" Default is /dev/watchdog\n"); printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n"); printf(" -d, --disable Turn off the watchdog timer\n"); printf(" -e, --enable Turn on the watchdog timer\n"); @@ -92,14 +95,20 @@ int main(int argc, char *argv[]) int ret; int c; int oneshot = 0; + char *file = "/dev/watchdog"; setbuf(stdout, NULL); - fd = open("/dev/watchdog", O_WRONLY); + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { + if (c == 'f') + file = optarg; + } + + fd = open(file, O_WRONLY); if (fd == -1) { if (errno == ENOENT) - printf("Watchdog device not enabled.\n"); + printf("Watchdog device (%s) not found.\n", file); else if (errno == EACCES) printf("Run watchdog as root.\n"); else @@ -108,6 +117,8 @@ int main(int argc, char *argv[]) exit(-1); } + optind = 0; + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { switch (c) { case 'b': @@ -190,6 +201,9 @@ int main(int argc, char *argv[]) else printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno)); break; + case 'f': + /* Handled above */ + break; default: usage(argv[0]); From 861f47b07b0523f00ffb8159684e7c840390b96e Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sat, 3 Aug 2019 09:01:26 +0900 Subject: [PATCH 182/345] selftest/ftrace: Fix typo in trigger-snapshot.tc This patch fixes a spelling typo in trigger-snapshot.tc [skhan@linuxfoundation.org: Fix typo in commit log] Signed-off-by: Masanari Iida Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- .../testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc index 7717c0a09686..ac738500d17f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -28,7 +28,7 @@ if [ -z "$FEATURE" ]; then exit_unsupported fi -echo "Test snapshot tigger" +echo "Test snapshot trigger" echo 0 > snapshot echo 1 > events/sched/sched_process_fork/enable ( echo "forked") From a54344ace273cd7fa182c287719b109cfd9e8613 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 30 Aug 2019 18:31:33 -0400 Subject: [PATCH 183/345] selftests: watchdog: cleanup whitespace in usage options Convert hard spaces to tabs in usage options. Suggested-by: Shuah Khan Signed-off-by: George G. Davis Signed-off-by: Shuah Khan --- .../selftests/watchdog/watchdog-test.c | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index 6a68b486dd61..afff120c7be6 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -70,18 +70,19 @@ static void term(int sig) static void usage(char *progname) { printf("Usage: %s [options]\n", progname); - printf(" -f, --file Open watchdog device file\n"); - printf(" Default is /dev/watchdog\n"); - printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n"); - printf(" -d, --disable Turn off the watchdog timer\n"); - printf(" -e, --enable Turn on the watchdog timer\n"); - printf(" -h, --help Print the help message\n"); - printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE); - printf(" -t, --timeout=T Set timeout to T seconds\n"); - printf(" -T, --gettimeout Get the timeout\n"); - printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n"); - printf(" -N, --getpretimeout Get the pretimeout\n"); - printf(" -L, --gettimeleft Get the time left until timer expires\n"); + printf(" -f, --file\t\tOpen watchdog device file\n"); + printf("\t\t\tDefault is /dev/watchdog\n"); + printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); + printf(" -d, --disable\t\tTurn off the watchdog timer\n"); + printf(" -e, --enable\t\tTurn on the watchdog timer\n"); + printf(" -h, --help\t\tPrint the help message\n"); + printf(" -p, --pingrate=P\tSet ping rate to P seconds (default %d)\n", + DEFAULT_PING_RATE); + printf(" -t, --timeout=T\tSet timeout to T seconds\n"); + printf(" -T, --gettimeout\tGet the timeout\n"); + printf(" -n, --pretimeout=T\tSet the pretimeout to T seconds\n"); + printf(" -N, --getpretimeout\tGet the pretimeout\n"); + printf(" -L, --gettimeleft\tGet the time left until timer expires\n"); printf("\n"); printf("Parameters are parsed left-to-right in real-time.\n"); printf("Example: %s -d -t 10 -p 5 -e\n", progname); From 955a0f3310081731a1756a1d7bd742600ee69ffc Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 14 Aug 2019 13:16:51 +0200 Subject: [PATCH 184/345] selftests: livepatch: add missing fragments to config When generating config with 'make defconfig kselftest-merge' fragment CONFIG_TEST_LIVEPATCH=m isn't set. Rework to enable CONFIG_LIVEPATCH and CONFIG_DYNAMIC_DEBUG as well. Signed-off-by: Anders Roxell Signed-off-by: Shuah Khan --- tools/testing/selftests/livepatch/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config index 0dd7700464a8..ad23100cb27c 100644 --- a/tools/testing/selftests/livepatch/config +++ b/tools/testing/selftests/livepatch/config @@ -1 +1,3 @@ +CONFIG_LIVEPATCH=y +CONFIG_DYNAMIC_DEBUG=y CONFIG_TEST_LIVEPATCH=m From 721cb3c8bc8890e824b7be53bf951960ff7811f9 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 14 Aug 2019 13:14:35 +0200 Subject: [PATCH 185/345] selftests: tpm2: install python files Both test_smoke.sh and test_space.sh require tpm2.py and tpm2_test.py. Rework so that tpm2.py and tpm2_test.py gets installed, added them to the variable TEST_PROGS_EXTENDED. Signed-off-by: Anders Roxell Signed-off-by: Shuah Khan --- tools/testing/selftests/tpm2/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index 9dd848427a7b..1a5db1eb8ed5 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile @@ -2,3 +2,4 @@ include ../lib.mk TEST_PROGS := test_smoke.sh test_space.sh +TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py From ae89339b08f3fe02457ec9edd512ddc3d246d0f8 Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Fri, 29 Mar 2019 11:32:50 +0000 Subject: [PATCH 186/345] ntb: point to right memory window index second parameter of ntb_peer_mw_get_addr is pointing to wrong memory window index by passing "peer gidx" instead of "local gidx". For ex, "local gidx" value is '0' and "peer gidx" value is '1', then on peer side ntb_mw_set_trans() api is used as below with gidx pointing to local side gidx which is '0', so memroy window '0' is chosen and XLAT '0' will be programmed by peer side. ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, peer->inbuf_xlat, peer->inbuf_size); Now, on local side ntb_peer_mw_get_addr() is been used as below with gidx pointing to "peer gidx" which is '1', so pointing to memory window '1' instead of memory window '0'. ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, &peer->outbuf_size); So this patch pass "local gidx" as parameter to ntb_peer_mw_get_addr(). Signed-off-by: Sanjay R Mehta Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index d028331558ea..e9b7c2dfc730 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -1378,7 +1378,7 @@ static int perf_setup_peer_mw(struct perf_peer *peer) int ret; /* Get outbound MW parameters and map it */ - ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, + ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr, &peer->outbuf_size); if (ret) return ret; From c16c6655605f52cf2107a7c8dc0c798645351976 Mon Sep 17 00:00:00 2001 From: Alexander Fomichev Date: Tue, 16 Jul 2019 20:34:48 +0300 Subject: [PATCH 187/345] ntb_hw_switchtec: make ntb_mw_set_trans() work when addr == 0 On switchtec_ntb_mw_set_trans() call, when (only) address == 0, it acts as ntb_mw_clear_trans(). Fix this, since address == 0 and size != 0 is valid combination for setting translation. Signed-off-by: Alexander Fomichev Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index f4959458d909..86ffa716eaf2 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -306,7 +306,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (rc) return rc; - if (addr == 0 || size == 0) { + if (size == 0) { if (widx < nr_direct_mw) switchtec_ntb_mw_clr_direct(sndev, widx); else From 5e2cbf13d0ec4fe2b6aabbeb6fe44830e8788dd0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 18 Aug 2019 19:53:49 +0100 Subject: [PATCH 188/345] NTB: ntb_transport: remove redundant assignment to rc Variable rc is initialized to a value that is never read and it is re-assigned later. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 40c90ca10729..00a5d5764993 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -292,7 +292,7 @@ static int ntb_transport_bus_match(struct device *dev, static int ntb_transport_bus_probe(struct device *dev) { const struct ntb_transport_client *client; - int rc = -EINVAL; + int rc; get_device(dev); From 5f59f6b182f75e5247104c8e0150c3d41f0a4c18 Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Sun, 15 Sep 2019 17:07:43 +0000 Subject: [PATCH 189/345] ntb_hw_amd: Add a new NTB PCI device ID Signed-off-by: Sanjay R Mehta Signed-off-by: Jon Mason --- drivers/ntb/hw/amd/ntb_hw_amd.c | 3 ++- drivers/ntb/hw/amd/ntb_hw_amd.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index 2859cc99b73e..e9286cf241f0 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1124,7 +1124,8 @@ static const struct file_operations amd_ntb_debugfs_info = { }; static const struct pci_device_id amd_ntb_pci_tbl[] = { - {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)}, + {PCI_VDEVICE(AMD, 0x145b)}, + {PCI_VDEVICE(AMD, 0x148b)}, {0} }; MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl); diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h index 8f3617a46292..3aac994f3e77 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.h +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h @@ -52,7 +52,6 @@ #include #include -#define PCI_DEVICE_ID_AMD_NTB 0x145B #define AMD_LINK_HB_TIMEOUT msecs_to_jiffies(1000) #define AMD_LINK_STATUS_OFFSET 0x68 #define NTB_LIN_STA_ACTIVE_BIT 0x00000002 From a1472e73e3d791eb5eddeceb99f7dc5c17ca98ce Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Sun, 15 Sep 2019 17:08:35 +0000 Subject: [PATCH 190/345] ntb_hw_amd: Add memory window support for new AMD hardware The AMD new hardware uses BAR23 and BAR45 as memory windows as compared to previos where BAR1, BAR23 and BAR45 is used for memory windows. This patch add support for both AMD hardwares. Signed-off-by: Sanjay R Mehta Signed-off-by: Jon Mason --- drivers/ntb/hw/amd/ntb_hw_amd.c | 23 ++++++++++++++++++----- drivers/ntb/hw/amd/ntb_hw_amd.h | 7 ++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index e9286cf241f0..156c2a18a239 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -78,7 +78,7 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx) if (idx < 0 || idx > ndev->mw_count) return -EINVAL; - return 1 << idx; + return ndev->dev_data->mw_idx << idx; } static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx) @@ -909,7 +909,7 @@ static int amd_init_ntb(struct amd_ntb_dev *ndev) { void __iomem *mmio = ndev->self_mmio; - ndev->mw_count = AMD_MW_CNT; + ndev->mw_count = ndev->dev_data->mw_count; ndev->spad_count = AMD_SPADS_CNT; ndev->db_count = AMD_DB_CNT; @@ -1069,6 +1069,8 @@ static int amd_ntb_pci_probe(struct pci_dev *pdev, goto err_ndev; } + ndev->dev_data = (struct ntb_dev_data *)id->driver_data; + ndev_init_struct(ndev, pdev); rc = amd_ntb_init_pci(ndev, pdev); @@ -1123,10 +1125,21 @@ static const struct file_operations amd_ntb_debugfs_info = { .read = ndev_debugfs_read, }; +static const struct ntb_dev_data dev_data[] = { + { /* for device 145b */ + .mw_count = 3, + .mw_idx = 1, + }, + { /* for device 148b */ + .mw_count = 2, + .mw_idx = 2, + }, +}; + static const struct pci_device_id amd_ntb_pci_tbl[] = { - {PCI_VDEVICE(AMD, 0x145b)}, - {PCI_VDEVICE(AMD, 0x148b)}, - {0} + { PCI_VDEVICE(AMD, 0x145b), (kernel_ulong_t)&dev_data[0] }, + { PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] }, + { 0, } }; MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl); diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h index 3aac994f3e77..139a307147bc 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.h +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h @@ -92,7 +92,6 @@ static inline void _write64(u64 val, void __iomem *mmio) enum { /* AMD NTB Capability */ - AMD_MW_CNT = 3, AMD_DB_CNT = 16, AMD_MSIX_VECTOR_CNT = 24, AMD_SPADS_CNT = 16, @@ -169,6 +168,11 @@ enum { AMD_PEER_OFFSET = 0x400, }; +struct ntb_dev_data { + const unsigned char mw_count; + const unsigned int mw_idx; +}; + struct amd_ntb_dev; struct amd_ntb_vec { @@ -184,6 +188,7 @@ struct amd_ntb_dev { u32 cntl_sta; u32 peer_sta; + struct ntb_dev_data *dev_data; unsigned char mw_count; unsigned char spad_count; unsigned char db_count; From 4720101fab62d0453babb0287b58a9c5bf78fb80 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 18 Sep 2019 13:58:31 -0700 Subject: [PATCH 191/345] NTB: fix IDT Kconfig typos/spellos Fix typos in drivers/ntb/hw/idt/Kconfig. Use consistent spelling and capitalization. Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support") Signed-off-by: Randy Dunlap Cc: Dave Jiang Cc: Allen Hubbe Cc: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/hw/idt/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig index bfc7cac94102..c79b54c1747d 100644 --- a/drivers/ntb/hw/idt/Kconfig +++ b/drivers/ntb/hw/idt/Kconfig @@ -4,11 +4,11 @@ config NTB_IDT depends on PCI select HWMON help - This driver supports NTB of cappable IDT PCIe-switches. + This driver supports NTB of capable IDT PCIe-switches. Some of the pre-initializations must be made before IDT PCIe-switch - exposes it NT-functions correctly. It should be done by either proper - initialisation of EEPROM connected to master smbus of the switch or + exposes its NT-functions correctly. It should be done by either proper + initialization of EEPROM connected to master SMbus of the switch or by BIOS using slave-SMBus interface changing corresponding registers value. Evidently it must be done before PCI bus enumeration is finished in Linux kernel. From c669675b56b4eb85e8daa3d2ae76db2fd6378b2f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 1 Aug 2019 15:39:37 -0700 Subject: [PATCH 192/345] thermal: int340x: processor_thermal: Add Ice Lake support Add new PCI id for Ice lake processor thermal device. Also enabled the RAPL mmio interface. The MMIO offsets match Skylake. Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- .../thermal/intel/int340x_thermal/processor_thermal_device.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index 97333fc4be42..89a015387283 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -39,6 +39,9 @@ /* GeminiLake thermal reporting device */ #define PCI_DEVICE_ID_PROC_GLK_THERMAL 0x318C +/* IceLake thermal reporting device */ +#define PCI_DEVICE_ID_PROC_ICL_THERMAL 0x8a03 + #define DRV_NAME "proc_thermal" struct power_config { @@ -719,6 +722,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CNL_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_CFL_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_GLK_THERMAL)}, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_ICL_THERMAL), + .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, }, { 0, }, }; From f639cff55fb414c2ee6d1032bc3244e1d15d6d40 Mon Sep 17 00:00:00 2001 From: Kelsey Skunberg Date: Wed, 17 Jul 2019 13:26:39 -0600 Subject: [PATCH 193/345] thermal: intel: int340x_thermal: Remove unnecessary acpi_has_method() uses acpi_evaluate_object() will already return in error if the method does not exist. Checking if the method is absent before the acpi_evaluate_object() call is not needed. Remove acpi_has_method() calls to avoid additional work. Signed-off-by: Kelsey Skunberg Reviewed-by: Rafael J. Wysocki Signed-off-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c index 9716bc3abaf9..7130e90773ed 100644 --- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c @@ -77,9 +77,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, struct acpi_buffer element = { 0, NULL }; struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" }; - if (!acpi_has_method(handle, "_TRT")) - return -ENODEV; - status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer); if (ACPI_FAILURE(status)) return -ENODEV; @@ -158,9 +155,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, struct acpi_buffer art_format = { sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" }; - if (!acpi_has_method(handle, "_ART")) - return -ENODEV; - status = acpi_evaluate_object(handle, "_ART", NULL, &buffer); if (ACPI_FAILURE(status)) return -ENODEV; From 4c8a342c118ae5f09a1729efdfcc42d8f4485bd7 Mon Sep 17 00:00:00 2001 From: Rishi Gupta Date: Fri, 23 Aug 2019 07:01:42 +0530 Subject: [PATCH 194/345] thermal: intel: int3403: replace printk(KERN_WARN...) with pr_warn(...) Direct invocation of printk() is not preferred to emit logs. This commit replaces printk(KERN_WARNING) with corresponding pr_warn() function call. Signed-off-by: Rishi Gupta Signed-off-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/int3403_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c index f5749d4418ae..a7bbd8584ae2 100644 --- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c @@ -181,7 +181,7 @@ static int int3403_cdev_add(struct int3403_priv *priv) p = buf.pointer; if (!p || (p->type != ACPI_TYPE_PACKAGE)) { - printk(KERN_WARNING "Invalid PPSS data\n"); + pr_warn("Invalid PPSS data\n"); kfree(buf.pointer); return -EFAULT; } From 97e9cafe85a9111fd72cefb18cfc9f2e0b6f85f1 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 24 Jul 2019 20:23:37 +0800 Subject: [PATCH 195/345] thermal: intel: Use dev_get_drvdata Instead of using to_pci_dev + pci_get_drvdata, use dev_get_drvdata to make code simpler. Signed-off-by: Chuhong Yuan Signed-off-by: Zhang Rui --- drivers/thermal/intel/intel_pch_thermal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c index 99f8b2540f18..4f0bb8f502e1 100644 --- a/drivers/thermal/intel/intel_pch_thermal.c +++ b/drivers/thermal/intel/intel_pch_thermal.c @@ -371,16 +371,14 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev) static int intel_pch_thermal_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + struct pch_thermal_device *ptd = dev_get_drvdata(device); return ptd->ops->suspend(ptd); } static int intel_pch_thermal_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct pch_thermal_device *ptd = pci_get_drvdata(pdev); + struct pch_thermal_device *ptd = dev_get_drvdata(device); return ptd->ops->resume(ptd); } From 8c7aa184281c01fc26f319059efb94725012921d Mon Sep 17 00:00:00 2001 From: Stefan Mavrodiev Date: Fri, 26 Jul 2019 16:32:36 +0300 Subject: [PATCH 196/345] thermal_hwmon: Sanitize thermal_zone type When calling thermal_add_hwmon_sysfs(), the device type is sanitized by replacing '-' with '_'. However tz->type remains unsanitized. Thus calling thermal_hwmon_lookup_by_type() returns no device. And if there is no device, thermal_remove_hwmon_sysfs() fails with "hwmon device lookup failed!". The result is unregisted hwmon devices in the sysfs. Fixes: 409ef0bacacf ("thermal_hwmon: Sanitize attribute name passed to hwmon") Signed-off-by: Stefan Mavrodiev Signed-off-by: Zhang Rui --- drivers/thermal/thermal_hwmon.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 40c69a533b24..dd5d8ee37928 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -87,13 +87,17 @@ static struct thermal_hwmon_device * thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; + char type[THERMAL_NAME_LENGTH]; mutex_lock(&thermal_hwmon_list_lock); - list_for_each_entry(hwmon, &thermal_hwmon_list, node) - if (!strcmp(hwmon->type, tz->type)) { + list_for_each_entry(hwmon, &thermal_hwmon_list, node) { + strcpy(type, tz->type); + strreplace(type, '-', '_'); + if (!strcmp(hwmon->type, type)) { mutex_unlock(&thermal_hwmon_list_lock); return hwmon; } + } mutex_unlock(&thermal_hwmon_list_lock); return NULL; From adc8749b150c51e857ac248017971371f70de197 Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Wed, 7 Aug 2019 11:01:30 +0800 Subject: [PATCH 197/345] thermal/drivers/core: Use put_device() if device_register() fails Never directly free @dev after calling device_register(), even if it returned an error! Always use put_device() to give up the reference initialized. Clean up the rollback block also. Signed-off-by: Yue Hu Signed-off-by: Zhang Rui --- drivers/thermal/thermal_core.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 6bab66e84eb5..bae1e412f57e 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -985,7 +985,7 @@ __thermal_cooling_device_register(struct device_node *np, result = device_register(&cdev->device); if (result) { ida_simple_remove(&thermal_cdev_ida, cdev->id); - kfree(cdev); + put_device(&cdev->device); return ERR_PTR(result); } @@ -1240,6 +1240,7 @@ thermal_zone_device_register(const char *type, int trips, int mask, struct thermal_zone_device *tz; enum thermal_trip_type trip_type; int trip_temp; + int id; int result; int count; struct thermal_governor *governor; @@ -1266,11 +1267,13 @@ thermal_zone_device_register(const char *type, int trips, int mask, INIT_LIST_HEAD(&tz->thermal_instances); ida_init(&tz->ida); mutex_init(&tz->lock); - result = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL); - if (result < 0) + id = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + result = id; goto free_tz; + } - tz->id = result; + tz->id = id; strlcpy(tz->type, type, sizeof(tz->type)); tz->ops = ops; tz->tzp = tzp; @@ -1292,7 +1295,7 @@ thermal_zone_device_register(const char *type, int trips, int mask, dev_set_name(&tz->device, "thermal_zone%d", tz->id); result = device_register(&tz->device); if (result) - goto remove_device_groups; + goto release_device; for (count = 0; count < trips; count++) { if (tz->ops->get_trip_type(tz, count, &trip_type)) @@ -1343,14 +1346,12 @@ thermal_zone_device_register(const char *type, int trips, int mask, return tz; unregister: - ida_simple_remove(&thermal_tz_ida, tz->id); - device_unregister(&tz->device); - return ERR_PTR(result); - -remove_device_groups: - thermal_zone_destroy_device_groups(tz); + device_del(&tz->device); +release_device: + put_device(&tz->device); + tz = NULL; remove_id: - ida_simple_remove(&thermal_tz_ida, tz->id); + ida_simple_remove(&thermal_tz_ida, id); free_tz: kfree(tz); return ERR_PTR(result); From 1851799e1d2978f68eea5d9dff322e121dcf59c1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 10 Jul 2019 13:14:52 +0300 Subject: [PATCH 198/345] thermal: Fix use-after-free when unregistering thermal zone device thermal_zone_device_unregister() cancels the delayed work that polls the thermal zone, but it does not wait for it to finish. This is racy with respect to the freeing of the thermal zone device, which can result in a use-after-free [1]. Fix this by waiting for the delayed work to finish before freeing the thermal zone device. Note that thermal_zone_device_set_polling() is never invoked from an atomic context, so it is safe to call cancel_delayed_work_sync() that can block. [1] [ +0.002221] ================================================================== [ +0.000064] BUG: KASAN: use-after-free in __mutex_lock+0x1076/0x11c0 [ +0.000016] Read of size 8 at addr ffff8881e48e0450 by task kworker/1:0/17 [ +0.000023] CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 5.2.0-rc6-custom-02495-g8e73ca3be4af #1701 [ +0.000010] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ +0.000016] Workqueue: events_freezable_power_ thermal_zone_device_check [ +0.000012] Call Trace: [ +0.000021] dump_stack+0xa9/0x10e [ +0.000020] print_address_description.cold.2+0x9/0x25e [ +0.000018] __kasan_report.cold.3+0x78/0x9d [ +0.000016] kasan_report+0xe/0x20 [ +0.000016] __mutex_lock+0x1076/0x11c0 [ +0.000014] step_wise_throttle+0x72/0x150 [ +0.000018] handle_thermal_trip+0x167/0x760 [ +0.000019] thermal_zone_device_update+0x19e/0x5f0 [ +0.000019] process_one_work+0x969/0x16f0 [ +0.000017] worker_thread+0x91/0xc40 [ +0.000014] kthread+0x33d/0x400 [ +0.000015] ret_from_fork+0x3a/0x50 [ +0.000020] Allocated by task 1: [ +0.000015] save_stack+0x19/0x80 [ +0.000015] __kasan_kmalloc.constprop.4+0xc1/0xd0 [ +0.000014] kmem_cache_alloc_trace+0x152/0x320 [ +0.000015] thermal_zone_device_register+0x1b4/0x13a0 [ +0.000015] mlxsw_thermal_init+0xc92/0x23d0 [ +0.000014] __mlxsw_core_bus_device_register+0x659/0x11b0 [ +0.000013] mlxsw_core_bus_device_register+0x3d/0x90 [ +0.000013] mlxsw_pci_probe+0x355/0x4b0 [ +0.000014] local_pci_probe+0xc3/0x150 [ +0.000013] pci_device_probe+0x280/0x410 [ +0.000013] really_probe+0x26a/0xbb0 [ +0.000013] driver_probe_device+0x208/0x2e0 [ +0.000013] device_driver_attach+0xfe/0x140 [ +0.000013] __driver_attach+0x110/0x310 [ +0.000013] bus_for_each_dev+0x14b/0x1d0 [ +0.000013] driver_register+0x1c0/0x400 [ +0.000015] mlxsw_sp_module_init+0x5d/0xd3 [ +0.000014] do_one_initcall+0x239/0x4dd [ +0.000013] kernel_init_freeable+0x42b/0x4e8 [ +0.000012] kernel_init+0x11/0x18b [ +0.000013] ret_from_fork+0x3a/0x50 [ +0.000015] Freed by task 581: [ +0.000013] save_stack+0x19/0x80 [ +0.000014] __kasan_slab_free+0x125/0x170 [ +0.000013] kfree+0xf3/0x310 [ +0.000013] thermal_release+0xc7/0xf0 [ +0.000014] device_release+0x77/0x200 [ +0.000014] kobject_put+0x1a8/0x4c0 [ +0.000014] device_unregister+0x38/0xc0 [ +0.000014] thermal_zone_device_unregister+0x54e/0x6a0 [ +0.000014] mlxsw_thermal_fini+0x184/0x35a [ +0.000014] mlxsw_core_bus_device_unregister+0x10a/0x640 [ +0.000013] mlxsw_devlink_core_bus_device_reload+0x92/0x210 [ +0.000015] devlink_nl_cmd_reload+0x113/0x1f0 [ +0.000014] genl_family_rcv_msg+0x700/0xee0 [ +0.000013] genl_rcv_msg+0xca/0x170 [ +0.000013] netlink_rcv_skb+0x137/0x3a0 [ +0.000012] genl_rcv+0x29/0x40 [ +0.000013] netlink_unicast+0x49b/0x660 [ +0.000013] netlink_sendmsg+0x755/0xc90 [ +0.000013] __sys_sendto+0x3de/0x430 [ +0.000013] __x64_sys_sendto+0xe2/0x1b0 [ +0.000013] do_syscall_64+0xa4/0x4d0 [ +0.000013] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ +0.000017] The buggy address belongs to the object at ffff8881e48e0008 which belongs to the cache kmalloc-2k of size 2048 [ +0.000012] The buggy address is located 1096 bytes inside of 2048-byte region [ffff8881e48e0008, ffff8881e48e0808) [ +0.000007] The buggy address belongs to the page: [ +0.000012] page:ffffea0007923800 refcount:1 mapcount:0 mapping:ffff88823680d0c0 index:0x0 compound_mapcount: 0 [ +0.000020] flags: 0x200000000010200(slab|head) [ +0.000019] raw: 0200000000010200 ffffea0007682008 ffffea00076ab808 ffff88823680d0c0 [ +0.000016] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000 [ +0.000007] page dumped because: kasan: bad access detected [ +0.000012] Memory state around the buggy address: [ +0.000012] ffff8881e48e0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000012] ffff8881e48e0380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000012] >ffff8881e48e0400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000008] ^ [ +0.000012] ffff8881e48e0480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000012] ffff8881e48e0500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000007] ================================================================== Fixes: b1569e99c795 ("ACPI: move thermal trip handling to generic thermal layer") Reported-by: Jiri Pirko Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: Zhang Rui --- drivers/thermal/thermal_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index bae1e412f57e..892f3548ad6d 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -304,7 +304,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, &tz->poll_queue, msecs_to_jiffies(delay)); else - cancel_delayed_work(&tz->poll_queue); + cancel_delayed_work_sync(&tz->poll_queue); } static void monitor_thermal_zone(struct thermal_zone_device *tz) From 67eed44b8a8ae7ca1a1a77c64d2c4815f00e361b Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Fri, 12 Jul 2019 02:01:58 +0530 Subject: [PATCH 199/345] thermal: Add some error messages When registering a thermal zone device, we currently return -EINVAL in four cases. This makes it a little hard to debug the real cause of the failure. Print some error messages to make it easier for developer to figure out what happened. Signed-off-by: Amit Kucheria Signed-off-by: Zhang Rui --- drivers/thermal/thermal_core.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 892f3548ad6d..d4481cc8958f 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1245,17 +1245,26 @@ thermal_zone_device_register(const char *type, int trips, int mask, int count; struct thermal_governor *governor; - if (!type || strlen(type) == 0) + if (!type || strlen(type) == 0) { + pr_err("Error: No thermal zone type defined\n"); return ERR_PTR(-EINVAL); + } - if (type && strlen(type) >= THERMAL_NAME_LENGTH) + if (type && strlen(type) >= THERMAL_NAME_LENGTH) { + pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n", + type, THERMAL_NAME_LENGTH); return ERR_PTR(-EINVAL); + } - if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) + if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) { + pr_err("Error: Incorrect number of thermal trips\n"); return ERR_PTR(-EINVAL); + } - if (!ops) + if (!ops) { + pr_err("Error: Thermal zone device ops not defined\n"); return ERR_PTR(-EINVAL); + } if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp)) return ERR_PTR(-EINVAL); From bf8ca04d8bfdb64b10a8fc8a08013aeb8ad1fb10 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Wed, 21 Aug 2019 14:38:31 +0530 Subject: [PATCH 200/345] MAINTAINERS: Add Amit Kucheria as reviewer for thermal Add Amit Kucheria as the reviewer for thermal as he would like to participate in the review process effort for the thermal framework. Signed-off-by: Amit Kucheria Signed-off-by: Zhang Rui --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 43604d6ab96c..38f9c2f3b4ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15910,6 +15910,7 @@ THERMAL M: Zhang Rui M: Eduardo Valentin R: Daniel Lezcano +R: Amit Kucheria L: linux-pm@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git From e2ada66ec41821e54a503776c1ce42fbf4e99fbe Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 21 Aug 2019 11:20:04 -0700 Subject: [PATCH 201/345] kvm: x86: Add Intel PMU MSRs to msrs_to_save[] These MSRs should be enumerated by KVM_GET_MSR_INDEX_LIST, so that userspace knows that these MSRs may be part of the vCPU state. Signed-off-by: Jim Mattson Reviewed-by: Eric Hankland Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dfd641243568..bb93771e4170 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1145,6 +1145,42 @@ static u32 msrs_to_save[] = { MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, + MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, + MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, + MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, + MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, + MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, + MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, + MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, + MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, + MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, + MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, + MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19, + MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21, + MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23, + MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25, + MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27, + MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29, + MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31, + MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, + MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, + MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, + MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, + MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, + MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, + MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, + MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, + MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, + MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19, + MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21, + MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23, + MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25, + MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27, + MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29, + MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31, }; static unsigned num_msrs_to_save; @@ -5051,6 +5087,11 @@ static void kvm_init_msr_list(void) u32 dummy[2]; unsigned i, j; + BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, + "Please update the fixed PMCs in msrs_to_save[]"); + BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32, + "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]"); + for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) continue; From 7a83247e010a2881ee246a88596af0549ef3e6c4 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 22 Aug 2019 22:30:19 +0800 Subject: [PATCH 202/345] x86/Hyper-V: Fix definition of struct hv_vp_assist_page The struct hv_vp_assist_page was defined incorrectly. The "vtl_control" should be u64[3], "nested_enlightenments _control" should be a u64 and there are 7 reserved bytes following "enlighten_vmentry". Fix the definition. Signed-off-by: Tianyu Lan Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 7a2705694f5b..3f08327744d1 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -524,14 +524,24 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ } __packed; +struct hv_nested_enlightenments_control { + struct { + __u32 directhypercall:1; + __u32 reserved:31; + } features; + struct { + __u32 reserved; + } hypercallControls; +} __packed; + /* Define virtual processor assist page structure. */ struct hv_vp_assist_page { __u32 apic_assist; - __u32 reserved; - __u64 vtl_control[2]; - __u64 nested_enlightenments_control[2]; - __u32 enlighten_vmentry; - __u32 padding; + __u32 reserved1; + __u64 vtl_control[3]; + struct hv_nested_enlightenments_control nested_control; + __u8 enlighten_vmentry; + __u8 reserved2[7]; __u64 current_nested_vmcs; } __packed; From 344c6c804703841d2bff4d68d7390ba726053874 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Thu, 22 Aug 2019 22:30:20 +0800 Subject: [PATCH 203/345] KVM/Hyper-V: Add new KVM capability KVM_CAP_HYPERV_DIRECT_TLBFLUSH Hyper-V direct tlb flush function should be enabled for guest that only uses Hyper-V hypercall. User space hypervisor(e.g, Qemu) can disable KVM identification in CPUID and just exposes Hyper-V identification to make sure the precondition. Add new KVM capability KVM_CAP_ HYPERV_DIRECT_TLBFLUSH for user space to enable Hyper-V direct tlb function and this function is default to be disabled in KVM. Signed-off-by: Tianyu Lan Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.txt | 13 +++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 8 ++++++++ include/uapi/linux/kvm.h | 1 + 4 files changed, 23 insertions(+) diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt index 136f1eef3712..4833904d32a5 100644 --- a/Documentation/virt/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -5309,3 +5309,16 @@ Architectures: x86 This capability indicates that KVM supports paravirtualized Hyper-V IPI send hypercalls: HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. +8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH + +Architecture: x86 + +This capability indicates that KVM running on top of Hyper-V hypervisor +enables Direct TLB flush for its guests meaning that TLB flush +hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM. +Due to the different ABI for hypercall parameters between Hyper-V and +KVM, enabling this capability effectively disables all hypercall +handling by KVM (as some KVM hypercall may be mistakenly treated as TLB +flush hypercalls by Hyper-V) so userspace should disable KVM identification +in CPUID and only exposes Hyper-V identification. In this case, guest +thinks it's running on Hyper-V and only use Hyper-V hypercalls. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a3a3ec73fa2f..4765ae0fc506 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1213,6 +1213,7 @@ struct kvm_x86_ops { bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); + int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bb93771e4170..5becc6753280 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3282,6 +3282,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_x86_ops->get_nested_state ? kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0; break; + case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: + r = kvm_x86_ops->enable_direct_tlbflush ? 1 : 0; + break; default: break; } @@ -4055,6 +4058,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = -EFAULT; } return r; + case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: + if (!kvm_x86_ops->enable_direct_tlbflush) + return -ENOTTY; + + return kvm_x86_ops->enable_direct_tlbflush(vcpu); default: return -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 233efbb1c81c..c73aead0c0a8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #define KVM_CAP_PMU_EVENT_FILTER 173 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 +#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 #ifdef KVM_CAP_IRQ_ROUTING From 6f6a657c99980485c265363447b269083dd1dc3a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 22 Aug 2019 22:30:21 +0800 Subject: [PATCH 204/345] KVM/Hyper-V/VMX: Add direct tlb flush support Hyper-V provides direct tlb flush function which helps L1 Hypervisor to handle Hyper-V tlb flush request from L2 guest. Add the function support for VMX. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Tianyu Lan Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 4 +++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx/evmcs.h | 2 ++ arch/x86/kvm/vmx/vmx.c | 39 ++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 3f08327744d1..038581e91a84 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -181,6 +181,7 @@ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ +#define HV_X64_NESTED_DIRECT_FLUSH BIT(17) #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) @@ -892,4 +893,7 @@ struct hv_tlb_flush_ex { u64 gva_list[]; } __packed; +struct hv_partition_assist_pg { + u32 tlb_lock_count; +}; #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4765ae0fc506..13b35f94dec4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -844,6 +844,8 @@ struct kvm_hv { /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; + + struct hv_partition_assist_pg *hv_pa_pg; }; enum kvm_irqchip_mode { diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 39a24eec8884..07ebf6882a45 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr) struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(smp_processor_id()); + if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall) + vp_ap->nested_control.features.directhypercall = 1; vp_ap->current_nested_vmcs = phys_addr; vp_ap->enlighten_vmentry = 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4a99be1fae4e..8bab3e53d1e1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -486,6 +486,35 @@ static int hv_remote_flush_tlb(struct kvm *kvm) return hv_remote_flush_tlb_with_range(kvm, NULL); } +static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +{ + struct hv_enlightened_vmcs *evmcs; + struct hv_partition_assist_pg **p_hv_pa_pg = + &vcpu->kvm->arch.hyperv.hv_pa_pg; + /* + * Synthetic VM-Exit is not enabled in current code and so All + * evmcs in singe VM shares same assist page. + */ + if (!*p_hv_pa_pg) { + *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!*p_hv_pa_pg) + return -ENOMEM; + pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n", + (u64)&vcpu->kvm); + } + + evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; + + evmcs->partition_assist_page = + __pa(*p_hv_pa_pg); + evmcs->hv_vm_id = (u64)vcpu->kvm; + evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + + pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n", + (u64)vcpu->kvm); + return 0; +} + #endif /* IS_ENABLED(CONFIG_HYPERV) */ /* @@ -6511,6 +6540,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) update_debugctlmsr(vmx->host_debugctlmsr); @@ -6578,6 +6610,7 @@ static struct kvm *vmx_vm_alloc(void) static void vmx_vm_free(struct kvm *kvm) { + kfree(kvm->arch.hyperv.hv_pa_pg); vfree(to_kvm_vmx(kvm)); } @@ -7837,6 +7870,7 @@ static void vmx_exit(void) if (!vp_ap) continue; + vp_ap->nested_control.features.directhypercall = 0; vp_ap->current_nested_vmcs = 0; vp_ap->enlighten_vmentry = 0; } @@ -7876,6 +7910,11 @@ static int __init vmx_init(void) pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); static_branch_enable(&enable_evmcs); } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) + vmx_x86_ops.enable_direct_tlbflush + = hv_enable_direct_tlbflush; + } else { enlightened_vmcs = false; } From 956e255c59a5b64d1b3f16fdf2db306d74829e33 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 28 Aug 2019 09:59:04 +0200 Subject: [PATCH 205/345] KVM: x86: svm: remove unneeded nested_enable_evmcs() hook Since commit 5158917c7b019 ("KVM: x86: nVMX: Allow nested_enable_evmcs to be NULL") the code in x86.c is prepared to see nested_enable_evmcs being NULL and in VMX case it actually is when nesting is disabled. Remove the unneeded stub from SVM code. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 04fe21849b6e..7e352e81df31 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7099,13 +7099,6 @@ failed: return ret; } -static int nested_enable_evmcs(struct kvm_vcpu *vcpu, - uint16_t *vmcs_version) -{ - /* Intel-only feature */ - return -ENODEV; -} - static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) { unsigned long cr4 = kvm_read_cr4(vcpu); @@ -7311,7 +7304,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, - .nested_enable_evmcs = nested_enable_evmcs, + .nested_enable_evmcs = NULL, .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, From 5a0165f6dde37fd16e85c86eccb895fd4f93305f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 28 Aug 2019 09:59:05 +0200 Subject: [PATCH 206/345] KVM: x86: announce KVM_CAP_HYPERV_ENLIGHTENED_VMCS support only when it is available It was discovered that after commit 65efa61dc0d5 ("selftests: kvm: provide common function to enable eVMCS") hyperv_cpuid selftest is failing on AMD. The reason is that the commit changed _vcpu_ioctl() to vcpu_ioctl() in the test and this one can't fail. Instead of fixing the test is seems to make more sense to not announce KVM_CAP_HYPERV_ENLIGHTENED_VMCS support if it is definitely missing (on svm and in case kvm_intel.nested=0). Signed-off-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5becc6753280..67af887f0b9b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3205,7 +3205,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_EVENTFD: case KVM_CAP_HYPERV_TLBFLUSH: case KVM_CAP_HYPERV_SEND_IPI: - case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: case KVM_CAP_HYPERV_CPUID: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: @@ -3283,7 +3282,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0; break; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: - r = kvm_x86_ops->enable_direct_tlbflush ? 1 : 0; + r = kvm_x86_ops->enable_direct_tlbflush != NULL; + break; + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: + r = kvm_x86_ops->nested_enable_evmcs != NULL; break; default: break; From 12c386b2308344f2ce8819ad11aab466166f276d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Aug 2019 09:36:16 +0800 Subject: [PATCH 207/345] KVM: selftests: Move vm type into _vm_create() internally Rather than passing the vm type from the top level to the end of vm creation, let's simply keep that as an internal of kvm_vm struct and decide the type in _vm_create(). Several reasons for doing this: - The vm type is only decided by physical address width and currently only used in aarch64, so we've got enough information as long as we're passing vm_guest_mode into _vm_create(), - This removes a loop dependency between the vm->type and creation of vms. That's why now we need to parse vm_guest_mode twice sometimes, once in run_test() and then again in _vm_create(). The follow up patches will move on to clean up that as well so we can have a single place to decide guest machine types and so. Note that this patch will slightly change the behavior of aarch64 tests in that previously most vm_create() callers will directly pass in type==0 into _vm_create() but now the type will depend on vm_guest_mode, however it shouldn't affect any user because all vm_create() users of aarch64 will be using VM_MODE_DEFAULT guest mode (which is VM_MODE_P40V48_4K) so at last type will still be zero. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 13 +++--------- .../testing/selftests/kvm/include/kvm_util.h | 3 +-- tools/testing/selftests/kvm/lib/kvm_util.c | 21 ++++++++++++------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index dc3346e090f5..6737b26b975e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -249,14 +249,12 @@ static void vm_dirty_log_verify(unsigned long *bmap) } static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, - uint64_t extra_mem_pages, void *guest_code, - unsigned long type) + uint64_t extra_mem_pages, void *guest_code) { struct kvm_vm *vm; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR, type); + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); @@ -273,7 +271,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; uint64_t max_gfn; unsigned long *bmap; - unsigned long type = 0; switch (mode) { case VM_MODE_P52V48_4K: @@ -314,10 +311,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * bits we can change to 39. */ guest_pa_bits = 39; -#endif -#ifdef __aarch64__ - if (guest_pa_bits != 40) - type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); #endif max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; guest_page_size = (1ul << guest_page_shift); @@ -351,7 +344,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type); + vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 5463b7896a0a..b2c15135f596 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -60,8 +60,7 @@ int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type); +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6e49bb039376..34a8a6572c7c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -84,7 +84,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } -static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) +static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); if (vm->kvm_fd < 0) @@ -95,7 +95,7 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) exit(KSFT_SKIP); } - vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); } @@ -130,8 +130,7 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type) +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -139,8 +138,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, TEST_ASSERT(vm != NULL, "Insufficient Memory"); vm->mode = mode; - vm->type = type; - vm_open(vm, perm, type); + vm->type = 0; /* Setup mode specific traits. */ switch (vm->mode) { @@ -190,6 +188,13 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } +#ifdef __aarch64__ + if (vm->pa_bits != 40) + vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); +#endif + + vm_open(vm, perm); + /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); sparsebit_set_num(vm->vpages_valid, @@ -212,7 +217,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { - return _vm_create(mode, phy_pages, perm, 0); + return _vm_create(mode, phy_pages, perm); } /* @@ -232,7 +237,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { struct userspace_mem_region *region; - vm_open(vmp, perm, vmp->type); + vm_open(vmp, perm); if (vmp->has_irqchip) vm_create_irqchip(vmp); From 338eb29876b9e571273175f167fcd58d9441ac8e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Aug 2019 09:36:17 +0800 Subject: [PATCH 208/345] KVM: selftests: Create VM earlier for dirty log test Since we've just removed the dependency of vm type in previous patch, now we can create the vm much earlier. Note that to move it earlier we used an approximation of number of extra pages but it should be fine. This prepares for the follow up patches to finally remove the duplication of guest mode parsings. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 6737b26b975e..cf2099abb121 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -263,6 +263,9 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, return vm; } +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -272,6 +275,18 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, uint64_t max_gfn; unsigned long *bmap; + /* + * We reserve page table for 2 times of extra dirty mem which + * will definitely cover the original (1G+) test range. Here + * we do the calculation with 4K page size which is the + * smallest so the page number will be enough for all archs + * (e.g., 64K page size guest will need even less memory for + * page tables). + */ + vm = create_vm(mode, VCPU_ID, + 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), + guest_code); + switch (mode) { case VM_MODE_P52V48_4K: guest_pa_bits = 52; @@ -318,7 +333,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - guest_page_shift)) + 16; #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; @@ -344,8 +359,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); - #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; From 567a9f1e9deb273a2c02dd18c254208537fcefaa Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Aug 2019 09:36:18 +0800 Subject: [PATCH 209/345] KVM: selftests: Introduce VM_MODE_PXXV48_4K The naming VM_MODE_P52V48_4K is explicit but unclear when used on x86_64 machines, because x86_64 machines are having various physical address width rather than some static values. Here's some examples: - Intel Xeon E3-1220: 36 bits - Intel Core i7-8650: 39 bits - AMD EPYC 7251: 48 bits All of them are using 48 bits linear address width but with totally different physical address width (and most of the old machines should be less than 52 bits). Let's create a new guest mode called VM_MODE_PXXV48_4K for current x86_64 tests and make it as the default to replace the old naming of VM_MODE_P52V48_4K because it shows more clearly that the PA width is not really a constant. Meanwhile we also stop assuming all the x86 machines are having 52 bits PA width but instead we fetch the real vm->pa_bits from CPUID 0x80000008 during runtime. We currently make this exclusively used by x86_64 but no other arch. As a slight touch up, moving DEBUG macro from dirty_log_test.c to kvm_util.h so lib can use it too. Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 5 ++-- .../testing/selftests/kvm/include/kvm_util.h | 11 ++++++- .../selftests/kvm/include/x86_64/processor.h | 3 ++ .../selftests/kvm/lib/aarch64/processor.c | 3 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 29 ++++++++++++++---- .../selftests/kvm/lib/x86_64/processor.c | 30 ++++++++++++++++--- 6 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index cf2099abb121..12c36fa356a8 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -19,8 +19,6 @@ #include "kvm_util.h" #include "processor.h" -#define DEBUG printf - #define VCPU_ID 1 /* The memory slot index to track dirty pages */ @@ -289,6 +287,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, switch (mode) { case VM_MODE_P52V48_4K: + case VM_MODE_PXXV48_4K: guest_pa_bits = 52; guest_page_shift = 12; break; @@ -488,7 +487,7 @@ int main(int argc, char *argv[]) #endif #ifdef __x86_64__ - vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true); #endif #ifdef __aarch64__ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index b2c15135f596..ef3259c5e856 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -24,6 +24,12 @@ struct kvm_vm; typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +#ifndef NDEBUG +#define DEBUG(...) printf(__VA_ARGS__); +#else +#define DEBUG(...) +#endif + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 @@ -38,11 +44,14 @@ enum vm_guest_mode { VM_MODE_P48V48_64K, VM_MODE_P40V48_4K, VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ NUM_VM_MODES, }; -#ifdef __aarch64__ +#if defined(__aarch64__) #define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#elif defined(__x86_64__) +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K #else #define VM_MODE_DEFAULT VM_MODE_P52V48_4K #endif diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 80d19740d2dc..0c17f2ee685e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, uint64_t msr_value); +uint32_t kvm_get_cpuid_max(void); +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 486400a97374..86036a59a668 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini case VM_MODE_P52V48_4K: TEST_ASSERT(false, "AArch64 does not support 4K sized pages " "with 52-bit physical address ranges"); + case VM_MODE_PXXV48_4K: + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 34a8a6572c7c..bb8f993b25fb 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -8,6 +8,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kvm_util_internal.h" +#include "processor.h" #include #include @@ -101,12 +102,13 @@ static void vm_open(struct kvm_vm *vm, int perm) } const char * const vm_guest_mode_string[] = { - "PA-bits:52, VA-bits:48, 4K pages", - "PA-bits:52, VA-bits:48, 64K pages", - "PA-bits:48, VA-bits:48, 4K pages", - "PA-bits:48, VA-bits:48, 64K pages", - "PA-bits:40, VA-bits:48, 4K pages", - "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:52, VA-bits:48, 4K pages", + "PA-bits:52, VA-bits:48, 64K pages", + "PA-bits:48, VA-bits:48, 4K pages", + "PA-bits:48, VA-bits:48, 64K pages", + "PA-bits:40, VA-bits:48, 4K pages", + "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:ANY, VA-bits:48, 4K pages", }; _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -184,6 +186,21 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->page_size = 0x10000; vm->page_shift = 16; break; + case VM_MODE_PXXV48_4K: +#ifdef __x86_64__ + kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); + TEST_ASSERT(vm->va_bits == 48, "Linear address width " + "(%d bits) not supported", vm->va_bits); + vm->pgtable_levels = 4; + vm->page_size = 0x1000; + vm->page_shift = 12; + DEBUG("Guest physical address width detected: %d\n", + vm->pa_bits); +#else + TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " + "non-x86 platforms"); +#endif + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 0a5e487dbc50..c53dbc6bc568 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ @@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint16_t index[4]; struct pageMapL4Entry *pml4e; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageDirectoryEntry *pde; struct pageTableEntry *pte; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); index[0] = (gva >> 12) & 0x1ffu; @@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); switch (vm->mode) { - case VM_MODE_P52V48_4K: + case VM_MODE_PXXV48_4K: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); @@ -1157,3 +1157,25 @@ bool is_intel_cpu(void) chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } + +uint32_t kvm_get_cpuid_max(void) +{ + return kvm_get_supported_cpuid_entry(0x80000000)->eax; +} + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) +{ + struct kvm_cpuid_entry2 *entry; + bool pae; + + /* SDM 4.1.4 */ + if (kvm_get_cpuid_max() < 0x80000008) { + pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); + *pa_bits = pae ? 36 : 32; + *va_bits = 32; + } else { + entry = kvm_get_supported_cpuid_entry(0x80000008); + *pa_bits = entry->eax & 0xff; + *va_bits = (entry->eax >> 8) & 0xff; + } +} From 52200d0d944e473142271773c41f5f490f3a821f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Aug 2019 09:36:19 +0800 Subject: [PATCH 210/345] KVM: selftests: Remove duplicate guest mode handling Remove the duplication code in run_test() of dirty_log_test because after some reordering of functions now we can directly use the outcome of vm_create(). Meanwhile, with the new VM_MODE_PXXV48_4K, we can safely revert b442324b58 too where we stick the x86_64 PA width to 39 bits for dirty_log_test. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_test.c | 52 ++----------------- .../testing/selftests/kvm/include/kvm_util.h | 4 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 17 ++++++ 3 files changed, 26 insertions(+), 47 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 12c36fa356a8..5614222a6628 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -267,10 +267,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - uint64_t max_gfn; unsigned long *bmap; /* @@ -285,54 +283,13 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code); - switch (mode) { - case VM_MODE_P52V48_4K: - case VM_MODE_PXXV48_4K: - guest_pa_bits = 52; - guest_page_shift = 12; - break; - case VM_MODE_P52V48_64K: - guest_pa_bits = 52; - guest_page_shift = 16; - break; - case VM_MODE_P48V48_4K: - guest_pa_bits = 48; - guest_page_shift = 12; - break; - case VM_MODE_P48V48_64K: - guest_pa_bits = 48; - guest_page_shift = 16; - break; - case VM_MODE_P40V48_4K: - guest_pa_bits = 40; - guest_page_shift = 12; - break; - case VM_MODE_P40V48_64K: - guest_pa_bits = 40; - guest_page_shift = 16; - break; - default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); - } - - DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - -#ifdef __x86_64__ - /* - * FIXME - * The x86_64 kvm selftests framework currently only supports a - * single PML4 which restricts the number of physical address - * bits we can change to 39. - */ - guest_pa_bits = 39; -#endif - max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; - guest_page_size = (1ul << guest_page_shift); + guest_page_size = vm_get_page_size(vm); /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (DIRTY_MEM_BITS - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - + vm_get_page_shift(vm))) + 16; #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; @@ -342,7 +299,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, !!((guest_num_pages * guest_page_size) % host_page_size); if (!phys_offset) { - guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_phys_mem = (vm_get_max_gfn(vm) - + guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { guest_test_phys_mem = phys_offset; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index ef3259c5e856..29cccaf96baf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -154,6 +154,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); bool vm_is_unrestricted_guest(struct kvm_vm *vm); +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned int vm_get_max_gfn(struct kvm_vm *vm); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index bb8f993b25fb..80a338b5403c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -136,6 +136,8 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; + DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -1650,3 +1652,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm) return val == 'Y'; } + +unsigned int vm_get_page_size(struct kvm_vm *vm) +{ + return vm->page_size; +} + +unsigned int vm_get_page_shift(struct kvm_vm *vm) +{ + return vm->page_shift; +} + +unsigned int vm_get_max_gfn(struct kvm_vm *vm) +{ + return vm->max_gfn; +} From 319109a2d0dde8672323efcf909740c2c6e98be4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Sep 2019 12:19:52 -0700 Subject: [PATCH 211/345] KVM: x86: Manually flush collapsible SPTEs only when toggling flags Zapping collapsible sptes, a.k.a. 4k sptes that can be promoted into a large page, is only necessary when changing only the dirty logging flag of a memory region. If the memslot is also being moved, then all sptes for the memslot are zapped when it is invalidated. When a memslot is being created, it is impossible for there to be existing dirty mappings, e.g. KVM can have MMIO sptes, but not present, and thus dirty, sptes. Note, the comment and logic are shamelessly borrowed from MIPS's version of kvm_arch_commit_memory_region(). Fixes: 3ea3b7fa9af06 ("kvm: mmu: lazy collapse small sptes into large sptes") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 67af887f0b9b..7e51924c4a1c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9741,8 +9741,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Scan sptes if dirty logging has been stopped, dropping those * which can be collapsed into a single large-page spte. Later * page faults will create the large-page sptes. + * + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() */ - if ((change != KVM_MR_DELETE) && + if (change == KVM_MR_FLAGS_ONLY && (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_zap_collapsible_sptes(kvm, new); From a073d7e3ad687a7ef32b65affe80faa7ce89bf92 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 16 Sep 2019 15:42:32 +0800 Subject: [PATCH 212/345] KVM: hyperv: Fix Direct Synthetic timers assert an interrupt w/o lapic_in_kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:__apic_accept_irq+0x46/0x740 arch/x86/kvm/lapic.c:1029 Call Trace: kvm_apic_set_irq+0xb4/0x140 arch/x86/kvm/lapic.c:558 stimer_notify_direct arch/x86/kvm/hyperv.c:648 [inline] stimer_expiration arch/x86/kvm/hyperv.c:659 [inline] kvm_hv_process_stimers+0x594/0x1650 arch/x86/kvm/hyperv.c:686 vcpu_enter_guest+0x2b2a/0x54b0 arch/x86/kvm/x86.c:7896 vcpu_run+0x393/0xd40 arch/x86/kvm/x86.c:8152 kvm_arch_vcpu_ioctl_run+0x636/0x900 arch/x86/kvm/x86.c:8360 kvm_vcpu_ioctl+0x6cf/0xaf0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2765 The testcase programs HV_X64_MSR_STIMERn_CONFIG/HV_X64_MSR_STIMERn_COUNT, in addition, there is no lapic in the kernel, the counters value are small enough in order that kvm_hv_process_stimers() inject this already-expired timer interrupt into the guest through lapic in the kernel which triggers the NULL deferencing. This patch fixes it by don't advertise direct mode synthetic timers and discarding the inject when lapic is not in kernel. syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=1752fe0a600000 Reported-by: syzbot+dff25ee91f0c7d5c1695@syzkaller.appspotmail.com Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index fff790a3f4ee..c0867b0aae3e 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -645,7 +645,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer) .vector = stimer->config.apic_vector }; - return !kvm_apic_set_irq(vcpu, &irq, NULL); + if (lapic_in_kernel(vcpu)) + return !kvm_apic_set_irq(vcpu, &irq, NULL); + return 0; } static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) @@ -1852,7 +1854,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; - ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; + + /* + * Direct Synthetic timers only make sense with in-kernel + * LAPIC + */ + if (lapic_in_kernel(vcpu)) + ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; break; From e1572f1d08be57a5412a464cff0712a23cd0b73e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 16 Sep 2019 18:22:56 +0200 Subject: [PATCH 213/345] cpu/SMT: create and export cpu_smt_possible() KVM needs to know if SMT is theoretically possible, this means it is supported and not forcefully disabled ('nosmt=force'). Create and export cpu_smt_possible() answering this question. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- include/linux/cpu.h | 2 ++ kernel/cpu.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 88dc0c653925..d0633ebdaa9c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -201,12 +201,14 @@ enum cpuhp_smt_control { extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); extern void cpu_smt_check_topology(void); +extern bool cpu_smt_possible(void); extern int cpuhp_smt_enable(void); extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval); #else # define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED) static inline void cpu_smt_disable(bool force) { } static inline void cpu_smt_check_topology(void) { } +static inline bool cpu_smt_possible(void) { return false; } static inline int cpuhp_smt_enable(void) { return 0; } static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index e1967e9eddc2..fc28e17940e0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -392,8 +392,7 @@ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; void __init cpu_smt_disable(bool force) { - if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || - cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + if (!cpu_smt_possible()) return; if (force) { @@ -438,6 +437,14 @@ static inline bool cpu_smt_allowed(unsigned int cpu) */ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask); } + +/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */ +bool cpu_smt_possible(void) +{ + return cpu_smt_control != CPU_SMT_FORCE_DISABLED && + cpu_smt_control != CPU_SMT_NOT_SUPPORTED; +} +EXPORT_SYMBOL_GPL(cpu_smt_possible); #else static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } #endif From b2d8b167e15bb5ec2691d1119c025630a247f649 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 16 Sep 2019 18:22:57 +0200 Subject: [PATCH 214/345] KVM: x86: hyper-v: set NoNonArchitecturalCoreSharing CPUID bit when SMT is impossible Hyper-V 2019 doesn't expose MD_CLEAR CPUID bit to guests when it cannot guarantee that two virtual processors won't end up running on sibling SMT threads without knowing about it. This is done as an optimization as in this case there is nothing the guest can do to protect itself against MDS and issuing additional flush requests is just pointless. On bare metal the topology is known, however, when Hyper-V is running nested (e.g. on top of KVM) it needs an additional piece of information: a confirmation that the exposed topology (wrt vCPU placement on different SMT threads) is trustworthy. NoNonArchitecturalCoreSharing (CPUID 0x40000004 EAX bit 18) is described in TLFS as follows: "Indicates that a virtual processor will never share a physical core with another virtual processor, except for virtual processors that are reported as sibling SMT threads." From KVM we can give such guarantee in two cases: - SMT is unsupported or forcefully disabled (just 'disabled' doesn't work as it can become re-enabled during the lifetime of the guest). - vCPUs are properly pinned so the scheduler won't put them on sibling SMT threads (when they're not reported as such). This patch reports NoNonArchitecturalCoreSharing bit in to userspace in the first case. The second case is outside of KVM's domain of responsibility (as vCPU pinning is actually done by someone who manages KVM's userspace - e.g. libvirt pinning QEMU threads). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/hyperv-tlfs.h | 7 +++++++ arch/x86/kvm/hyperv.c | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 038581e91a84..7741e211f7f5 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -180,6 +180,13 @@ /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +/* + * Virtual processor will never share a physical core with another virtual + * processor, except for virtual processors that are reported as sibling SMT + * threads. + */ +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) + /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ #define HV_X64_NESTED_DIRECT_FLUSH BIT(17) #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index c0867b0aae3e..23ff65504d7e 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,6 +23,7 @@ #include "ioapic.h" #include "hyperv.h" +#include #include #include #include @@ -1872,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; if (evmcs_ver) ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - + if (!cpu_smt_possible()) + ent->eax |= HV_X64_NO_NONARCH_CORESHARING; /* * Default number of spinlock retry attempts, matches * HyperV 2016. From e738772e29214019f506372dd1162723b7a4d507 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 16 Sep 2019 18:22:58 +0200 Subject: [PATCH 215/345] KVM: selftests: hyperv_cpuid: add check for NoNonArchitecturalCoreSharing bit The bit is supposed to be '1' when SMT is not supported or forcefully disabled and '0' otherwise. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/hyperv_cpuid.c | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index ee59831fbc98..443a2b54645b 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -26,6 +26,25 @@ static void guest_code(void) { } +static int smt_possible(void) +{ + char buf[16]; + FILE *f; + bool res = 1; + + f = fopen("/sys/devices/system/cpu/smt/control", "r"); + if (f) { + if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { + if (!strncmp(buf, "forceoff", 8) || + !strncmp(buf, "notsupported", 12)) + res = 0; + } + fclose(f); + } + + return res; +} + static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, int evmcs_enabled) { @@ -59,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && !entry->padding[2], "padding should be zero"); + if (entry->function == 0x40000004) { + int nononarchcs = !!(entry->eax & (1UL << 18)); + + TEST_ASSERT(nononarchcs == !smt_possible(), + "NoNonArchitecturalCoreSharing bit" + " doesn't reflect SMT setting"); + } + /* * If needed for debug: * fprintf(stdout, From 5845038c111db27902bc220a4f70070fe945871c Mon Sep 17 00:00:00 2001 From: Krish Sadhukhan Date: Fri, 9 Aug 2019 12:26:19 -0700 Subject: [PATCH 216/345] KVM: nVMX: Check Host Address Space Size on vmentry of nested guests According to section "Checks Related to Address-Space Size" in Intel SDM vol 3C, the following checks are performed on vmentry of nested guests: If the logical processor is outside IA-32e mode (if IA32_EFER.LMA = 0) at the time of VM entry, the following must hold: - The "IA-32e mode guest" VM-entry control is 0. - The "host address-space size" VM-exit control is 0. If the logical processor is in IA-32e mode (if IA32_EFER.LMA = 1) at the time of VM entry, the "host address-space size" VM-exit control must be 1. If the "host address-space size" VM-exit control is 0, the following must hold: - The "IA-32e mode guest" VM-entry control is 0. - Bit 17 of the CR4 field (corresponding to CR4.PCIDE) is 0. - Bits 63:32 in the RIP field are 0. If the "host address-space size" VM-exit control is 1, the following must hold: - Bit 5 of the CR4 field (corresponding to CR4.PAE) is 1. - The RIP field contains a canonical address. On processors that do not support Intel 64 architecture, checks are performed to ensure that the "IA-32e mode guest" VM-entry control and the "host address-space size" VM-exit control are both 0. Signed-off-by: Krish Sadhukhan Reviewed-by: Karl Heubaum Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a10cd351940..75ed0a63abbe 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2664,6 +2664,34 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu))) return -EINVAL; + + if (!(vmcs12->host_ia32_efer & EFER_LMA) && + ((vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE))) { + return -EINVAL; + } + + if ((vmcs12->host_ia32_efer & EFER_LMA) && + !(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) { + return -EINVAL; + } + + if (!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) && + ((vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + (vmcs12->host_cr4 & X86_CR4_PCIDE) || + (((vmcs12->host_rip) >> 32) & 0xffffffff))) { + return -EINVAL; + } + + if ((vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) && + ((!(vmcs12->host_cr4 & X86_CR4_PAE)) || + (is_noncanonical_address(vmcs12->host_rip, vcpu)))) { + return -EINVAL; + } +#else + if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE || + vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) + return -EINVAL; #endif /* From bc8a0aafcbb82e5c7dde486c03e6c51e3896d797 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:27 -0700 Subject: [PATCH 217/345] KVM: x86: Relocate MMIO exit stats counting Move the stat.mmio_exits update into x86_emulate_instruction(). This is both a bug fix, e.g. the current update flows will incorrectly increment mmio_exits on emulation failure, and a preparatory change to set the stage for eliminating EMULATE_DONE and company. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 -- arch/x86/kvm/vmx/vmx.c | 1 - arch/x86/kvm/x86.c | 2 ++ 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a10af9c87f8a..138dcdef8a06 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5456,8 +5456,6 @@ emulate: case EMULATE_DONE: return 1; case EMULATE_USER_EXIT: - ++vcpu->stat.mmio_exits; - /* fall through */ case EMULATE_FAIL: return 0; default: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8bab3e53d1e1..8f61250fc1c0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5201,7 +5201,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) err = kvm_emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { - ++vcpu->stat.mmio_exits; ret = 0; goto out; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e51924c4a1c..2b92c2ca1d79 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6703,6 +6703,8 @@ restart: } r = EMULATE_USER_EXIT; } else if (vcpu->mmio_needed) { + ++vcpu->stat.mmio_exits; + if (!vcpu->mmio_is_write) writeback = false; r = EMULATE_USER_EXIT; From 22da61c9123e574f607c92d8f69165b1ffff6d5f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:28 -0700 Subject: [PATCH 218/345] KVM: x86: Clean up handle_emulation_failure() When handling emulation failure, return the emulation result directly instead of capturing it in a local variable. Future patches will move additional cases into handle_emulation_failure(), clean up the cruft before so there isn't an ugly mix of setting a local variable and returning directly. Reviewed-by: Vitaly Kuznetsov Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2b92c2ca1d79..786d2f88fdf5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6304,24 +6304,22 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { - int r = EMULATE_DONE; - ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) return EMULATE_FAIL; + kvm_queue_exception(vcpu, UD_VECTOR); + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_USER_EXIT; + return EMULATE_USER_EXIT; } - kvm_queue_exception(vcpu, UD_VECTOR); - - return r; + return EMULATE_DONE; } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, From 120c2c4f99a80ef8682867b41e82d4e5d6b49635 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:29 -0700 Subject: [PATCH 219/345] KVM: x86: Refactor kvm_vcpu_do_singlestep() to remove out param Return the single-step emulation result directly instead of via an out param. Presumably at some point in the past kvm_vcpu_do_singlestep() could be called with *r==EMULATE_USER_EXIT, but that is no longer the case, i.e. all callers are happy to overwrite their own return variable. Reviewed-by: Vitaly Kuznetsov Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 786d2f88fdf5..e6063f20259e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6474,7 +6474,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) +static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; @@ -6483,10 +6483,10 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); + return EMULATE_USER_EXIT; } + kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); + return EMULATE_DONE; } int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -6507,7 +6507,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) * that sets the TF flag". */ if (unlikely(rflags & X86_EFLAGS_TF)) - kvm_vcpu_do_singlestep(vcpu, &r); + r = kvm_vcpu_do_singlestep(vcpu); return r == EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -6720,7 +6720,7 @@ restart: exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE && ctxt->tf) - kvm_vcpu_do_singlestep(vcpu, &r); + r = kvm_vcpu_do_singlestep(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } From a6c6ed1e81060e110258dbc7118fda07e9971e48 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:30 -0700 Subject: [PATCH 220/345] KVM: x86: Don't attempt VMWare emulation on #GP with non-zero error code The VMware backdoor hooks #GP faults on IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero error code for their #GP. Re-injecting #GP instead of attempting emulation on a non-zero error code will allow a future patch to move #GP injection (for emulation failure) into kvm_emulate_instruction() without having to plumb in the error code. Reviewed-and-tested-by: Vitaly Kuznetsov Reviewed-by: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 +++++++++- arch/x86/kvm/vmx/vmx.c | 12 +++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7e352e81df31..00ec27bc2946 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2772,12 +2772,20 @@ static int gp_interception(struct vcpu_svm *svm) WARN_ON_ONCE(!enable_vmware_backdoor); + /* + * VMware backdoor emulation on #GP interception only handles IN{S}, + * OUT{S}, and RDPMC, none of which generate a non-zero error code. + */ + if (error_code) { + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8f61250fc1c0..d1bac3cb2440 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4539,12 +4539,22 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); + + /* + * VMware backdoor emulation on #GP interception only handles + * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero + * error code on #GP. + */ + if (error_code) { + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return 1; } From 42cbf06872cc44fb8d2d6665fa6494b5925eaf1c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:31 -0700 Subject: [PATCH 221/345] KVM: x86: Move #GP injection for VMware into x86_emulate_instruction() Immediately inject a #GP when VMware emulation fails and return EMULATE_DONE instead of propagating EMULATE_FAIL up the stack. This helps pave the way for removing EMULATE_FAIL altogether. Rename EMULTYPE_VMWARE to EMULTYPE_VMWARE_GP to document that the x86 emulator is called to handle VMware #GP interception, e.g. why a #GP is injected on emulation failure for EMULTYPE_VMWARE_GP. Drop EMULTYPE_NO_UD_ON_FAIL as a standalone type. The "no #UD on fail" is used only in the VMWare case and is obsoleted by having the emulator itself reinject #GP. Signed-off-by: Sean Christopherson Reviewed-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/svm.c | 10 ++-------- arch/x86/kvm/vmx/vmx.c | 10 ++-------- arch/x86/kvm/x86.c | 14 +++++++++----- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 13b35f94dec4..80071d02b87d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1325,8 +1325,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_ALLOW_RETRY (1 << 3) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) -#define EMULTYPE_VMWARE (1 << 5) +#define EMULTYPE_VMWARE_GP (1 << 5) int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 00ec27bc2946..c7aa55e192c0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2768,7 +2768,6 @@ static int gp_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 error_code = svm->vmcb->control.exit_info_1; - int er; WARN_ON_ONCE(!enable_vmware_backdoor); @@ -2780,13 +2779,8 @@ static int gp_interception(struct vcpu_svm *svm) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != + EMULATE_USER_EXIT; } static bool is_erratum_383(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d1bac3cb2440..d4371599abe3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4522,7 +4522,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) u32 intr_info, ex_no, error_code; unsigned long cr2, rip, dr6; u32 vect_info; - enum emulation_result er; vect_info = vmx->idt_vectoring_info; intr_info = vmx->exit_intr_info; @@ -4549,13 +4548,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != + EMULATE_USER_EXIT; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e6063f20259e..09753b699d81 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6307,8 +6307,10 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) - return EMULATE_FAIL; + if (emulation_type & EMULTYPE_VMWARE_GP) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return EMULATE_DONE; + } kvm_queue_exception(vcpu, UD_VECTOR); @@ -6648,9 +6650,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, } } - if ((emulation_type & EMULTYPE_VMWARE) && - !is_vmware_backdoor_opcode(ctxt)) - return EMULATE_FAIL; + if ((emulation_type & EMULTYPE_VMWARE_GP) && + !is_vmware_backdoor_opcode(ctxt)) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return EMULATE_DONE; + } if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); From b4000606205959e6cfe1fd3a71c490267ff23506 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:32 -0700 Subject: [PATCH 222/345] KVM: x86: Add explicit flag for forced emulation on #UD Add an explicit emulation type for forced #UD emulation and use it to detect that KVM should unconditionally inject a #UD instead of falling into its standard emulation failure handling. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 80071d02b87d..f3c623dbd5ab 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1325,6 +1325,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_TRAP_UD_FORCED (1 << 4) #define EMULTYPE_VMWARE_GP (1 << 5) int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 09753b699d81..eb2ed5c9a584 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5449,7 +5449,7 @@ int handle_ud(struct kvm_vcpu *vcpu) sig, sizeof(sig), &e) == 0 && memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); - emul_type = 0; + emul_type = EMULTYPE_TRAP_UD_FORCED; } er = kvm_emulate_instruction(vcpu, emul_type); @@ -6629,7 +6629,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, trace_kvm_emulate_insn_start(vcpu); ++vcpu->stat.insn_emulation; if (r != EMULATION_OK) { - if (emulation_type & EMULTYPE_TRAP_UD) + if ((emulation_type & EMULTYPE_TRAP_UD) || + (emulation_type & EMULTYPE_TRAP_UD_FORCED)) return EMULATE_FAIL; if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) From c83fad65e2cae1aa570a519b33e8ebf00f6e7227 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:33 -0700 Subject: [PATCH 223/345] KVM: x86: Move #UD injection for failed emulation into emulation code Immediately inject a #UD and return EMULATE done if emulation fails when handling an intercepted #UD. This helps pave the way for removing EMULATE_FAIL altogether. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eb2ed5c9a584..64d584d48c60 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5440,7 +5440,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); int handle_ud(struct kvm_vcpu *vcpu) { int emul_type = EMULTYPE_TRAP_UD; - enum emulation_result er; char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; @@ -5452,12 +5451,7 @@ int handle_ud(struct kvm_vcpu *vcpu) emul_type = EMULTYPE_TRAP_UD_FORCED; } - er = kvm_emulate_instruction(vcpu, emul_type); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; + return kvm_emulate_instruction(vcpu, emul_type) != EMULATE_USER_EXIT; } EXPORT_SYMBOL_GPL(handle_ud); @@ -6630,8 +6624,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, ++vcpu->stat.insn_emulation; if (r != EMULATION_OK) { if ((emulation_type & EMULTYPE_TRAP_UD) || - (emulation_type & EMULTYPE_TRAP_UD_FORCED)) - return EMULATE_FAIL; + (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return EMULATE_DONE; + } if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; From 738fece46dc5e1bb1309a827a2b69056143b3d13 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:34 -0700 Subject: [PATCH 224/345] KVM: x86: Exit to userspace on emulation skip failure Kill a few birds with one stone by forcing an exit to userspace on skip emulation failure. This removes a reference to EMULATE_FAIL, fixes a bug in handle_ept_misconfig() where it would exit to userspace without setting run->exit_reason, and fixes a theoretical bug in SVM's task_switch_interception() where it would overwrite run->exit_reason on a return of EMULATE_USER_EXIT. Note, this technically doesn't fully fix task_switch_interception() as it now incorrectly handles EMULATE_FAIL, but in practice there is no bug as EMULATE_FAIL will never be returned for EMULTYPE_SKIP. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/x86.c | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c7aa55e192c0..fc40052fa334 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3885,8 +3885,8 @@ static int task_switch_interception(struct vcpu_svm *svm) int_type == SVM_EXITINTINFO_TYPE_SOFT || (int_type == SVM_EXITINTINFO_TYPE_EXEPT && (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) { - if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE) - goto fail; + if (skip_emulated_instruction(&svm->vcpu) == EMULATE_USER_EXIT) + return 0; } if (int_type != SVM_EXITINTINFO_TYPE_SOFT) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 64d584d48c60..802dfb926ca7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6306,6 +6306,13 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return EMULATE_DONE; } + if (emulation_type & EMULTYPE_SKIP) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return EMULATE_USER_EXIT; + } + kvm_queue_exception(vcpu, UD_VECTOR); if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { @@ -6641,8 +6648,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, inject_emulated_exception(vcpu); return EMULATE_DONE; } - if (emulation_type & EMULTYPE_SKIP) - return EMULATE_FAIL; return handle_emulation_failure(vcpu, emulation_type); } } From 1051778f6e1e25c1203869586f4431785a35c13c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:35 -0700 Subject: [PATCH 225/345] KVM: x86: Handle emulation failure directly in kvm_task_switch() Consolidate the reporting of emulation failure into kvm_task_switch() so that it can return EMULATE_USER_EXIT. This helps pave the way for removing EMULATE_FAIL altogether. This also fixes a theoretical bug where task switch interception could suppress an EMULATE_USER_EXIT return. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 11 ++--------- arch/x86/kvm/vmx/vmx.c | 14 +++----------- arch/x86/kvm/x86.c | 9 ++++++--- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fc40052fa334..69e53aa3969d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3892,17 +3892,10 @@ static int task_switch_interception(struct vcpu_svm *svm) if (int_type != SVM_EXITINTINFO_TYPE_SOFT) int_vec = -1; - if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, - has_error_code, error_code) == EMULATE_FAIL) - goto fail; - return 1; -fail: - svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - svm->vcpu.run->internal.ndata = 0; - return 0; + return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, + has_error_code, error_code) != EMULATE_USER_EXIT; } static int cpuid_interception(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d4371599abe3..498270cfc71d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5074,21 +5074,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) type != INTR_TYPE_NMI_INTR)) skip_emulated_instruction(vcpu); - if (kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, - has_error_code, error_code) == EMULATE_FAIL) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } - /* * TODO: What about debug traps on tss switch? * Are we supposed to inject them and update dr6? */ - - return 1; + return kvm_task_switch(vcpu, tss_selector, + type == INTR_TYPE_SOFT_INTR ? idt_index : -1, + reason, has_error_code, error_code) != EMULATE_USER_EXIT; } static int handle_ept_violation(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 802dfb926ca7..bdee7e39accb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8693,9 +8693,12 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, has_error_code, error_code); - - if (ret) - return EMULATE_FAIL; + if (ret) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return EMULATE_USER_EXIT; + } kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); From 9497e1f2ec93f0c8a7832c3c1eb3af8159800cd8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:36 -0700 Subject: [PATCH 226/345] KVM: x86: Move triple fault request into RM int injection Request triple fault in kvm_inject_realmode_interrupt() instead of returning EMULATE_FAIL and deferring to the caller. All existing callers request triple fault and it's highly unlikely Real Mode is going to acquire new features. While this consolidates a small amount of code, the real goal is to remove the last reference to EMULATE_FAIL. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 9 +++------ arch/x86/kvm/x86.c | 17 ++++++++--------- arch/x86/kvm/x86.h | 2 +- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 498270cfc71d..67733eef184c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1556,8 +1556,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) int inc_eip = 0; if (kvm_exception_is_soft(nr)) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); return; } @@ -4306,8 +4305,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) int inc_eip = 0; if (vcpu->arch.interrupt.soft) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); return; } intr = irq | INTR_INFO_VALID_MASK; @@ -4343,8 +4341,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bdee7e39accb..92b6690d0512 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6273,7 +6273,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int ret; @@ -6285,14 +6285,13 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) ctxt->_eip = ctxt->eip + inc_eip; ret = emulate_int_real(ctxt, irq); - if (ret != X86EMUL_CONTINUE) - return EMULATE_FAIL; - - ctxt->eip = ctxt->_eip; - kvm_rip_write(vcpu, ctxt->eip); - kvm_set_rflags(vcpu, ctxt->eflags); - - return EMULATE_DONE; + if (ret != X86EMUL_CONTINUE) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } else { + ctxt->eip = ctxt->_eip; + kvm_rip_write(vcpu, ctxt->eip); + kvm_set_rflags(vcpu, ctxt->eflags); + } } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b5274e2a53cf..dbf7442a822b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) } void kvm_set_pending_timer(struct kvm_vcpu *vcpu); -int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); +void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 get_kvmclock_ns(struct kvm *kvm); From 8fff2710eaf50aff1e6a7aa0607fe5288b619404 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:37 -0700 Subject: [PATCH 227/345] KVM: VMX: Remove EMULATE_FAIL handling in handle_invalid_guest_state() Now that EMULATE_FAIL is completely unused, remove the last remaning usage where KVM does something functional in response to EMULATE_FAIL. Leave the check in place as a WARN_ON_ONCE to provide a better paper trail when EMULATE_{DONE,FAIL,USER_EXIT} are completely removed. Opportunistically remove the gotos in handle_invalid_guest_state(). With the EMULATE_FAIL handling gone there is no need to have a common handler for emulation failure and the gotos only complicate things, e.g. the signal_pending() check always returns '1', but this is far from obvious when glancing through the code. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 67733eef184c..9c92b2993f5a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5169,8 +5169,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - enum emulation_result err = EMULATE_DONE; - int ret = 1; + enum emulation_result err; bool intr_window_requested; unsigned count = 130; @@ -5193,38 +5192,38 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) err = kvm_emulate_instruction(vcpu, 0); - if (err == EMULATE_USER_EXIT) { - ret = 0; - goto out; - } + if (err == EMULATE_USER_EXIT) + return 0; - if (err != EMULATE_DONE) - goto emulation_error; + if (WARN_ON_ONCE(err == EMULATE_FAIL)) + return 1; if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) - goto emulation_error; + vcpu->arch.exception.pending) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_vcpu_halt(vcpu); - goto out; + return kvm_vcpu_halt(vcpu); } + /* + * Note, return 1 and not 0, vcpu_run() is responsible for + * morphing the pending signal into the proper return code. + */ if (signal_pending(current)) - goto out; + return 1; + if (need_resched()) schedule(); } -out: - return ret; - -emulation_error: - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; + return 1; } static void grow_ple_window(struct kvm_vcpu *vcpu) From 60fc3d02d5b8829b91b7b443ef6c7e8f0bbae868 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:38 -0700 Subject: [PATCH 228/345] KVM: x86: Remove emulation_result enums, EMULATE_{DONE,FAIL,USER_EXIT} Deferring emulation failure handling (in some cases) to the caller of x86_emulate_instruction() has proven fragile, e.g. multiple instances of KVM not setting run->exit_reason on EMULATE_FAIL, largely due to it being difficult to discern what emulation types can return what result, and which combination of types and results are handled where. Now that x86_emulate_instruction() always handles emulation failure, i.e. EMULATION_FAIL is only referenced in callers, remove the emulation_result enums entirely. Per KVM's existing exit handling conventions, return '0' and '1' for "exit to userspace" and "resume guest" respectively. Doing so cleans up many callers, e.g. they can return kvm_emulate_instruction() directly instead of having to interpret its result. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ---- arch/x86/kvm/mmu.c | 14 ++------ arch/x86/kvm/svm.c | 22 ++++++------- arch/x86/kvm/vmx/vmx.c | 28 ++++++---------- arch/x86/kvm/x86.c | 57 ++++++++++++++++----------------- 5 files changed, 49 insertions(+), 78 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f3c623dbd5ab..26f85a24b5e7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1315,12 +1315,6 @@ extern u64 kvm_default_tsc_scaling_ratio; extern u64 kvm_mce_cap_supported; -enum emulation_result { - EMULATE_DONE, /* no further processing */ - EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ - EMULATE_FAIL, /* can't emulate this instruction */ -}; - #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 138dcdef8a06..84e1a9c661dd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5383,7 +5383,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; - enum emulation_result er; bool direct = vcpu->arch.mmu->direct_map; /* With shadow page tables, fault_address contains a GVA or nGPA. */ @@ -5450,17 +5449,8 @@ emulate: return 1; } - er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); - - switch (er) { - case EMULATE_DONE: - return 1; - case EMULATE_USER_EXIT: - case EMULATE_FAIL: - return 0; - default: - BUG(); - } + return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 69e53aa3969d..cb7ad362e9b9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -787,7 +787,7 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, svm->next_rip); svm_set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } static void svm_queue_exception(struct kvm_vcpu *vcpu) @@ -2779,8 +2779,7 @@ static int gp_interception(struct vcpu_svm *svm) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != - EMULATE_USER_EXIT; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } static bool is_erratum_383(void) @@ -2878,7 +2877,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -3885,17 +3884,15 @@ static int task_switch_interception(struct vcpu_svm *svm) int_type == SVM_EXITINTINFO_TYPE_SOFT || (int_type == SVM_EXITINTINFO_TYPE_EXEPT && (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) { - if (skip_emulated_instruction(&svm->vcpu) == EMULATE_USER_EXIT) + if (!skip_emulated_instruction(&svm->vcpu)) return 0; } if (int_type != SVM_EXITINTINFO_TYPE_SOFT) int_vec = -1; - - return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, - has_error_code, error_code) != EMULATE_USER_EXIT; + has_error_code, error_code); } static int cpuid_interception(struct vcpu_svm *svm) @@ -3916,7 +3913,7 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) - return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0); kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3924,13 +3921,12 @@ static int invlpg_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm) { - return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0); } static int rsm_interception(struct vcpu_svm *svm) { - return kvm_emulate_instruction_from_buffer(&svm->vcpu, - rsm_ins_bytes, 2) == EMULATE_DONE; + return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2); } static int rdpmc_interception(struct vcpu_svm *svm) @@ -4719,7 +4715,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ret = avic_unaccel_trap_write(svm); } else { /* Handling Fault */ - ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + ret = kvm_emulate_instruction(&svm->vcpu, 0); } return ret; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9c92b2993f5a..e71dc36850cb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1516,7 +1516,7 @@ static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -4468,7 +4468,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0)) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -4545,8 +4545,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); return 1; } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP) != - EMULATE_USER_EXIT; + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } /* @@ -4643,7 +4642,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -4717,7 +4716,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_cr(struct kvm_vcpu *vcpu) @@ -4933,7 +4932,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -5000,7 +4999,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -5077,7 +5076,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) */ return kvm_task_switch(vcpu, tss_selector, type == INTR_TYPE_SOFT_INTR ? idt_index : -1, - reason, has_error_code, error_code) != EMULATE_USER_EXIT; + reason, has_error_code, error_code); } static int handle_ept_violation(struct kvm_vcpu *vcpu) @@ -5149,8 +5148,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == - EMULATE_DONE; + return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -5169,7 +5167,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - enum emulation_result err; bool intr_window_requested; unsigned count = 130; @@ -5190,14 +5187,9 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = kvm_emulate_instruction(vcpu, 0); - - if (err == EMULATE_USER_EXIT) + if (!kvm_emulate_instruction(vcpu, 0)) return 0; - if (WARN_ON_ONCE(err == EMULATE_FAIL)) - return 1; - if (vmx->emulation_required && !vmx->rmode.vm86_active && vcpu->arch.exception.pending) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92b6690d0512..a83b269126a0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5451,7 +5451,7 @@ int handle_ud(struct kvm_vcpu *vcpu) emul_type = EMULTYPE_TRAP_UD_FORCED; } - return kvm_emulate_instruction(vcpu, emul_type) != EMULATE_USER_EXIT; + return kvm_emulate_instruction(vcpu, emul_type); } EXPORT_SYMBOL_GPL(handle_ud); @@ -6302,14 +6302,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) if (emulation_type & EMULTYPE_VMWARE_GP) { kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return EMULATE_DONE; + return 1; } if (emulation_type & EMULTYPE_SKIP) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - return EMULATE_USER_EXIT; + return 0; } kvm_queue_exception(vcpu, UD_VECTOR); @@ -6318,10 +6318,10 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - return EMULATE_USER_EXIT; + return 0; } - return EMULATE_DONE; + return 1; } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, @@ -6485,10 +6485,10 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - return EMULATE_USER_EXIT; + return 0; } kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); - return EMULATE_DONE; + return 1; } int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -6497,7 +6497,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r; r = kvm_x86_ops->skip_emulated_instruction(vcpu); - if (unlikely(r != EMULATE_DONE)) + if (unlikely(!r)) return 0; /* @@ -6510,7 +6510,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) */ if (unlikely(rflags & X86_EFLAGS_TF)) r = kvm_vcpu_do_singlestep(vcpu); - return r == EMULATE_DONE; + return r; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -6529,7 +6529,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) kvm_run->debug.arch.pc = eip; kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; + *r = 0; return true; } } @@ -6545,7 +6545,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); - *r = EMULATE_DONE; + *r = 1; return true; } } @@ -6632,11 +6632,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if ((emulation_type & EMULTYPE_TRAP_UD) || (emulation_type & EMULTYPE_TRAP_UD_FORCED)) { kvm_queue_exception(vcpu, UD_VECTOR); - return EMULATE_DONE; + return 1; } if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) - return EMULATE_DONE; + return 1; if (ctxt->have_exception) { /* * #UD should result in just EMULATION_FAILED, and trap-like @@ -6645,7 +6645,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR || exception_type(ctxt->exception.vector) == EXCPT_TRAP); inject_emulated_exception(vcpu); - return EMULATE_DONE; + return 1; } return handle_emulation_failure(vcpu, emulation_type); } @@ -6654,7 +6654,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if ((emulation_type & EMULTYPE_VMWARE_GP) && !is_vmware_backdoor_opcode(ctxt)) { kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return EMULATE_DONE; + return 1; } if (emulation_type & EMULTYPE_SKIP) { @@ -6662,11 +6662,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (ctxt->eflags & X86_EFLAGS_RF) kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); kvm_x86_ops->set_interrupt_shadow(vcpu, 0); - return EMULATE_DONE; + return 1; } if (retry_instruction(ctxt, cr2, emulation_type)) - return EMULATE_DONE; + return 1; /* this is needed for vmware backdoor interface to work since it changes registers values during IO operation */ @@ -6682,18 +6682,18 @@ restart: r = x86_emulate_insn(ctxt); if (r == EMULATION_INTERCEPTED) - return EMULATE_DONE; + return 1; if (r == EMULATION_FAILED) { if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) - return EMULATE_DONE; + return 1; return handle_emulation_failure(vcpu, emulation_type); } if (ctxt->have_exception) { - r = EMULATE_DONE; + r = 1; if (inject_emulated_exception(vcpu)) return r; } else if (vcpu->arch.pio.count) { @@ -6704,18 +6704,18 @@ restart: writeback = false; vcpu->arch.complete_userspace_io = complete_emulated_pio; } - r = EMULATE_USER_EXIT; + r = 0; } else if (vcpu->mmio_needed) { ++vcpu->stat.mmio_exits; if (!vcpu->mmio_is_write) writeback = false; - r = EMULATE_USER_EXIT; + r = 0; vcpu->arch.complete_userspace_io = complete_emulated_mmio; } else if (r == EMULATION_RESTART) goto restart; else - r = EMULATE_DONE; + r = 1; if (writeback) { unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); @@ -6724,7 +6724,7 @@ restart: if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) { kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && ctxt->tf) + if (r && ctxt->tf) r = kvm_vcpu_do_singlestep(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } @@ -8319,12 +8319,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu) static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { int r; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) - return 0; - return 1; + return r; } static int complete_emulated_pio(struct kvm_vcpu *vcpu) @@ -8696,13 +8695,13 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - return EMULATE_USER_EXIT; + return 0; } kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); - return EMULATE_DONE; + return 1; } EXPORT_SYMBOL_GPL(kvm_task_switch); From 1957aa63be5395307f8b80458f011871b34617ad Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:39 -0700 Subject: [PATCH 229/345] KVM: VMX: Handle single-step #DB for EMULTYPE_SKIP on EPT misconfig VMX's EPT misconfig flow to handle fast-MMIO path falls back to decoding the instruction to determine the instruction length when running as a guest (Hyper-V doesn't fill VMCS.VM_EXIT_INSTRUCTION_LEN because it's technically not defined for EPT misconfigs). Rather than implement the slow skip in VMX's generic skip_emulated_instruction(), handle_ept_misconfig() directly calls kvm_emulate_instruction() with EMULTYPE_SKIP, which intentionally doesn't do single-step detection, and so handle_ept_misconfig() misses a single-step #DB. Rework the EPT misconfig fallback case to route it through kvm_skip_emulated_instruction() so that single-step #DBs and interrupt shadow updates are handled automatically. I.e. make VMX's slow skip logic match SVM's and have the SVM flow not intentionally avoid the shadow update. Alternatively, the handle_ept_misconfig() could manually handle single- step detection, but that results in EMULTYPE_SKIP having split logic for the interrupt shadow vs. single-step #DBs, and split emulator logic is largely what led to this mess in the first place. Modifying SVM to mirror VMX flow isn't really an option as SVM's case isn't limited to a specific exit reason, i.e. handling the slow skip in skip_emulated_instruction() is mandatory for all intents and purposes. Drop VMX's skip_emulated_instruction() wrapper since it can now fail, and instead WARN if it fails unexpectedly, e.g. if exit_reason somehow becomes corrupted. Cc: Vitaly Kuznetsov Fixes: d391f12070672 ("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 17 +++++++------- arch/x86/kvm/vmx/vmx.c | 52 ++++++++++++++++++------------------------ arch/x86/kvm/x86.c | 6 ++++- 3 files changed, 36 insertions(+), 39 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cb7ad362e9b9..ce75296b1b10 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -777,14 +777,15 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) svm->next_rip = svm->vmcb->control.next_rip; } - if (!svm->next_rip) - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); - - if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) - printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", - __func__, kvm_rip_read(vcpu), svm->next_rip); - - kvm_rip_write(vcpu, svm->next_rip); + if (!svm->next_rip) { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } else { + if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) + pr_err("%s: ip 0x%lx next 0x%llx\n", + __func__, kvm_rip_read(vcpu), svm->next_rip); + kvm_rip_write(vcpu, svm->next_rip); + } svm_set_interrupt_shadow(vcpu, 0); return 1; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e71dc36850cb..ef98311ad153 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1501,17 +1501,27 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -/* - * Returns an int to be compatible with SVM implementation (which can fail). - * Do not use directly, use skip_emulated_instruction() instead. - */ -static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) +static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { unsigned long rip; - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); + /* + * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on + * undefined behavior: Intel's SDM doesn't mandate the VMCS field be + * set when EPT misconfig occurs. In practice, real hardware updates + * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors + * (namely Hyper-V) don't set it due to it being undefined behavior, + * i.e. we end up advancing IP with some random value. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || + to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { + rip = kvm_rip_read(vcpu); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_rip_write(vcpu, rip); + } else { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); @@ -1519,11 +1529,6 @@ static int __skip_emulated_instruction(struct kvm_vcpu *vcpu) return 1; } -static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu) -{ - (void)__skip_emulated_instruction(vcpu); -} - static void vmx_clear_hlt(struct kvm_vcpu *vcpu) { /* @@ -4587,7 +4592,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) - skip_emulated_instruction(vcpu); + WARN_ON(!skip_emulated_instruction(vcpu)); kvm_queue_exception(vcpu, DB_VECTOR); return 1; @@ -5068,7 +5073,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && type != INTR_TYPE_EXT_INTR && type != INTR_TYPE_NMI_INTR)) - skip_emulated_instruction(vcpu); + WARN_ON(!skip_emulated_instruction(vcpu)); /* * TODO: What about debug traps on tss switch? @@ -5135,20 +5140,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); - /* - * Doing kvm_skip_emulated_instruction() depends on undefined - * behavior: Intel's manual doesn't mandate - * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG - * occurs and while on real hardware it was observed to be set, - * other hypervisors (namely Hyper-V) don't set it, we end up - * advancing IP with some random value. Disable fast mmio when - * running nested and keep it for real hardware in hope that - * VM_EXIT_INSTRUCTION_LEN will always be set correctly. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return kvm_skip_emulated_instruction(vcpu); - else - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP); + return kvm_skip_emulated_instruction(vcpu); } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -7722,7 +7714,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = __skip_emulated_instruction, + .skip_emulated_instruction = skip_emulated_instruction, .set_interrupt_shadow = vmx_set_interrupt_shadow, .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a83b269126a0..c38d247dbffb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6657,11 +6657,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; } + /* + * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks + * for kvm_skip_emulated_instruction(). The caller is responsible for + * updating interruptibility state and injecting single-step #DBs. + */ if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); if (ctxt->eflags & X86_EFLAGS_RF) kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); - kvm_x86_ops->set_interrupt_shadow(vcpu, 0); return 1; } From 41577ab8bd72f8813cfd74cd01da3ae6a643878e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 27 Aug 2019 14:40:40 -0700 Subject: [PATCH 230/345] KVM: x86: Add comments to document various emulation types Document the intended usage of each emulation type as each exists to handle an edge case of one kind or another and can be easily misinterpreted at first glance. Cc: Liran Alon Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26f85a24b5e7..78f0a919173e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1315,6 +1315,36 @@ extern u64 kvm_default_tsc_scaling_ratio; extern u64 kvm_mce_cap_supported; +/* + * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing + * userspace I/O) to indicate that the emulation context + * should be resued as is, i.e. skip initialization of + * emulation context, instruction fetch and decode. + * + * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. + * Indicates that only select instructions (tagged with + * EmulateOnUD) should be emulated (to minimize the emulator + * attack surface). See also EMULTYPE_TRAP_UD_FORCED. + * + * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to + * decode the instruction length. For use *only* by + * kvm_x86_ops->skip_emulated_instruction() implementations. + * + * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to + * retry native execution under certain conditions. + * + * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was + * triggered by KVM's magic "force emulation" prefix, + * which is opt in via module param (off by default). + * Bypasses EmulateOnUD restriction despite emulating + * due to an intercepted #UD (see EMULTYPE_TRAP_UD). + * Used to test the full emulator from userspace. + * + * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware + * backdoor emulation, which is opt in via module param. + * VMware backoor emulation handles select instructions + * and reinjects the #GP for all other cases. + */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) From e69e72faa3a0709dd23df6a4ca060a15e99168a1 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 16 Jul 2019 14:55:49 +0800 Subject: [PATCH 231/345] KVM: x86: Add support for user wait instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions. This patch adds support for user wait instructions in KVM. Availability of the user wait instructions is indicated by the presence of the CPUID feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may be executed at any privilege level, and use 32bit IA32_UMWAIT_CONTROL MSR to set the maximum time. The behavior of user wait instructions in VMX non-root operation is determined first by the setting of the "enable user wait and pause" secondary processor-based VM-execution control bit 26. If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause an invalid-opcode exception (#UD). If the VM-execution control is 1, treatment is based on the setting of the “RDTSC exiting†VM-execution control. Because KVM never enables RDTSC exiting, if the instruction causes a delay, the amount of time delayed is called here the physical delay. The physical delay is first computed by determining the virtual delay. If IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in EDX:EAX minus the value that RDTSC would return; if IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH). Because umwait and tpause can put a (psysical) CPU into a power saving state, by default we dont't expose it to kvm and enable it only when guest CPUID has it. Detailed information about user wait instructions can be found in the latest Intel 64 and IA-32 Architectures Software Developer's Manual. Co-developed-by: Jingqi Liu Signed-off-by: Jingqi Liu Signed-off-by: Tao Xu Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx/capabilities.h | 6 ++++++ arch/x86/kvm/vmx/nested.c | 1 + arch/x86/kvm/vmx/vmx.c | 18 ++++++++++++++++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b15e6465870f..fdad81626829 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,6 +69,7 @@ #define SECONDARY_EXEC_PT_USE_GPA 0x01000000 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index dd5985eb61b4..4e5a835989d1 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -360,7 +360,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d6664ee3d127..7aa69716d516 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } +static inline bool vmx_waitpkg_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + static inline bool cpu_has_vmx_tsc_scaling(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 75ed0a63abbe..c5b7ba795f95 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2089,6 +2089,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ef98311ad153..bb55f54e29b1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2323,6 +2323,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | @@ -4059,6 +4060,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) } } + if (vmx_waitpkg_supported()) { + bool waitpkg_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); + + if (!waitpkg_enabled) + exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + + if (nested) { + if (waitpkg_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + } + } + vmx->secondary_exec_control = exec_control; } From 6e3ba4abcea5681eebbfc10f1b56c9fbe80b6685 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 16 Jul 2019 14:55:50 +0800 Subject: [PATCH 232/345] KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL UMWAIT and TPAUSE instructions use 32bit IA32_UMWAIT_CONTROL at MSR index E1H to determines the maximum time in TSC-quanta that the processor can reside in either C0.1 or C0.2. This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate IA32_UMWAIT_CONTROL between host and guest. The variable mwait_control_cached in arch/x86/kernel/cpu/umwait.c caches the MSR value, so this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL. Co-developed-by: Jingqi Liu Signed-off-by: Jingqi Liu Signed-off-by: Tao Xu Signed-off-by: Paolo Bonzini --- arch/x86/kernel/cpu/umwait.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.h | 9 +++++++++ arch/x86/kvm/x86.c | 2 ++ 4 files changed, 53 insertions(+) diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 32b4dc9030aa..c222f283b456 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -17,6 +17,12 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); +u32 get_umwait_control_msr(void) +{ + return umwait_control_cached; +} +EXPORT_SYMBOL_GPL(get_umwait_control_msr); + /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index bb55f54e29b1..52dd2241b749 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1733,6 +1733,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + msr_info->data = vmx->msr_ia32_umwait_control; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1906,6 +1912,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_ia32_umwait_control = data; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -4211,6 +4227,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_ia32_umwait_control = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); vmx->hv_deadline_tsc = -1; @@ -6384,6 +6402,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) +{ + u32 host_umwait_control; + + if (!vmx_has_waitpkg(vmx)) + return; + + host_umwait_control = get_umwait_control_msr(); + + if (vmx->msr_ia32_umwait_control != host_umwait_control) + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, + vmx->msr_ia32_umwait_control, + host_umwait_control, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); +} + static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6478,6 +6513,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); + atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 64d5a4890aa9..bee16687dc0b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,6 +14,8 @@ extern const u32 vmx_msr_index[]; extern u64 host_efer; +extern u32 get_umwait_control_msr(void); + #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 @@ -211,6 +213,7 @@ struct vcpu_vmx { #endif u64 spec_ctrl; + u32 msr_ia32_umwait_control; u32 secondary_exec_control; @@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) +{ + return vmx->secondary_exec_control & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + void dump_vmcs(void); #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c38d247dbffb..977b36348bed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1145,6 +1145,8 @@ static u32 msrs_to_save[] = { MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_IA32_UMWAIT_CONTROL, + MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, From dd9212a885ca4a95443905c7c3781122a4d664e8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 20 Sep 2019 15:13:24 -0500 Subject: [PATCH 233/345] drm/amdgpu/display: fix 64 bit divide Use proper helper for 32 bit. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 36277bca0326..b1e657e137a9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -197,7 +197,9 @@ void dce11_pplib_apply_display_requirements( */ if (ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, - (uint32_t) (dc->bw_vbios->high_yclk.value / memory_type_multiplier / 10000)); + (uint32_t) div64_s64( + div64_s64(dc->bw_vbios->high_yclk.value, + memory_type_multiplier), 10000)); } else { pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz / memory_type_multiplier; From bf653b78f9608d66db174eabcbda7454c8fde6d5 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 16 Jul 2019 14:55:51 +0800 Subject: [PATCH 234/345] KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit As the latest Intel 64 and IA-32 Architectures Software Developer's Manual, UMWAIT and TPAUSE instructions cause a VM exit if the RDTSC exiting and enable user wait and pause VM-execution controls are both 1. Because KVM never enable RDTSC exiting, the vm-exit for UMWAIT and TPAUSE should never happen. Considering EXIT_REASON_XSAVES and EXIT_REASON_XRSTORS is also unexpected VM-exit for KVM. Introduce a common exit helper handle_unexpected_vmexit() to handle these unexpected VM-exit. Suggested-by: Sean Christopherson Co-developed-by: Jingqi Liu Signed-off-by: Jingqi Liu Signed-off-by: Tao Xu Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/vmx.h | 6 +++++- arch/x86/kvm/vmx/nested.c | 4 ++++ arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++---------------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f01950aa7fae..3eb8411ab60e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -86,6 +86,8 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_UMWAIT 67 +#define EXIT_REASON_TPAUSE 68 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -144,7 +146,9 @@ { EXIT_REASON_RDSEED, "RDSEED" }, \ { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_UMWAIT, "UMWAIT" }, \ + { EXIT_REASON_TPAUSE, "TPAUSE" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index c5b7ba795f95..77548ef1ad3b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5470,6 +5470,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_ENCLS: /* SGX is never exposed to L1 */ return false; + case EXIT_REASON_UMWAIT: + case EXIT_REASON_TPAUSE: + return nested_cpu_has2(vmcs12, + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 52dd2241b749..a7c9922e3905 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5007,20 +5007,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) return 1; } -static int handle_xsaves(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - -static int handle_xrstors(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { @@ -5514,6 +5500,14 @@ static int handle_encls(struct kvm_vcpu *vcpu) return 1; } +static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu) +{ + kvm_skip_emulated_instruction(vcpu); + WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x", + vmcs_read32(VM_EXIT_REASON)); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5565,13 +5559,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVVPID] = handle_vmx_instruction, [EXIT_REASON_RDRAND] = handle_invalid_op, [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_XSAVES] = handle_xsaves, - [EXIT_REASON_XRSTORS] = handle_xrstors, + [EXIT_REASON_XSAVES] = handle_unexpected_vmexit, + [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit, [EXIT_REASON_PML_FULL] = handle_pml_full, [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, + [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit, + [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit, }; static const int kvm_vmx_max_exit_handlers = From d0f5a86a34072c46b7fe1b8f8768e666d9d6f87f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 17 Sep 2019 16:16:26 +0800 Subject: [PATCH 235/345] KVM: LAPIC: Tune lapic_timer_advance_ns smoothly Filter out drastic fluctuation and random fluctuation, remove timer_advance_adjust_done altogether, the adjustment would be continuous. Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 28 ++++++++++++++-------------- arch/x86/kvm/lapic.h | 1 - 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8675458c2205..3a3a6854dcca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -65,7 +65,9 @@ #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul -#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 +static bool lapic_timer_advance_dynamic __read_mostly; +#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 +#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000 #define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -1485,26 +1487,25 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; u64 ns; + /* Do not adjust for tiny fluctuations or large random spikes. */ + if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || + abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) + return; + /* too early */ if (advance_expire_delta < 0) { ns = -advance_expire_delta * 1000000ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); - timer_advance_ns -= min((u32)ns, - timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } else { /* too late */ ns = advance_expire_delta * 1000000ULL; do_div(ns, vcpu->arch.virtual_tsc_khz); - timer_advance_ns += min((u32)ns, - timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; } - if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) - apic->lapic_timer.timer_advance_adjust_done = true; - if (unlikely(timer_advance_ns > 5000)) { + if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX)) timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; - apic->lapic_timer.timer_advance_adjust_done = false; - } apic->lapic_timer.timer_advance_ns = timer_advance_ns; } @@ -1524,7 +1525,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); - if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) + if (lapic_timer_advance_dynamic) adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } @@ -2302,13 +2303,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; - apic->lapic_timer.timer_advance_adjust_done = false; + lapic_timer_advance_dynamic = true; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; - apic->lapic_timer.timer_advance_adjust_done = true; + lapic_timer_advance_dynamic = false; } - /* * APIC is created enabled. This will prevent kvm_lapic_set_base from * thinking that APIC state has changed. diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 50053d2b8b7b..2aad7e226fc0 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -35,7 +35,6 @@ struct kvm_timer { s64 advance_expire_delta; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; - bool timer_advance_adjust_done; }; struct kvm_lapic { From fac026dac0bc4dc3bb2e72309fa47ff3c4dd7c16 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:03 -0700 Subject: [PATCH 236/345] KVM: x86/mmu: Treat invalid shadow pages as obsolete Treat invalid shadow pages as obsolete to fix a bug where an obsolete and invalid page with a non-zero root count could become non-obsolete due to mmu_valid_gen wrapping. The bug is largely theoretical with the current code base, as an unsigned long will effectively never wrap on 64-bit KVM, and userspace would have to deliberately stall a vCPU in order to keep an obsolete invalid page on the active list while simultaneously modifying memslots billions of times to trigger a wrap. The obvious alternative is to use a 64-bit value for mmu_valid_gen, but it's actually desirable to go in the opposite direction, i.e. using a smaller 8-bit value to reduce KVM's memory footprint by 8 bytes per shadow page, and relying on proper treatment of invalid pages instead of preventing the generation from wrapping. Note, "Fixes" points at a commit that was at one point reverted, but has since been restored. Fixes: 5304b8d37c2a5 ("KVM: MMU: fast invalidate all pages") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 84e1a9c661dd..151c4b94696f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2252,7 +2252,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ - if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ + if (is_obsolete_sp((_kvm), (_sp))) { \ } else #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ @@ -2311,7 +2311,8 @@ static void mmu_audit_disable(void) { } static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { - return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); + return sp->role.invalid || + unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, From 92f58b5c0181596d9f1e317b49ada2e728fb76eb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:04 -0700 Subject: [PATCH 237/345] KVM: x86/mmu: Use fast invalidate mechanism to zap MMIO sptes Use the fast invalidate mechasim to zap MMIO sptes on a MMIO generation wrap. The fast invalidate flow was reintroduced to fix a livelock bug in kvm_mmu_zap_all() that can occur if kvm_mmu_zap_all() is invoked when the guest has live vCPUs. I.e. using kvm_mmu_zap_all() to handle the MMIO generation wrap is theoretically susceptible to the livelock bug. This effectively reverts commit 4771450c345dc ("Revert "KVM: MMU: drop kvm_mmu_zap_mmio_sptes""), i.e. restores the behavior of commit a8eca9dcc656a ("KVM: MMU: drop kvm_mmu_zap_mmio_sptes"). Note, this actually fixes commit 571c5af06e303 ("KVM: x86/mmu: Voluntarily reschedule as needed when zapping MMIO sptes"), but there is no need to incrementally revert back to using fast invalidate, e.g. doing so doesn't provide any bisection or stability benefits. Fixes: 571c5af06e303 ("KVM: x86/mmu: Voluntarily reschedule as needed when zapping MMIO sptes") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 151c4b94696f..6319f1e208f6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -403,8 +403,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, mask |= (gpa & shadow_nonpresent_or_rsvd_mask) << shadow_nonpresent_or_rsvd_mask_len; - page_header(__pa(sptep))->mmio_cached = true; - trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); } @@ -5948,7 +5946,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); -static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) +void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); @@ -5957,14 +5955,10 @@ static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) spin_lock(&kvm->mmu_lock); restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { - if (mmio_only && !sp->mmio_cached) - continue; if (sp->role.invalid && sp->root_count) continue; - if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) { - WARN_ON_ONCE(mmio_only); + if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) goto restart; - } if (cond_resched_lock(&kvm->mmu_lock)) goto restart; } @@ -5973,11 +5967,6 @@ restart: spin_unlock(&kvm->mmu_lock); } -void kvm_mmu_zap_all(struct kvm *kvm) -{ - return __kvm_mmu_zap_all(kvm, false); -} - void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); @@ -5999,7 +5988,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) */ if (unlikely(gen == 0)) { kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); - __kvm_mmu_zap_all(kvm, true); + kvm_mmu_zap_all_fast(kvm); } } From dd6223c76205c5eec16d12d8ea7cd9090e537357 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:05 -0700 Subject: [PATCH 238/345] KVM: x86/mmu: Revert "Revert "KVM: MMU: show mmu_valid_gen in shadow page related tracepoints"" Now that the fast invalidate mechanism has been reintroduced, restore tracing of the generation number in shadow page tracepoints. This reverts commit b59c4830ca185ba0e9f9e046fb1cd10a4a92627a. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmutrace.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index d8001b4bca05..e9832b5ec53c 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -8,16 +8,18 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu -#define KVM_MMU_PAGE_FIELDS \ - __field(__u64, gfn) \ - __field(__u32, role) \ - __field(__u32, root_count) \ +#define KVM_MMU_PAGE_FIELDS \ + __field(unsigned long, mmu_valid_gen) \ + __field(__u64, gfn) \ + __field(__u32, role) \ + __field(__u32, root_count) \ __field(bool, unsync) -#define KVM_MMU_PAGE_ASSIGN(sp) \ - __entry->gfn = sp->gfn; \ - __entry->role = sp->role.word; \ - __entry->root_count = sp->root_count; \ +#define KVM_MMU_PAGE_ASSIGN(sp) \ + __entry->mmu_valid_gen = sp->mmu_valid_gen; \ + __entry->gfn = sp->gfn; \ + __entry->role = sp->role.word; \ + __entry->root_count = sp->root_count; \ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ @@ -29,8 +31,9 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \ + trace_seq_printf(p, "sp gen %lx gfn %llx l%u %u-byte q%u%s %s%s"\ " %snxe %sad root %u %s%c", \ + __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ role.gpte_is_8_bytes ? 8 : 4, \ role.quadrant, \ From 14a3c4f498edac65ce2ff1f05d849cd72e886d78 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:06 -0700 Subject: [PATCH 239/345] KVM: x86/mmu: Revert "Revert "KVM: MMU: add tracepoint for kvm_mmu_invalidate_all_pages"" Now that the fast invalidate mechanism has been reintroduced, restore the tracepoint associated with said mechanism. Note, the name of the tracepoint deviates from the original tracepoint so as to match KVM's current nomenclature. This reverts commit 42560fb1f3c6c7f730897b7fa7a478bc37e0be50. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/mmutrace.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6319f1e208f6..2ab0ac88d86e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5743,6 +5743,7 @@ restart: static void kvm_mmu_zap_all_fast(struct kvm *kvm) { spin_lock(&kvm->mmu_lock); + trace_kvm_mmu_zap_all_fast(kvm); kvm->arch.mmu_valid_gen++; kvm_zap_obsolete_pages(kvm); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index e9832b5ec53c..1a063ba76281 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -282,6 +282,27 @@ TRACE_EVENT( ) ); +TRACE_EVENT( + kvm_mmu_zap_all_fast, + TP_PROTO(struct kvm *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(unsigned long, mmu_valid_gen) + __field(unsigned int, mmu_used_pages) + ), + + TP_fast_assign( + __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; + __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; + ), + + TP_printk("kvm-mmu-valid-gen %lx used_pages %x", + __entry->mmu_valid_gen, __entry->mmu_used_pages + ) +); + + TRACE_EVENT( check_mmio_spte, TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), From fbb158cb88b6e4f8279707a1842ee0332f64b1a3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:07 -0700 Subject: [PATCH 240/345] KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch"" Now that the fast invalidate mechanism has been reintroduced, restore the performance tweaks for fast invalidation that existed prior to its removal. Paraphrashing the original changelog: Zap at least 10 shadow pages before releasing mmu_lock to reduce the overhead associated with re-acquiring the lock. Note: "10" is an arbitrary number, speculated to be high enough so that a vCPU isn't stuck zapping obsolete pages for an extended period, but small enough so that other vCPUs aren't starved waiting for mmu_lock. This reverts commit 43d2b14b105fb00b8864c7b0ee7043cc1cc4a969. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2ab0ac88d86e..3acc4b046d47 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5671,12 +5671,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) return ret; } - +#define BATCH_ZAP_PAGES 10 static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; LIST_HEAD(invalid_list); - int ign; + int nr_zapped, batch = 0; restart: list_for_each_entry_safe_reverse(sp, node, @@ -5689,28 +5689,6 @@ restart: break; /* - * Do not repeatedly zap a root page to avoid unnecessary - * KVM_REQ_MMU_RELOAD, otherwise we may not be able to - * progress: - * vcpu 0 vcpu 1 - * call vcpu_enter_guest(): - * 1): handle KVM_REQ_MMU_RELOAD - * and require mmu-lock to - * load mmu - * repeat: - * 1): zap root page and - * send KVM_REQ_MMU_RELOAD - * - * 2): if (cond_resched_lock(mmu-lock)) - * - * 2): hold mmu-lock and load mmu - * - * 3): see KVM_REQ_MMU_RELOAD bit - * on vcpu->requests is set - * then return 1 to call - * vcpu_enter_guest() again. - * goto repeat; - * * Since we are reversely walking the list and the invalid * list will be moved to the head, skip the invalid page * can help us to avoid the infinity list walking. @@ -5718,14 +5696,19 @@ restart: if (sp->role.invalid) continue; - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + if (batch >= BATCH_ZAP_PAGES && + (need_resched() || spin_needbreak(&kvm->mmu_lock))) { + batch = 0; kvm_mmu_commit_zap_page(kvm, &invalid_list); cond_resched_lock(&kvm->mmu_lock); goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) + if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, + &nr_zapped)) { + batch += nr_zapped; goto restart; + } } kvm_mmu_commit_zap_page(kvm, &invalid_list); From 4506ecf4855e53c3e5a2b40312a240696863582c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:08 -0700 Subject: [PATCH 241/345] KVM: x86/mmu: Revert "Revert "KVM: MMU: collapse TLB flushes when zap all pages"" Now that the fast invalidate mechanism has been reintroduced, restore the performance tweaks for fast invalidation that existed prior to its removal. Paraphrashing the original changelog: Reload the mmu on all vCPUs after updating the generation number so that obsolete pages are not used by any vCPUs. This allows collapsing all TLB flushes during obsolete page zapping into a single flush, as there is no need to flush when dropping mmu_lock (to reschedule). Note: a remote TLB flush is still needed before freeing the pages as other vCPUs may be doing a lockless shadow page walk. Opportunstically improve the comments restored by the revert (the code itself is a true revert). This reverts commit f34d251d66ba263c077ed9d2bbd1874339a4c887. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3acc4b046d47..652f2ce14583 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5696,11 +5696,15 @@ restart: if (sp->role.invalid) continue; + /* + * No need to flush the TLB since we're only zapping shadow + * pages with an obsolete generation number and all vCPUS have + * loaded a new root, i.e. the shadow pages being zapped cannot + * be in active use by the guest. + */ if (batch >= BATCH_ZAP_PAGES && - (need_resched() || spin_needbreak(&kvm->mmu_lock))) { + cond_resched_lock(&kvm->mmu_lock)) { batch = 0; - kvm_mmu_commit_zap_page(kvm, &invalid_list); - cond_resched_lock(&kvm->mmu_lock); goto restart; } @@ -5711,6 +5715,11 @@ restart: } } + /* + * Trigger a remote TLB flush before freeing the page tables to ensure + * KVM is not in the middle of a lockless shadow page table walk, which + * may reference the pages. + */ kvm_mmu_commit_zap_page(kvm, &invalid_list); } @@ -5729,6 +5738,16 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) trace_kvm_mmu_zap_all_fast(kvm); kvm->arch.mmu_valid_gen++; + /* + * Notify all vcpus to reload its shadow page table and flush TLB. + * Then all vcpus will switch to new shadow page table with the new + * mmu_valid_gen. + * + * Note: we need to do this under the protection of mmu_lock, + * otherwise, vcpu would purge shadow page but miss tlb flush. + */ + kvm_reload_remote_mmus(kvm); + kvm_zap_obsolete_pages(kvm); spin_unlock(&kvm->mmu_lock); } From 31741eb11a43066a3da92996bcfccfb42e248d44 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:09 -0700 Subject: [PATCH 242/345] KVM: x86/mmu: Revert "Revert "KVM: MMU: reclaim the zapped-obsolete page first"" Now that the fast invalidate mechanism has been reintroduced, restore the performance tweaks for fast invalidation that existed prior to its removal. Paraphrashing the original changelog: Introduce a per-VM list to track obsolete shadow pages, i.e. pages which have been deleted from the mmu cache but haven't yet been freed. When page reclaiming is needed, zap/free the deleted pages first. This reverts commit 52d5dedc79bdcbac2976159a172069618cf31be5. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 78f0a919173e..53e0b956ed3c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -865,6 +865,7 @@ struct kvm_arch { * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; From 10605204e91f46bdec98bef81923263d245515a0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:10 -0700 Subject: [PATCH 243/345] KVM: x86/mmu: Revert "KVM: x86/mmu: Remove is_obsolete() call" Now that the fast invalidate mechanism has been reintroduced, restore the performance tweaks for fast invalidation that existed prior to its removal. Paraphrasing the original changelog (commit 5ff0568374ed2 was itself a partial revert): Don't force reloading the remote mmu when zapping an obsolete page, as a MMU_RELOAD request has already been issued by kvm_mmu_zap_all_fast() immediately after incrementing mmu_valid_gen, i.e. after marking pages obsolete. This reverts commit 5ff0568374ed2e585376a3832857ade5daccd381. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 29 +++++++++++++++++++++++------ arch/x86/kvm/x86.c | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 652f2ce14583..e552fd5cd33d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2752,7 +2752,12 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, } else { list_move(&sp->link, &kvm->arch.active_mmu_pages); - if (!sp->role.invalid) + /* + * Obsolete pages cannot be used on any vCPUs, see the comment + * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also + * treats invalid shadow pages as being obsolete. + */ + if (!is_obsolete_sp(kvm, sp)) kvm_reload_remote_mmus(kvm); } @@ -5675,7 +5680,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; - LIST_HEAD(invalid_list); int nr_zapped, batch = 0; restart: @@ -5708,8 +5712,8 @@ restart: goto restart; } - if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, - &nr_zapped)) { + if (__kvm_mmu_prepare_zap_page(kvm, sp, + &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { batch += nr_zapped; goto restart; } @@ -5720,7 +5724,7 @@ restart: * KVM is not in the middle of a lockless shadow page table walk, which * may reference the pages. */ - kvm_mmu_commit_zap_page(kvm, &invalid_list); + kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); } /* @@ -5752,6 +5756,11 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) spin_unlock(&kvm->mmu_lock); } +static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) +{ + return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); +} + static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_page_track_notifier_node *node) @@ -6022,16 +6031,24 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) * want to shrink a VM that only started to populate its MMU * anyway. */ - if (!kvm->arch.n_used_mmu_pages) + if (!kvm->arch.n_used_mmu_pages && + !kvm_has_zapped_obsolete_pages(kvm)) continue; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); + if (kvm_has_zapped_obsolete_pages(kvm)) { + kvm_mmu_commit_zap_page(kvm, + &kvm->arch.zapped_obsolete_pages); + goto unlock; + } + if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) freed++; kvm_mmu_commit_zap_page(kvm, &invalid_list); +unlock: spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 977b36348bed..c9a3d8efe1c2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9425,6 +9425,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); From ca333add6933ad9732ba2c6671f133d7367ad96c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:11 -0700 Subject: [PATCH 244/345] KVM: x86/mmu: Explicitly track only a single invalid mmu generation Toggle mmu_valid_gen between '0' and '1' instead of blindly incrementing the generation. Because slots_lock is held for the entire duration of zapping obsolete pages, it's impossible for there to be multiple invalid generations associated with shadow pages at any given time. Toggling between the two generations (valid vs. invalid) allows changing mmu_valid_gen from an unsigned long to a u8, which reduces the size of struct kvm_mmu_page from 160 to 152 bytes on 64-bit KVM, i.e. reduces KVM's memory footprint by 8 bytes per shadow page. Set sp->mmu_valid_gen before it is added to active_mmu_pages. Functionally this has no effect as kvm_mmu_alloc_page() has a single caller that sets sp->mmu_valid_gen soon thereafter, but visually it is jarring to see a shadow page being added to the list without its mmu_valid_gen first being set. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/mmu.c | 14 ++++++++++++-- arch/x86/kvm/mmutrace.h | 16 ++++++++-------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53e0b956ed3c..c5ed92482e2c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -320,6 +320,7 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; bool unsync; + u8 mmu_valid_gen; bool mmio_cached; /* @@ -335,7 +336,6 @@ struct kvm_mmu_page { int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ - unsigned long mmu_valid_gen; DECLARE_BITMAP(unsync_child_bitmap, 512); #ifdef CONFIG_X86_32 @@ -859,7 +859,7 @@ struct kvm_arch { unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; - unsigned long mmu_valid_gen; + u8 mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e552fd5cd33d..5f0864000360 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2101,6 +2101,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct * depends on valid pages being added to the head of the list. See * comments in kvm_zap_obsolete_pages(). */ + sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; @@ -2537,7 +2538,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (level > PT_PAGE_TABLE_LEVEL && need_sync) flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } - sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); @@ -5738,9 +5738,19 @@ restart: */ static void kvm_mmu_zap_all_fast(struct kvm *kvm) { + lockdep_assert_held(&kvm->slots_lock); + spin_lock(&kvm->mmu_lock); trace_kvm_mmu_zap_all_fast(kvm); - kvm->arch.mmu_valid_gen++; + + /* + * Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is + * held for the entire duration of zapping obsolete pages, it's + * impossible for there to be multiple invalid generations associated + * with *valid* shadow pages at any given time, i.e. there is exactly + * one valid generation and (at most) one invalid generation. + */ + kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1; /* * Notify all vcpus to reload its shadow page table and flush TLB. diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 1a063ba76281..7ca8831c7d1a 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -8,11 +8,11 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu -#define KVM_MMU_PAGE_FIELDS \ - __field(unsigned long, mmu_valid_gen) \ - __field(__u64, gfn) \ - __field(__u32, role) \ - __field(__u32, root_count) \ +#define KVM_MMU_PAGE_FIELDS \ + __field(__u8, mmu_valid_gen) \ + __field(__u64, gfn) \ + __field(__u32, role) \ + __field(__u32, root_count) \ __field(bool, unsync) #define KVM_MMU_PAGE_ASSIGN(sp) \ @@ -31,7 +31,7 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gen %lx gfn %llx l%u %u-byte q%u%s %s%s"\ + trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ @@ -288,7 +288,7 @@ TRACE_EVENT( TP_ARGS(kvm), TP_STRUCT__entry( - __field(unsigned long, mmu_valid_gen) + __field(__u8, mmu_valid_gen) __field(unsigned int, mmu_used_pages) ), @@ -297,7 +297,7 @@ TRACE_EVENT( __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; ), - TP_printk("kvm-mmu-valid-gen %lx used_pages %x", + TP_printk("kvm-mmu-valid-gen %u used_pages %x", __entry->mmu_valid_gen, __entry->mmu_used_pages ) ); From 9a5c034c9abaef81ad9df0221638785a088942b5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:12 -0700 Subject: [PATCH 245/345] KVM: x86/mmu: Skip invalid pages during zapping iff root_count is zero Do not skip invalid shadow pages when zapping obsolete pages if the pages' root_count has reached zero, in which case the page can be immediately zapped and freed. Update the comment accordingly. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5f0864000360..5269aa057dfa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5693,11 +5693,12 @@ restart: break; /* - * Since we are reversely walking the list and the invalid - * list will be moved to the head, skip the invalid page - * can help us to avoid the infinity list walking. + * Skip invalid pages with a non-zero root count, zapping pages + * with a non-zero root count will never succeed, i.e. the page + * will get thrown back on active_mmu_pages and we'll get stuck + * in an infinite loop. */ - if (sp->role.invalid) + if (sp->role.invalid && sp->root_count) continue; /* From daa5de5415849b9a53056ec1e1e88fe4c5c9aa2b Mon Sep 17 00:00:00 2001 From: yangerkun Date: Tue, 24 Sep 2019 20:53:34 +0800 Subject: [PATCH 246/345] io_uring: compare cached_cq_tail with cq.head in_io_uring_poll After 75b28af("io_uring: allocate the two rings together"), we compare sq.head with cached_cq_tail to determine does there any cq invalid. Actually, we should use cq.head. Fixes: 75b28affdd6a ("io_uring: allocate the two rings together") Signed-off-by: yangerkun Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca7570aca430..9b84232e5cc4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3455,7 +3455,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != ctx->rings->sq_ring_entries) mask |= EPOLLOUT | EPOLLWRNORM; - if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail) + if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail) mask |= EPOLLIN | EPOLLRDNORM; return mask; From a06dcd625d6181747fac7f4c140b5a4c397a778c Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 12 Sep 2019 09:55:03 -0700 Subject: [PATCH 247/345] kvm: x86: Add "significant index" flag to a few CPUID leaves According to the Intel SDM, volume 2, "CPUID," the index is significant (or partially significant) for CPUID leaves 0FH, 10H, 12H, 17H, 18H, and 1FH. Add the corresponding flag to these CPUID leaves in do_host_cpuid(). Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Reviewed-by: Steve Rutherford Fixes: a87f2d3a6eadab ("KVM: x86: Add Intel CPUID.1F cpuid emulation support") Reviewed-by: Krish Sadhukhan Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4e5a835989d1..63316036f85a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function, case 7: case 0xb: case 0xd: + case 0xf: + case 0x10: + case 0x12: case 0x14: + case 0x17: + case 0x18: + case 0x1f: case 0x8000001d: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; break; From 0cb8410b90e78948984f35f2c4d50c2c0b7ee675 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 19 Sep 2019 15:59:17 -0700 Subject: [PATCH 248/345] kvm: svm: Intercept RDPRU The RDPRU instruction gives the guest read access to the IA32_APERF MSR and the IA32_MPERF MSR. According to volume 3 of the APM, "When virtualization is enabled, this instruction can be intercepted by the Hypervisor. The intercept bit is at VMCB byte offset 10h, bit 14." Since we don't enumerate the instruction in KVM_SUPPORTED_CPUID, intercept it and synthesize #UD. Signed-off-by: Jim Mattson Reviewed-by: Drew Schmitt Reviewed-by: Jacob Xu Reviewed-by: Peter Shier Reviewed-by: Krish Sadhukhan Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 1 + arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm.c | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index dec9c1e84c78..6ece8561ba66 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -52,6 +52,7 @@ enum { INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, + INTERCEPT_RDPRU, }; diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index a9731f8a480f..2e8a30f06c74 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -75,6 +75,7 @@ #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce75296b1b10..f8ecb6df5106 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1540,6 +1540,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); set_intercept(svm, INTERCEPT_XSETBV); + set_intercept(svm, INTERCEPT_RDPRU); set_intercept(svm, INTERCEPT_RSM); if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { @@ -3832,6 +3833,12 @@ static int xsetbv_interception(struct vcpu_svm *svm) return 1; } +static int rdpru_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; +} + static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; @@ -4783,6 +4790,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, + [SVM_EXIT_RDPRU] = rdpru_interception, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, From f0b5105af6e0ca781fcc9aaf9277635ea7df2d36 Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Tue, 17 Sep 2019 11:50:57 -0700 Subject: [PATCH 249/345] kvm: nvmx: limit atomic switch MSRs Allowing an unlimited number of MSRs to be specified via the VMX load/store MSR lists (e.g., vm-entry MSR load list) is bad for two reasons. First, a guest can specify an unreasonable number of MSRs, forcing KVM to process all of them in software. Second, the SDM bounds the number of MSRs allowed to be packed into the atomic switch MSR lists. Quoting the "Miscellaneous Data" section in the "VMX Capability Reporting Facility" appendix: "Bits 27:25 is used to compute the recommended maximum number of MSRs that should appear in the VM-exit MSR-store list, the VM-exit MSR-load list, or the VM-entry MSR-load list. Specifically, if the value bits 27:25 of IA32_VMX_MISC is N, then 512 * (N + 1) is the recommended maximum number of MSRs to be included in each list. If the limit is exceeded, undefined processor behavior may result (including a machine check during the VMX transition)." Because KVM needs to protect itself and can't model "undefined processor behavior", arbitrarily force a VM-entry to fail due to MSR loading when the MSR load list is too large. Similarly, trigger an abort during a VM exit that encounters an MSR load list or MSR store list that is too large. The MSR list size is intentionally not pre-checked so as to maintain compatibility with hardware inasmuch as possible. Test these new checks with the kvm-unit-test "x86: nvmx: test max atomic switch MSRs". Suggested-by: Jim Mattson Reviewed-by: Jim Mattson Reviewed-by: Peter Shier Signed-off-by: Marc Orr Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/vmx/nested.c | 43 ++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fdad81626829..1835767aa335 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -111,6 +111,7 @@ #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 #define VMX_MISC_ZERO_LEN_INS 0x40000000 +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 /* VMFUNC functions */ #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 77548ef1ad3b..70d59d9304f2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -198,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); } +static inline bool vmx_control_verify(u32 control, u32 low, u32 high) +{ + return fixed_bits_valid(control, low, high); +} + +static inline u64 vmx_control_msr(u32 low, u32 high) +{ + return low | ((u64)high << 32); +} + static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) { secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); @@ -866,16 +876,34 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, return 0; } +static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, + vmx->nested.msrs.misc_high); + + return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER; +} + /* * Load guest's/host's msr at nested entry/exit. * return 0 for success, entry index for failure. + * + * One of the failure modes for MSR load/store is when a list exceeds the + * virtual hardware's capacity. To maintain compatibility with hardware inasmuch + * as possible, process all valid entries before failing rather than precheck + * for a capacity violation. */ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) { u32 i; struct vmx_msr_entry e; + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); for (i = 0; i < count; i++) { + if (unlikely(i >= max_msr_list_size)) + goto fail; + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), &e, sizeof(e))) { pr_debug_ratelimited( @@ -906,8 +934,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) u64 data; u32 i; struct vmx_msr_entry e; + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); for (i = 0; i < count; i++) { + if (unlikely(i >= max_msr_list_size)) + return -EINVAL; + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), &e, 2 * sizeof(u32))) { @@ -1013,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; } - -static inline bool vmx_control_verify(u32 control, u32 low, u32 high) -{ - return fixed_bits_valid(control, low, high); -} - -static inline u64 vmx_control_msr(u32 low, u32 high) -{ - return low | ((u64)high << 32); -} - static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) { superset &= mask; From 697d7150502e3b4f6eb536b42ef189e897d94914 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 23 Sep 2019 15:56:25 -0500 Subject: [PATCH 250/345] drm/amdgpu/display: include slab.h in dcn21_resource.c It's apparently needed in some configurations. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 3ca5139f1273..de182185fe1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -23,6 +23,8 @@ * */ +#include + #include "dm_services.h" #include "dc.h" From 1e94b43813a2757f6ee471584ca07cdbc1a51db9 Mon Sep 17 00:00:00 2001 From: "Tianci.Yin" Date: Thu, 12 Sep 2019 17:40:22 +0800 Subject: [PATCH 251/345] drm/amdgpu/gfx10: add support for wks firmware loading load different cp firmware according to the DID and RID Reviewed-by: Feifei Xu Signed-off-by: Tianci.Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index db28823891ac..638c821611ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -70,6 +70,11 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin"); MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi14_me.bin"); @@ -594,7 +599,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; + char wks[10]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -607,12 +613,16 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); + memset(wks, 0, sizeof(wks)); switch (adev->asic_type) { case CHIP_NAVI10: chip_name = "navi10"; break; case CHIP_NAVI14: chip_name = "navi14"; + if (!(adev->pdev->device == 0x7340 && + adev->pdev->revision != 0x00)) + snprintf(wks, sizeof(wks), "_wks"); break; case CHIP_NAVI12: chip_name = "navi12"; @@ -621,7 +631,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); if (err) goto out; @@ -632,7 +642,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); if (err) goto out; @@ -643,7 +653,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); if (err) goto out; @@ -708,7 +718,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (adev->gfx.rlc.is_rlc_v2_1) gfx_v10_0_init_rlc_ext_microcode(adev); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; @@ -719,7 +729,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { err = amdgpu_ucode_validate(adev->gfx.mec2_fw); From 61aa258ab1a50b834267f5df6cabef0d10f8955a Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Fri, 20 Sep 2019 06:49:03 +0800 Subject: [PATCH 252/345] pwm: mediatek: Remove the has_clks field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use fixed clocks to repair mt7628 PWM during configure from userspace. The SoC is legacy MIPS and has no complex clock tree. Because we can get the clock frequency for period calculation from fixed clocks specified in DT, we can remove the has_clock field, and directly use devm_clk_get() and clk_get_rate(). Signed-off-by: Ryder Lee Signed-off-by: Sam Shih Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 459b2ccd0b75..9394735b0762 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -57,7 +57,6 @@ static const char * const mtk_pwm_clk_name[MTK_CLK_MAX] = { struct mtk_pwm_platform_data { unsigned int num_pwms; bool pwm45_fixup; - bool has_clks; }; /** @@ -87,9 +86,6 @@ static int mtk_pwm_clk_enable(struct pwm_chip *chip, struct pwm_device *pwm) struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); int ret; - if (!pc->soc->has_clks) - return 0; - ret = clk_prepare_enable(pc->clks[MTK_CLK_TOP]); if (ret < 0) return ret; @@ -116,9 +112,6 @@ static void mtk_pwm_clk_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); - if (!pc->soc->has_clks) - return; - clk_disable_unprepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]); clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]); clk_disable_unprepare(pc->clks[MTK_CLK_TOP]); @@ -242,7 +235,7 @@ static int mtk_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->regs)) return PTR_ERR(pc->regs); - for (i = 0; i < pc->soc->num_pwms + 2 && pc->soc->has_clks; i++) { + for (i = 0; i < pc->soc->num_pwms + 2; i++) { pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]); if (IS_ERR(pc->clks[i])) { dev_err(&pdev->dev, "clock: %s fail: %ld\n", @@ -277,31 +270,26 @@ static int mtk_pwm_remove(struct platform_device *pdev) static const struct mtk_pwm_platform_data mt2712_pwm_data = { .num_pwms = 8, .pwm45_fixup = false, - .has_clks = true, }; static const struct mtk_pwm_platform_data mt7622_pwm_data = { .num_pwms = 6, .pwm45_fixup = false, - .has_clks = true, }; static const struct mtk_pwm_platform_data mt7623_pwm_data = { .num_pwms = 5, .pwm45_fixup = true, - .has_clks = true, }; static const struct mtk_pwm_platform_data mt7628_pwm_data = { .num_pwms = 4, .pwm45_fixup = true, - .has_clks = false, }; static const struct mtk_pwm_platform_data mt8516_pwm_data = { .num_pwms = 5, .pwm45_fixup = false, - .has_clks = true, }; static const struct of_device_id mtk_pwm_of_match[] = { From efecdeb82f21d4100566ce85bda2d7ffb9c9edff Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Fri, 20 Sep 2019 06:49:04 +0800 Subject: [PATCH 253/345] pwm: mediatek: Allocate the clks array dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using fixed size of arrays, allocate the memory for them based on the number of PWMs specified for each SoC generation. Signed-off-by: Ryder Lee Signed-off-by: Sam Shih Reviewed-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 79 +++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 9394735b0762..db986b77e556 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -35,25 +35,6 @@ #define PWM_CLK_DIV_MAX 7 -enum { - MTK_CLK_MAIN = 0, - MTK_CLK_TOP, - MTK_CLK_PWM1, - MTK_CLK_PWM2, - MTK_CLK_PWM3, - MTK_CLK_PWM4, - MTK_CLK_PWM5, - MTK_CLK_PWM6, - MTK_CLK_PWM7, - MTK_CLK_PWM8, - MTK_CLK_MAX, -}; - -static const char * const mtk_pwm_clk_name[MTK_CLK_MAX] = { - "main", "top", "pwm1", "pwm2", "pwm3", "pwm4", "pwm5", "pwm6", "pwm7", - "pwm8" -}; - struct mtk_pwm_platform_data { unsigned int num_pwms; bool pwm45_fixup; @@ -63,12 +44,17 @@ struct mtk_pwm_platform_data { * struct mtk_pwm_chip - struct representing PWM chip * @chip: linux PWM chip representation * @regs: base address of PWM chip - * @clks: list of clocks + * @clk_top: the top clock generator + * @clk_main: the clock used by PWM core + * @clk_pwms: the clock used by each PWM channel + * @clk_freq: the fix clock frequency of legacy MIPS SoC */ struct mtk_pwm_chip { struct pwm_chip chip; void __iomem *regs; - struct clk *clks[MTK_CLK_MAX]; + struct clk *clk_top; + struct clk *clk_main; + struct clk **clk_pwms; const struct mtk_pwm_platform_data *soc; }; @@ -86,24 +72,24 @@ static int mtk_pwm_clk_enable(struct pwm_chip *chip, struct pwm_device *pwm) struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); int ret; - ret = clk_prepare_enable(pc->clks[MTK_CLK_TOP]); + ret = clk_prepare_enable(pc->clk_top); if (ret < 0) return ret; - ret = clk_prepare_enable(pc->clks[MTK_CLK_MAIN]); + ret = clk_prepare_enable(pc->clk_main); if (ret < 0) goto disable_clk_top; - ret = clk_prepare_enable(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]); + ret = clk_prepare_enable(pc->clk_pwms[pwm->hwpwm]); if (ret < 0) goto disable_clk_main; return 0; disable_clk_main: - clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]); + clk_disable_unprepare(pc->clk_main); disable_clk_top: - clk_disable_unprepare(pc->clks[MTK_CLK_TOP]); + clk_disable_unprepare(pc->clk_top); return ret; } @@ -112,9 +98,9 @@ static void mtk_pwm_clk_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); - clk_disable_unprepare(pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]); - clk_disable_unprepare(pc->clks[MTK_CLK_MAIN]); - clk_disable_unprepare(pc->clks[MTK_CLK_TOP]); + clk_disable_unprepare(pc->clk_pwms[pwm->hwpwm]); + clk_disable_unprepare(pc->clk_main); + clk_disable_unprepare(pc->clk_top); } static inline u32 mtk_pwm_readl(struct mtk_pwm_chip *chip, unsigned int num, @@ -134,7 +120,7 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); - struct clk *clk = pc->clks[MTK_CLK_PWM1 + pwm->hwpwm]; + struct clk *clk = pc->clk_pwms[pwm->hwpwm]; u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH, reg_thres = PWMTHRES; u64 resolution; @@ -235,12 +221,35 @@ static int mtk_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->regs)) return PTR_ERR(pc->regs); - for (i = 0; i < pc->soc->num_pwms + 2; i++) { - pc->clks[i] = devm_clk_get(&pdev->dev, mtk_pwm_clk_name[i]); - if (IS_ERR(pc->clks[i])) { + pc->clk_pwms = devm_kcalloc(&pdev->dev, pc->soc->num_pwms, + sizeof(*pc->clk_pwms), GFP_KERNEL); + if (!pc->clk_pwms) + return -ENOMEM; + + pc->clk_top = devm_clk_get(&pdev->dev, "top"); + if (IS_ERR(pc->clk_top)) { + dev_err(&pdev->dev, "clock: top fail: %ld\n", + PTR_ERR(pc->clk_top)); + return PTR_ERR(pc->clk_top); + } + + pc->clk_main = devm_clk_get(&pdev->dev, "main"); + if (IS_ERR(pc->clk_main)) { + dev_err(&pdev->dev, "clock: main fail: %ld\n", + PTR_ERR(pc->clk_main)); + return PTR_ERR(pc->clk_main); + } + + for (i = 0; i < pc->soc->num_pwms; i++) { + char name[8]; + + snprintf(name, sizeof(name), "pwm%d", i + 1); + + pc->clk_pwms[i] = devm_clk_get(&pdev->dev, name); + if (IS_ERR(pc->clk_pwms[i])) { dev_err(&pdev->dev, "clock: %s fail: %ld\n", - mtk_pwm_clk_name[i], PTR_ERR(pc->clks[i])); - return PTR_ERR(pc->clks[i]); + name, PTR_ERR(pc->clk_pwms[i])); + return PTR_ERR(pc->clk_pwms[i]); } } From 2503781c97fa66f1ee7ffac21f8c5c330b82b5eb Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Fri, 20 Sep 2019 06:49:05 +0800 Subject: [PATCH 254/345] pwm: mediatek: Use pwm_mediatek as common prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use pwm_mediatek as common prefix to match the filename. No functional change intended. Signed-off-by: Ryder Lee Signed-off-by: Sam Shih Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 117 +++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index db986b77e556..6b9a5857b5b6 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -35,13 +35,13 @@ #define PWM_CLK_DIV_MAX 7 -struct mtk_pwm_platform_data { +struct pwm_mediatek_of_data { unsigned int num_pwms; bool pwm45_fixup; }; /** - * struct mtk_pwm_chip - struct representing PWM chip + * struct pwm_mediatek_chip - struct representing PWM chip * @chip: linux PWM chip representation * @regs: base address of PWM chip * @clk_top: the top clock generator @@ -49,27 +49,29 @@ struct mtk_pwm_platform_data { * @clk_pwms: the clock used by each PWM channel * @clk_freq: the fix clock frequency of legacy MIPS SoC */ -struct mtk_pwm_chip { +struct pwm_mediatek_chip { struct pwm_chip chip; void __iomem *regs; struct clk *clk_top; struct clk *clk_main; struct clk **clk_pwms; - const struct mtk_pwm_platform_data *soc; + const struct pwm_mediatek_of_data *soc; }; -static const unsigned int mtk_pwm_reg_offset[] = { +static const unsigned int pwm_mediatek_reg_offset[] = { 0x0010, 0x0050, 0x0090, 0x00d0, 0x0110, 0x0150, 0x0190, 0x0220 }; -static inline struct mtk_pwm_chip *to_mtk_pwm_chip(struct pwm_chip *chip) +static inline struct pwm_mediatek_chip * +to_pwm_mediatek_chip(struct pwm_chip *chip) { - return container_of(chip, struct mtk_pwm_chip, chip); + return container_of(chip, struct pwm_mediatek_chip, chip); } -static int mtk_pwm_clk_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static int pwm_mediatek_clk_enable(struct pwm_chip *chip, + struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); int ret; ret = clk_prepare_enable(pc->clk_top); @@ -94,45 +96,46 @@ disable_clk_top: return ret; } -static void mtk_pwm_clk_disable(struct pwm_chip *chip, struct pwm_device *pwm) +static void pwm_mediatek_clk_disable(struct pwm_chip *chip, + struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); clk_disable_unprepare(pc->clk_pwms[pwm->hwpwm]); clk_disable_unprepare(pc->clk_main); clk_disable_unprepare(pc->clk_top); } -static inline u32 mtk_pwm_readl(struct mtk_pwm_chip *chip, unsigned int num, - unsigned int offset) +static inline u32 pwm_mediatek_readl(struct pwm_mediatek_chip *chip, + unsigned int num, unsigned int offset) { - return readl(chip->regs + mtk_pwm_reg_offset[num] + offset); + return readl(chip->regs + pwm_mediatek_reg_offset[num] + offset); } -static inline void mtk_pwm_writel(struct mtk_pwm_chip *chip, - unsigned int num, unsigned int offset, - u32 value) +static inline void pwm_mediatek_writel(struct pwm_mediatek_chip *chip, + unsigned int num, unsigned int offset, + u32 value) { - writel(value, chip->regs + mtk_pwm_reg_offset[num] + offset); + writel(value, chip->regs + pwm_mediatek_reg_offset[num] + offset); } -static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns) +static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); - struct clk *clk = pc->clk_pwms[pwm->hwpwm]; + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH, reg_thres = PWMTHRES; u64 resolution; int ret; - ret = mtk_pwm_clk_enable(chip, pwm); + ret = pwm_mediatek_clk_enable(chip, pwm); + if (ret < 0) return ret; /* Using resolution in picosecond gets accuracy higher */ resolution = (u64)NSEC_PER_SEC * 1000; - do_div(resolution, clk_get_rate(clk)); + do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm])); cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution); while (cnt_period > 8191) { @@ -143,7 +146,7 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } if (clkdiv > PWM_CLK_DIV_MAX) { - mtk_pwm_clk_disable(chip, pwm); + pwm_mediatek_clk_disable(chip, pwm); dev_err(chip->dev, "period %d not supported\n", period_ns); return -EINVAL; } @@ -158,22 +161,22 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, } cnt_duty = DIV_ROUND_CLOSEST_ULL((u64)duty_ns * 1000, resolution); - mtk_pwm_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv); - mtk_pwm_writel(pc, pwm->hwpwm, reg_width, cnt_period); - mtk_pwm_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); + pwm_mediatek_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv); + pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period); + pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); - mtk_pwm_clk_disable(chip, pwm); + pwm_mediatek_clk_disable(chip, pwm); return 0; } -static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 value; int ret; - ret = mtk_pwm_clk_enable(chip, pwm); + ret = pwm_mediatek_clk_enable(chip, pwm); if (ret < 0) return ret; @@ -184,28 +187,28 @@ static int mtk_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) return 0; } -static void mtk_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) +static void pwm_mediatek_disable(struct pwm_chip *chip, struct pwm_device *pwm) { - struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip); + struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 value; value = readl(pc->regs); value &= ~BIT(pwm->hwpwm); writel(value, pc->regs); - mtk_pwm_clk_disable(chip, pwm); + pwm_mediatek_clk_disable(chip, pwm); } -static const struct pwm_ops mtk_pwm_ops = { - .config = mtk_pwm_config, - .enable = mtk_pwm_enable, - .disable = mtk_pwm_disable, +static const struct pwm_ops pwm_mediatek_ops = { + .config = pwm_mediatek_config, + .enable = pwm_mediatek_enable, + .disable = pwm_mediatek_disable, .owner = THIS_MODULE, }; -static int mtk_pwm_probe(struct platform_device *pdev) +static int pwm_mediatek_probe(struct platform_device *pdev) { - struct mtk_pwm_chip *pc; + struct pwm_mediatek_chip *pc; struct resource *res; unsigned int i; int ret; @@ -256,7 +259,7 @@ static int mtk_pwm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pc); pc->chip.dev = &pdev->dev; - pc->chip.ops = &mtk_pwm_ops; + pc->chip.ops = &pwm_mediatek_ops; pc->chip.base = -1; pc->chip.npwm = pc->soc->num_pwms; @@ -269,39 +272,39 @@ static int mtk_pwm_probe(struct platform_device *pdev) return 0; } -static int mtk_pwm_remove(struct platform_device *pdev) +static int pwm_mediatek_remove(struct platform_device *pdev) { - struct mtk_pwm_chip *pc = platform_get_drvdata(pdev); + struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev); return pwmchip_remove(&pc->chip); } -static const struct mtk_pwm_platform_data mt2712_pwm_data = { +static const struct pwm_mediatek_of_data mt2712_pwm_data = { .num_pwms = 8, .pwm45_fixup = false, }; -static const struct mtk_pwm_platform_data mt7622_pwm_data = { +static const struct pwm_mediatek_of_data mt7622_pwm_data = { .num_pwms = 6, .pwm45_fixup = false, }; -static const struct mtk_pwm_platform_data mt7623_pwm_data = { +static const struct pwm_mediatek_of_data mt7623_pwm_data = { .num_pwms = 5, .pwm45_fixup = true, }; -static const struct mtk_pwm_platform_data mt7628_pwm_data = { +static const struct pwm_mediatek_of_data mt7628_pwm_data = { .num_pwms = 4, .pwm45_fixup = true, }; -static const struct mtk_pwm_platform_data mt8516_pwm_data = { +static const struct pwm_mediatek_of_data mt8516_pwm_data = { .num_pwms = 5, .pwm45_fixup = false, }; -static const struct of_device_id mtk_pwm_of_match[] = { +static const struct of_device_id pwm_mediatek_of_match[] = { { .compatible = "mediatek,mt2712-pwm", .data = &mt2712_pwm_data }, { .compatible = "mediatek,mt7622-pwm", .data = &mt7622_pwm_data }, { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data }, @@ -309,17 +312,17 @@ static const struct of_device_id mtk_pwm_of_match[] = { { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data }, { }, }; -MODULE_DEVICE_TABLE(of, mtk_pwm_of_match); +MODULE_DEVICE_TABLE(of, pwm_mediatek_of_match); -static struct platform_driver mtk_pwm_driver = { +static struct platform_driver pwm_mediatek_driver = { .driver = { - .name = "mtk-pwm", - .of_match_table = mtk_pwm_of_match, + .name = "pwm-mediatek", + .of_match_table = pwm_mediatek_of_match, }, - .probe = mtk_pwm_probe, - .remove = mtk_pwm_remove, + .probe = pwm_mediatek_probe, + .remove = pwm_mediatek_remove, }; -module_platform_driver(mtk_pwm_driver); +module_platform_driver(pwm_mediatek_driver); MODULE_AUTHOR("John Crispin "); MODULE_LICENSE("GPL"); From 4bea6dd5be7e2ab8c2368ee9724ef88793b2421b Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Fri, 20 Sep 2019 06:49:06 +0800 Subject: [PATCH 255/345] pwm: mediatek: Update license and switch to SPDX tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SPDX identifiers to pwm-mediatek.c. Update MODULE_LICENSE to correctly reflect the GNU General Public License v2.0. Signed-off-by: Ryder Lee Signed-off-by: Sam Shih Reviewed-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 6b9a5857b5b6..7782fadbc116 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Mediatek Pulse Width Modulator driver + * MediaTek Pulse Width Modulator driver * * Copyright (C) 2015 John Crispin * Copyright (C) 2017 Zhi Mao * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #include @@ -325,4 +323,4 @@ static struct platform_driver pwm_mediatek_driver = { module_platform_driver(pwm_mediatek_driver); MODULE_AUTHOR("John Crispin "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); From 1c00ad6ebf36aa3b0fa598a48b8ae59782be4121 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 20 Sep 2019 06:49:10 +0800 Subject: [PATCH 256/345] dt-bindings: pwm: Update bindings for MT7629 SoC This updates bindings for MT7629 PWM controller. Signed-off-by: Ryder Lee Signed-off-by: Sam Shih Reviewed-by: Rob Herring Reviewed-by: Matthias Brugger Signed-off-by: Thierry Reding --- Documentation/devicetree/bindings/pwm/pwm-mediatek.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt index 9152bf5afe56..c8501530173c 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt @@ -6,6 +6,7 @@ Required properties: - "mediatek,mt7622-pwm": found on mt7622 SoC. - "mediatek,mt7623-pwm": found on mt7623 SoC. - "mediatek,mt7628-pwm": found on mt7628 SoC. + - "mediatek,mt7629-pwm", "mediatek,mt7622-pwm": found on mt7629 SoC. - "mediatek,mt8516-pwm": found on mt8516 SoC. - reg: physical base address and length of the controller's registers. - #pwm-cells: must be 2. See pwm.txt in this directory for a description of From 89340d0935c9296c7b8222b6eab30e67cb57ab82 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 9 Sep 2019 09:40:28 +0800 Subject: [PATCH 257/345] Revert "locking/pvqspinlock: Don't wait if vCPU is preempted" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch reverts commit 75437bb304b20 (locking/pvqspinlock: Don't wait if vCPU is preempted). A large performance regression was caused by this commit. on over-subscription scenarios. The test was run on a Xeon Skylake box, 2 sockets, 40 cores, 80 threads, with three VMs of 80 vCPUs each. The score of ebizzy -M is reduced from 13000-14000 records/s to 1700-1800 records/s: Host Guest score vanilla w/o kvm optimizations upstream 1700-1800 records/s vanilla w/o kvm optimizations revert 13000-14000 records/s vanilla w/ kvm optimizations upstream 4500-5000 records/s vanilla w/ kvm optimizations revert 14000-15500 records/s Exit from aggressive wait-early mechanism can result in premature yield and extra scheduling latency. Actually, only 6% of wait_early events are caused by vcpu_is_preempted() being true. However, when one vCPU voluntarily releases its vCPU, all the subsequently waiters in the queue will do the same and the cascading effect leads to bad performance. kvm optimizations: [1] commit d73eb57b80b (KVM: Boost vCPUs that are delivering interrupts) [2] commit 266e85a5ec9 (KVM: X86: Boost queue head vCPU to mitigate lock waiter preemption) Tested-by: loobinliu@tencent.com Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Waiman Long Cc: Paolo Bonzini Cc: Radim Krčmář Cc: loobinliu@tencent.com Cc: stable@vger.kernel.org Fixes: 75437bb304b20 (locking/pvqspinlock: Don't wait if vCPU is preempted) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- kernel/locking/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 89bab079e7a4..e84d21aa0722 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -269,7 +269,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu); + return READ_ONCE(prev->state) != vcpu_running; } /* From f8d7ab2bded897607bff6324d5c6ea6b4aecca0c Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 24 Sep 2019 17:19:06 +0530 Subject: [PATCH 258/345] tracing/probe: Fix same probe event argument matching Commit fe60b0ce8e73 ("tracing/probe: Reject exactly same probe event") tries to reject a event which matches an already existing probe. However it currently continues to match arguments and rejects adding a probe even when the arguments don't match. Fix this by only rejecting a probe if and only if all the arguments match. Link: http://lkml.kernel.org/r/20190924114906.14038-1-srikar@linux.vnet.ibm.com Fixes: fe60b0ce8e73 ("tracing/probe: Reject exactly same probe event") Acked-by: Masami Hiramatsu Signed-off-by: Srikar Dronamraju Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 5 +++-- kernel/trace/trace_uprobe.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a6697e28ddda..402dc3ce88d3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -549,10 +549,11 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig, for (i = 0; i < orig->tp.nr_args; i++) { if (strcmp(orig->tp.args[i].comm, comp->tp.args[i].comm)) - continue; + break; } - return true; + if (i == orig->tp.nr_args) + return true; } return false; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 34dd6d0016a3..dd884341f5c5 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -431,10 +431,11 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, for (i = 0; i < orig->tp.nr_args; i++) { if (strcmp(orig->tp.args[i].comm, comp->tp.args[i].comm)) - continue; + break; } - return true; + if (i == orig->tp.nr_args) + return true; } return false; From faef87494139cf2cc4d188d5730251ade9b2022d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 19 Sep 2019 15:43:02 -0500 Subject: [PATCH 259/345] perf vendor events amd: Add L3 cache events for Family 17h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow users to symbolically specify L3 events for Family 17h processors using the existing AMD Uncore driver. Source of events descriptions are from section 2.1.15.4.1 "L3 Cache PMC Events" of the latest Family 17h PPR, available here: https://www.amd.com/system/files/TechDocs/55570-B1_PUB.zip Opnly BriefDescriptions added, since they show with and without the -v and --details flags. Tested with: # perf stat -e l3_request_g1.caching_l3_cache_accesses,amd_l3/event=0x01,umask=0x80/,l3_comb_clstr_state.request_miss,amd_l3/event=0x06,umask=0x01/ perf bench mem memcpy -s 4mb -l 100 -f default ... 7,006,831 l3_request_g1.caching_l3_cache_accesses 7,006,830 amd_l3/event=0x01,umask=0x80/ 366,530 l3_comb_clstr_state.request_miss 366,568 amd_l3/event=0x06,umask=0x01/ Signed-off-by: Kim Phillips Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Janakarajan Natarajan Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Luke Mujica Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190919204306.12598-1-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/amdfam17h/cache.json | 42 +++++++++++++++++++ tools/perf/pmu-events/jevents.c | 1 + 2 files changed, 43 insertions(+) diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json index fad4af9142cb..6221a840fcea 100644 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json @@ -283,5 +283,47 @@ "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", "UMask": "0x1" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "UMask": "0x00", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", + "EventCode": "0x9a", + "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", + "UMask": "0x3f", + "Unit": "L3PMC" } ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index d413761621b0..9e37287da924 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -239,6 +239,7 @@ static struct map { { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, { "hisi_sccl,hha", "hisi_sccl,hha" }, { "hisi_sccl,l3c", "hisi_sccl,l3c" }, + { "L3PMC", "amd_l3" }, {} }; From 0c03d3aa255b5d3a7b64051a79f6e9f487194a9f Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 19 Sep 2019 15:43:03 -0500 Subject: [PATCH 260/345] perf vendor events amd: Remove redundant '[' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the redundant '['. 'perf list' output before: ex_ret_brn [[Retired Branch Instructions] 'perf list' output after: ex_ret_brn [Retired Branch Instructions] Fixes: 98c07a8f74f8 ("perf vendor events amd: perf PMU events for AMD Family 17h") Signed-off-by: Kim Phillips Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Borislav Petkov Cc: Janakarajan Natarajan Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Luke Mujica Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190919204306.12598-2-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdfam17h/core.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json index 7b285b0a7f35..1079544eeed5 100644 --- a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json @@ -13,7 +13,7 @@ { "EventName": "ex_ret_brn", "EventCode": "0xc2", - "BriefDescription": "[Retired Branch Instructions.", + "BriefDescription": "Retired Branch Instructions.", "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." }, { From 93125562ce385fc0322e010003767d656677bdef Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 19 Sep 2019 15:43:04 -0500 Subject: [PATCH 261/345] perf vendor events: Minor fixes to the README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some grammatical fixes, and updates to some path references that have since changed. Signed-off-by: Kim Phillips Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Borislav Petkov Cc: Janakarajan Natarajan Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Luke Mujica Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190919204306.12598-3-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/README | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index e62b09b6a844..de7efa2cebd1 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -30,9 +30,9 @@ the topic. Eg: "Floating-point.json". All the topic JSON files for a CPU model/family should be in a separate sub directory. Thus for the Silvermont X86 CPU: - $ ls tools/perf/pmu-events/arch/x86/Silvermont_core - Cache.json Memory.json Virtual-Memory.json - Frontend.json Pipeline.json + $ ls tools/perf/pmu-events/arch/x86/silvermont + cache.json memory.json virtual-memory.json + frontend.json pipeline.json The JSONs folder for a CPU model/family may be placed in the root arch folder, or may be placed in a vendor sub-folder under the arch folder @@ -94,7 +94,7 @@ users to specify events by their name: where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. -However some errors in processing may cause the perf build to fail. +However some errors in processing may cause the alias build to fail. Mapfile format =============== @@ -119,7 +119,7 @@ where: Header line The header line is the first line in the file, which is - always _IGNORED_. It can empty. + always _IGNORED_. It can be empty. CPUID: CPUID is an arch-specific char string, that can be used @@ -138,15 +138,15 @@ where: files, relative to the directory containing the mapfile.csv Type: - indicates whether the events or "core" or "uncore" events. + indicates whether the events are "core" or "uncore" events. Eg: - $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv - GenuineIntel-6-37,V13,Silvermont_core,core - GenuineIntel-6-4D,V13,Silvermont_core,core - GenuineIntel-6-4C,V13,Silvermont_core,core + $ grep silvermont tools/perf/pmu-events/arch/x86/mapfile.csv + GenuineIntel-6-37,v13,silvermont,core + GenuineIntel-6-4D,v13,silvermont,core + GenuineIntel-6-4C,v13,silvermont,core i.e the three CPU models use the JSON files (i.e PMU events) listed - in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'. + in the directory 'tools/perf/pmu-events/arch/x86/silvermont'. From 8788d36950dae26074dc63b46eab3debb4b210b4 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 19 Sep 2019 15:43:05 -0500 Subject: [PATCH 262/345] perf list: Allow plurals for metric, metricgroup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance usability by allowing the same plurality used in the output title, for the command line parameter. BEFORE, perf deceitfully acts as if there are no metrics to be had: $ perf list metrics List of pre-defined events (to be used in -e): Metric Groups: $ But singular 'metric' shows a list of metrics: $ perf list metric List of pre-defined events (to be used in -e): Metrics: IPC [Instructions Per Cycle (per logical thread)] UPI [Uops Per Instruction] AFTER, when asking for 'metrics', we actually see the metrics get listed: $ perf list metrics List of pre-defined events (to be used in -e): Metrics: IPC [Instructions Per Cycle (per logical thread)] UPI [Uops Per Instruction] Fixes: 71b0acce78d1 ("perf list: Add metric groups to perf list") Signed-off-by: Kim Phillips Reviewed-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Borislav Petkov Cc: Janakarajan Natarajan Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Luke Mujica Cc: Martin Liška Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190919204306.12598-4-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index e290f6b348d8..08e62ae9d37e 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -81,9 +81,9 @@ int cmd_list(int argc, const char **argv) long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); - else if (strcmp(argv[i], "metric") == 0) + else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) metricgroup__print(true, false, NULL, raw_dump, details_flag); - else if (strcmp(argv[i], "metricgroup") == 0) + else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) metricgroup__print(false, true, NULL, raw_dump, details_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; From 5c8da72dc21eff32deb00638e547e2c247ebbfb0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 19 Sep 2019 16:51:19 -0400 Subject: [PATCH 263/345] libtraceevent: Round up in tep_print_event() time precision When testing the output of the old trace-cmd compared to the one that uses the updated tep_print_event() logic, it was different in that the time stamp precision in the old format would round up to the nearest precision, where as the new logic truncates. Bring back the old method of rounding up. Signed-off-by: Steven Rostedt (VMware) Cc: Jiri Olsa Cc: Tzvetomir Stoyanov Cc: linux trace devel Link: http://lore.kernel.org/lkml/20190919165119.5efa5de6@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 6f842af4550b..d948475585ce 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5527,8 +5527,10 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s, if (divstr && isdigit(*(divstr + 1))) div = atoi(divstr + 1); time = record->ts; - if (div) + if (div) { + time += div / 2; time /= div; + } pr = prec; while (pr--) p10 *= 10; From 4ab91deacc9b8611fdae9055f926c8f99dde8411 Mon Sep 17 00:00:00 2001 From: Tzvetomir Stoyanov Date: Thu, 19 Sep 2019 17:23:36 -0400 Subject: [PATCH 264/345] libtraceevent: Man pages for libtraceevent event print related API Added new man page, describing tep_print_event() libtraceevent API. Signed-off-by: Tzvetomir Stoyanov Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/linux-trace-devel/20190801075012.22098-1-tz.stoyanov@gmail.com Link: http://lore.kernel.org/lkml/20190919212541.553160178@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- .../libtraceevent-event_print.txt | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_print.txt diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt new file mode 100644 index 000000000000..2c6a61811118 --- /dev/null +++ b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt @@ -0,0 +1,130 @@ +libtraceevent(3) +================ + +NAME +---- +tep_print_event - Writes event information into a trace sequence. + +SYNOPSIS +-------- +[verse] +-- +*#include * +*#include * + +void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._) +-- + +DESCRIPTION +----------- + +The _tep_print_event()_ function parses the event information of the given +_record_ and writes it into the trace sequence _s_, according to the format +string _fmt_. The desired information is specified after the format string. +The _fmt_ is printf-like format string, following arguments are supported: +[verse] +-- + TEP_PRINT_PID, "%d" - PID of the event. + TEP_PRINT_CPU, "%d" - Event CPU. + TEP_PRINT_COMM, "%s" - Event command string. + TEP_PRINT_NAME, "%s" - Event name. + TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more + fields - interrupt state, scheduling state, + current context, and preemption count. + Field 1 is the interrupt enabled state: + d : Interrupts are disabled + . : Interrupts are enabled + X : The architecture does not support this + information + Field 2 is the "need resched" state. + N : The task is set to call the scheduler when + possible, as another higher priority task + may need to be scheduled in. + . : The task is not set to call the scheduler. + Field 3 is the context state. + . : Normal context + s : Soft interrupt context + h : Hard interrupt context + H : Hard interrupt context which triggered + during soft interrupt context. + z : NMI context + Z : NMI context which triggered during hard + interrupt context + Field 4 is the preemption count. + . : The preempt count is zero. + On preemptible kernels (where the task can be scheduled + out in arbitrary locations while in kernel context), the + preempt count, when non zero, will prevent the kernel + from scheduling out the current task. The preempt count + number is displayed when it is not zero. + Depending on the kernel, it may show other fields + (lock depth, or migration disabled, which are unique to + specialized kernels). + TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be + specified as part of this format string: + "%precision.divisord". Example: + "%3.1000d" - divide the time by 1000 and print the first + 3 digits before the dot. Thus, the time stamp + "123456000" will be printed as "123.456" + TEP_PRINT_INFO, "%s" - event information. + TEP_PRINT_INFO_RAW, "%s" - event information, in raw format. + +-- +EXAMPLE +------- +[source,c] +-- +#include +#include +... +struct trace_seq seq; +trace_seq_init(&seq); +struct tep_handle *tep = tep_alloc(); +... +void print_my_event(struct tep_record *record) +{ + trace_seq_reset(&seq); + tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s", + TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU, + TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME, + TEP_PRINT_INFO); +} +... +-- + +FILES +----- +[verse] +-- +*event-parse.h* + Header file to include in order to have access to the library APIs. +*trace-seq.h* + Header file to include in order to have access to trace sequences related APIs. + Trace sequences are used to allow a function to call several other functions + to create a string of data to use. +*-ltraceevent* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtraceevent(3)_, _trace-cmd(1)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* , author of *libtraceevent*. +*Tzvetomir Stoyanov* , author of this man page. +-- +REPORTING BUGS +-------------- +Report bugs to + +LICENSE +------- +libtraceevent is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git From 38f76c31368b7a8b88659c178f9e620b02660a70 Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Thu, 19 Sep 2019 17:23:37 -0400 Subject: [PATCH 265/345] libtraceevent: Man pages fix, rename tep_ref_get() to tep_get_ref() The tep_ref_get() was renamed to tep_get_ref(), to be more consistent with the other tep_ref_* APIs. However, in the man pages the API is still with the old name. The documentation is fixed to reflect the actual name of the API. Signed-off-by: Tzvetomir Stoyanov (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/linux-trace-devel/20190808113636.13299-2-tz.stoyanov@gmail.com Link: http://lore.kernel.org/lkml/20190919212541.697034573@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- .../lib/traceevent/Documentation/libtraceevent-handle.txt | 8 ++++---- tools/lib/traceevent/Documentation/libtraceevent.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt index 8d568316847d..45b20172e262 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt @@ -3,7 +3,7 @@ libtraceevent(3) NAME ---- -tep_alloc, tep_free,tep_ref, tep_unref,tep_ref_get - Create, destroy, manage +tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage references of trace event parser context. SYNOPSIS @@ -16,7 +16,7 @@ struct tep_handle pass:[*]*tep_alloc*(void); void *tep_free*(struct tep_handle pass:[*]_tep_); void *tep_ref*(struct tep_handle pass:[*]_tep_); void *tep_unref*(struct tep_handle pass:[*]_tep_); -int *tep_ref_get*(struct tep_handle pass:[*]_tep_); +int *tep_get_ref*(struct tep_handle pass:[*]_tep_); -- DESCRIPTION @@ -57,9 +57,9 @@ EXAMPLE ... struct tep_handle *tep = tep_alloc(); ... -int ref = tep_ref_get(tep); +int ref = tep_get_ref(tep); tep_ref(tep); -if ( (ref+1) != tep_ref_get(tep)) { +if ( (ref+1) != tep_get_ref(tep)) { /* Something wrong happened, the counter is not incremented by 1 */ } tep_unref(tep); diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt index fbd977b47de1..00519503c8de 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent.txt @@ -16,7 +16,7 @@ Management of tep handler data structure and access of its members: void *tep_free*(struct tep_handle pass:[*]_tep_); void *tep_ref*(struct tep_handle pass:[*]_tep_); void *tep_unref*(struct tep_handle pass:[*]_tep_); - int *tep_ref_get*(struct tep_handle pass:[*]_tep_); + int *tep_get_ref*(struct tep_handle pass:[*]_tep_); void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_); bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_); From f8d16387fffba1c7c88b5570b09891ba8a26cfd5 Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Thu, 19 Sep 2019 17:23:38 -0400 Subject: [PATCH 266/345] libtraceevent: Man pages fix, changes in event printing APIs APIs for printing various trace event information were redesigned to be more simple. However, the main libtraceevent man page was not updated with those changes. The documentation is updated to describe the new event print API. Signed-off-by: Tzvetomir Stoyanov (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/linux-trace-devel/20190808113636.13299-3-tz.stoyanov@gmail.com Link: http://lore.kernel.org/lkml/20190919212541.869643036@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- .../lib/traceevent/Documentation/libtraceevent.txt | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt index 00519503c8de..d530a7ce8fb2 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent.txt @@ -26,15 +26,12 @@ Management of tep handler data structure and access of its members: void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_); int *tep_get_page_size*(struct tep_handle pass:[*]_tep_); void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_); - bool *tep_is_latency_format*(struct tep_handle pass:[*]_tep_); - void *tep_set_latency_format*(struct tep_handle pass:[*]_tep_, int _lat_); int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_); int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_); bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_); int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_); Register / unregister APIs: - int *tep_register_trace_clock*(struct tep_handle pass:[*]_tep_, const char pass:[*]_trace_clock_); int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_); int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_); int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_); @@ -57,14 +54,7 @@ Event related APIs: int *tep_get_events_count*(struct tep_handle pass:[*]_tep_); struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_); - -Event printing: - void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, bool _use_trace_clock_); - void *tep_print_event_data*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_); - void *tep_event_info*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_); - void *tep_print_event_task*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]_record_); - void *tep_print_event_time*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, struct tep_record pass:[*]record, bool _use_trace_clock_); - void *tep_set_print_raw*(struct tep_handle pass:[*]_tep_, int _print_raw_); + void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._); Event finding: struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_); @@ -116,7 +106,6 @@ Filter management: int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_); Parsing various data from the records: - void *tep_data_latency_format*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_); int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_); From d69094f364d012f6d0be712969e6a6a355b69e84 Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Thu, 19 Sep 2019 17:23:39 -0400 Subject: [PATCH 267/345] libtraceevent: Add tep_get_event() in event-parse.h The tep_get_event() function is an official libtracevent API, described in the library man pages. However, it cannot be used by the library users because it is not declared in the event-parse.h file, where all libtracevent APIs are. The function declaration is added in event-parse.h file. Signed-off-by: Tzvetomir Stoyanov (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/linux-trace-devel/20190808113721.13539-1-tz.stoyanov@gmail.com Link: http://lore.kernel.org/lkml/20190919212542.058025937@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index d438ee44289f..b77837f75a0d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -441,6 +441,8 @@ int tep_register_print_string(struct tep_handle *tep, const char *fmt, unsigned long long addr); bool tep_is_pid_registered(struct tep_handle *tep, int pid); +struct tep_event *tep_get_event(struct tep_handle *tep, int index); + #define TEP_PRINT_INFO "INFO" #define TEP_PRINT_INFO_RAW "INFO_RAW" #define TEP_PRINT_COMM "COMM" From 077faf3dc7cc13c3bd784613304bf38696b591da Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Thu, 19 Sep 2019 17:23:41 -0400 Subject: [PATCH 268/345] libtraceevent: Move traceevent plugins in its own subdirectory All traceevent plugins code is moved to tools/lib/traceevent/plugins subdirectory. It makes traceevent implementation in trace-cmd and in kernel tree consistent. There is no changes in the way libtraceevent and plugins are compiled and installed. Committer notes: Applied fixup provided by Steven, fixing the tools/perf/Makefile.perf target for the plugin dynamic list file. Problem noticed when cross building to aarch64 from a Ubuntu 19.04 container. Suggested-by: Steven Rostedt (VMware) Signed-off-by: Tzvetomir Stoyanov (VMware) Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20190923115929.453b68f1@oasis.local.home Link: http://lore.kernel.org/lkml/20190919212542.377333393@goodmis.org Link: http://lore.kernel.org/linux-trace-devel/20190917105055.18983-1-tz.stoyanov@gmail.com Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Build | 11 - tools/lib/traceevent/Makefile | 94 ++------ tools/lib/traceevent/plugins/Build | 10 + tools/lib/traceevent/plugins/Makefile | 222 ++++++++++++++++++ .../{ => plugins}/plugin_cfg80211.c | 0 .../{ => plugins}/plugin_function.c | 0 .../traceevent/{ => plugins}/plugin_hrtimer.c | 0 .../traceevent/{ => plugins}/plugin_jbd2.c | 0 .../traceevent/{ => plugins}/plugin_kmem.c | 0 .../lib/traceevent/{ => plugins}/plugin_kvm.c | 0 .../{ => plugins}/plugin_mac80211.c | 0 .../{ => plugins}/plugin_sched_switch.c | 0 .../traceevent/{ => plugins}/plugin_scsi.c | 0 .../lib/traceevent/{ => plugins}/plugin_xen.c | 0 tools/perf/Makefile.perf | 4 +- 15 files changed, 250 insertions(+), 91 deletions(-) create mode 100644 tools/lib/traceevent/plugins/Build create mode 100644 tools/lib/traceevent/plugins/Makefile rename tools/lib/traceevent/{ => plugins}/plugin_cfg80211.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_function.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_hrtimer.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_jbd2.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_kmem.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_kvm.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_mac80211.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_sched_switch.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_scsi.c (100%) rename tools/lib/traceevent/{ => plugins}/plugin_xen.c (100%) diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build index ba54bfce0b0b..f9a5d79578f5 100644 --- a/tools/lib/traceevent/Build +++ b/tools/lib/traceevent/Build @@ -6,14 +6,3 @@ libtraceevent-y += parse-utils.o libtraceevent-y += kbuffer-parse.o libtraceevent-y += tep_strerror.o libtraceevent-y += event-parse-api.o - -plugin_jbd2-y += plugin_jbd2.o -plugin_hrtimer-y += plugin_hrtimer.o -plugin_kmem-y += plugin_kmem.o -plugin_kvm-y += plugin_kvm.o -plugin_mac80211-y += plugin_mac80211.o -plugin_sched_switch-y += plugin_sched_switch.o -plugin_function-y += plugin_function.o -plugin_xen-y += plugin_xen.o -plugin_scsi-y += plugin_scsi.o -plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index a39cdd0d890d..5315f3787f8d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -58,30 +58,6 @@ export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ export EVENT_PARSE_VERSION -set_plugin_dir := 1 - -# Set plugin_dir to preffered global plugin location -# If we install under $HOME directory we go under -# $(HOME)/.local/lib/traceevent/plugins -# -# We dont set PLUGIN_DIR in case we install under $HOME -# directory, because by default the code looks under: -# $(HOME)/.local/lib/traceevent/plugins by default. -# -ifeq ($(plugin_dir),) -ifeq ($(prefix),$(HOME)) -override plugin_dir = $(HOME)/.local/lib/traceevent/plugins -set_plugin_dir := 0 -else -override plugin_dir = $(libdir)/traceevent/plugins -endif -endif - -ifeq ($(set_plugin_dir),1) -PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" -PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' -endif - include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -105,7 +81,6 @@ export prefix libdir src obj # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) CONFIG_INCLUDES = CONFIG_LIBS = @@ -151,29 +126,14 @@ MAKEOVERRIDES= export srctree OUTPUT CC LD CFLAGS V build := -f $(srctree)/tools/build/Makefile.build dir=. obj -PLUGINS = plugin_jbd2.so -PLUGINS += plugin_hrtimer.so -PLUGINS += plugin_kmem.so -PLUGINS += plugin_kvm.so -PLUGINS += plugin_mac80211.so -PLUGINS += plugin_sched_switch.so -PLUGINS += plugin_function.so -PLUGINS += plugin_xen.so -PLUGINS += plugin_scsi.so -PLUGINS += plugin_cfg80211.so - -PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) -PLUGINS_IN := $(PLUGINS:.so=-in.o) - TE_IN := $(OUTPUT)libtraceevent-in.o LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) -DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list -CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE) +CMD_TARGETS = $(LIB_TARGET) TARGETS = $(CMD_TARGETS) -all: all_cmd +all: all_cmd plugins all_cmd: $(CMD_TARGETS) @@ -188,17 +148,6 @@ $(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) $(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) - $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) - -plugins: $(PLUGINS) - -__plugin_obj = $(notdir $@) - plugin_obj = $(__plugin_obj:-in.o=) - -$(PLUGINS_IN): force - $(Q)$(MAKE) $(build)=$(plugin_obj) - $(OUTPUT)%.so: $(OUTPUT)%-in.o $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ @@ -258,25 +207,6 @@ define do_install $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' endef -define do_install_plugins - for plugin in $1; do \ - $(call do_install,$$plugin,$(plugin_dir_SQ)); \ - done -endef - -define do_generate_dynamic_list_file - symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ - xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ - if [ "$$symbol_type" = "U W" ];then \ - (echo '{'; \ - $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\ - echo '};'; \ - ) > $2; \ - else \ - (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\ - fi -endef - PKG_CONFIG_FILE = libtraceevent.pc define do_install_pkgconfig_file if [ -n "${pkgconfig_dir}" ]; then \ @@ -296,10 +226,6 @@ install_lib: all_cmd install_plugins install_headers install_pkgconfig $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) -install_plugins: $(PLUGINS) - $(call QUIET_INSTALL, trace_plugins) \ - $(call do_install_plugins, $(PLUGINS)) - install_pkgconfig: $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \ $(call do_install_pkgconfig_file,$(prefix)) @@ -313,7 +239,7 @@ install_headers: install: install_lib -clean: +clean: clean_plugins $(call QUIET_CLEAN, libtraceevent) \ $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ $(RM) TRACEEVENT-CFLAGS tags TAGS; \ @@ -351,7 +277,19 @@ help: @echo ' doc-install - install the man pages' @echo ' doc-uninstall - uninstall the man pages' @echo'' -PHONY += force plugins + +PHONY += plugins +plugins: + $(call descend,plugins) + +PHONY += install_plugins +install_plugins: + $(call descend,plugins,install) + +PHONY += clean_plugins +clean_plugins: + $(call descend,plugins,clean) + force: # Declare the contents of the .PHONY variable as phony. We keep that diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build new file mode 100644 index 000000000000..210d26910613 --- /dev/null +++ b/tools/lib/traceevent/plugins/Build @@ -0,0 +1,10 @@ +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile new file mode 100644 index 000000000000..f440989fa55e --- /dev/null +++ b/tools/lib/traceevent/plugins/Makefile @@ -0,0 +1,222 @@ +# SPDX-License-Identifier: GPL-2.0 + +#MAKEFLAGS += --no-print-directory + + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,NM,$(CROSS_COMPILE)nm) +$(call allow-override,PKG_CONFIG,pkg-config) + +EXT = -std=gnu99 +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= /usr/local +libdir = $(prefix)/$(libdir_relative) + +set_plugin_dir := 1 + +# Set plugin_dir to preffered global plugin location +# If we install under $HOME directory we go under +# $(HOME)/.local/lib/traceevent/plugins +# +# We dont set PLUGIN_DIR in case we install under $HOME +# directory, because by default the code looks under: +# $(HOME)/.local/lib/traceevent/plugins by default. +# +ifeq ($(plugin_dir),) +ifeq ($(prefix),$(HOME)) +override plugin_dir = $(HOME)/.local/lib/traceevent/plugins +set_plugin_dir := 0 +else +override plugin_dir = $(libdir)/traceevent/plugins +endif +endif + +ifeq ($(set_plugin_dir),1) +PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" +PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' +endif + +include ../../../scripts/Makefile.include + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +export prefix libdir src obj + +# Shell quotes +plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) + +CONFIG_INCLUDES = +CONFIG_LIBS = +CONFIG_FLAGS = + +OBJ = $@ +N = + +INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES) + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +# Append required CFLAGS +override CFLAGS += -fPIC +override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) +override CFLAGS += $(udis86-flags) -D_GNU_SOURCE + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Disable command line variables (CFLAGS) override from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= + +export srctree OUTPUT CC LD CFLAGS V + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list + +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so + +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) + +plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE) + +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) + +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) + +$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) + $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) + +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ + +define update_dir + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +tags: force + $(RM) tags + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' + +TAGS: force + $(RM) TAGS + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + $(call do_install_mkdir,$2); \ + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' +endef + +define do_install_plugins + for plugin in $1; do \ + $(call do_install,$$plugin,$(plugin_dir_SQ)); \ + done +endef + +define do_generate_dynamic_list_file + symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ + xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ + if [ "$$symbol_type" = "U W" ];then \ + (echo '{'; \ + $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\ + echo '};'; \ + ) > $2; \ + else \ + (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\ + fi +endef + +install: $(PLUGINS) + $(call QUIET_INSTALL, trace_plugins) \ + $(call do_install_plugins, $(PLUGINS)) + +clean: + $(call QUIET_CLEAN, trace_plugins) \ + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ + $(RM) $(OUTPUT)libtraceevent-dynamic-list \ + $(RM) TRACEEVENT-CFLAGS tags TAGS; + +PHONY += force plugins +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c similarity index 100% rename from tools/lib/traceevent/plugin_cfg80211.c rename to tools/lib/traceevent/plugins/plugin_cfg80211.c diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c similarity index 100% rename from tools/lib/traceevent/plugin_function.c rename to tools/lib/traceevent/plugins/plugin_function.c diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c similarity index 100% rename from tools/lib/traceevent/plugin_hrtimer.c rename to tools/lib/traceevent/plugins/plugin_hrtimer.c diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c similarity index 100% rename from tools/lib/traceevent/plugin_jbd2.c rename to tools/lib/traceevent/plugins/plugin_jbd2.c diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c similarity index 100% rename from tools/lib/traceevent/plugin_kmem.c rename to tools/lib/traceevent/plugins/plugin_kmem.c diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c similarity index 100% rename from tools/lib/traceevent/plugin_kvm.c rename to tools/lib/traceevent/plugins/plugin_kvm.c diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c similarity index 100% rename from tools/lib/traceevent/plugin_mac80211.c rename to tools/lib/traceevent/plugins/plugin_mac80211.c diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c similarity index 100% rename from tools/lib/traceevent/plugin_sched_switch.c rename to tools/lib/traceevent/plugins/plugin_sched_switch.c diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c similarity index 100% rename from tools/lib/traceevent/plugin_scsi.c rename to tools/lib/traceevent/plugins/plugin_scsi.c diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c similarity index 100% rename from tools/lib/traceevent/plugin_xen.c rename to tools/lib/traceevent/plugins/plugin_xen.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 2ccc12f3730b..902c792f326a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -292,7 +292,7 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list # # The static build has no dynsym table, so this does not work for @@ -737,7 +737,7 @@ libtraceevent_plugins: FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) From 33c96400dcd34d572299b8106e1b0460a1b8bae9 Mon Sep 17 00:00:00 2001 From: Tzvetomir Stoyanov Date: Thu, 19 Sep 2019 17:23:40 -0400 Subject: [PATCH 269/345] libtraceevent: Man pages for tep plugins APIs Create man pages for libtraceevent APIs: tep_load_plugins(), tep_unload_plugin() Signed-off-by: Tzvetomir Stoyanov Cc: Andrew Morton Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/linux-trace-devel/20190903133434.30417-1-tz.stoyanov@gmail.com Link: http://lore.kernel.org/lkml/20190919212542.216189588@goodmis.org Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/libtraceevent-plugins.txt | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tools/lib/traceevent/Documentation/libtraceevent-plugins.txt diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt new file mode 100644 index 000000000000..596032ade31f --- /dev/null +++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt @@ -0,0 +1,99 @@ +libtraceevent(3) +================ + +NAME +---- +tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins. + +SYNOPSIS +-------- +[verse] +-- +*#include * + +struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); +void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); +-- + +DESCRIPTION +----------- +The _tep_load_plugins()_ function loads all plugins, located in the plugin +directories. The _tep_ argument is trace event parser context. +The plugin directories are : +[verse] +-- + - System's plugin directory, defined at the library compile time. It + depends on the library installation prefix and usually is + _(install_preffix)/lib/traceevent/plugins_ + - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ + - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ +-- +Loading of plugins can be controlled by the _tep_flags_, using the +_tep_set_flag()_ API: +[verse] +-- + _TEP_DISABLE_SYS_PLUGINS_ - do not load plugins, located in + the system's plugin directory. + _TEP_DISABLE_PLUGINS_ - do not load any plugins. +-- +The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if +loading of all plugins is not the desired case. + +The _tep_unload_plugins()_ function unloads the plugins, previously loaded by +_tep_load_plugins()_. The _tep_ argument is trace event parser context. The +_plugin_list_ is the list of loaded plugins, returned by +the _tep_load_plugins()_ function. + +RETURN VALUE +------------ +The _tep_load_plugins()_ function returns a list of successfully loaded plugins, +or NULL in case no plugins are loaded. + +EXAMPLE +------- +[source,c] +-- +#include +... +struct tep_handle *tep = tep_alloc(); +... +struct tep_plugin_list *plugins = tep_load_plugins(tep); +if (plugins == NULL) { + /* no plugins are loaded */ +} +... +tep_unload_plugins(plugins, tep); +-- + +FILES +----- +[verse] +-- +*event-parse.h* + Header file to include in order to have access to the library APIs. +*-ltraceevent* + Linker switch to add when building a program that uses the library. +-- + +SEE ALSO +-------- +_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_ + +AUTHOR +------ +[verse] +-- +*Steven Rostedt* , author of *libtraceevent*. +*Tzvetomir Stoyanov* , author of this man page. +-- +REPORTING BUGS +-------------- +Report bugs to + +LICENSE +------- +libtraceevent is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git From 058bd857845a9675cfaf4bc2ca831ec7953b58f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 12 Sep 2019 10:57:18 +0200 Subject: [PATCH 270/345] tools: Add missing stdio.h include to asm/bug.h header We have a direct fprintf() call in the header, so we need stdio.h include, otherwise it could fail compilation if there's no prior stdio.h include directive. Signed-off-by: Jiri Olsa Link: http://lkml.kernel.org/n/tip-8hvjgh24olfsa4non0a3ohnq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm/bug.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h index bbd75ac8b202..550223f0a6e6 100644 --- a/tools/include/asm/bug.h +++ b/tools/include/asm/bug.h @@ -3,6 +3,7 @@ #define _TOOLS_ASM_BUG_H #include +#include #define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0) From a583053299c1e66e6202b494cbc3acd93cedc4cc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 20:30:53 +0200 Subject: [PATCH 271/345] perf tools: Rename 'struct perf_mmap' to 'struct mmap' Rename 'struct perf_evlist' to 'struct evlist', so we don't have a name clash when we add 'struct perf_mmap' to libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 34 ++++++------ tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/auxtrace.c | 6 +-- tools/perf/util/auxtrace.h | 8 +-- tools/perf/util/evlist.c | 14 ++--- tools/perf/util/evlist.h | 4 +- tools/perf/util/mmap.c | 54 ++++++++++---------- tools/perf/util/mmap.h | 32 ++++++------ tools/perf/util/python.c | 6 +-- 22 files changed, 93 insertions(+), 93 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 0a4570b340fa..c83abb1ed0ff 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -65,7 +65,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe union perf_event *event; u64 test_tsc, comm1_tsc, comm2_tsc; u64 test_time, comm1_time = 0, comm2_time = 0; - struct perf_mmap *md; + struct mmap *md; threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index c4b22e1b0a40..7354b77e9137 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -750,7 +750,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, { struct evlist *evlist = kvm->evlist; union perf_event *event; - struct perf_mmap *md; + struct mmap *md; u64 timestamp; s64 n = 0; int err; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1e1f97139f16..1528fb686f96 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -119,7 +119,7 @@ static bool switch_output_time(struct record *rec) trigger_is_ready(&switch_output_trigger); } -static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, +static int record__write(struct record *rec, struct mmap *map __maybe_unused, void *bf, size_t size) { struct perf_data_file *file = &rec->session->data->file; @@ -168,7 +168,7 @@ static int record__aio_write(struct aiocb *cblock, int trace_fd, return rc; } -static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) +static int record__aio_complete(struct mmap *md, struct aiocb *cblock) { void *rem_buf; off_t rem_off; @@ -214,7 +214,7 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) return rc; } -static int record__aio_sync(struct perf_mmap *md, bool sync_all) +static int record__aio_sync(struct mmap *md, bool sync_all) { struct aiocb **aiocb = md->aio.aiocb; struct aiocb *cblocks = md->aio.cblocks; @@ -255,7 +255,7 @@ struct record_aio { size_t size; }; -static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) +static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) { struct record_aio *aio = to; @@ -300,7 +300,7 @@ static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t return size; } -static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) +static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) { int ret, idx; int trace_fd = rec->session->data->file.fd; @@ -351,13 +351,13 @@ static void record__aio_mmap_read_sync(struct record *rec) { int i; struct evlist *evlist = rec->evlist; - struct perf_mmap *maps = evlist->mmap; + struct mmap *maps = evlist->mmap; if (!record__aio_enabled(rec)) return; for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *map = &maps[i]; + struct mmap *map = &maps[i]; if (map->base) record__aio_sync(map, true); @@ -387,7 +387,7 @@ static int record__aio_parse(const struct option *opt, #else /* HAVE_AIO_SUPPORT */ static int nr_cblocks_max = 0; -static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, +static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, off_t *off __maybe_unused) { return -1; @@ -482,7 +482,7 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, NULL, event, event->header.size); } -static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) +static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) { struct record *rec = to; @@ -527,7 +527,7 @@ static void record__sig_exit(void) #ifdef HAVE_AUXTRACE_SUPPORT static int record__process_auxtrace(struct perf_tool *tool, - struct perf_mmap *map, + struct mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2) { @@ -565,7 +565,7 @@ static int record__process_auxtrace(struct perf_tool *tool, } static int record__auxtrace_mmap_read(struct record *rec, - struct perf_mmap *map) + struct mmap *map) { int ret; @@ -581,7 +581,7 @@ static int record__auxtrace_mmap_read(struct record *rec, } static int record__auxtrace_mmap_read_snapshot(struct record *rec, - struct perf_mmap *map) + struct mmap *map) { int ret; @@ -603,7 +603,7 @@ static int record__auxtrace_read_snapshot_all(struct record *rec) int rc = 0; for (i = 0; i < rec->evlist->nr_mmaps; i++) { - struct perf_mmap *map = &rec->evlist->mmap[i]; + struct mmap *map = &rec->evlist->mmap[i]; if (!map->auxtrace_mmap.base) continue; @@ -668,7 +668,7 @@ static int record__auxtrace_init(struct record *rec) static inline int record__auxtrace_mmap_read(struct record *rec __maybe_unused, - struct perf_mmap *map __maybe_unused) + struct mmap *map __maybe_unused) { return 0; } @@ -901,7 +901,7 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) +static void record__adjust_affinity(struct record *rec, struct mmap *map) { if (rec->opts.affinity != PERF_AFFINITY_SYS && !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { @@ -948,7 +948,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, u64 bytes_written = rec->bytes_written; int i; int rc = 0; - struct perf_mmap *maps; + struct mmap *maps; int trace_fd = rec->data.file.fd; off_t off = 0; @@ -967,7 +967,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, for (i = 0; i < evlist->nr_mmaps; i++) { u64 flush = 0; - struct perf_mmap *map = &maps[i]; + struct mmap *map = &maps[i]; if (map->base) { record__adjust_affinity(rec, map); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8da3c939e6b0..834a927107c4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -863,7 +863,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) { struct record_opts *opts = &top->record_opts; struct evlist *evlist = top->evlist; - struct perf_mmap *md; + struct mmap *md; union perf_event *event; md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a292658b4232..2f853870d68d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3444,7 +3444,7 @@ again: for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index a637a4a90760..0a046096e6f4 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -33,7 +33,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, int i; for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *map = &evlist->overwrite_mmap[i]; + struct mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; perf_mmap__read_init(map); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fc102e4f403e..cf9776aceb82 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -180,7 +180,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fd02c1f1d976..df96cbb9ffe5 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -419,7 +419,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, struct state *state) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; int i, ret; for (i = 0; i < evlist->nr_mmaps; i++) { diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index df0fd5a44e04..67a87f507bbd 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -31,7 +31,7 @@ static int find_comm(struct evlist *evlist, const char *comm) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; int i, found; found = 0; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 042757629e90..accca3d78fa3 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -42,7 +42,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[nsyscalls], i, j; struct evsel *evsels[nsyscalls], *evsel; char sbuf[STRERR_BUFSIZE]; - struct perf_mmap *md; + struct mmap *md; threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index b71167b43dda..e5ef74d4e925 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -88,7 +88,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index e1b42292cf7f..49d2d4c5956d 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -166,7 +166,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - struct perf_mmap *md; + struct mmap *md; md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 97694a040986..7abe0b6aabb7 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -42,7 +42,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) }; struct perf_cpu_map *cpus; struct perf_thread_map *threads; - struct perf_mmap *md; + struct mmap *md; attr.sample_freq = 500; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 3fb1ff7b8a2f..e7888ddd69a3 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -263,7 +263,7 @@ static int process_events(struct evlist *evlist, unsigned pos, cnt = 0; LIST_HEAD(events); struct event_node *events_array, *node; - struct perf_mmap *md; + struct mmap *md; int i, ret; for (i = 0; i < evlist->nr_mmaps; i++) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 088c7708b03a..cff99707cc9d 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -51,7 +51,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused char sbuf[STRERR_BUFSIZE]; struct perf_cpu_map *cpus; struct perf_thread_map *threads; - struct perf_mmap *md; + struct mmap *md; signal(SIGCHLD, sig_handler); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 0e8c89cf7cad..1d22ffd972ac 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1228,7 +1228,7 @@ int perf_event__process_auxtrace_error(struct perf_session *session, return 0; } -static int __auxtrace_mmap__read(struct perf_mmap *map, +static int __auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) @@ -1339,13 +1339,13 @@ static int __auxtrace_mmap__read(struct perf_mmap *map, return 1; } -int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn) { return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); } -int auxtrace_mmap__read_snapshot(struct perf_mmap *map, +int auxtrace_mmap__read_snapshot(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index b110aec1da4d..f201f36bc35f 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -22,7 +22,7 @@ union perf_event; struct perf_session; struct evlist; struct perf_tool; -struct perf_mmap; +struct mmap; struct perf_sample; struct option; struct record_opts; @@ -445,14 +445,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, bool per_cpu); typedef int (*process_auxtrace_t)(struct perf_tool *tool, - struct perf_mmap *map, + struct mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2); -int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn); -int auxtrace_mmap__read_snapshot(struct perf_mmap *map, +int auxtrace_mmap__read_snapshot(struct mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ea9517d506d8..ceab9fb7f7f9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -423,7 +423,7 @@ int perf_evlist__alloc_pollfd(struct evlist *evlist) } static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, - struct perf_mmap *map, short revent) + struct mmap *map, short revent) { int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); /* @@ -447,7 +447,7 @@ int perf_evlist__add_pollfd(struct evlist *evlist, int fd) static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, void *arg __maybe_unused) { - struct perf_mmap *map = fda->priv[fd].ptr; + struct mmap *map = fda->priv[fd].ptr; if (map) perf_mmap__put(map); @@ -693,16 +693,16 @@ void perf_evlist__munmap(struct evlist *evlist) zfree(&evlist->overwrite_mmap); } -static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, +static struct mmap *perf_evlist__alloc_mmap(struct evlist *evlist, bool overwrite) { int i; - struct perf_mmap *map; + struct mmap *map; evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); if (perf_cpu_map__empty(evlist->core.cpus)) evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads); - map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + map = zalloc(evlist->nr_mmaps * sizeof(struct mmap)); if (!map) return NULL; @@ -741,7 +741,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); evlist__for_each_entry(evlist, evsel) { - struct perf_mmap *maps = evlist->mmap; + struct mmap *maps = evlist->mmap; int *output = _output; int fd; int cpu; @@ -1847,7 +1847,7 @@ static void *perf_evlist__poll_thread(void *arg) perf_evlist__poll(evlist, 1000); for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *map = &evlist->mmap[i]; + struct mmap *map = &evlist->mmap[i]; union perf_event *event; if (perf_mmap__read_init(map)) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a55f0f2546e5..129786361572 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -39,8 +39,8 @@ struct evlist { pid_t pid; } workload; struct fdarray pollfd; - struct perf_mmap *mmap; - struct perf_mmap *overwrite_mmap; + struct mmap *mmap; + struct mmap *overwrite_mmap; struct evsel *selected; struct events_stats stats; struct perf_env *env; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 33c5b5495482..f3b7c8b0fa90 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -22,13 +22,13 @@ #include "../perf.h" #include "util.h" /* page_size */ -size_t perf_mmap__mmap_len(struct perf_mmap *map) +size_t perf_mmap__mmap_len(struct mmap *map) { return map->mask + 1 + page_size; } /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, +static union perf_event *perf_mmap__read(struct mmap *map, u64 *startp, u64 end) { unsigned char *data = map->base + page_size; @@ -82,7 +82,7 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, * } * perf_mmap__read_done() */ -union perf_event *perf_mmap__read_event(struct perf_mmap *map) +union perf_event *perf_mmap__read_event(struct mmap *map) { union perf_event *event; @@ -104,17 +104,17 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map) return event; } -static bool perf_mmap__empty(struct perf_mmap *map) +static bool perf_mmap__empty(struct mmap *map) { return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; } -void perf_mmap__get(struct perf_mmap *map) +void perf_mmap__get(struct mmap *map) { refcount_inc(&map->refcnt); } -void perf_mmap__put(struct perf_mmap *map) +void perf_mmap__put(struct mmap *map) { BUG_ON(map->base && refcount_read(&map->refcnt) == 0); @@ -122,7 +122,7 @@ void perf_mmap__put(struct perf_mmap *map) perf_mmap__munmap(map); } -void perf_mmap__consume(struct perf_mmap *map) +void perf_mmap__consume(struct mmap *map) { if (!map->overwrite) { u64 old = map->prev; @@ -161,13 +161,13 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb } #ifdef HAVE_AIO_SUPPORT -static int perf_mmap__aio_enabled(struct perf_mmap *map) +static int perf_mmap__aio_enabled(struct mmap *map) { return map->aio.nr_cblocks > 0; } #ifdef HAVE_LIBNUMA_SUPPORT -static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +static int perf_mmap__aio_alloc(struct mmap *map, int idx) { map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); @@ -179,7 +179,7 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) return 0; } -static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +static void perf_mmap__aio_free(struct mmap *map, int idx) { if (map->aio.data[idx]) { munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); @@ -187,7 +187,7 @@ static void perf_mmap__aio_free(struct perf_mmap *map, int idx) } } -static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity) +static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) { void *data; size_t mmap_len; @@ -207,7 +207,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi return 0; } #else /* !HAVE_LIBNUMA_SUPPORT */ -static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +static int perf_mmap__aio_alloc(struct mmap *map, int idx) { map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); if (map->aio.data[idx] == NULL) @@ -216,19 +216,19 @@ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) return 0; } -static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +static void perf_mmap__aio_free(struct mmap *map, int idx) { zfree(&(map->aio.data[idx])); } -static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused, +static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused, int cpu __maybe_unused, int affinity __maybe_unused) { return 0; } #endif -static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) +static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp) { int delta_max, i, prio, ret; @@ -282,7 +282,7 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) return 0; } -static void perf_mmap__aio_munmap(struct perf_mmap *map) +static void perf_mmap__aio_munmap(struct mmap *map) { int i; @@ -294,23 +294,23 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) zfree(&map->aio.aiocb); } #else /* !HAVE_AIO_SUPPORT */ -static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused) +static int perf_mmap__aio_enabled(struct mmap *map __maybe_unused) { return 0; } -static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, +static int perf_mmap__aio_mmap(struct mmap *map __maybe_unused, struct mmap_params *mp __maybe_unused) { return 0; } -static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) +static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) { } #endif -void perf_mmap__munmap(struct perf_mmap *map) +void perf_mmap__munmap(struct mmap *map) { perf_mmap__aio_munmap(map); if (map->data != NULL) { @@ -343,7 +343,7 @@ static void build_node_mask(int node, cpu_set_t *mask) } } -static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) +static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { CPU_ZERO(&map->affinity_mask); if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) @@ -352,7 +352,7 @@ static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_pa CPU_SET(map->cpu, &map->affinity_mask); } -int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) +int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { /* * The last one will be done at perf_mmap__consume(), so that we @@ -440,7 +440,7 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) /* * Report the start and end of the available data in ringbuffer */ -static int __perf_mmap__read_init(struct perf_mmap *md) +static int __perf_mmap__read_init(struct mmap *md) { u64 head = perf_mmap__read_head(md); u64 old = md->prev; @@ -474,7 +474,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md) return 0; } -int perf_mmap__read_init(struct perf_mmap *map) +int perf_mmap__read_init(struct mmap *map) { /* * Check if event was unmapped due to a POLLHUP/POLLERR. @@ -485,8 +485,8 @@ int perf_mmap__read_init(struct perf_mmap *map) return __perf_mmap__read_init(map); } -int perf_mmap__push(struct perf_mmap *md, void *to, - int push(struct perf_mmap *map, void *to, void *buf, size_t size)) +int perf_mmap__push(struct mmap *md, void *to, + int push(struct mmap *map, void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); unsigned char *data = md->base + page_size; @@ -532,7 +532,7 @@ out: * The last perf_mmap__read() will set tail to map->prev. * Need to correct the map->prev to head which is the end of next read. */ -void perf_mmap__read_done(struct perf_mmap *map) +void perf_mmap__read_done(struct mmap *map) { /* * Check if event was unmapped due to a POLLHUP/POLLERR. diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 3857a49e8f96..01524608a984 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -15,11 +15,11 @@ struct aiocb; /** - * struct perf_mmap - perf's ring buffer mmap details + * struct mmap - perf's ring buffer mmap details * * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this */ -struct perf_mmap { +struct mmap { void *base; int mask; int fd; @@ -78,33 +78,33 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu); -void perf_mmap__munmap(struct perf_mmap *map); +int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +void perf_mmap__munmap(struct mmap *map); -void perf_mmap__get(struct perf_mmap *map); -void perf_mmap__put(struct perf_mmap *map); +void perf_mmap__get(struct mmap *map); +void perf_mmap__put(struct mmap *map); -void perf_mmap__consume(struct perf_mmap *map); +void perf_mmap__consume(struct mmap *map); -static inline u64 perf_mmap__read_head(struct perf_mmap *mm) +static inline u64 perf_mmap__read_head(struct mmap *mm) { return ring_buffer_read_head(mm->base); } -static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) { ring_buffer_write_tail(md->base, tail); } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map); +union perf_event *perf_mmap__read_forward(struct mmap *map); -union perf_event *perf_mmap__read_event(struct perf_mmap *map); +union perf_event *perf_mmap__read_event(struct mmap *map); -int perf_mmap__push(struct perf_mmap *md, void *to, - int push(struct perf_mmap *map, void *to, void *buf, size_t size)); +int perf_mmap__push(struct mmap *md, void *to, + int push(struct mmap *map, void *to, void *buf, size_t size)); -size_t perf_mmap__mmap_len(struct perf_mmap *map); +size_t perf_mmap__mmap_len(struct mmap *map); -int perf_mmap__read_init(struct perf_mmap *md); -void perf_mmap__read_done(struct perf_mmap *map); +int perf_mmap__read_init(struct mmap *md); +void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0ba19dd75510..1e1247cffea8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -984,12 +984,12 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, return Py_BuildValue("i", evlist->core.nr_entries); } -static struct perf_mmap *get_md(struct evlist *evlist, int cpu) +static struct mmap *get_md(struct evlist *evlist, int cpu) { int i; for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *md = &evlist->mmap[i]; + struct mmap *md = &evlist->mmap[i]; if (md->cpu == cpu) return md; @@ -1005,7 +1005,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, union perf_event *event; int sample_id_all = 1, cpu; static char *kwlist[] = { "cpu", "sample_id_all", NULL }; - struct perf_mmap *md; + struct mmap *md; int err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, From 9521b5f2d9d3e3fd6092fb9d7b00c914e7fa7d33 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 28 Jul 2019 12:45:35 +0200 Subject: [PATCH 272/345] perf tools: Rename perf_evlist__mmap() to evlist__mmap() Rename perf_evlist__mmap() to evlist__mmap(), so we don't have a name clash when we add perf_evlist__mmap() in libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 6 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 4 +-- tools/perf/tests/bpf.c | 4 +-- tools/perf/tests/code-reading.c | 4 +-- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 4 +-- tools/perf/tests/perf-record.c | 4 +-- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 4 +-- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 30 ++++++++++---------- tools/perf/util/evlist.h | 8 +++--- tools/perf/util/python.c | 2 +- 18 files changed, 43 insertions(+), 43 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index c83abb1ed0ff..42ae47525a76 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -90,7 +90,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(evlist__open(evlist)); - CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); + CHECK__(evlist__mmap(evlist, UINT_MAX)); pc = evlist->mmap[0].base; ret = perf_read_tsc_conversion(pc, &tc); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 7354b77e9137..72debb7bd20d 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1060,7 +1060,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) goto out; } - if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) { + if (evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) { ui__error("Failed to mmap the events: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); evlist__close(evlist); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1528fb686f96..093d9ab5633e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -439,7 +439,7 @@ static int record__mmap_flush_parse(const struct option *opt, if (!opts->mmap_flush) opts->mmap_flush = MMAP_FLUSH_DEFAULT; - flush_max = perf_evlist__mmap_size(opts->mmap_pages); + flush_max = evlist__mmap_size(opts->mmap_pages); flush_max /= 4; if (opts->mmap_flush > flush_max) opts->mmap_flush = flush_max; @@ -707,7 +707,7 @@ static int record__mmap_evlist(struct record *rec, if (opts->affinity != PERF_AFFINITY_SYS) cpu__setup_cpunode_map(); - if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, + if (evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, opts->nr_cblocks, opts->affinity, @@ -1980,7 +1980,7 @@ out_free: static void switch_output_size_warn(struct record *rec) { - u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); + u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); struct switch_output *s = &rec->switch_output; wakeup_size /= 2; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 834a927107c4..771b3ff47dc3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1042,7 +1042,7 @@ try_again: } } - if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) { + if (evlist__mmap(evlist, opts->mmap_pages) < 0) { ui__error("Failed to mmap with %d (%s)\n", errno, str_error_r(errno, msg, sizeof(msg))); goto out_err; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2f853870d68d..ff989351567d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3409,7 +3409,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->dump.map) bpf_map__fprintf(trace->dump.map, trace->output); - err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); + err = evlist__mmap(evlist, trace->opts.mmap_pages); if (err < 0) goto out_error_mmap; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 0a046096e6f4..f1eb7e9c1d7d 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -63,9 +63,9 @@ static int do_test(struct evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages); + err = evlist__mmap(evlist, mmap_pages); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); return TEST_FAIL; } diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index cf9776aceb82..964731915498 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -167,9 +167,9 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, opts.mmap_pages); + err = evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index df96cbb9ffe5..413783b69006 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -685,9 +685,9 @@ static int do_test_code_reading(bool try_kcore) break; } - ret = perf_evlist__mmap(evlist, UINT_MAX); + ret = evlist__mmap(evlist, UINT_MAX); if (ret < 0) { - pr_debug("perf_evlist__mmap failed\n"); + pr_debug("evlist__mmap failed\n"); goto out_put; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 67a87f507bbd..8e2ec5f72042 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -104,7 +104,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un goto out_err; } - CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); + CHECK__(evlist__mmap(evlist, UINT_MAX)); /* * First, test that a 'comm' event can be found when the event is diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index accca3d78fa3..4d42e455feb7 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -99,7 +99,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128) < 0) { + if (evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index e5ef74d4e925..5c2576174ae9 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -69,9 +69,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, UINT_MAX); + err = evlist__mmap(evlist, UINT_MAX); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 49d2d4c5956d..669fd88e7f30 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -143,9 +143,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * fds in the same CPU to be injected in the same mmap ring buffer * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)). */ - err = perf_evlist__mmap(evlist, opts.mmap_pages); + err = evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { - pr_debug("perf_evlist__mmap: %s\n", + pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 7abe0b6aabb7..fbff60815be8 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -82,7 +82,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128); + err = evlist__mmap(evlist, 128); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e7888ddd69a3..c92e287e0f53 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -460,9 +460,9 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out; } - err = perf_evlist__mmap(evlist, UINT_MAX); + err = evlist__mmap(evlist, UINT_MAX); if (err) { - pr_debug("perf_evlist__mmap failed!\n"); + pr_debug("evlist__mmap failed!\n"); goto out_err; } diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index cff99707cc9d..718838b5e724 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -106,7 +106,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128) < 0) { + if (evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ceab9fb7f7f9..e8afe4fdc1b2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -732,7 +732,7 @@ perf_evlist__should_poll(struct evlist *evlist __maybe_unused, return true; } -static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, +static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { @@ -810,7 +810,7 @@ static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, return 0; } -static int perf_evlist__mmap_per_cpu(struct evlist *evlist, +static int evlist__mmap_per_cpu(struct evlist *evlist, struct mmap_params *mp) { int cpu, thread; @@ -826,7 +826,7 @@ static int perf_evlist__mmap_per_cpu(struct evlist *evlist, true); for (thread = 0; thread < nr_threads; thread++) { - if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, + if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output, &output_overwrite)) goto out_unmap; } @@ -839,7 +839,7 @@ out_unmap: return -1; } -static int perf_evlist__mmap_per_thread(struct evlist *evlist, +static int evlist__mmap_per_thread(struct evlist *evlist, struct mmap_params *mp) { int thread; @@ -853,7 +853,7 @@ static int perf_evlist__mmap_per_thread(struct evlist *evlist, auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); - if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, + if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, &output, &output_overwrite)) goto out_unmap; } @@ -888,7 +888,7 @@ unsigned long perf_event_mlock_kb_in_pages(void) return pages; } -size_t perf_evlist__mmap_size(unsigned long pages) +size_t evlist__mmap_size(unsigned long pages) { if (pages == UINT_MAX) pages = perf_event_mlock_kb_in_pages(); @@ -971,7 +971,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, } /** - * perf_evlist__mmap_ex - Create mmaps to receive events. + * evlist__mmap_ex - Create mmaps to receive events. * @evlist: list of events * @pages: map length in pages * @overwrite: overwrite older events? @@ -979,7 +979,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * @auxtrace_overwrite - overwrite older auxtrace data? * * If @overwrite is %false the user needs to signal event consumption using - * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this + * perf_mmap__write_tail(). Using evlist__mmap_read() does this * automatically. * * Similarly, if @auxtrace_overwrite is %false the user needs to signal data @@ -987,7 +987,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * * Return: %0 on success, negative error code otherwise. */ -int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, +int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level) @@ -1011,7 +1011,7 @@ int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->mmap_len = perf_evlist__mmap_size(pages); + evlist->mmap_len = evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; @@ -1026,14 +1026,14 @@ int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, } if (perf_cpu_map__empty(cpus)) - return perf_evlist__mmap_per_thread(evlist, &mp); + return evlist__mmap_per_thread(evlist, &mp); - return perf_evlist__mmap_per_cpu(evlist, &mp); + return evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct evlist *evlist, unsigned int pages) +int evlist__mmap(struct evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); + return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); } int perf_evlist__create_maps(struct evlist *evlist, struct target *target) @@ -1889,7 +1889,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, UINT_MAX)) + if (evlist__mmap(evlist, UINT_MAX)) goto out_delete_evlist; evlist__for_each_entry(evlist, counter) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 129786361572..aaf06182c1b8 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -139,7 +139,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id); void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state); -void perf_evlist__mmap_consume(struct evlist *evlist, int idx); +void evlist__mmap_consume(struct evlist *evlist, int idx); int evlist__open(struct evlist *evlist); void evlist__close(struct evlist *evlist); @@ -170,14 +170,14 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); -int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, +int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level); -int perf_evlist__mmap(struct evlist *evlist, unsigned int pages); +int evlist__mmap(struct evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct evlist *evlist); -size_t perf_evlist__mmap_size(unsigned long pages); +size_t evlist__mmap_size(unsigned long pages); void evlist__disable(struct evlist *evlist); void evlist__enable(struct evlist *evlist); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 1e1247cffea8..b102d174b868 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -899,7 +899,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages) < 0) { + if (evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } From db6b7b138506ad537edd12d98f674722a643c150 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Aug 2019 16:19:55 +0200 Subject: [PATCH 273/345] perf tools: Rename perf_evlist__munmap() to evlist__munmap() Rename perf_evlist__munmap() to evlist__munmap(), so we don't have a name clash when we add perf_evlist__munmap() in libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/util/evlist.c | 12 ++++++------ tools/perf/util/evlist.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index f1eb7e9c1d7d..3073a68d17b9 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -75,7 +75,7 @@ static int do_test(struct evlist *evlist, int mmap_pages, evlist__disable(evlist); err = count_samples(evlist, sample_count, comm_count); - perf_evlist__munmap(evlist); + evlist__munmap(evlist); return err; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e8afe4fdc1b2..888472c7bf9c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -149,7 +149,7 @@ void evlist__delete(struct evlist *evlist) if (evlist == NULL) return; - perf_evlist__munmap(evlist); + evlist__munmap(evlist); evlist__close(evlist); perf_cpu_map__put(evlist->core.cpus); perf_thread_map__put(evlist->core.threads); @@ -673,7 +673,7 @@ static int perf_evlist__resume(struct evlist *evlist) return perf_evlist__set_paused(evlist, false); } -static void perf_evlist__munmap_nofree(struct evlist *evlist) +static void evlist__munmap_nofree(struct evlist *evlist) { int i; @@ -686,9 +686,9 @@ static void perf_evlist__munmap_nofree(struct evlist *evlist) perf_mmap__munmap(&evlist->overwrite_mmap[i]); } -void perf_evlist__munmap(struct evlist *evlist) +void evlist__munmap(struct evlist *evlist) { - perf_evlist__munmap_nofree(evlist); + evlist__munmap_nofree(evlist); zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); } @@ -835,7 +835,7 @@ static int evlist__mmap_per_cpu(struct evlist *evlist, return 0; out_unmap: - perf_evlist__munmap_nofree(evlist); + evlist__munmap_nofree(evlist); return -1; } @@ -861,7 +861,7 @@ static int evlist__mmap_per_thread(struct evlist *evlist, return 0; out_unmap: - perf_evlist__munmap_nofree(evlist); + evlist__munmap_nofree(evlist); return -1; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index aaf06182c1b8..f07501602353 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -175,7 +175,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level); int evlist__mmap(struct evlist *evlist, unsigned int pages); -void perf_evlist__munmap(struct evlist *evlist); +void evlist__munmap(struct evlist *evlist); size_t evlist__mmap_size(unsigned long pages); From d50cf36115a08e668b4a0919a6807c3f9a9e8db8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Aug 2019 16:21:46 +0200 Subject: [PATCH 274/345] perf tools: Rename perf_evlist__alloc_mmap() to evlist__alloc_mmap() Rename perf_evlist__alloc_mmap() to evlist__alloc_mmap(), so we don't have a name clash when we add perf_evlist__alloc_mmap() to libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 888472c7bf9c..5bde2e5bf0e3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -693,8 +693,8 @@ void evlist__munmap(struct evlist *evlist) zfree(&evlist->overwrite_mmap); } -static struct mmap *perf_evlist__alloc_mmap(struct evlist *evlist, - bool overwrite) +static struct mmap *evlist__alloc_mmap(struct evlist *evlist, + bool overwrite) { int i; struct mmap *map; @@ -752,7 +752,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, maps = evlist->overwrite_mmap; if (!maps) { - maps = perf_evlist__alloc_mmap(evlist, true); + maps = evlist__alloc_mmap(evlist, true); if (!maps) return -1; evlist->overwrite_mmap = maps; @@ -1004,7 +1004,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, .comp_level = comp_level }; if (!evlist->mmap) - evlist->mmap = perf_evlist__alloc_mmap(evlist, false); + evlist->mmap = evlist__alloc_mmap(evlist, false); if (!evlist->mmap) return -ENOMEM; From 470579b0211d370fd3bd7c2f2b2b098f1ca1ae65 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 14:34:52 +0200 Subject: [PATCH 275/345] perf tools: Rename perf_evlist__exit() to evlist__exit() Rename perf_evlist__exit() to evlist__exit(), so we don't have a name clash when we add perf_evlist__exit() to libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 2 +- tools/perf/util/python.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5bde2e5bf0e3..4c5b7040c02b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -137,7 +137,7 @@ static void perf_evlist__purge(struct evlist *evlist) evlist->core.nr_entries = 0; } -void perf_evlist__exit(struct evlist *evlist) +void evlist__exit(struct evlist *evlist) { zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); @@ -156,7 +156,7 @@ void evlist__delete(struct evlist *evlist) evlist->core.cpus = NULL; evlist->core.threads = NULL; perf_evlist__purge(evlist); - perf_evlist__exit(evlist); + evlist__exit(evlist); free(evlist); } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f07501602353..b33c5d67410a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -65,7 +65,7 @@ struct evlist *perf_evlist__new_default(void); struct evlist *perf_evlist__new_dummy(void); void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -void perf_evlist__exit(struct evlist *evlist); +void evlist__exit(struct evlist *evlist); void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b102d174b868..60a6b6e8e176 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -884,7 +884,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist, static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) { - perf_evlist__exit(&pevlist->evlist); + evlist__exit(&pevlist->evlist); Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } From e6b1878d4eea85ab453e4b198cbb26a34614fdc0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 5 Sep 2019 10:11:37 +0200 Subject: [PATCH 276/345] perf tools: Rename perf_evlist__purge() to evlist__purge() Rename (perf_evlist__purge) to evlist__purge(), so we don't have a name clash when we add (perf_evlist__purge) in libperf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4c5b7040c02b..c96c743cc1d2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -124,7 +124,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist) perf_evlist__set_id_pos(evlist); } -static void perf_evlist__purge(struct evlist *evlist) +static void evlist__purge(struct evlist *evlist) { struct evsel *pos, *n; @@ -155,7 +155,7 @@ void evlist__delete(struct evlist *evlist) perf_thread_map__put(evlist->core.threads); evlist->core.cpus = NULL; evlist->core.threads = NULL; - perf_evlist__purge(evlist); + evlist__purge(evlist); evlist__exit(evlist); free(evlist); } From d80a5540bccb4a9e7bd49d249056ce4c7d385ff3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 18 Aug 2019 23:02:58 +0200 Subject: [PATCH 277/345] libperf: Link libapi.a in libperf.so Linking libapi.a in libperf.so, because we are about to use some of the API functions in it. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/Makefile | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index e325c0503dc6..54466cc84544 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -59,7 +59,13 @@ else CFLAGS := -g -Wall endif -INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/ -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = \ +-I$(srctree)/tools/perf/lib/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS) @@ -88,13 +94,34 @@ LIBPERF_PC := $(OUTPUT)libperf.pc LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so* +LIB_DIR := $(srctree)/tools/lib/api/ + +ifneq ($(OUTPUT),) +ifneq ($(subdir),) + API_PATH=$(OUTPUT)/../lib/api/ +else + API_PATH=$(OUTPUT) +endif +else + API_PATH=$(LIB_DIR) +endif + +LIBAPI = $(API_PATH)libapi.a + +$(LIBAPI): FORCE + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a + +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null + $(LIBPERF_IN): FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) -$(LIBPERF_SO): $(LIBPERF_IN) +$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI) $(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \ -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ @ln -sf $(@F) $(OUTPUT)libperf.so @@ -106,7 +133,7 @@ libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC) all: fixdep $(Q)$(MAKE) libs -clean: +clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC) $(Q)$(MAKE) -C tests clean From e0fcfb086fbbb6233de1062d4b2f05e9afedab3b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Sep 2019 12:20:38 -0300 Subject: [PATCH 278/345] perf evlist: Adopt backwards ring buffer state enum As this isn't used at all in mmap.h but in evlist.h, so to cut down the header dependency tree, move it to where it is used. Also add mmap.h to the places using it but previously getting it indirectly via evlist.h. Add missing pthread.h to evlist.h, as it has a pthread_t struct member and was getting the header via mmap.h. Noticed while processing a Jiri's libperf batch touching mmap.h, where almost everything gets rebuilt because evlist.h is so popular, so cut down't this rebuild the world party. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Link: https://lkml.kernel.org/n/tip-he0uljeftl0xfveh3d6vtode@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/auxtrace.c | 1 + tools/perf/arch/x86/tests/perf-time-to-tsc.c | 1 + tools/perf/arch/x86/util/intel-bts.c | 1 + tools/perf/arch/x86/util/intel-pt.c | 1 + tools/perf/builtin-kvm.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-top.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/backward-ring-buffer.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/code-reading.c | 1 + tools/perf/tests/hists_link.c | 1 + tools/perf/tests/keep-tracking.c | 1 + tools/perf/tests/mmap-basic.c | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 1 + tools/perf/tests/perf-record.c | 1 + tools/perf/tests/sw-clock.c | 1 + tools/perf/tests/switch-tracking.c | 1 + tools/perf/tests/task-exit.c | 1 + tools/perf/ui/gtk/hists.c | 1 + tools/perf/util/annotate.c | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 30 +++++++++++++++++++- tools/perf/util/mmap.h | 28 ------------------ tools/perf/util/parse-events.c | 1 + 26 files changed, 53 insertions(+), 29 deletions(-) diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c index b0fb70e38960..0db5c58c98e8 100644 --- a/tools/perf/arch/s390/util/auxtrace.c +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 42ae47525a76..e1dd5f8894ba 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -17,6 +17,7 @@ #include "thread_map.h" #include "record.h" #include "tsc.h" +#include "util/mmap.h" #include "tests/tests.h" #include "arch-tests.h" diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 090d90e093df..64b409dad6e2 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -15,6 +15,7 @@ #include "../../util/event.h" #include "../../util/evsel.h" #include "../../util/evlist.h" +#include "../../util/mmap.h" #include "../../util/session.h" #include "../../util/pmu.h" #include "../../util/debug.h" diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 3d041b89f018..6c139611d231 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -18,6 +18,7 @@ #include "../../util/evlist.h" #include "../../util/evsel.h" #include "../../util/cpumap.h" +#include "../../util/mmap.h" #include #include "../../util/parse-events.h" #include "../../util/pmu.h" diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 72debb7bd20d..30852848ed9c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -5,6 +5,7 @@ #include "util/build-id.h" #include "util/evsel.h" #include "util/evlist.h" +#include "util/mmap.h" #include "util/term.h" #include "util/symbol.h" #include "util/thread.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 093d9ab5633e..1bb3d91c6599 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -20,6 +20,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" +#include "util/mmap.h" #include "util/target.h" #include "util/session.h" #include "util/tool.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 771b3ff47dc3..e637a08655db 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -30,6 +30,7 @@ #include "util/event.h" #include "util/machine.h" #include "util/map.h" +#include "util/mmap.h" #include "util/session.h" #include "util/symbol.h" #include "util/synthetic-events.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ff989351567d..c44280358e58 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -31,6 +31,7 @@ #include "util/synthetic-events.h" #include "util/evlist.h" #include "util/evswitch.h" +#include "util/mmap.h" #include #include #include "util/machine.h" diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 3073a68d17b9..c59d3752d48d 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -10,6 +10,7 @@ #include "tests.h" #include "debug.h" #include "parse-events.h" +#include "util/mmap.h" #include #include diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 964731915498..3c8533fdbce5 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -19,6 +19,7 @@ #include "llvm.h" #include "debug.h" #include "parse-events.h" +#include "util/mmap.h" #define NR_ITERS 111 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 413783b69006..bc6db3e7a1c5 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -24,6 +24,7 @@ #include "symbol.h" #include "event.h" #include "record.h" +#include "util/mmap.h" #include "util/synthetic-events.h" #include "thread.h" diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 8be4d0b61e3a..1a3bdc0a2d14 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -8,6 +8,7 @@ #include "machine.h" #include "parse-events.h" #include "hists_common.h" +#include "util/mmap.h" #include #include diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 8e2ec5f72042..c6030fdf7d4c 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -13,6 +13,7 @@ #include "record.h" #include "thread_map.h" #include "tests.h" +#include "util/mmap.h" #define CHECK__(x) { \ while ((x) < 0) { \ diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 4d42e455feb7..3a22dce991ba 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -11,6 +11,7 @@ #include "evsel.h" #include "thread_map.h" #include "tests.h" +#include "util/mmap.h" #include #include #include diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 5c2576174ae9..e20eaadb1a35 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -11,6 +11,7 @@ #include "record.h" #include "tests.h" #include "debug.h" +#include "util/mmap.h" #include #ifndef O_DIRECTORY diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 669fd88e7f30..ea8bcaa13ea5 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -11,6 +11,7 @@ #include "debug.h" #include "record.h" #include "tests.h" +#include "util/mmap.h" static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) { diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index fbff60815be8..84519df87f30 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -12,6 +12,7 @@ #include "util/evsel.h" #include "util/evlist.h" #include "util/cpumap.h" +#include "util/mmap.h" #include "util/thread_map.h" #include diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index c92e287e0f53..e5c3f2ee223a 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -16,6 +16,7 @@ #include "thread_map.h" #include "record.h" #include "tests.h" +#include "util/mmap.h" static int spin_sleep(void) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 718838b5e724..7fc39af48a76 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -5,6 +5,7 @@ #include "target.h" #include "thread_map.h" #include "tests.h" +#include "util/mmap.h" #include #include diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 6c2efc10bf5c..ed1a97b2c4b0 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -8,6 +8,7 @@ #include "../string2.h" #include "gtk.h" #include +#include #include #define MAX_COLUMNS 32 diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index d441cca6a517..69d0a1991b29 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -34,6 +34,7 @@ #include "bpf-event.h" #include "block-range.h" #include "string2.h" +#include "util/mmap.h" #include "arch/common.h" #include #include diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 1d22ffd972ac..4bd92f5bfb5f 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -51,6 +51,7 @@ #include "arm-spe.h" #include "s390-cpumsf.h" #include "util.h" // page_size +#include "util/mmap.h" #include #include diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c96c743cc1d2..f700dbe043b7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -10,6 +10,7 @@ #include #include #include "cpumap.h" +#include "util/mmap.h" #include "thread_map.h" #include "target.h" #include "evlist.h" diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b33c5d67410a..8b9c35efea67 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -11,7 +11,7 @@ #include #include "events_stats.h" #include "evsel.h" -#include "mmap.h" +#include #include #include @@ -20,6 +20,34 @@ struct thread_map; struct perf_cpu_map; struct record_opts; +/* + * State machine of bkw_mmap_state: + * + * .________________(forbid)_____________. + * | V + * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY + * ^ ^ | ^ | + * | |__(forbid)____/ |___(forbid)___/| + * | | + * \_________________(3)_______________/ + * + * NOTREADY : Backward ring buffers are not ready + * RUNNING : Backward ring buffers are recording + * DATA_PENDING : We are required to collect data from backward ring buffers + * EMPTY : We have collected data from backward ring buffers. + * + * (0): Setup backward ring buffer + * (1): Pause ring buffers for reading + * (2): Read from ring buffers + * (3): Resume ring buffers for recording + */ +enum bkw_mmap_state { + BKW_MMAP_NOTREADY, + BKW_MMAP_RUNNING, + BKW_MMAP_DATA_PENDING, + BKW_MMAP_EMPTY, +}; + #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 01524608a984..ab086ede044a 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,34 +45,6 @@ struct mmap { int comp_level; }; -/* - * State machine of bkw_mmap_state: - * - * .________________(forbid)_____________. - * | V - * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY - * ^ ^ | ^ | - * | |__(forbid)____/ |___(forbid)___/| - * | | - * \_________________(3)_______________/ - * - * NOTREADY : Backward ring buffers are not ready - * RUNNING : Backward ring buffers are recording - * DATA_PENDING : We are required to collect data from backward ring buffers - * EMPTY : We have collected data from backward ring buffers. - * - * (0): Setup backward ring buffer - * (1): Pause ring buffers for reading - * (2): Read from ring buffers - * (3): Resume ring buffers for recording - */ -enum bkw_mmap_state { - BKW_MMAP_NOTREADY, - BKW_MMAP_RUNNING, - BKW_MMAP_DATA_PENDING, - BKW_MMAP_EMPTY, -}; - struct mmap_params { int prot, mask, nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a33a1d5059a2..9cf1371c90a3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -34,6 +34,7 @@ #include "asm/bug.h" #include "util/parse-branch-options.h" #include "metricgroup.h" +#include "util/mmap.h" #define MAX_NAME_LEN 100 From 547740f7b357cd91cca1fab5d7bf3a37469f7587 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:07:44 +0200 Subject: [PATCH 279/345] libperf: Add perf_mmap struct Add the perf_mmap struct to libperf. The definition is added into: include/internal/mmap.h which is not to be included by users, but shared within perf and libperf. Committer notes: Remove unnecessary includes from tools/perf/lib/include/internal/mmap.h, those will be readded as they become necessary, later in the series. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/builtin-record.c | 14 ++++++------- tools/perf/lib/include/internal/mmap.h | 14 +++++++++++++ tools/perf/util/mmap.c | 22 ++++++++++---------- tools/perf/util/mmap.h | 7 ++++--- 7 files changed, 39 insertions(+), 24 deletions(-) create mode 100644 tools/perf/lib/include/internal/mmap.h diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index e1dd5f8894ba..bd404db94b3a 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -93,7 +93,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(evlist__mmap(evlist, UINT_MAX)); - pc = evlist->mmap[0].base; + pc = evlist->mmap[0].core.base; ret = perf_read_tsc_conversion(pc, &tc); if (ret) { if (ret == -EOPNOTSUPP) { diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 64b409dad6e2..130925141369 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -78,7 +78,7 @@ static int intel_bts_info_fill(struct auxtrace_record *itr, if (!session->evlist->nr_mmaps) return -EINVAL; - pc = session->evlist->mmap[0].base; + pc = session->evlist->mmap[0].core.base; if (pc) { err = perf_read_tsc_conversion(pc, &tc); if (err) { diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6c139611d231..34d7118bf390 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -355,7 +355,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, if (!session->evlist->nr_mmaps) return -EINVAL; - pc = session->evlist->mmap[0].base; + pc = session->evlist->mmap[0].core.base; if (pc) { err = perf_read_tsc_conversion(pc, &tc); if (err) { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1bb3d91c6599..2520c0212275 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -261,7 +261,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size struct record_aio *aio = to; /* - * map->base data pointed by buf is copied into free map->aio.data[] buffer + * map->core.base data pointed by buf is copied into free map->aio.data[] buffer * to release space in the kernel buffer as fast as possible, calling * perf_mmap__consume() from perf_mmap__push() function. * @@ -360,7 +360,7 @@ static void record__aio_mmap_read_sync(struct record *rec) for (i = 0; i < evlist->nr_mmaps; i++) { struct mmap *map = &maps[i]; - if (map->base) + if (map->core.base) record__aio_sync(map, true); } } @@ -970,7 +970,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, u64 flush = 0; struct mmap *map = &maps[i]; - if (map->base) { + if (map->core.base) { record__adjust_affinity(rec, map); if (synch) { flush = map->flush; @@ -1198,10 +1198,10 @@ static const struct perf_event_mmap_page * perf_evlist__pick_pc(struct evlist *evlist) { if (evlist) { - if (evlist->mmap && evlist->mmap[0].base) - return evlist->mmap[0].base; - if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) - return evlist->overwrite_mmap[0].base; + if (evlist->mmap && evlist->mmap[0].core.base) + return evlist->mmap[0].core.base; + if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) + return evlist->overwrite_mmap[0].core.base; } return NULL; } diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h new file mode 100644 index 000000000000..2ef051901f48 --- /dev/null +++ b/tools/perf/lib/include/internal/mmap.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LIBPERF_INTERNAL_MMAP_H +#define __LIBPERF_INTERNAL_MMAP_H + +/** + * struct perf_mmap - perf's ring buffer mmap details + * + * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this + */ +struct perf_mmap { + void *base; +}; + +#endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index f3b7c8b0fa90..76190b2edd78 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -31,7 +31,7 @@ size_t perf_mmap__mmap_len(struct mmap *map) static union perf_event *perf_mmap__read(struct mmap *map, u64 *startp, u64 end) { - unsigned char *data = map->base + page_size; + unsigned char *data = map->core.base + page_size; union perf_event *event = NULL; int diff = end - *startp; @@ -116,7 +116,7 @@ void perf_mmap__get(struct mmap *map) void perf_mmap__put(struct mmap *map) { - BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + BUG_ON(map->core.base && refcount_read(&map->refcnt) == 0); if (refcount_dec_and_test(&map->refcnt)) perf_mmap__munmap(map); @@ -317,9 +317,9 @@ void perf_mmap__munmap(struct mmap *map) munmap(map->data, perf_mmap__mmap_len(map)); map->data = NULL; } - if (map->base != NULL) { - munmap(map->base, perf_mmap__mmap_len(map)); - map->base = NULL; + if (map->core.base != NULL) { + munmap(map->core.base, perf_mmap__mmap_len(map)); + map->core.base = NULL; map->fd = -1; refcount_set(&map->refcnt, 0); } @@ -370,12 +370,12 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) refcount_set(&map->refcnt, 2); map->prev = 0; map->mask = mp->mask; - map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, MAP_SHARED, fd, 0); - if (map->base == MAP_FAILED) { + if (map->core.base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - map->base = NULL; + map->core.base = NULL; return -1; } map->fd = fd; @@ -399,7 +399,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) } if (auxtrace_mmap__mmap(&map->auxtrace_mmap, - &mp->auxtrace_mp, map->base, fd)) + &mp->auxtrace_mp, map->core.base, fd)) return -1; return perf_mmap__aio_mmap(map, mp); @@ -444,7 +444,7 @@ static int __perf_mmap__read_init(struct mmap *md) { u64 head = perf_mmap__read_head(md); u64 old = md->prev; - unsigned char *data = md->base + page_size; + unsigned char *data = md->core.base + page_size; unsigned long size; md->start = md->overwrite ? head : old; @@ -489,7 +489,7 @@ int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); - unsigned char *data = md->base + page_size; + unsigned char *data = md->core.base + page_size; unsigned long size; void *buf; int rc = 0; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index ab086ede044a..d2f0ce581e2c 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -1,6 +1,7 @@ #ifndef __PERF_MMAP_H #define __PERF_MMAP_H 1 +#include #include #include #include @@ -20,7 +21,7 @@ struct aiocb; * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this */ struct mmap { - void *base; + struct perf_mmap core; int mask; int fd; int cpu; @@ -60,12 +61,12 @@ void perf_mmap__consume(struct mmap *map); static inline u64 perf_mmap__read_head(struct mmap *mm) { - return ring_buffer_read_head(mm->base); + return ring_buffer_read_head(mm->core.base); } static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) { - ring_buffer_write_tail(md->base, tail); + ring_buffer_write_tail(md->core.base, tail); } union perf_event *perf_mmap__read_forward(struct mmap *map); From 4fd0cef2c7b6469abfeef1f9bd056265ce369b13 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:27:55 +0200 Subject: [PATCH 280/345] libperf: Add 'mask' to struct perf_mmap Move 'mask' from tools/perf's mmap to libperf's perf_mmap struct. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/util/mmap.c | 24 ++++++++++++------------ tools/perf/util/mmap.h | 1 - 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 2ef051901f48..1caa1e8ee5c6 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -9,6 +9,7 @@ */ struct perf_mmap { void *base; + int mask; }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 76190b2edd78..702e8e0b90ea 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -24,7 +24,7 @@ size_t perf_mmap__mmap_len(struct mmap *map) { - return map->mask + 1 + page_size; + return map->core.mask + 1 + page_size; } /* When check_messup is true, 'end' must points to a good entry */ @@ -38,7 +38,7 @@ static union perf_event *perf_mmap__read(struct mmap *map, if (diff >= (int)sizeof(event->header)) { size_t size; - event = (union perf_event *)&data[*startp & map->mask]; + event = (union perf_event *)&data[*startp & map->core.mask]; size = event->header.size; if (size < sizeof(event->header) || diff < (int)size) @@ -48,14 +48,14 @@ static union perf_event *perf_mmap__read(struct mmap *map, * Event straddles the mmap boundary -- header should always * be inside due to u64 alignment of output. */ - if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { unsigned int offset = *startp; unsigned int len = min(sizeof(*event), size), cpy; void *dst = map->event_copy; do { - cpy = min(map->mask + 1 - (offset & map->mask), len); - memcpy(dst, &data[offset & map->mask], cpy); + cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); + memcpy(dst, &data[offset & map->core.mask], cpy); offset += cpy; dst += cpy; len -= cpy; @@ -369,7 +369,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) */ refcount_set(&map->refcnt, 2); map->prev = 0; - map->mask = mp->mask; + map->core.mask = mp->mask; map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, MAP_SHARED, fd, 0); if (map->core.base == MAP_FAILED) { @@ -454,7 +454,7 @@ static int __perf_mmap__read_init(struct mmap *md) return -EAGAIN; size = md->end - md->start; - if (size > (unsigned long)(md->mask) + 1) { + if (size > (unsigned long)(md->core.mask) + 1) { if (!md->overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); @@ -467,7 +467,7 @@ static int __perf_mmap__read_init(struct mmap *md) * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) + if (overwrite_rb_find_range(data, md->core.mask, &md->start, &md->end)) return -EINVAL; } @@ -500,9 +500,9 @@ int perf_mmap__push(struct mmap *md, void *to, size = md->end - md->start; - if ((md->start & md->mask) + size != (md->end & md->mask)) { - buf = &data[md->start & md->mask]; - size = md->mask + 1 - (md->start & md->mask); + if ((md->start & md->core.mask) + size != (md->end & md->core.mask)) { + buf = &data[md->start & md->core.mask]; + size = md->core.mask + 1 - (md->start & md->core.mask); md->start += size; if (push(md, to, buf, size) < 0) { @@ -511,7 +511,7 @@ int perf_mmap__push(struct mmap *md, void *to, } } - buf = &data[md->start & md->mask]; + buf = &data[md->start & md->core.mask]; size = md->end - md->start; md->start += size; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index d2f0ce581e2c..370138e395fc 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -22,7 +22,6 @@ struct aiocb; */ struct mmap { struct perf_mmap core; - int mask; int fd; int cpu; refcount_t refcnt; From 2cf07b294a604aecd6b583e60724eaa1607f0fbc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:31:17 +0200 Subject: [PATCH 281/345] libperf: Add 'fd' to struct perf_mmap Move 'fd' from tools/perf's mmap to libperf's perf_mmap struct. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/util/evlist.c | 4 ++-- tools/perf/util/mmap.c | 4 ++-- tools/perf/util/mmap.h | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 1caa1e8ee5c6..892cbd401d8d 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -10,6 +10,7 @@ struct perf_mmap { void *base; int mask; + int fd; }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f700dbe043b7..a4e1c19c969f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -652,7 +652,7 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->overwrite_mmap[i].fd; + int fd = evlist->overwrite_mmap[i].core.fd; int err; if (fd < 0) @@ -708,7 +708,7 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, return NULL; for (i = 0; i < evlist->nr_mmaps; i++) { - map[i].fd = -1; + map[i].core.fd = -1; map[i].overwrite = overwrite; /* * When the perf_mmap() call is made we grab one refcount, plus diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 702e8e0b90ea..40bf124cb658 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -320,7 +320,7 @@ void perf_mmap__munmap(struct mmap *map) if (map->core.base != NULL) { munmap(map->core.base, perf_mmap__mmap_len(map)); map->core.base = NULL; - map->fd = -1; + map->core.fd = -1; refcount_set(&map->refcnt, 0); } auxtrace_mmap__munmap(&map->auxtrace_mmap); @@ -378,7 +378,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) map->core.base = NULL; return -1; } - map->fd = fd; + map->core.fd = fd; map->cpu = cpu; perf_mmap__setup_affinity_mask(map, mp); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 370138e395fc..de991194af8d 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -22,7 +22,6 @@ struct aiocb; */ struct mmap { struct perf_mmap core; - int fd; int cpu; refcount_t refcnt; u64 prev; From 56a94706cd7233b158ab13e5ac93f5a97ca88941 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:33:20 +0200 Subject: [PATCH 282/345] libperf: Add 'cpu' to struct perf_mmap Move 'cpu' from tools/perf's mmap to libperf's perf_mmap struct. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/util/mmap.c | 8 ++++---- tools/perf/util/mmap.h | 1 - tools/perf/util/python.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 892cbd401d8d..f7d1809fa34c 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -11,6 +11,7 @@ struct perf_mmap { void *base; int mask; int fd; + int cpu; }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 40bf124cb658..dc8320891344 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -256,7 +256,7 @@ static int perf_mmap__aio_mmap(struct mmap *map, struct mmap_params *mp) pr_debug2("failed to allocate data buffer area, error %m"); return -1; } - ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity); + ret = perf_mmap__aio_bind(map, i, map->core.cpu, mp->affinity); if (ret == -1) return -1; /* @@ -347,9 +347,9 @@ static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params { CPU_ZERO(&map->affinity_mask); if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) - build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); + build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - CPU_SET(map->cpu, &map->affinity_mask); + CPU_SET(map->core.cpu, &map->affinity_mask); } int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) @@ -379,7 +379,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) return -1; } map->core.fd = fd; - map->cpu = cpu; + map->core.cpu = cpu; perf_mmap__setup_affinity_mask(map, mp); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index de991194af8d..8ab779c98f4d 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -22,7 +22,6 @@ struct aiocb; */ struct mmap { struct perf_mmap core; - int cpu; refcount_t refcnt; u64 prev; u64 start; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 60a6b6e8e176..ba4085d7ae9f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -991,7 +991,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu) for (i = 0; i < evlist->nr_mmaps; i++) { struct mmap *md = &evlist->mmap[i]; - if (md->cpu == cpu) + if (md->core.cpu == cpu) return md; } From e03edfeac0330eaa2b19b82fc942611c1abf2120 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:35:35 +0200 Subject: [PATCH 283/345] libperf: Add 'refcnt' to struct perf_mmap Move 'refcnt' from tools/perf's mmap to libperf's perf_mmap struct. Committer notes: Add the refcount.h include directive here, now it is needed. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 3 +++ tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 18 +++++++++--------- tools/perf/util/mmap.h | 1 - 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index f7d1809fa34c..e6fb850ba47a 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -2,6 +2,8 @@ #ifndef __LIBPERF_INTERNAL_MMAP_H #define __LIBPERF_INTERNAL_MMAP_H +#include + /** * struct perf_mmap - perf's ring buffer mmap details * @@ -12,6 +14,7 @@ struct perf_mmap { int mask; int fd; int cpu; + refcount_t refcnt; }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a4e1c19c969f..d987e0e5d62b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -719,7 +719,7 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - refcount_set(&map[i].refcnt, 0); + refcount_set(&map[i].core.refcnt, 0); } return map; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index dc8320891344..d6406d216cfe 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -89,7 +89,7 @@ union perf_event *perf_mmap__read_event(struct mmap *map) /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!refcount_read(&map->refcnt)) + if (!refcount_read(&map->core.refcnt)) return NULL; /* non-overwirte doesn't pause the ringbuffer */ @@ -111,14 +111,14 @@ static bool perf_mmap__empty(struct mmap *map) void perf_mmap__get(struct mmap *map) { - refcount_inc(&map->refcnt); + refcount_inc(&map->core.refcnt); } void perf_mmap__put(struct mmap *map) { - BUG_ON(map->core.base && refcount_read(&map->refcnt) == 0); + BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); - if (refcount_dec_and_test(&map->refcnt)) + if (refcount_dec_and_test(&map->core.refcnt)) perf_mmap__munmap(map); } @@ -130,7 +130,7 @@ void perf_mmap__consume(struct mmap *map) perf_mmap__write_tail(map, old); } - if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) perf_mmap__put(map); } @@ -321,7 +321,7 @@ void perf_mmap__munmap(struct mmap *map) munmap(map->core.base, perf_mmap__mmap_len(map)); map->core.base = NULL; map->core.fd = -1; - refcount_set(&map->refcnt, 0); + refcount_set(&map->core.refcnt, 0); } auxtrace_mmap__munmap(&map->auxtrace_mmap); } @@ -367,7 +367,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - refcount_set(&map->refcnt, 2); + refcount_set(&map->core.refcnt, 2); map->prev = 0; map->core.mask = mp->mask; map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, @@ -479,7 +479,7 @@ int perf_mmap__read_init(struct mmap *map) /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!refcount_read(&map->refcnt)) + if (!refcount_read(&map->core.refcnt)) return -ENOENT; return __perf_mmap__read_init(map); @@ -537,7 +537,7 @@ void perf_mmap__read_done(struct mmap *map) /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!refcount_read(&map->refcnt)) + if (!refcount_read(&map->core.refcnt)) return; map->prev = perf_mmap__read_head(map); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 8ab779c98f4d..5febd22fbe2e 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -22,7 +22,6 @@ struct aiocb; */ struct mmap { struct perf_mmap core; - refcount_t refcnt; u64 prev; u64 start; u64 end; From ebe4d72bba86a499ea0935c58ba1c2aea5aafb43 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:39:53 +0200 Subject: [PATCH 284/345] libperf: Add prev/start/end to struct perf_mmap Move prev/start/end from tools/perf's mmap to libperf's perf_mmap struct. Committer notes: Add linux/types.h as we use u64. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 4 +++ tools/perf/util/mmap.c | 50 +++++++++++++------------- tools/perf/util/mmap.h | 3 -- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index e6fb850ba47a..ebf5b93754fd 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -3,6 +3,7 @@ #define __LIBPERF_INTERNAL_MMAP_H #include +#include /** * struct perf_mmap - perf's ring buffer mmap details @@ -15,6 +16,9 @@ struct perf_mmap { int fd; int cpu; refcount_t refcnt; + u64 prev; + u64 start; + u64 end; }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index d6406d216cfe..6ce70ff005cb 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -94,19 +94,19 @@ union perf_event *perf_mmap__read_event(struct mmap *map) /* non-overwirte doesn't pause the ringbuffer */ if (!map->overwrite) - map->end = perf_mmap__read_head(map); + map->core.end = perf_mmap__read_head(map); - event = perf_mmap__read(map, &map->start, map->end); + event = perf_mmap__read(map, &map->core.start, map->core.end); if (!map->overwrite) - map->prev = map->start; + map->core.prev = map->core.start; return event; } static bool perf_mmap__empty(struct mmap *map) { - return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base; + return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; } void perf_mmap__get(struct mmap *map) @@ -125,7 +125,7 @@ void perf_mmap__put(struct mmap *map) void perf_mmap__consume(struct mmap *map) { if (!map->overwrite) { - u64 old = map->prev; + u64 old = map->core.prev; perf_mmap__write_tail(map, old); } @@ -368,7 +368,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) * perf_evlist__filter_pollfd(). */ refcount_set(&map->core.refcnt, 2); - map->prev = 0; + map->core.prev = 0; map->core.mask = mp->mask; map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, MAP_SHARED, fd, 0); @@ -443,22 +443,22 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) static int __perf_mmap__read_init(struct mmap *md) { u64 head = perf_mmap__read_head(md); - u64 old = md->prev; + u64 old = md->core.prev; unsigned char *data = md->core.base + page_size; unsigned long size; - md->start = md->overwrite ? head : old; - md->end = md->overwrite ? old : head; + md->core.start = md->overwrite ? head : old; + md->core.end = md->overwrite ? old : head; - if ((md->end - md->start) < md->flush) + if ((md->core.end - md->core.start) < md->flush) return -EAGAIN; - size = md->end - md->start; + size = md->core.end - md->core.start; if (size > (unsigned long)(md->core.mask) + 1) { if (!md->overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - md->prev = head; + md->core.prev = head; perf_mmap__consume(md); return -EAGAIN; } @@ -467,7 +467,7 @@ static int __perf_mmap__read_init(struct mmap *md) * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->core.mask, &md->start, &md->end)) + if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end)) return -EINVAL; } @@ -498,12 +498,12 @@ int perf_mmap__push(struct mmap *md, void *to, if (rc < 0) return (rc == -EAGAIN) ? 1 : -1; - size = md->end - md->start; + size = md->core.end - md->core.start; - if ((md->start & md->core.mask) + size != (md->end & md->core.mask)) { - buf = &data[md->start & md->core.mask]; - size = md->core.mask + 1 - (md->start & md->core.mask); - md->start += size; + if ((md->core.start & md->core.mask) + size != (md->core.end & md->core.mask)) { + buf = &data[md->core.start & md->core.mask]; + size = md->core.mask + 1 - (md->core.start & md->core.mask); + md->core.start += size; if (push(md, to, buf, size) < 0) { rc = -1; @@ -511,16 +511,16 @@ int perf_mmap__push(struct mmap *md, void *to, } } - buf = &data[md->start & md->core.mask]; - size = md->end - md->start; - md->start += size; + buf = &data[md->core.start & md->core.mask]; + size = md->core.end - md->core.start; + md->core.start += size; if (push(md, to, buf, size) < 0) { rc = -1; goto out; } - md->prev = head; + md->core.prev = head; perf_mmap__consume(md); out: return rc; @@ -529,8 +529,8 @@ out: /* * Mandatory for overwrite mode * The direction of overwrite mode is backward. - * The last perf_mmap__read() will set tail to map->prev. - * Need to correct the map->prev to head which is the end of next read. + * The last perf_mmap__read() will set tail to map->core.prev. + * Need to correct the map->core.prev to head which is the end of next read. */ void perf_mmap__read_done(struct mmap *map) { @@ -540,5 +540,5 @@ void perf_mmap__read_done(struct mmap *map) if (!refcount_read(&map->core.refcnt)) return; - map->prev = perf_mmap__read_head(map); + map->core.prev = perf_mmap__read_head(map); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 5febd22fbe2e..a3dd53f2bfb8 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -22,9 +22,6 @@ struct aiocb; */ struct mmap { struct perf_mmap core; - u64 prev; - u64 start; - u64 end; bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); From 8df7a869818ec278969d34e4792985f12b24f23d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:42:56 +0200 Subject: [PATCH 285/345] libperf: Add 'overwrite' to 'struct perf_mmap' Move 'overwrite' from tools/perf's mmap to libperf's perf_mmap struct. Committer notes: Add stdbool.h as we start using 'bool'. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-17-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 12 ++++++------ tools/perf/util/mmap.h | 1 - 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index ebf5b93754fd..47c09f375fb6 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -4,6 +4,7 @@ #include #include +#include /** * struct perf_mmap - perf's ring buffer mmap details @@ -19,6 +20,7 @@ struct perf_mmap { u64 prev; u64 start; u64 end; + bool overwrite; }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d987e0e5d62b..16d47a420bc2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -709,7 +709,7 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, for (i = 0; i < evlist->nr_mmaps; i++) { map[i].core.fd = -1; - map[i].overwrite = overwrite; + map[i].core.overwrite = overwrite; /* * When the perf_mmap() call is made we grab one refcount, plus * one extra to let perf_mmap__consume() get the last diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 6ce70ff005cb..a8850ce2c2ff 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -93,12 +93,12 @@ union perf_event *perf_mmap__read_event(struct mmap *map) return NULL; /* non-overwirte doesn't pause the ringbuffer */ - if (!map->overwrite) + if (!map->core.overwrite) map->core.end = perf_mmap__read_head(map); event = perf_mmap__read(map, &map->core.start, map->core.end); - if (!map->overwrite) + if (!map->core.overwrite) map->core.prev = map->core.start; return event; @@ -124,7 +124,7 @@ void perf_mmap__put(struct mmap *map) void perf_mmap__consume(struct mmap *map) { - if (!map->overwrite) { + if (!map->core.overwrite) { u64 old = map->core.prev; perf_mmap__write_tail(map, old); @@ -447,15 +447,15 @@ static int __perf_mmap__read_init(struct mmap *md) unsigned char *data = md->core.base + page_size; unsigned long size; - md->core.start = md->overwrite ? head : old; - md->core.end = md->overwrite ? old : head; + md->core.start = md->core.overwrite ? head : old; + md->core.end = md->core.overwrite ? old : head; if ((md->core.end - md->core.start) < md->flush) return -EAGAIN; size = md->core.end - md->core.start; if (size > (unsigned long)(md->core.mask) + 1) { - if (!md->overwrite) { + if (!md->core.overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->core.prev = head; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index a3dd53f2bfb8..d3e74c8da51a 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -22,7 +22,6 @@ struct aiocb; */ struct mmap { struct perf_mmap core; - bool overwrite; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); #ifdef HAVE_AIO_SUPPORT From 4443e6d7704ee85412e5cb0a0181d7ceee7e984f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Jul 2019 22:47:58 +0200 Subject: [PATCH 286/345] libperf: Add 'event_copy' to 'struct perf_mmap' Move 'event_copy' from tools/perf's mmap to libperf's perf_mmap struct. Committer notes: Add linux/compiler.h as we need it for '__aligned'. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-18-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/mmap.h | 5 +++++ tools/perf/util/mmap.c | 4 ++-- tools/perf/util/mmap.h | 1 - 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 47c09f375fb6..10653b6e864e 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -2,10 +2,14 @@ #ifndef __LIBPERF_INTERNAL_MMAP_H #define __LIBPERF_INTERNAL_MMAP_H +#include #include #include #include +/* perf sample has 16 bits size limit */ +#define PERF_SAMPLE_MAX_SIZE (1 << 16) + /** * struct perf_mmap - perf's ring buffer mmap details * @@ -21,6 +25,7 @@ struct perf_mmap { u64 start; u64 end; bool overwrite; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a8850ce2c2ff..4b8ec8dd79c5 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -51,7 +51,7 @@ static union perf_event *perf_mmap__read(struct mmap *map, if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { unsigned int offset = *startp; unsigned int len = min(sizeof(*event), size), cpy; - void *dst = map->event_copy; + void *dst = map->core.event_copy; do { cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); @@ -61,7 +61,7 @@ static union perf_event *perf_mmap__read(struct mmap *map, len -= cpy; } while (len); - event = (union perf_event *)map->event_copy; + event = (union perf_event *)map->core.event_copy; } *startp += size; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index d3e74c8da51a..75c77fa57121 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -23,7 +23,6 @@ struct aiocb; struct mmap { struct perf_mmap core; struct auxtrace_mmap auxtrace_mmap; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); #ifdef HAVE_AIO_SUPPORT struct { void **data; From 65aa2e6bae3658cbc84c2e628a5c0ca163686204 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 27 Aug 2019 16:05:18 +0200 Subject: [PATCH 287/345] libperf: Add 'flush' to 'struct perf_mmap' Move 'flush' from tools/perf's mmap to libperf's perf_mmap struct. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 10 +++++----- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/util/mmap.c | 4 ++-- tools/perf/util/mmap.h | 1 - 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2520c0212275..06738efd9820 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -973,13 +973,13 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, if (map->core.base) { record__adjust_affinity(rec, map); if (synch) { - flush = map->flush; - map->flush = 1; + flush = map->core.flush; + map->core.flush = 1; } if (!record__aio_enabled(rec)) { if (perf_mmap__push(map, rec, record__pushfn) < 0) { if (synch) - map->flush = flush; + map->core.flush = flush; rc = -1; goto out; } @@ -987,13 +987,13 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, if (record__aio_push(rec, map, &off) < 0) { record__aio_set_pos(trace_fd, off); if (synch) - map->flush = flush; + map->core.flush = flush; rc = -1; goto out; } } if (synch) - map->flush = flush; + map->core.flush = flush; } if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 10653b6e864e..ba1e519c15b9 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -25,6 +25,7 @@ struct perf_mmap { u64 start; u64 end; bool overwrite; + u64 flush; char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 4b8ec8dd79c5..4cc3b54b2f73 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -383,7 +383,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) perf_mmap__setup_affinity_mask(map, mp); - map->flush = mp->flush; + map->core.flush = mp->flush; map->comp_level = mp->comp_level; @@ -450,7 +450,7 @@ static int __perf_mmap__read_init(struct mmap *md) md->core.start = md->core.overwrite ? head : old; md->core.end = md->core.overwrite ? old : head; - if ((md->core.end - md->core.start) < md->flush) + if ((md->core.end - md->core.start) < md->core.flush) return -EAGAIN; size = md->core.end - md->core.start; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 75c77fa57121..e567c1c875bd 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -32,7 +32,6 @@ struct mmap { } aio; #endif cpu_set_t affinity_mask; - u64 flush; void *data; int comp_level; }; From 648b5af3f3ae7f4fad7395c8dc84cb79eafe2ba9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Aug 2019 11:35:19 +0200 Subject: [PATCH 288/345] libperf: Move 'system_wide' from 'struct evsel' to 'struct perf_evsel' Move the 'system_wide 'member from perf's evsel to libperf's perf_evsel. Committer notes: Added stdbool.h as we now use bool here. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 4 ++-- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-stat.c | 4 ++-- tools/perf/lib/include/internal/evsel.h | 2 ++ tools/perf/tests/switch-tracking.c | 6 +++--- tools/perf/util/evlist.c | 10 +++++----- tools/perf/util/evsel.c | 8 ++++---- tools/perf/util/evsel.h | 1 - tools/perf/util/parse-events.c | 2 +- tools/perf/util/stat.c | 2 +- 10 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 34d7118bf390..1aa86a88884a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -422,7 +422,7 @@ static int intel_pt_track_switches(struct evlist *evlist) perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TIME); - evsel->system_wide = true; + evsel->core.system_wide = true; evsel->no_aux_samples = true; evsel->immediate = true; @@ -723,7 +723,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, switch_evsel->core.attr.sample_period = 1; switch_evsel->core.attr.context_switch = 1; - switch_evsel->system_wide = true; + switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a17a9306bdf6..e7a49e2d7575 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1916,7 +1916,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) int cpu, thread; static int header_printed; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; if (!header_printed) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f7d13326b830..0d55eb6bd6e2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -278,7 +278,7 @@ static int read_counter(struct evsel *counter, struct timespec *rs) if (!counter->supported) return -ENOENT; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { @@ -1671,7 +1671,7 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->system_wide) + if (!counter->core.system_wide) return; } diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index 8b854d1c9b45..1bff789b0923 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -4,6 +4,7 @@ #include #include +#include struct perf_cpu_map; struct perf_thread_map; @@ -18,6 +19,7 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; + bool system_wide; }; int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e5c3f2ee223a..de700aad1fed 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -144,7 +144,7 @@ static int process_sample_event(struct evlist *evlist, return err; /* * Check for no missing sched_switch events i.e. that the - * evsel->system_wide flag has worked. + * evsel->core.system_wide flag has worked. */ if (switch_tracking->tids[cpu] != -1 && switch_tracking->tids[cpu] != prev_tid) { @@ -316,7 +316,7 @@ out_free_nodes: * * This function implements a test that checks that sched_switch events and * tracking events can be recorded for a workload (current process) using the - * evsel->system_wide and evsel->tracking flags (respectively) with other events + * evsel->core.system_wide and evsel->tracking flags (respectively) with other events * sometimes enabled or disabled. */ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) @@ -396,7 +396,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ perf_evsel__set_sample_bit(switch_evsel, CPU); perf_evsel__set_sample_bit(switch_evsel, TIME); - switch_evsel->system_wide = true; + switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 16d47a420bc2..16866533745c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -319,7 +319,7 @@ int perf_evlist__add_newtp(struct evlist *evlist, static int perf_evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) { - if (evsel->system_wide) + if (evsel->core.system_wide) return 1; else return perf_thread_map__nr(evlist->core.threads); @@ -410,7 +410,7 @@ int perf_evlist__alloc_pollfd(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->system_wide) + if (evsel->core.system_wide) nfds += nr_cpus; else nfds += nr_cpus * nr_threads; @@ -536,7 +536,7 @@ static void perf_evlist__set_sid_idx(struct evlist *evlist, sid->cpu = evlist->core.cpus->map[cpu]; else sid->cpu = -1; - if (!evsel->system_wide && evlist->core.threads && thread >= 0) + if (!evsel->core.system_wide && evlist->core.threads && thread >= 0) sid->tid = perf_thread_map__pid(evlist->core.threads, thread); else sid->tid = -1; @@ -763,7 +763,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, mp->prot &= ~PROT_WRITE; } - if (evsel->system_wide && thread) + if (evsel->core.system_wide && thread) continue; cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); @@ -793,7 +793,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, * other events, so it should not need to be polled anyway. * Therefore don't add it for polling. */ - if (!evsel->system_wide && + if (!evsel->core.system_wide && __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { perf_mmap__put(&maps[idx]); return -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 502bc3d50e0d..566c9413246c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1232,7 +1232,7 @@ int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads) if (ncpus == 0 || nthreads == 0) return 0; - if (evsel->system_wide) + if (evsel->core.system_wide) nthreads = 1; evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); @@ -1663,7 +1663,7 @@ static bool ignore_missing_thread(struct evsel *evsel, return false; /* The system wide setup does not work with threads. */ - if (evsel->system_wide) + if (evsel->core.system_wide) return false; /* The -ESRCH is perf event syscall errno for pid's not found. */ @@ -1772,7 +1772,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, threads = empty_thread_map; } - if (evsel->system_wide) + if (evsel->core.system_wide) nthreads = 1; else nthreads = threads->nr; @@ -1819,7 +1819,7 @@ retry_sample_id: for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; - if (!evsel->cgrp && !evsel->system_wide) + if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); group_fd = get_group_fd(evsel, cpu, thread); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 74df298acb31..f757ff449a17 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -146,7 +146,6 @@ struct evsel { bool disabled; bool no_aux_samples; bool immediate; - bool system_wide; bool tracking; bool per_pkg; bool precise_max; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9cf1371c90a3..d7aebe9b005d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -335,7 +335,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = perf_cpu_map__get(cpus); evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->system_wide = pmu ? pmu->is_uncore : false; + evsel->core.system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index fcd54342c04c..ebdd130557fb 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -336,7 +336,7 @@ static int process_counter_maps(struct perf_stat_config *config, int ncpus = perf_evsel__nr_cpus(counter); int cpu, thread; - if (counter->system_wide) + if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { From c976ee11a0e1b3ba5e63e734dbf4b19154e39fab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Jul 2019 13:04:59 +0200 Subject: [PATCH 289/345] libperf: Move 'nr_mmaps' from 'struct evlist' to 'struct perf_evlist' Moving 'nr_mmaps' from 'struct evlist' to 'struct perf_evlist', it will be used in following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 6 +++--- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/internal/evlist.h | 1 + tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/util/evlist.c | 16 ++++++++-------- tools/perf/util/evlist.h | 1 - tools/perf/util/python.c | 2 +- 20 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 5d120b1e35ed..051e9066fb38 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -648,7 +648,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, if (priv_size != cs_etm_info_priv_size(itr, session->evlist)) return -EINVAL; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; /* If the cpu_map is empty all online CPUs are involved */ diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index eebbf31f995c..9302c6566f53 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -51,7 +51,7 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) return -EINVAL; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index bd404db94b3a..10b7acebc0eb 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -115,7 +115,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe evlist__disable(evlist); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 130925141369..e81535c8e9c5 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -75,7 +75,7 @@ static int intel_bts_info_fill(struct auxtrace_record *itr, if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) return -EINVAL; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; pc = session->evlist->mmap[0].core.base; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 1aa86a88884a..886b3ac60f23 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -352,7 +352,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); filter_str_len = filter ? strlen(filter) : 0; - if (!session->evlist->nr_mmaps) + if (!session->evlist->core.nr_mmaps) return -EINVAL; pc = session->evlist->mmap[0].core.base; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 30852848ed9c..e2b42efc86fa 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -802,7 +802,7 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) s64 n, ntotal = 0; u64 flush_time = ULLONG_MAX, mmap_time; - for (i = 0; i < kvm->evlist->nr_mmaps; i++) { + for (i = 0; i < kvm->evlist->core.nr_mmaps; i++) { n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time); if (n < 0) return -1; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 06738efd9820..8577bf33a556 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -357,7 +357,7 @@ static void record__aio_mmap_read_sync(struct record *rec) if (!record__aio_enabled(rec)) return; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *map = &maps[i]; if (map->core.base) @@ -603,7 +603,7 @@ static int record__auxtrace_read_snapshot_all(struct record *rec) int i; int rc = 0; - for (i = 0; i < rec->evlist->nr_mmaps; i++) { + for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { struct mmap *map = &rec->evlist->mmap[i]; if (!map->auxtrace_mmap.base) @@ -966,7 +966,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, if (record__aio_enabled(rec)) off = record__aio_get_pos(trace_fd); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { u64 flush = 0; struct mmap *map = &maps[i]; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e637a08655db..474b9860cfd4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -904,7 +904,7 @@ static void perf_top__mmap_read(struct perf_top *top) if (overwrite) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING); - for (i = 0; i < top->evlist->nr_mmaps; i++) + for (i = 0; i < top->evlist->core.nr_mmaps; i++) perf_top__mmap_read_idx(top, i); if (overwrite) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c44280358e58..91c73c7472ba 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3443,7 +3443,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) again: before = trace->nr_events; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; struct mmap *md; diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 448891f06e3e..035c1e1cc324 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -13,6 +13,7 @@ struct perf_evlist { bool has_user_cpus; struct perf_cpu_map *cpus; struct perf_thread_map *threads; + int nr_mmaps; }; /** diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index c59d3752d48d..338cd9faa835 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -33,7 +33,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, { int i; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 3c8533fdbce5..1eb0bffaed6c 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -179,7 +179,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), (*func)(); evlist__disable(evlist); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; struct mmap *md; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index bc6db3e7a1c5..7dac69a375f9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -423,7 +423,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, struct mmap *md; int i, ret; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index c6030fdf7d4c..bd4ae8e5cd5d 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -36,7 +36,7 @@ static int find_comm(struct evlist *evlist, const char *comm) int i, found; found = 0; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index e20eaadb1a35..4629fa33c8ad 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -87,7 +87,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest while (1) { int before = nr_events; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; struct mmap *md; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index ea8bcaa13ea5..199a66444e60 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -165,7 +165,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus while (1) { int before = total_events; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { union perf_event *event; struct mmap *md; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index de700aad1fed..30a70db6473d 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -267,7 +267,7 @@ static int process_events(struct evlist *evlist, struct mmap *md; int i, ret; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; if (perf_mmap__read_init(md) < 0) continue; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 16866533745c..d147834fbe60 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -651,7 +651,7 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value) if (!evlist->overwrite_mmap) return 0; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { int fd = evlist->overwrite_mmap[i].core.fd; int err; @@ -679,11 +679,11 @@ static void evlist__munmap_nofree(struct evlist *evlist) int i; if (evlist->mmap) - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->core.nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); if (evlist->overwrite_mmap) - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->core.nr_mmaps; i++) perf_mmap__munmap(&evlist->overwrite_mmap[i]); } @@ -700,14 +700,14 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, int i; struct mmap *map; - evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); + evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); if (perf_cpu_map__empty(evlist->core.cpus)) - evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads); - map = zalloc(evlist->nr_mmaps * sizeof(struct mmap)); + evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads); + map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap)); if (!map) return NULL; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { map[i].core.fd = -1; map[i].core.overwrite = overwrite; /* @@ -1847,7 +1847,7 @@ static void *perf_evlist__poll_thread(void *arg) if (!draining) perf_evlist__poll(evlist, 1000); - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *map = &evlist->mmap[i]; union perf_event *event; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8b9c35efea67..816b72a2b1e5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -55,7 +55,6 @@ struct evlist { struct perf_evlist core; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_groups; - int nr_mmaps; bool enabled; size_t mmap_len; int id_pos; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ba4085d7ae9f..62144b97e17b 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -988,7 +988,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu) { int i; - for (i = 0; i < evlist->nr_mmaps; i++) { + for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *md = &evlist->mmap[i]; if (md->core.cpu == cpu) From f6fa437577937265dacd4637608f4cf0a1a928dc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Aug 2019 15:14:05 +0200 Subject: [PATCH 290/345] libperf: Move 'mmap_len' from 'struct evlist' to 'struct perf_evlist' Moving 'mmap_len' from 'struct evlist' to 'struct perf_evlist' it will be used in following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/lib/include/internal/evlist.h | 1 + tools/perf/util/evlist.c | 10 +++++----- tools/perf/util/evlist.h | 1 - 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8577bf33a556..94997144547d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1412,7 +1412,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = -1; goto out_child; } - session->header.env.comp_mmap_len = session->evlist->mmap_len; + session->header.env.comp_mmap_len = session->evlist->core.mmap_len; err = bpf__apply_obj_config(); if (err) { diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 035c1e1cc324..01b813616440 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -14,6 +14,7 @@ struct perf_evlist { struct perf_cpu_map *cpus; struct perf_thread_map *threads; int nr_mmaps; + size_t mmap_len; }; /** diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d147834fbe60..4d8cde099e10 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1012,11 +1012,11 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->mmap_len = evlist__mmap_size(pages); - pr_debug("mmap size %zuB\n", evlist->mmap_len); - mp.mask = evlist->mmap_len - page_size - 1; + evlist->core.mmap_len = evlist__mmap_size(pages); + pr_debug("mmap size %zuB\n", evlist->core.mmap_len); + mp.mask = evlist->core.mmap_len - page_size - 1; - auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, + auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, auxtrace_pages, auxtrace_overwrite); evlist__for_each_entry(evlist, evsel) { @@ -1600,7 +1600,7 @@ out_default: int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) { char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); - int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; + int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0; switch (err) { case EPERM: diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 816b72a2b1e5..765cee8bced1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -56,7 +56,6 @@ struct evlist { struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_groups; bool enabled; - size_t mmap_len; int id_pos; int is_pos; u64 combined_sample_type; From 40cb2d5141bdd52b3c00bb78799118c4606947ac Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Aug 2019 11:28:02 +0200 Subject: [PATCH 291/345] libperf: Move 'pollfd' from 'struct evlist' to 'struct perf_evlist' Moving 'pollfd' from 'struct evlist' to 'struct perf_evlist' it will be used in following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-23-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/lib/include/internal/evlist.h | 2 ++ tools/perf/util/evlist.c | 18 +++++++++--------- tools/perf/util/evlist.h | 1 - tools/perf/util/python.c | 6 +++--- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index e2b42efc86fa..710a2898ed6c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -981,7 +981,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) evlist__enable(kvm->evlist); while (!done) { - struct fdarray *fda = &kvm->evlist->pollfd; + struct fdarray *fda = &kvm->evlist->core.pollfd; int rc; rc = perf_kvm__mmap_read(kvm); diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 01b813616440..8a4eb66fbf3a 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -3,6 +3,7 @@ #define __LIBPERF_INTERNAL_EVLIST_H #include +#include struct perf_cpu_map; struct perf_thread_map; @@ -15,6 +16,7 @@ struct perf_evlist { struct perf_thread_map *threads; int nr_mmaps; size_t mmap_len; + struct fdarray pollfd; }; /** diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4d8cde099e10..13595e8e6b4b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -61,7 +61,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, INIT_HLIST_HEAD(&evlist->heads[i]); perf_evlist__init(&evlist->core); perf_evlist__set_maps(&evlist->core, cpus, threads); - fdarray__init(&evlist->pollfd, 64); + fdarray__init(&evlist->core.pollfd, 64); evlist->workload.pid = -1; evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; } @@ -142,7 +142,7 @@ void evlist__exit(struct evlist *evlist) { zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); - fdarray__exit(&evlist->pollfd); + fdarray__exit(&evlist->core.pollfd); } void evlist__delete(struct evlist *evlist) @@ -416,8 +416,8 @@ int perf_evlist__alloc_pollfd(struct evlist *evlist) nfds += nr_cpus * nr_threads; } - if (fdarray__available_entries(&evlist->pollfd) < nfds && - fdarray__grow(&evlist->pollfd, nfds) < 0) + if (fdarray__available_entries(&evlist->core.pollfd) < nfds && + fdarray__grow(&evlist->core.pollfd, nfds) < 0) return -ENOMEM; return 0; @@ -426,13 +426,13 @@ int perf_evlist__alloc_pollfd(struct evlist *evlist) static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, struct mmap *map, short revent) { - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); + int pos = fdarray__add(&evlist->core.pollfd, fd, revent | POLLERR | POLLHUP); /* * Save the idx so that when we filter out fds POLLHUP'ed we can * close the associated evlist->mmap[] entry. */ if (pos >= 0) { - evlist->pollfd.priv[pos].ptr = map; + evlist->core.pollfd.priv[pos].ptr = map; fcntl(fd, F_SETFL, O_NONBLOCK); } @@ -456,13 +456,13 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->pollfd, revents_and_mask, + return fdarray__filter(&evlist->core.pollfd, revents_and_mask, perf_evlist__munmap_filtered, NULL); } int perf_evlist__poll(struct evlist *evlist, int timeout) { - return fdarray__poll(&evlist->pollfd, timeout); + return fdarray__poll(&evlist->core.pollfd, timeout); } static void perf_evlist__id_hash(struct evlist *evlist, @@ -1009,7 +1009,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, if (!evlist->mmap) return -ENOMEM; - if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; evlist->core.mmap_len = evlist__mmap_size(pages); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 765cee8bced1..4fcdf93eb19c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -64,7 +64,6 @@ struct evlist { int cork_fd; pid_t pid; } workload; - struct fdarray pollfd; struct mmap *mmap; struct mmap *overwrite_mmap; struct evsel *selected; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 62144b97e17b..fcbafb1e8d9d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -935,17 +935,17 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, PyObject *list = PyList_New(0); int i; - for (i = 0; i < evlist->pollfd.nr; ++i) { + for (i = 0; i < evlist->core.pollfd.nr; ++i) { PyObject *file; #if PY_MAJOR_VERSION < 3 - FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r"); + FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r"); if (fp == NULL) goto free_list; file = PyFile_FromFile(fp, "perf", "r", NULL); #else - file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, + file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 0); #endif if (file == NULL) From fee92b4442f135495d4fc59a0b9418490d4ac0ba Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Sep 2019 15:10:35 -0300 Subject: [PATCH 292/345] libperf: Add missing 'struct xyarray' forward declaration We were getting it by luck, from files included before internal/evsel.h where it is being included. Fixes: 9dfcb7599084 ("libperf: Move fd array from perf's evsel to lobperf's perf_evsel class") Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-r8ukhxprpkflbd2k9vcc42v1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/evsel.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index 1bff789b0923..60daee6db914 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -8,6 +8,7 @@ struct perf_cpu_map; struct perf_thread_map; +struct xyarray; struct perf_evsel { struct list_head node; From 8cd36f3ef4926165bc5e5af6f7d7b45f0e14a1f4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 22:04:12 +0200 Subject: [PATCH 293/345] libperf: Move 'sample_id' from 'struct evsel' to 'struct perf_evsel' Move 'sample_id' array from 'struct evsel' to libperf's 'struct perf_evsel'. Committer notes: Removed the 'struct xyarray' from util/evsel.h, not needed anymore there. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-24-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- tools/perf/lib/include/internal/evsel.h | 1 + tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evsel.c | 12 ++++++------ tools/perf/util/evsel.h | 2 -- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0d55eb6bd6e2..468fc49420ce 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -235,7 +235,7 @@ static int write_stat_round_event(u64 tm, u64 type) #define WRITE_STAT_ROUND_EVENT(time, interval) \ write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) -#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) +#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) static int perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index 60daee6db914..1ddb969f7807 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -17,6 +17,7 @@ struct perf_evsel { struct perf_cpu_map *own_cpus; struct perf_thread_map *threads; struct xyarray *fd; + struct xyarray *sample_id; /* parse modifier helper */ int nr_members; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 13595e8e6b4b..84b409802298 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -50,7 +50,7 @@ int sigqueue(pid_t pid, int sig, const union sigval value); #endif #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) +#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads) @@ -1021,7 +1021,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, evlist__for_each_entry(evlist, evsel) { if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && - evsel->sample_id == NULL && + evsel->core.sample_id == NULL && perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 566c9413246c..cb1ada8cf4a4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1235,14 +1235,14 @@ int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads) if (evsel->core.system_wide) nthreads = 1; - evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); - if (evsel->sample_id == NULL) + evsel->core.sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); + if (evsel->core.sample_id == NULL) return -ENOMEM; evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); if (evsel->id == NULL) { - xyarray__delete(evsel->sample_id); - evsel->sample_id = NULL; + xyarray__delete(evsel->core.sample_id); + evsel->core.sample_id = NULL; return -ENOMEM; } @@ -1251,8 +1251,8 @@ int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads) static void perf_evsel__free_id(struct evsel *evsel) { - xyarray__delete(evsel->sample_id); - evsel->sample_id = NULL; + xyarray__delete(evsel->core.sample_id); + evsel->core.sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f757ff449a17..2d2c6cad81f8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -96,7 +96,6 @@ enum perf_tool_event { struct bpf_object; struct perf_counts; -struct xyarray; /** struct evsel - event selector * @@ -117,7 +116,6 @@ struct evsel { struct perf_evsel core; struct evlist *evlist; char *filter; - struct xyarray *sample_id; u64 *id; struct perf_counts *counts; struct perf_counts *prev_raw_counts; From deaf321913a7b1d440c5cd5c7766d47381c9b21b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 22:12:26 +0200 Subject: [PATCH 294/345] libperf: Move 'id' from 'struct evsel' to 'struct perf_evsel' Move the 'id' array from 'struct evsel' to libperf's 'struct perf_evsel'. Committer note: Fix the tools/perf/util/cs-etm.c build, i.e. aarch64's CoreSight. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-25-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/evsel.h | 1 + tools/perf/util/cs-etm.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 1 - tools/perf/util/header.c | 10 +++++----- tools/perf/util/intel-bts.c | 2 +- tools/perf/util/intel-pt.c | 8 ++++---- tools/perf/util/synthetic-events.c | 12 ++++++------ 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index 1ddb969f7807..e7171948c347 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -18,6 +18,7 @@ struct perf_evsel { struct perf_thread_map *threads; struct xyarray *fd; struct xyarray *sample_id; + u64 *id; /* parse modifier helper */ int nr_members; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 6021974577d5..4ba0f871f086 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1298,7 +1298,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.read_format = evsel->core.attr.read_format; /* create new id val to be a fixed offset from evsel id */ - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 84b409802298..24f03f245525 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -482,7 +482,7 @@ void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, int cpu, int thread, u64 id) { perf_evlist__id_hash(evlist, evsel, cpu, thread, id); - evsel->id[evsel->ids++] = id; + evsel->core.id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct evlist *evlist, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cb1ada8cf4a4..9c1b4f4a5fa3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1239,8 +1239,8 @@ int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads) if (evsel->core.sample_id == NULL) return -ENOMEM; - evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); - if (evsel->id == NULL) { + evsel->core.id = zalloc(ncpus * nthreads * sizeof(u64)); + if (evsel->core.id == NULL) { xyarray__delete(evsel->core.sample_id); evsel->core.sample_id = NULL; return -ENOMEM; @@ -1253,7 +1253,7 @@ static void perf_evsel__free_id(struct evsel *evsel) { xyarray__delete(evsel->core.sample_id); evsel->core.sample_id = NULL; - zfree(&evsel->id); + zfree(&evsel->core.id); evsel->ids = 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 2d2c6cad81f8..0d2aa933ceb3 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -116,7 +116,6 @@ struct evsel { struct perf_evsel core; struct evlist *evlist; char *filter; - u64 *id; struct perf_counts *counts; struct perf_counts *prev_raw_counts; int idx; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0167f9697172..2b0681ab08aa 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -530,7 +530,7 @@ static int write_event_desc(struct feat_fd *ff, /* * write unique ids for this event */ - ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64)); + ret = do_write(ff, evsel->core.id, evsel->ids * sizeof(u64)); if (ret < 0) return ret; } @@ -1590,7 +1590,7 @@ static void free_event_desc(struct evsel *events) for (evsel = events; evsel->core.attr.size; evsel++) { zfree(&evsel->name); - zfree(&evsel->id); + zfree(&evsel->core.id); } free(events); @@ -1657,7 +1657,7 @@ static struct evsel *read_event_desc(struct feat_fd *ff) if (!id) goto error; evsel->ids = nr; - evsel->id = id; + evsel->core.id = id; for (j = 0 ; j < nr; j++) { if (do_read_u64(ff, id)) @@ -1701,7 +1701,7 @@ static void print_event_desc(struct feat_fd *ff, FILE *fp) if (evsel->ids) { fprintf(fp, ", id = {"); - for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { + for (j = 0, id = evsel->core.id; j < evsel->ids; j++, id++) { if (j) fputc(',', fp); fprintf(fp, " %"PRIu64, *id); @@ -3068,7 +3068,7 @@ int perf_session__write_header(struct perf_session *session, evlist__for_each_entry(session->evlist, evsel) { evsel->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64)); + err = do_write(&ff, evsel->core.id, evsel->ids * sizeof(u64)); if (err < 0) { pr_debug("failed to write perf header\n"); return err; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 3888d4cd3ed1..c94360cd9c00 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -795,7 +795,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bcdc0359f7cf..24ca5d5908ca 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1705,7 +1705,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->id[0]; + u64 id = evsel->core.id[0]; u8 cpumode; if (intel_pt_skip_event(pt)) @@ -2720,7 +2720,7 @@ static void intel_pt_set_event_name(struct evlist *evlist, u64 id, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->id && evsel->id[0] == id) { + if (evsel->core.id && evsel->core.id[0] == id) { if (evsel->name) zfree(&evsel->name); evsel->name = strdup(name); @@ -2776,7 +2776,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->id[0] + 1000000000; + id = evsel->core.id[0] + 1000000000; if (!id) id = 1; @@ -2903,7 +2903,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) return; evlist__for_each_entry(pt->session->evlist, evsel) { - if (evsel->core.attr.aux_output && evsel->id) { + if (evsel->core.attr.aux_output && evsel->core.id) { pt->sample_pebs = true; pt->pebs_evsel = evsel; return; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 8322028a9a97..907ac3971959 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1442,7 +1442,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ e = &ev->id_index.entries[i++]; - e->id = evsel->id[j]; + e->id = evsel->core.id[j]; sid = perf_evlist__id2sid(evlist, e->id); if (!sid) { @@ -1515,7 +1515,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse struct perf_record_event_update *ev; int err; - ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]); + ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->core.id[0]); if (ev == NULL) return -ENOMEM; @@ -1532,7 +1532,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs struct perf_record_event_update_scale *ev_data; int err; - ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]); + ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->core.id[0]); if (ev == NULL) return -ENOMEM; @@ -1550,7 +1550,7 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse size_t len = strlen(evsel->name); int err; - ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]); + ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->core.id[0]); if (ev == NULL) return -ENOMEM; @@ -1578,7 +1578,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evse ev->header.type = PERF_RECORD_EVENT_UPDATE; ev->header.size = (u16)size; ev->type = PERF_EVENT_UPDATE__CPUS; - ev->id = evsel->id[0]; + ev->id = evsel->core.id[0]; cpu_map_data__synthesize((struct perf_record_cpu_map_data *)ev->data, evsel->core.own_cpus, type, max); @@ -1596,7 +1596,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, evlist__for_each_entry(evlist, evsel) { err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids, - evsel->id, process); + evsel->core.id, process); if (err) { pr_debug("failed to create perf header attribute\n"); return err; From e7eb9002d4513ac4a26c756b72e6c25bf063baf2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 22:15:47 +0200 Subject: [PATCH 295/345] libperf: Move 'ids' from 'struct evsel' to 'struct perf_evsel' Move 'ids' from 'struct evsel' to libperf's 'struct perf_evsel'. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-26-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/evsel.h | 1 + tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 2 +- tools/perf/util/evsel.h | 1 - tools/perf/util/header.c | 14 +++++++------- tools/perf/util/intel-bts.c | 2 +- tools/perf/util/intel-pt.c | 2 +- tools/perf/util/synthetic-events.c | 6 +++--- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index e7171948c347..385766f06591 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -19,6 +19,7 @@ struct perf_evsel { struct xyarray *fd; struct xyarray *sample_id; u64 *id; + u32 ids; /* parse modifier helper */ int nr_members; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 24f03f245525..5e43fb9da359 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -482,7 +482,7 @@ void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, int cpu, int thread, u64 id) { perf_evlist__id_hash(evlist, evsel, cpu, thread, id); - evsel->core.id[evsel->ids++] = id; + evsel->core.id[evsel->core.ids++] = id; } int perf_evlist__id_add_fd(struct evlist *evlist, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9c1b4f4a5fa3..55638eb9299c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1254,7 +1254,7 @@ static void perf_evsel__free_id(struct evsel *evsel) xyarray__delete(evsel->core.sample_id); evsel->core.sample_id = NULL; zfree(&evsel->core.id); - evsel->ids = 0; + evsel->core.ids = 0; } static void perf_evsel__free_config_terms(struct evsel *evsel) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0d2aa933ceb3..ed64395ec340 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -119,7 +119,6 @@ struct evsel { struct perf_counts *counts; struct perf_counts *prev_raw_counts; int idx; - u32 ids; unsigned long max_events; unsigned long nr_events_printed; char *name; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2b0681ab08aa..498f6a825656 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -516,7 +516,7 @@ static int write_event_desc(struct feat_fd *ff, * copy into an nri to be independent of the * type of ids, */ - nri = evsel->ids; + nri = evsel->core.ids; ret = do_write(ff, &nri, sizeof(nri)); if (ret < 0) return ret; @@ -530,7 +530,7 @@ static int write_event_desc(struct feat_fd *ff, /* * write unique ids for this event */ - ret = do_write(ff, evsel->core.id, evsel->ids * sizeof(u64)); + ret = do_write(ff, evsel->core.id, evsel->core.ids * sizeof(u64)); if (ret < 0) return ret; } @@ -1656,7 +1656,7 @@ static struct evsel *read_event_desc(struct feat_fd *ff) id = calloc(nr, sizeof(*id)); if (!id) goto error; - evsel->ids = nr; + evsel->core.ids = nr; evsel->core.id = id; for (j = 0 ; j < nr; j++) { @@ -1699,9 +1699,9 @@ static void print_event_desc(struct feat_fd *ff, FILE *fp) for (evsel = events; evsel->core.attr.size; evsel++) { fprintf(fp, "# event : name = %s, ", evsel->name); - if (evsel->ids) { + if (evsel->core.ids) { fprintf(fp, ", id = {"); - for (j = 0, id = evsel->core.id; j < evsel->ids; j++, id++) { + for (j = 0, id = evsel->core.id; j < evsel->core.ids; j++, id++) { if (j) fputc(',', fp); fprintf(fp, " %"PRIu64, *id); @@ -3068,7 +3068,7 @@ int perf_session__write_header(struct perf_session *session, evlist__for_each_entry(session->evlist, evsel) { evsel->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(&ff, evsel->core.id, evsel->ids * sizeof(u64)); + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64)); if (err < 0) { pr_debug("failed to write perf header\n"); return err; @@ -3082,7 +3082,7 @@ int perf_session__write_header(struct perf_session *session, .attr = evsel->core.attr, .ids = { .offset = evsel->id_offset, - .size = evsel->ids * sizeof(u64), + .size = evsel->core.ids * sizeof(u64), } }; err = do_write(&ff, &f_attr, sizeof(f_attr)); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index c94360cd9c00..34cb380d19a3 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -768,7 +768,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, int err; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == bts->pmu_type && evsel->ids) { + if (evsel->core.attr.type == bts->pmu_type && evsel->core.ids) { found = true; break; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 24ca5d5908ca..a1c9eb6d4f40 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2735,7 +2735,7 @@ static struct evsel *intel_pt_evsel(struct intel_pt *pt, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == pt->pmu_type && evsel->ids) + if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids) return evsel; } diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 907ac3971959..96ed008c2775 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1413,7 +1413,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ sizeof(struct id_index_entry); evlist__for_each_entry(evlist, evsel) - nr += evsel->ids; + nr += evsel->core.ids; n = nr > max_nr ? max_nr : nr; sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry); @@ -1428,7 +1428,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ evlist__for_each_entry(evlist, evsel) { u32 j; - for (j = 0; j < evsel->ids; j++) { + for (j = 0; j < evsel->core.ids; j++) { struct id_index_entry *e; struct perf_sample_id *sid; @@ -1595,7 +1595,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, int err = 0; evlist__for_each_entry(evlist, evsel) { - err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->ids, + err = perf_event__synthesize_attr(tool, &evsel->core.attr, evsel->core.ids, evsel->core.id, process); if (err) { pr_debug("failed to create perf header attribute\n"); From 1d5af02d7a92acaa877ab0fbec0756114852720a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 22:20:12 +0200 Subject: [PATCH 296/345] libperf: Move 'heads' from 'struct evlist' to 'struct perf_evlist' Move 'heads' hash table from 'struct evlist' to 'struct perf_evlist'. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-27-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 4 ++++ tools/perf/lib/include/internal/evlist.h | 4 ++++ tools/perf/util/evlist.c | 10 +++------- tools/perf/util/evlist.h | 4 ---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index f4dc9a208332..2bdd9d2c79d9 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -11,6 +11,10 @@ void perf_evlist__init(struct perf_evlist *evlist) { + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; } diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 8a4eb66fbf3a..c5a06890fd6a 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -5,6 +5,9 @@ #include #include +#define PERF_EVLIST__HLIST_BITS 8 +#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) + struct perf_cpu_map; struct perf_thread_map; @@ -17,6 +20,7 @@ struct perf_evlist { int nr_mmaps; size_t mmap_len; struct fdarray pollfd; + struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; }; /** diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5e43fb9da359..c6af7c622612 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -55,10 +55,6 @@ int sigqueue(pid_t pid, int sig, const union sigval value); void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); perf_evlist__init(&evlist->core); perf_evlist__set_maps(&evlist->core, cpus, threads); fdarray__init(&evlist->core.pollfd, 64); @@ -475,7 +471,7 @@ static void perf_evlist__id_hash(struct evlist *evlist, sid->id = id; sid->evsel = evsel; hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); - hlist_add_head(&sid->node, &evlist->heads[hash]); + hlist_add_head(&sid->node, &evlist->core.heads[hash]); } void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, @@ -549,7 +545,7 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) int hash; hash = hash_64(id, PERF_EVLIST__HLIST_BITS); - head = &evlist->heads[hash]; + head = &evlist->core.heads[hash]; hlist_for_each_entry(sid, head, node) if (sid->id == id) @@ -635,7 +631,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist, return first; hash = hash_64(id, PERF_EVLIST__HLIST_BITS); - head = &evlist->heads[hash]; + head = &evlist->core.heads[hash]; hlist_for_each_entry(sid, head, node) { if (sid->id == id) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4fcdf93eb19c..ad9c0ba57a91 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -48,12 +48,8 @@ enum bkw_mmap_state { BKW_MMAP_EMPTY, }; -#define PERF_EVLIST__HLIST_BITS 8 -#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) - struct evlist { struct perf_evlist core; - struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_groups; bool enabled; int id_pos; From 70c20369ee95ef8b6887944194cfb74a5a8d1fe3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Sep 2019 10:34:29 +0200 Subject: [PATCH 297/345] libperf: Add perf_evsel__alloc_id/perf_evsel__free_id functions Add perf_evsel__alloc_id()/perf_evsel__free_id() functions to libperf as internal functions. Move 'struct perf_sample_id' to internal/evsel.h header and change 'struct perf_sample_id::evsel' to 'struct perf_evsel' and the related code that touches it. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-28-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evsel.c | 30 +++++++++++++++++++++ tools/perf/lib/include/internal/evsel.h | 27 +++++++++++++++++++ tools/perf/tests/event_update.c | 2 +- tools/perf/util/evlist.c | 10 +++---- tools/perf/util/evsel.c | 36 +++---------------------- tools/perf/util/evsel.h | 25 ----------------- tools/perf/util/header.c | 4 +-- tools/perf/util/session.c | 4 ++- 8 files changed, 71 insertions(+), 67 deletions(-) diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c index 24abc80dd767..a8cb582e2721 100644 --- a/tools/perf/lib/evsel.c +++ b/tools/perf/lib/evsel.c @@ -230,3 +230,33 @@ struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel) { return &evsel->attr; } + +int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + if (ncpus == 0 || nthreads == 0) + return 0; + + if (evsel->system_wide) + nthreads = 1; + + evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); + if (evsel->sample_id == NULL) + return -ENOMEM; + + evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); + if (evsel->id == NULL) { + xyarray__delete(evsel->sample_id); + evsel->sample_id = NULL; + return -ENOMEM; + } + + return 0; +} + +void perf_evsel__free_id(struct perf_evsel *evsel) +{ + xyarray__delete(evsel->sample_id); + evsel->sample_id = NULL; + zfree(&evsel->id); + evsel->ids = 0; +} diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index 385766f06591..a69b8299c36f 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -5,11 +5,35 @@ #include #include #include +#include struct perf_cpu_map; struct perf_thread_map; struct xyarray; +/* + * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are + * more than one entry in the evlist. + */ +struct perf_sample_id { + struct hlist_node node; + u64 id; + struct perf_evsel *evsel; + /* + * 'idx' will be used for AUX area sampling. A sample will have AUX area + * data that will be queued for decoding, where there are separate + * queues for each CPU (per-cpu tracing) or task (per-thread tracing). + * The sample ID can be used to lookup 'idx' which is effectively the + * queue number. + */ + int idx; + int cpu; + pid_t tid; + + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; +}; + struct perf_evsel { struct list_head node; struct perf_event_attr attr; @@ -32,4 +56,7 @@ void perf_evsel__free_fd(struct perf_evsel *evsel); int perf_evsel__read_size(struct perf_evsel *evsel); int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); +int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__free_id(struct perf_evsel *evsel); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 0497d900ced2..cf4f90170f90 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -95,7 +95,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("failed to allocate ids", - !perf_evsel__alloc_id(evsel, 1, 1)); + !perf_evsel__alloc_id(&evsel->core, 1, 1)); perf_evlist__id_add(evlist, evsel, 0, 0, 123); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c6af7c622612..559db38594a8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -469,7 +469,7 @@ static void perf_evlist__id_hash(struct evlist *evlist, struct perf_sample_id *sid = SID(evsel, cpu, thread); sid->id = id; - sid->evsel = evsel; + sid->evsel = &evsel->core; hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); hlist_add_head(&sid->node, &evlist->core.heads[hash]); } @@ -563,7 +563,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) sid = perf_evlist__id2sid(evlist, id); if (sid) - return sid->evsel; + return container_of(sid->evsel, struct evsel, core); if (!perf_evlist__sample_id_all(evlist)) return perf_evlist__first(evlist); @@ -581,7 +581,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, sid = perf_evlist__id2sid(evlist, id); if (sid) - return sid->evsel; + return container_of(sid->evsel, struct evsel, core); return NULL; } @@ -635,7 +635,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist, hlist_for_each_entry(sid, head, node) { if (sid->id == id) - return sid->evsel; + return container_of(sid->evsel, struct evsel, core); } return NULL; } @@ -1018,7 +1018,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, evlist__for_each_entry(evlist, evsel) { if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && evsel->core.sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 55638eb9299c..a4a492f11849 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1227,36 +1227,6 @@ int evsel__disable(struct evsel *evsel) return err; } -int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads) -{ - if (ncpus == 0 || nthreads == 0) - return 0; - - if (evsel->core.system_wide) - nthreads = 1; - - evsel->core.sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); - if (evsel->core.sample_id == NULL) - return -ENOMEM; - - evsel->core.id = zalloc(ncpus * nthreads * sizeof(u64)); - if (evsel->core.id == NULL) { - xyarray__delete(evsel->core.sample_id); - evsel->core.sample_id = NULL; - return -ENOMEM; - } - - return 0; -} - -static void perf_evsel__free_id(struct evsel *evsel) -{ - xyarray__delete(evsel->core.sample_id); - evsel->core.sample_id = NULL; - zfree(&evsel->core.id); - evsel->core.ids = 0; -} - static void perf_evsel__free_config_terms(struct evsel *evsel) { struct perf_evsel_config_term *term, *h; @@ -1273,7 +1243,7 @@ void perf_evsel__exit(struct evsel *evsel) assert(evsel->evlist == NULL); perf_evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); - perf_evsel__free_id(evsel); + perf_evsel__free_id(&evsel->core); perf_evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); perf_cpu_map__put(evsel->core.cpus); @@ -1992,7 +1962,7 @@ out_close: void evsel__close(struct evsel *evsel) { perf_evsel__close(&evsel->core); - perf_evsel__free_id(evsel); + perf_evsel__free_id(&evsel->core); } int perf_evsel__open_per_cpu(struct evsel *evsel, @@ -2706,7 +2676,7 @@ int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist) struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; - if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr)) + if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr)) return -ENOMEM; return store_evsel_ids(evsel, evlist); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ed64395ec340..a5b29ac10da0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -17,29 +17,6 @@ struct addr_location; struct evsel; union perf_event; -/* - * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are - * more than one entry in the evlist. - */ -struct perf_sample_id { - struct hlist_node node; - u64 id; - struct evsel *evsel; - /* - * 'idx' will be used for AUX area sampling. A sample will have AUX area - * data that will be queued for decoding, where there are separate - * queues for each CPU (per-cpu tracing) or task (per-thread tracing). - * The sample ID can be used to lookup 'idx' which is effectively the - * queue number. - */ - int idx; - int cpu; - pid_t tid; - - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; -}; - struct cgroup; /* @@ -272,8 +249,6 @@ const char *perf_evsel__name(struct evsel *evsel); const char *perf_evsel__group_name(struct evsel *evsel); int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size); -int perf_evsel__alloc_id(struct evsel *evsel, int ncpus, int nthreads); - void __perf_evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); void __perf_evsel__reset_sample_bit(struct evsel *evsel, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 498f6a825656..fc5eac41e102 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3609,7 +3609,7 @@ int perf_session__read_header(struct perf_session *session) * for allocating the perf_sample_id table we fake 1 cpu and * hattr->ids threads. */ - if (perf_evsel__alloc_id(evsel, 1, nr_ids)) + if (perf_evsel__alloc_id(&evsel->core, 1, nr_ids)) goto out_delete_evlist; lseek(fd, f_attr.ids.offset, SEEK_SET); @@ -3750,7 +3750,7 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, * for allocating the perf_sample_id table we fake 1 cpu and * hattr->ids threads. */ - if (perf_evsel__alloc_id(evsel, 1, n_ids)) + if (perf_evsel__alloc_id(&evsel->core, 1, n_ids)) return -ENOMEM; for (i = 0; i < n_ids; i++) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a621c73bad42..84a30ff3968a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1324,6 +1324,7 @@ static int deliver_sample_value(struct evlist *evlist, struct machine *machine) { struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); + struct evsel *evsel; if (sid) { sample->id = v->id; @@ -1343,7 +1344,8 @@ static int deliver_sample_value(struct evlist *evlist, if (!sample->period) return 0; - return tool->sample(tool, event, sample, sid->evsel, machine); + evsel = container_of(sid->evsel, struct evsel, core); + return tool->sample(tool, event, sample, evsel, machine); } static int deliver_sample_group(struct evlist *evlist, From 515dbe48f6202147fb7c88aac48c43f49db1c793 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Sep 2019 10:39:52 +0200 Subject: [PATCH 298/345] libperf: Add perf_evlist__first()/last() functions Add perf_evlist__first()/last() functions to libperf, as internal functions and rename perf's origins to evlist__first/last. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-29-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/x86/tests/intel-cqm.c | 4 +- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 6 +- tools/perf/builtin-record.c | 4 +- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-top.c | 10 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/internal/evlist.h | 11 ++ tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/event-times.c | 14 +-- tools/perf/tests/event_update.c | 2 +- tools/perf/tests/evsel-roundtrip-name.c | 2 +- tools/perf/tests/hists_cumulate.c | 2 +- tools/perf/tests/hists_link.c | 4 +- tools/perf/tests/hists_output.c | 2 +- tools/perf/tests/keep-tracking.c | 4 +- tools/perf/tests/parse-events.c | 116 +++++++++---------- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/switch-tracking.c | 14 +-- tools/perf/tests/task-exit.c | 2 +- tools/perf/ui/browsers/hists.c | 6 +- tools/perf/util/bpf-loader.c | 2 +- tools/perf/util/evlist.c | 24 ++-- tools/perf/util/evlist.h | 13 ++- tools/perf/util/jitdump.c | 2 +- tools/perf/util/parse-events.c | 4 +- tools/perf/util/record.c | 6 +- tools/perf/util/sort.c | 2 +- tools/perf/util/top.c | 2 +- 32 files changed, 145 insertions(+), 129 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 051e9066fb38..6654bcfc1224 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -416,7 +416,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, if (err) goto out; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 9302c6566f53..745f2d50ee82 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -129,7 +129,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, if (err) return err; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); tracking_evsel->core.attr.freq = 0; diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 111c0ab2e7b5..0329b9168fae 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -62,9 +62,9 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt goto out; } - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); if (!evsel) { - pr_debug("perf_evlist__first failed\n"); + pr_debug("evlist__first failed\n"); goto out; } diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 10b7acebc0eb..fa947952c16a 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -83,7 +83,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe perf_evlist__config(evlist, &opts, NULL); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index e81535c8e9c5..e2c7095327db 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -231,7 +231,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, if (err) return err; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 886b3ac60f23..84a65524c418 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -417,7 +417,7 @@ static int intel_pt_track_switches(struct evlist *evlist) return err; } - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TIME); @@ -717,7 +717,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (err) return err; - switch_evsel = perf_evlist__last(evlist); + switch_evsel = evlist__last(evlist); switch_evsel->core.attr.freq = 0; switch_evsel->core.attr.sample_period = 1; @@ -775,7 +775,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (err) return err; - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 94997144547d..48600c90cc7e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -756,9 +756,9 @@ static int record__open(struct record *rec) if (perf_evlist__add_dummy(evlist)) return -ENOMEM; - pos = perf_evlist__first(evlist); + pos = evlist__first(evlist); pos->tracking = 0; - pos = perf_evlist__last(evlist); + pos = evlist__last(evlist); pos->tracking = 1; pos->core.attr.enable_on_exec = 1; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e7a49e2d7575..22c1d114014c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2043,7 +2043,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, return err; evlist = *pevlist; - evsel = perf_evlist__last(*pevlist); + evsel = evlist__last(*pevlist); if (!evsel->priv) { if (scr->per_event_dump) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 474b9860cfd4..73bf79053ae3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -531,7 +531,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->core.nr_entries) { - top->sym_evsel = perf_evlist__first(top->evlist); + top->sym_evsel = evlist__first(top->evlist); fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; @@ -540,7 +540,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) if (top->sym_evsel->idx == counter) break; } else - top->sym_evsel = perf_evlist__first(top->evlist); + top->sym_evsel = evlist__first(top->evlist); break; case 'f': prompt_integer(&top->count_filter, "Enter display event count filter"); @@ -962,7 +962,7 @@ static int perf_top__overwrite_check(struct perf_top *top) /* has term for current event */ if ((overwrite < 0) && (set >= 0)) { /* if it's first event, set overwrite */ - if (evsel == perf_evlist__first(evlist)) + if (evsel == evlist__first(evlist)) overwrite = set; else return -1; @@ -986,7 +986,7 @@ static int perf_top_overwrite_fallback(struct perf_top *top, return 0; /* only fall back when first event fails */ - if (evsel != perf_evlist__first(evlist)) + if (evsel != evlist__first(evlist)) return 0; evlist__for_each_entry(evlist, counter) @@ -1644,7 +1644,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - top.sym_evsel = perf_evlist__first(top.evlist); + top.sym_evsel = evlist__first(top.evlist); if (!callchain_param.enabled) { symbol_conf.cumulate_callchain = false; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 91c73c7472ba..97667287f573 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3427,7 +3427,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || evlist->core.threads->nr > 1 || - perf_evlist__first(evlist)->core.attr.inherit; + evlist__first(evlist)->core.attr.inherit; /* * Now that we already used evsel->core.attr to ask the kernel to setup the diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index c5a06890fd6a..16ae6d6cfb39 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -4,6 +4,7 @@ #include #include +#include #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) @@ -55,4 +56,14 @@ struct perf_evlist { #define perf_evlist__for_each_entry_reverse(evlist, evsel) \ __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel) +static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.next, struct perf_evsel, node); +} + +static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.prev, struct perf_evsel, node); +} + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 7dac69a375f9..f5764a3890b9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -652,7 +652,7 @@ static int do_test_code_reading(bool try_kcore) perf_evlist__config(evlist, &opts, NULL); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 0228ba435a2a..1ee8704e2284 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -16,7 +16,7 @@ static int attach__enable_on_exec(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct target target = { .uid = UINT_MAX, }; @@ -58,7 +58,7 @@ static int detach__enable_on_exec(struct evlist *evlist) static int attach__current_disabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_thread_map *threads; int err; @@ -84,7 +84,7 @@ static int attach__current_disabled(struct evlist *evlist) static int attach__current_enabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_thread_map *threads; int err; @@ -104,14 +104,14 @@ static int attach__current_enabled(struct evlist *evlist) static int detach__disable(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); return evsel__enable(evsel); } static int attach__cpu_disabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_cpu_map *cpus; int err; @@ -140,7 +140,7 @@ static int attach__cpu_disabled(struct evlist *evlist) static int attach__cpu_enabled(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__last(evlist); + struct evsel *evsel = evlist__last(evlist); struct perf_cpu_map *cpus; int err; @@ -180,7 +180,7 @@ static int test_times(int (attach)(struct evlist *), goto out_err; } - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); evsel->core.attr.read_format |= PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index cf4f90170f90..cd6cae8e5137 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -92,7 +92,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu evlist = perf_evlist__new_default(); TEST_ASSERT_VAL("failed to get evlist", evlist); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); TEST_ASSERT_VAL("failed to allocate ids", !perf_evsel__alloc_id(&evsel->core, 1, 1)); diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 5330f106a6ee..956205bf9326 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -34,7 +34,7 @@ static int perf_evsel__roundtrip_cache_name_test(void) } idx = 0; - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index fa55b7bad3af..6367c8f6ca22 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -721,7 +721,7 @@ int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_u if (verbose > 1) machine__fprintf(machine, stderr); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); for (i = 0; i < ARRAY_SIZE(testcases); i++) { err = testcases[i](evsel, machine); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 1a3bdc0a2d14..a024d3f3a412 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -311,8 +311,8 @@ int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unuse print_hists_in(hists); } - first = perf_evlist__first(evlist); - evsel = perf_evlist__last(evlist); + first = evlist__first(evlist); + evsel = evlist__last(evlist); first_hists = evsel__hists(first); hists = evsel__hists(evsel); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 3f6dfa212260..38f804ff6452 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -608,7 +608,7 @@ int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unu if (verbose > 1) machine__fprintf(machine, stderr); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); for (i = 0; i < ARRAY_SIZE(testcases); i++) { err = testcases[i](evsel, machine); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index bd4ae8e5cd5d..92c7d591bcac 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -93,7 +93,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un perf_evlist__config(evlist, &opts, NULL); - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; @@ -132,7 +132,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un evlist__enable(evlist); - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); CHECK__(evsel__disable(evsel)); diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index c25c8e7b41e5..25e0ed2eedfc 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -46,7 +46,7 @@ static bool kvm_s390_create_vm_valid(void) static int test__checkevent_tracepoint(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); @@ -77,7 +77,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist) static int test__checkevent_raw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); @@ -87,7 +87,7 @@ static int test__checkevent_raw(struct evlist *evlist) static int test__checkevent_numeric(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); @@ -97,7 +97,7 @@ static int test__checkevent_numeric(struct evlist *evlist) static int test__checkevent_symbolic_name(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -108,7 +108,7 @@ static int test__checkevent_symbolic_name(struct evlist *evlist) static int test__checkevent_symbolic_name_config(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); @@ -129,7 +129,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) static int test__checkevent_symbolic_alias(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); @@ -140,7 +140,7 @@ static int test__checkevent_symbolic_alias(struct evlist *evlist) static int test__checkevent_genhw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); @@ -150,7 +150,7 @@ static int test__checkevent_genhw(struct evlist *evlist) static int test__checkevent_breakpoint(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -164,7 +164,7 @@ static int test__checkevent_breakpoint(struct evlist *evlist) static int test__checkevent_breakpoint_x(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -177,7 +177,7 @@ static int test__checkevent_breakpoint_x(struct evlist *evlist) static int test__checkevent_breakpoint_r(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", @@ -192,7 +192,7 @@ static int test__checkevent_breakpoint_r(struct evlist *evlist) static int test__checkevent_breakpoint_w(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", @@ -207,7 +207,7 @@ static int test__checkevent_breakpoint_w(struct evlist *evlist) static int test__checkevent_breakpoint_rw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", @@ -222,7 +222,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist) static int test__checkevent_tracepoint_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -253,7 +253,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist) static int test__checkevent_raw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -265,7 +265,7 @@ static int test__checkevent_raw_modifier(struct evlist *evlist) static int test__checkevent_numeric_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -277,7 +277,7 @@ static int test__checkevent_numeric_modifier(struct evlist *evlist) static int test__checkevent_symbolic_name_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -289,7 +289,7 @@ static int test__checkevent_symbolic_name_modifier(struct evlist *evlist) static int test__checkevent_exclude_host_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); @@ -299,7 +299,7 @@ static int test__checkevent_exclude_host_modifier(struct evlist *evlist) static int test__checkevent_exclude_guest_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); @@ -309,7 +309,7 @@ static int test__checkevent_exclude_guest_modifier(struct evlist *evlist) static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -321,7 +321,7 @@ static int test__checkevent_symbolic_alias_modifier(struct evlist *evlist) static int test__checkevent_genhw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -333,7 +333,7 @@ static int test__checkevent_genhw_modifier(struct evlist *evlist) static int test__checkevent_exclude_idle_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle); TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); @@ -348,7 +348,7 @@ static int test__checkevent_exclude_idle_modifier(struct evlist *evlist) static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude idle", evsel->core.attr.exclude_idle); TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); @@ -363,7 +363,7 @@ static int test__checkevent_exclude_idle_modifier_1(struct evlist *evlist) static int test__checkevent_breakpoint_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); @@ -378,7 +378,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -392,7 +392,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -406,7 +406,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -420,7 +420,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist) static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); @@ -435,7 +435,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist) static int test__checkevent_pmu(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); @@ -453,7 +453,7 @@ static int test__checkevent_pmu(struct evlist *evlist) static int test__checkevent_list(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); @@ -492,7 +492,7 @@ static int test__checkevent_list(struct evlist *evlist) static int test__checkevent_pmu_name(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); /* cpu/config=1,name=krava/u */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); @@ -513,7 +513,7 @@ static int test__checkevent_pmu_name(struct evlist *evlist) static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); /* cpu/config=1,call-graph=fp,time,period=100000/ */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); @@ -546,7 +546,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) static int test__checkevent_pmu_events(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); @@ -564,7 +564,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) static int test__checkevent_pmu_events_mix(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); /* pmu-event:u */ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); @@ -642,7 +642,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* instructions:k */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -684,7 +684,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* faults + :ku modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_SW_PAGE_FAULTS == evsel->core.attr.config); @@ -739,7 +739,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); /* group1 syscalls:sys_enter_openat:H */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type); @@ -831,7 +831,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:u + p */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -875,7 +875,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); /* cycles + G */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -961,7 +961,7 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles + :H group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1001,7 +1001,7 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles + :G group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1041,7 +1041,7 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:G + :u group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1081,7 +1081,7 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:G + :uG group modifier */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1120,7 +1120,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); /* cycles - sampling group leader */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1173,7 +1173,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); /* instructions - sampling group leader */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -1207,7 +1207,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) static int test__checkevent_pinned_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -1225,7 +1225,7 @@ static int test__pinned_group(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); /* cycles - group leader */ - evsel = leader = perf_evlist__first(evlist); + evsel = leader = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -1251,7 +1251,7 @@ static int test__pinned_group(struct evlist *evlist) static int test__checkevent_breakpoint_len(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -1266,7 +1266,7 @@ static int test__checkevent_breakpoint_len(struct evlist *evlist) static int test__checkevent_breakpoint_len_w(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type); @@ -1282,7 +1282,7 @@ static int test__checkevent_breakpoint_len_w(struct evlist *evlist) static int test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -1294,7 +1294,7 @@ test__checkevent_breakpoint_len_rw_modifier(struct evlist *evlist) static int test__checkevent_precise_max_modifier(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); @@ -1305,7 +1305,7 @@ static int test__checkevent_precise_max_modifier(struct evlist *evlist) static int test__checkevent_config_symbol(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0); return 0; @@ -1313,7 +1313,7 @@ static int test__checkevent_config_symbol(struct evlist *evlist) static int test__checkevent_config_raw(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0); return 0; @@ -1321,7 +1321,7 @@ static int test__checkevent_config_raw(struct evlist *evlist) static int test__checkevent_config_num(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0); return 0; @@ -1329,7 +1329,7 @@ static int test__checkevent_config_num(struct evlist *evlist) static int test__checkevent_config_cache(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0); return 0; @@ -1342,7 +1342,7 @@ static bool test__intel_pt_valid(void) static int test__intel_pt(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0); return 0; @@ -1350,7 +1350,7 @@ static int test__intel_pt(struct evlist *evlist) static int test__checkevent_complex_name(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0); return 0; @@ -1358,7 +1358,7 @@ static int test__checkevent_complex_name(struct evlist *evlist) static int test__sym_event_slash(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES); @@ -1368,7 +1368,7 @@ static int test__sym_event_slash(struct evlist *evlist) static int test__sym_event_dc(struct evlist *evlist) { - struct evsel *evsel = perf_evlist__first(evlist); + struct evsel *evsel = evlist__first(evlist); TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 199a66444e60..401e8d11427b 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -104,7 +104,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus /* * Config the evsels, setting attr->comm on the first one, etc. */ - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); perf_evsel__set_sample_bit(evsel, CPU); perf_evsel__set_sample_bit(evsel, TID); perf_evsel__set_sample_bit(evsel, TIME); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 30a70db6473d..ffa592e0020e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -367,7 +367,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - cpu_clocks_evsel = perf_evlist__last(evlist); + cpu_clocks_evsel = evlist__last(evlist); /* Second event */ err = parse_events(evlist, "cycles:u", NULL); @@ -376,7 +376,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - cycles_evsel = perf_evlist__last(evlist); + cycles_evsel = evlist__last(evlist); /* Third event */ if (!perf_evlist__can_select_event(evlist, sched_switch)) { @@ -391,7 +391,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - switch_evsel = perf_evlist__last(evlist); + switch_evsel = evlist__last(evlist); perf_evsel__set_sample_bit(switch_evsel, CPU); perf_evsel__set_sample_bit(switch_evsel, TIME); @@ -401,12 +401,12 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ switch_evsel->immediate = true; /* Test moving an event to the front */ - if (cycles_evsel == perf_evlist__first(evlist)) { + if (cycles_evsel == evlist__first(evlist)) { pr_debug("cycles event already at front"); goto out_err; } perf_evlist__to_front(evlist, cycles_evsel); - if (cycles_evsel != perf_evlist__first(evlist)) { + if (cycles_evsel != evlist__first(evlist)) { pr_debug("Failed to move cycles event to front"); goto out_err; } @@ -421,7 +421,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - tracking_evsel = perf_evlist__last(evlist); + tracking_evsel = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, tracking_evsel); @@ -434,7 +434,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ perf_evlist__config(evlist, &opts, NULL); /* Check moved event is still at the front */ - if (cycles_evsel != perf_evlist__first(evlist)) { + if (cycles_evsel != evlist__first(evlist)) { pr_debug("Front event no longer at front"); goto out_err; } diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 7fc39af48a76..24565f83e07d 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -88,7 +88,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); evsel->core.attr.task = 1; #ifdef __s390x__ evsel->core.attr.sample_freq = 1000000; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 589168ca9f62..7a7187e069b4 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3319,13 +3319,13 @@ browse_hists: switch (key) { case K_TAB: if (pos->core.node.next == &evlist->core.entries) - pos = perf_evlist__first(evlist); + pos = evlist__first(evlist); else pos = perf_evsel__next(pos); goto browse_hists; case K_UNTAB: if (pos->core.node.prev == &evlist->core.entries) - pos = perf_evlist__last(evlist); + pos = evlist__last(evlist); else pos = perf_evsel__prev(pos); goto browse_hists; @@ -3417,7 +3417,7 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help, single_entry: if (nr_entries == 1) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); return perf_evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 37283e865352..10c187b8b8ea 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1568,7 +1568,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name) return ERR_PTR(-err); } - evsel = perf_evlist__last(evlist); + evsel = evlist__last(evlist); } bpf__for_each_map_named(map, obj, tmp, name) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 559db38594a8..e8f0357b1532 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -105,7 +105,7 @@ struct evlist *perf_evlist__new_dummy(void) */ void perf_evlist__set_id_pos(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); evlist->id_pos = first->id_pos; evlist->is_pos = first->is_pos; @@ -559,14 +559,14 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) struct perf_sample_id *sid; if (evlist->core.nr_entries == 1 || !id) - return perf_evlist__first(evlist); + return evlist__first(evlist); sid = perf_evlist__id2sid(evlist, id); if (sid) return container_of(sid->evsel, struct evsel, core); if (!perf_evlist__sample_id_all(evlist)) - return perf_evlist__first(evlist); + return evlist__first(evlist); return NULL; } @@ -610,7 +610,7 @@ static int perf_evlist__event2id(struct evlist *evlist, struct evsel *perf_evlist__event2evsel(struct evlist *evlist, union perf_event *event) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); struct hlist_head *head; struct perf_sample_id *sid; int hash; @@ -1222,7 +1222,7 @@ u64 perf_evlist__combined_branch_type(struct evlist *evlist) bool perf_evlist__valid_read_format(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist), *pos = first; + struct evsel *first = evlist__first(evlist), *pos = first; u64 read_format = first->core.attr.read_format; u64 sample_type = first->core.attr.sample_type; @@ -1242,13 +1242,13 @@ bool perf_evlist__valid_read_format(struct evlist *evlist) u64 perf_evlist__read_format(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); return first->core.attr.read_format; } u16 perf_evlist__id_hdr_size(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); struct perf_sample *data; u64 sample_type; u16 size = 0; @@ -1281,7 +1281,7 @@ out: bool perf_evlist__valid_sample_id_all(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist), *pos = first; + struct evsel *first = evlist__first(evlist), *pos = first; evlist__for_each_entry_continue(evlist, pos) { if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all) @@ -1293,7 +1293,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist) bool perf_evlist__sample_id_all(struct evlist *evlist) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); return first->core.attr.sample_id_all; } @@ -1568,7 +1568,7 @@ int perf_evlist__strerror_open(struct evlist *evlist, "Hint:\tThe current value is %d.", value); break; case EINVAL: { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); int max_freq; if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) @@ -1630,7 +1630,7 @@ void perf_evlist__to_front(struct evlist *evlist, struct evsel *evsel, *n; LIST_HEAD(move); - if (move_evsel == perf_evlist__first(evlist)) + if (move_evsel == evlist__first(evlist)) return; evlist__for_each_entry_safe(evlist, n, evsel) { @@ -1751,7 +1751,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist) void perf_evlist__force_leader(struct evlist *evlist) { if (!evlist->nr_groups) { - struct evsel *leader = perf_evlist__first(evlist); + struct evsel *leader = evlist__first(evlist); perf_evlist__set_leader(evlist); leader->forced_leader = true; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index ad9c0ba57a91..6529ad2a9d97 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "events_stats.h" #include "evsel.h" #include @@ -242,14 +243,18 @@ static inline bool perf_evlist__empty(struct evlist *evlist) return list_empty(&evlist->core.entries); } -static inline struct evsel *perf_evlist__first(struct evlist *evlist) +static inline struct evsel *evlist__first(struct evlist *evlist) { - return list_entry(evlist->core.entries.next, struct evsel, core.node); + struct perf_evsel *evsel = perf_evlist__first(&evlist->core); + + return container_of(evsel, struct evsel, core); } -static inline struct evsel *perf_evlist__last(struct evlist *evlist) +static inline struct evsel *evlist__last(struct evlist *evlist) { - return list_entry(evlist->core.entries.prev, struct evsel, core.node); + struct perf_evsel *evsel = perf_evlist__last(&evlist->core); + + return container_of(evsel, struct evsel, core); } size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9f9c6c6a2fd3..1bdf4c6ea3e5 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -777,7 +777,7 @@ jit_process(struct perf_session *session, * track sample_type to compute id_all layout * perf sets the same sample type to all events as of now */ - first = perf_evlist__first(session->evlist); + first = evlist__first(session->evlist); jd.sample_type = first->core.attr.sample_type; *nbytes = 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d7aebe9b005d..d69ff746cda5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1936,7 +1936,7 @@ int parse_events(struct evlist *evlist, const char *str, perf_evlist__splice_list_tail(evlist, &parse_state.list); evlist->nr_groups += parse_state.nr_groups; - last = perf_evlist__last(evlist); + last = evlist__last(evlist); last->cmdline_group_boundary = true; return 0; @@ -2050,7 +2050,7 @@ foreach_evsel_in_last_glob(struct evlist *evlist, * So no need to WARN here, let *func do this. */ if (evlist->core.nr_entries > 0) - last = perf_evlist__last(evlist); + last = evlist__last(evlist); do { err = (*func)(last, arg); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 8a015fc0aba0..8579505c29a4 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -30,7 +30,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) if (parse_events(evlist, str, NULL)) goto out_delete; - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); while (1) { fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); @@ -171,7 +171,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, use_sample_identifier = perf_can_sample_identifier(); sample_id = true; } else if (evlist->core.nr_entries > 1) { - struct evsel *first = perf_evlist__first(evlist); + struct evsel *first = evlist__first(evlist); evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.sample_type == first->core.attr.sample_type) @@ -276,7 +276,7 @@ bool perf_evlist__can_select_event(struct evlist *evlist, const char *str) if (err) goto out_delete; - evsel = perf_evlist__last(temp_evlist); + evsel = evlist__last(temp_evlist); if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a2308eb77681..43d1d410854a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2329,7 +2329,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) if (nr > evlist->core.nr_entries) return NULL; - evsel = perf_evlist__first(evlist); + evsel = evlist__first(evlist); while (--nr > 0) evsel = perf_evsel__next(evsel); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index ef96e3dd6902..3dce2de9d005 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -71,7 +71,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) } if (top->evlist->core.nr_entries == 1) { - struct evsel *first = perf_evlist__first(top->evlist); + struct evsel *first = evlist__first(top->evlist); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->core.attr.sample_period, opts->freq ? "Hz" : ""); From ff47d86a0d9bf618b185b49cb4bb9c6f957bb445 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Sep 2019 10:54:48 +0200 Subject: [PATCH 299/345] libperf: Add perf_evlist__read_format() function Add the perf_evlist__read_format() function to libperf as internal function. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-30-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 7 +++++++ tools/perf/lib/include/internal/evlist.h | 2 ++ tools/perf/util/evlist.c | 8 +------- tools/perf/util/evlist.h | 1 - 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 2bdd9d2c79d9..ae861bf38976 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -161,3 +161,10 @@ void perf_evlist__disable(struct perf_evlist *evlist) perf_evlist__for_each_entry(evlist, evsel) perf_evsel__disable(evsel); } + +u64 perf_evlist__read_format(struct perf_evlist *evlist) +{ + struct perf_evsel *first = perf_evlist__first(evlist); + + return first->attr.read_format; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 16ae6d6cfb39..63516fe14f4c 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -66,4 +66,6 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) return list_entry(evlist->entries.prev, struct perf_evsel, node); } +u64 perf_evlist__read_format(struct perf_evlist *evlist); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e8f0357b1532..bc788192493f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -503,7 +503,7 @@ int perf_evlist__id_add_fd(struct evlist *evlist, * This way does not work with group format read, so bail * out in that case. */ - if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) + if (perf_evlist__read_format(&evlist->core) & PERF_FORMAT_GROUP) return -1; if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || @@ -1240,12 +1240,6 @@ bool perf_evlist__valid_read_format(struct evlist *evlist) return true; } -u64 perf_evlist__read_format(struct evlist *evlist) -{ - struct evsel *first = evlist__first(evlist); - return first->core.attr.read_format; -} - u16 perf_evlist__id_hdr_size(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6529ad2a9d97..3f89d9913a30 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -217,7 +217,6 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct evlist *evlist); -u64 perf_evlist__read_format(struct evlist *evlist); u64 __perf_evlist__combined_sample_type(struct evlist *evlist); u64 perf_evlist__combined_sample_type(struct evlist *evlist); u64 perf_evlist__combined_branch_type(struct evlist *evlist); From b0031c22819ab606a0cb648c0f0a7d80db3c3a89 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Sep 2019 11:01:04 +0200 Subject: [PATCH 300/345] libperf: Add perf_evlist__id_add() function Add the perf_evlist__id_add() function to libperf as an internal function. We already have the 'heads' member in 'struct perf_evlist'. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-31-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 26 ++++++++++++++++++++++++ tools/perf/lib/include/internal/evlist.h | 4 ++++ tools/perf/tests/event_update.c | 2 +- tools/perf/util/evlist.c | 22 +------------------- tools/perf/util/evlist.h | 2 -- tools/perf/util/header.c | 4 ++-- 6 files changed, 34 insertions(+), 26 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index ae861bf38976..a29ee8a746d9 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include +#include #include #include +#include #include #include #include @@ -168,3 +171,26 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) return first->attr.read_format; } + +#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) + +static void perf_evlist__id_hash(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id) +{ + int hash; + struct perf_sample_id *sid = SID(evsel, cpu, thread); + + sid->id = id; + sid->evsel = evsel; + hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); + hlist_add_head(&sid->node, &evlist->heads[hash]); +} + +void perf_evlist__id_add(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id) +{ + perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + evsel->id[evsel->ids++] = id; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 63516fe14f4c..649406f717bc 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -68,4 +68,8 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) u64 perf_evlist__read_format(struct perf_evlist *evlist); +void perf_evlist__id_add(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index cd6cae8e5137..c727379cf20e 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -97,7 +97,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("failed to allocate ids", !perf_evsel__alloc_id(&evsel->core, 1, 1)); - perf_evlist__id_add(evlist, evsel, 0, 0, 123); + perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); evsel->unit = strdup("KRAVA"); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bc788192493f..f2863b4c61d7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -461,26 +461,6 @@ int perf_evlist__poll(struct evlist *evlist, int timeout) return fdarray__poll(&evlist->core.pollfd, timeout); } -static void perf_evlist__id_hash(struct evlist *evlist, - struct evsel *evsel, - int cpu, int thread, u64 id) -{ - int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); - - sid->id = id; - sid->evsel = &evsel->core; - hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); - hlist_add_head(&sid->node, &evlist->core.heads[hash]); -} - -void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, - int cpu, int thread, u64 id) -{ - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); - evsel->core.id[evsel->core.ids++] = id; -} - int perf_evlist__id_add_fd(struct evlist *evlist, struct evsel *evsel, int cpu, int thread, int fd) @@ -518,7 +498,7 @@ int perf_evlist__id_add_fd(struct evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(&evlist->core, &evsel->core, cpu, thread, id); return 0; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 3f89d9913a30..eb35b4b1d86f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -141,8 +141,6 @@ struct evsel * perf_evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); -void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, - int cpu, int thread, u64 id); int perf_evlist__id_add_fd(struct evlist *evlist, struct evsel *evsel, int cpu, int thread, int fd); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fc5eac41e102..02602bc8cabf 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3618,7 +3618,7 @@ int perf_session__read_header(struct perf_session *session) if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id))) goto out_errno; - perf_evlist__id_add(session->evlist, evsel, 0, j, f_id); + perf_evlist__id_add(&session->evlist->core, &evsel->core, 0, j, f_id); } lseek(fd, tmp, SEEK_SET); @@ -3754,7 +3754,7 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, return -ENOMEM; for (i = 0; i < n_ids; i++) { - perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); + perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]); } return 0; From d5a99483dece17dbde01968a7ffc03b7f575dc11 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Sep 2019 11:19:56 +0200 Subject: [PATCH 301/345] libperf: Add perf_evlist__id_add_fd() function Add the perf_evlist__id_add_fd() function to libperf as an internal function. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-32-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 44 ++++++++++++++++++++++++ tools/perf/lib/include/internal/evlist.h | 4 +++ tools/perf/util/evlist.c | 43 +---------------------- tools/perf/util/evlist.h | 4 --- tools/perf/util/evsel.c | 2 +- 5 files changed, 50 insertions(+), 47 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index a29ee8a746d9..3a16dd0c044f 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -4,11 +4,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include @@ -194,3 +197,44 @@ void perf_evlist__id_add(struct perf_evlist *evlist, perf_evlist__id_hash(evlist, evsel, cpu, thread, id); evsel->id[evsel->ids++] = id; } + +int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd) +{ + u64 read_data[4] = { 0, }; + int id_idx = 1; /* The first entry is the counter value */ + u64 id; + int ret; + + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); + if (!ret) + goto add; + + if (errno != ENOTTY) + return -1; + + /* Legacy way to get event id.. All hail to old kernels! */ + + /* + * This way does not work with group format read, so bail + * out in that case. + */ + if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) + return -1; + + if (!(evsel->attr.read_format & PERF_FORMAT_ID) || + read(fd, &read_data, sizeof(read_data)) == -1) + return -1; + + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + ++id_idx; + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + ++id_idx; + + id = read_data[id_idx]; + +add: + perf_evlist__id_add(evlist, evsel, cpu, thread, id); + return 0; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 649406f717bc..7d64185cfabd 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -72,4 +72,8 @@ void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id); +int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f2863b4c61d7..a37eccf65eae 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -461,47 +461,6 @@ int perf_evlist__poll(struct evlist *evlist, int timeout) return fdarray__poll(&evlist->core.pollfd, timeout); } -int perf_evlist__id_add_fd(struct evlist *evlist, - struct evsel *evsel, - int cpu, int thread, int fd) -{ - u64 read_data[4] = { 0, }; - int id_idx = 1; /* The first entry is the counter value */ - u64 id; - int ret; - - ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); - if (!ret) - goto add; - - if (errno != ENOTTY) - return -1; - - /* Legacy way to get event id.. All hail to old kernels! */ - - /* - * This way does not work with group format read, so bail - * out in that case. - */ - if (perf_evlist__read_format(&evlist->core) & PERF_FORMAT_GROUP) - return -1; - - if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || - read(fd, &read_data, sizeof(read_data)) == -1) - return -1; - - if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - ++id_idx; - if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - ++id_idx; - - id = read_data[id_idx]; - - add: - perf_evlist__id_add(&evlist->core, &evsel->core, cpu, thread, id); - return 0; -} - static void perf_evlist__set_sid_idx(struct evlist *evlist, struct evsel *evsel, int idx, int cpu, int thread) @@ -776,7 +735,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, } if (evsel->core.attr.read_format & PERF_FORMAT_ID) { - if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, + if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, fd) < 0) return -1; perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index eb35b4b1d86f..80b3361613e5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -141,10 +141,6 @@ struct evsel * perf_evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); -int perf_evlist__id_add_fd(struct evlist *evlist, - struct evsel *evsel, - int cpu, int thread, int fd); - int perf_evlist__add_pollfd(struct evlist *evlist, int fd); int perf_evlist__alloc_pollfd(struct evlist *evlist); int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a4a492f11849..9c284d2adcea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2662,7 +2662,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) thread++) { int fd = FD(evsel, cpu, thread); - if (perf_evlist__id_add_fd(evlist, evsel, + if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, fd) < 0) return -1; } From 20f2be1d48ec293b5a935595bd0c2e2915ffa77c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Aug 2019 15:25:25 +0200 Subject: [PATCH 302/345] libperf: Move 'page_size' global variable to libperf We need the 'page_size' variable in libperf, so move it there. Add a libperf_init() as a global libperf init function to obtain this value via sysconf() at tool start. Committer notes: Add internal/lib.h to tools/perf/ files using 'page_size', sometimes replacing util.h with it if that was the only reason for having util.h included. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-33-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 2 +- tools/perf/arch/s390/util/machine.c | 2 +- tools/perf/arch/x86/tests/intel-cqm.c | 1 + tools/perf/arch/x86/tests/rdpmc.c | 2 +- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 2 +- tools/perf/arch/x86/util/machine.c | 2 +- tools/perf/lib/core.c | 7 +++++++ tools/perf/lib/include/internal/lib.h | 2 ++ tools/perf/lib/include/perf/core.h | 1 + tools/perf/lib/lib.c | 2 ++ tools/perf/lib/libperf.map | 1 + tools/perf/perf.c | 7 ++++--- tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/tests/vmlinux-kallsyms.c | 2 +- tools/perf/util/auxtrace.c | 1 - tools/perf/util/evlist.c | 2 +- tools/perf/util/header.c | 2 +- tools/perf/util/machine.c | 1 + tools/perf/util/mmap.c | 2 +- tools/perf/util/python.c | 2 +- tools/perf/util/session.c | 1 - tools/perf/util/srccode.c | 2 +- tools/perf/util/synthetic-events.c | 2 +- tools/perf/util/trace-event-info.c | 2 +- tools/perf/util/util.c | 3 +-- tools/perf/util/util.h | 2 -- 28 files changed, 36 insertions(+), 25 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 6654bcfc1224..2e4de211d881 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -25,7 +25,7 @@ #include "../../util/evsel.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" -#include "../../util/util.h" // page_size +#include // page_size #include "../../util/session.h" #include diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 745f2d50ee82..eba6541ec0f1 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -16,7 +16,7 @@ #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/session.h" -#include "../../util/util.h" // page_size +#include // page_size #include "../../util/pmu.h" #include "../../util/debug.h" #include "../../util/auxtrace.h" diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index df099d6cc3f5..724efb2d842d 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -2,7 +2,7 @@ #include #include #include -#include "util.h" // page_size +#include // page_size #include "machine.h" #include "api/fs/fs.h" #include "debug.h" diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 0329b9168fae..3ec562a2aaba 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -5,6 +5,7 @@ #include "evlist.h" #include "evsel.h" #include "arch-tests.h" +#include // page_size #include #include diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index e7640fb047de..1ea916656a2d 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -13,7 +13,7 @@ #include "tests/tests.h" #include "cloexec.h" #include "event.h" -#include "util.h" // page_size +#include // page_size #include "arch-tests.h" static u64 rdpmc(unsigned int counter) diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index e2c7095327db..f7f68a50a5cd 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -23,7 +23,7 @@ #include "../../util/tsc.h" #include "../../util/auxtrace.h" #include "../../util/intel-bts.h" -#include "../../util/util.h" // page_size +#include // page_size #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 84a65524c418..d6d26256915f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -27,7 +27,7 @@ #include "../../util/record.h" #include "../../util/target.h" #include "../../util/tsc.h" -#include "../../util/util.h" // page_size +#include // page_size #include "../../util/intel-pt.h" #define KiB(x) ((x) * 1024) diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index f0c289862f9f..e17e080e76f4 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -4,7 +4,7 @@ #include #include -#include "../../util/util.h" // page_size +#include // page_size #include "../../util/machine.h" #include "../../util/map.h" #include "../../util/symbol.h" diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c index 29d5e3348718..6689d593c2d1 100644 --- a/tools/perf/lib/core.c +++ b/tools/perf/lib/core.c @@ -4,7 +4,9 @@ #include #include +#include #include +#include #include "internal.h" static int __base_pr(enum libperf_print_level level, const char *format, @@ -32,3 +34,8 @@ void libperf_print(enum libperf_print_level level, const char *format, ...) __libperf_pr(level, format, args); va_end(args); } + +void libperf_init(void) +{ + page_size = sysconf(_SC_PAGE_SIZE); +} diff --git a/tools/perf/lib/include/internal/lib.h b/tools/perf/lib/include/internal/lib.h index 0b56f1201dc9..9168b7d2a7e1 100644 --- a/tools/perf/lib/include/internal/lib.h +++ b/tools/perf/lib/include/internal/lib.h @@ -4,6 +4,8 @@ #include +extern unsigned int page_size; + ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h index c341a7b2c874..ba2f4e76a3e2 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/perf/lib/include/perf/core.h @@ -18,5 +18,6 @@ typedef int (*libperf_print_fn_t)(enum libperf_print_level level, const char *, va_list ap); LIBPERF_API void libperf_set_print(libperf_print_fn_t fn); +LIBPERF_API void libperf_init(void); #endif /* __LIBPERF_CORE_H */ diff --git a/tools/perf/lib/lib.c b/tools/perf/lib/lib.c index 2a81819c3b8c..18658931fc71 100644 --- a/tools/perf/lib/lib.c +++ b/tools/perf/lib/lib.c @@ -5,6 +5,8 @@ #include #include +unsigned int page_size; + static ssize_t ion(bool is_read, int fd, void *buf, size_t n) { void *buf_start = buf; diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index cd0d17b996c8..724da66776ef 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -1,5 +1,6 @@ LIBPERF_0.0.1 { global: + libperf_init; libperf_set_print; perf_cpu_map__dummy_new; perf_cpu_map__get; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index bb40a108b8f7..c012ceb64ff9 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -12,6 +12,7 @@ #include "util/build-id.h" #include "util/cache.h" #include "util/env.h" +#include // page_size #include #include "util/config.h" #include @@ -20,11 +21,12 @@ #include "util/bpf-loader.h" #include "util/debug.h" #include "util/event.h" -#include "util/util.h" // page_size, usage() +#include "util/util.h" // usage() #include "ui/ui.h" #include "perf-sys.h" #include #include +#include #include #include #include @@ -438,8 +440,7 @@ int main(int argc, const char **argv) exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); pager_init(PERF_PAGER_ENVIRONMENT); - /* The page_size is placed in util object. */ - page_size = sysconf(_SC_PAGE_SIZE); + libperf_init(); cmd = extract_argv0_path(argv[0]); if (!cmd) diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 33b496d194f4..8d9d4cbff76d 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -16,7 +16,7 @@ #include "symbol.h" #include "util/synthetic-events.h" #include "thread.h" -#include "util.h" // page_size +#include // page_size #define THREADS 4 diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 7f28775875c2..aa296ffea6d1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -7,7 +7,7 @@ #include "dso.h" #include "map.h" #include "symbol.h" -#include "util.h" // page_size +#include // page_size #include "tests.h" #include "debug.h" #include "machine.h" diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 4bd92f5bfb5f..8470dfe9fe97 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -50,7 +50,6 @@ #include "intel-bts.h" #include "arm-spe.h" #include "s390-cpumsf.h" -#include "util.h" // page_size #include "util/mmap.h" #include diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a37eccf65eae..6069fb52661f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -17,7 +17,7 @@ #include "evsel.h" #include "debug.h" #include "units.h" -#include "util.h" // page_size +#include // page_size #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 02602bc8cabf..3b24b4974c5f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -42,7 +42,7 @@ #include "tool.h" #include "time-utils.h" #include "units.h" -#include "util.h" // page_size, perf_exe() +#include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 0535338f2d7a..70a9f8716a4b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -32,6 +32,7 @@ #include "linux/hash.h" #include "asm/bug.h" #include "bpf-event.h" +#include // page_size #include #include diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 4cc3b54b2f73..12671d089748 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -20,7 +20,7 @@ #include "event.h" #include "mmap.h" #include "../perf.h" -#include "util.h" /* page_size */ +#include /* page_size */ size_t perf_mmap__mmap_len(struct mmap *map) { diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index fcbafb1e8d9d..6c46d7dbd263 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -14,7 +14,7 @@ #include "thread_map.h" #include "trace-event.h" #include "mmap.h" -#include "util.h" +#include #include "../perf-sys.h" #if PY_MAJOR_VERSION < 3 diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 84a30ff3968a..061bb4d6a3f5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -29,7 +29,6 @@ #include "thread-stack.h" #include "sample-raw.h" #include "stat.h" -#include "util.h" #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index b402f9ca89ab..d84ed8b6caaa 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -15,7 +15,7 @@ #include #include "srccode.h" #include "debug.h" -#include "util.h" // page_size +#include // page_size #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 96ed008c2775..807cbca403a7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -16,7 +16,6 @@ #include "util/synthetic-events.h" #include "util/target.h" #include "util/time-utils.h" -#include "util/util.h" #include #include #include @@ -26,6 +25,7 @@ #include #include #include +#include // page_size #include #include #include diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index fe07e7550930..086e98ff42a3 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2008,2009, Steven Rostedt */ -#include "util.h" // page_size #include #include #include @@ -19,6 +18,7 @@ #include #include #include +#include // page_size #include "trace-event.h" #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 32322a20a68b..cb6fa4c98470 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -3,6 +3,7 @@ #include "debug.h" #include "event.h" #include "namespaces.h" +#include #include #include #include @@ -41,8 +42,6 @@ void perf_set_multithreaded(void) perf_singlethreaded = false; } -unsigned int page_size; - int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 45a5c6f20197..d6ae394e67c4 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -33,8 +33,6 @@ int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); size_t hex_width(u64 v); -extern unsigned int page_size; - int sysctl__max_stack(void); int fetch_kernel_version(unsigned int *puint, From 26049111c333fda4194e895f4cb719766b602256 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Sep 2019 16:22:22 -0300 Subject: [PATCH 303/345] perf tools: No need to include internal/lib.h from util/util.h That was done just to have users of writen() and readn(), that before had their prototypes in util/util.h to get it without having to add an include for internal/lib.h, but the right way is to add it and by now all places already do it. Fix a fallout were readlink() was used but unistd.h was being obtained by luck thru util.h -> internal/lib.h, now to check why unistd.h is being included there... Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-lcnytgrtafey3kwlfog2rzzj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sdt.c | 1 + tools/perf/util/util.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index cf1bd57d3023..60f0e9ee04fb 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d6ae394e67c4..b78b73e5bb32 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,7 +11,6 @@ #include #include #include -#include /* General helper functions */ void usage(const char *err) __noreturn; From 7634d5336a6e66cba5befd8190b3994916424e08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Sep 2019 18:06:52 -0300 Subject: [PATCH 304/345] libperf: Use sys/types.h to get ssize_t, not unistd.h The sys/types.h header looks more sensible, from its name we can gather it should be there because of some needed typedef, and it is much smaller than unistd.h, so use it and fix up the fallout in places where it was being used for something else entirely but being obtained by sheer luck, indirectly. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-49bn251httu22ymwgipeavmy@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/include/internal/lib.h | 2 +- tools/perf/util/mmap.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/lib/include/internal/lib.h b/tools/perf/lib/include/internal/lib.h index 9168b7d2a7e1..5175d491b2d4 100644 --- a/tools/perf/lib/include/internal/lib.h +++ b/tools/perf/lib/include/internal/lib.h @@ -2,7 +2,7 @@ #ifndef __LIBPERF_INTERNAL_LIB_H #define __LIBPERF_INTERNAL_LIB_H -#include +#include extern unsigned int page_size; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 12671d089748..a35dc57d5995 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -12,6 +12,7 @@ #include #include #include +#include // sysconf() #ifdef HAVE_LIBNUMA_SUPPORT #include #endif From fb4bf51fcc153f74c86e08ce1d17e9f1a1a71ba0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 10:35:47 +0200 Subject: [PATCH 305/345] libperf: Add libperf dependency for tests targets Add libperf dependency for tests targets. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-36-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index 54466cc84544..85ccb8c439a4 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -138,7 +138,7 @@ clean: $(LIBAPI)-clean *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC) $(Q)$(MAKE) -C tests clean -tests: +tests: libs $(Q)$(MAKE) -C tests $(Q)$(MAKE) -C tests run From 428dab813a56ce9402f359dc35a3ccdfcaea9429 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 31 Aug 2019 20:44:13 +0200 Subject: [PATCH 306/345] libperf: Merge libperf_set_print() into libperf_init() The libperf_set_print() function needs to be called in any case so let's merge it with libperf_init(), so we have just one init function. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-34-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/core.c | 8 ++------ tools/perf/lib/include/perf/core.h | 3 +-- tools/perf/lib/libperf.map | 1 - tools/perf/perf.c | 8 +++++++- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c index 6689d593c2d1..d0b9ae422b9f 100644 --- a/tools/perf/lib/core.c +++ b/tools/perf/lib/core.c @@ -17,11 +17,6 @@ static int __base_pr(enum libperf_print_level level, const char *format, static libperf_print_fn_t __libperf_pr = __base_pr; -void libperf_set_print(libperf_print_fn_t fn) -{ - __libperf_pr = fn; -} - __printf(2, 3) void libperf_print(enum libperf_print_level level, const char *format, ...) { @@ -35,7 +30,8 @@ void libperf_print(enum libperf_print_level level, const char *format, ...) va_end(args); } -void libperf_init(void) +void libperf_init(libperf_print_fn_t fn) { page_size = sysconf(_SC_PAGE_SIZE); + __libperf_pr = fn; } diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h index ba2f4e76a3e2..cfd70e720c1c 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/perf/lib/include/perf/core.h @@ -17,7 +17,6 @@ enum libperf_print_level { typedef int (*libperf_print_fn_t)(enum libperf_print_level level, const char *, va_list ap); -LIBPERF_API void libperf_set_print(libperf_print_fn_t fn); -LIBPERF_API void libperf_init(void); +LIBPERF_API void libperf_init(libperf_print_fn_t fn); #endif /* __LIBPERF_CORE_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index 724da66776ef..5eb0150ccdc6 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -1,7 +1,6 @@ LIBPERF_0.0.1 { global: libperf_init; - libperf_set_print; perf_cpu_map__dummy_new; perf_cpu_map__get; perf_cpu_map__put; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index c012ceb64ff9..27f94b0bb874 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -430,6 +430,12 @@ void pthread__unblock_sigwinch(void) pthread_sigmask(SIG_UNBLOCK, &set, NULL); } +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return eprintf(level, verbose, fmt, ap); +} + int main(int argc, const char **argv) { int err; @@ -440,7 +446,7 @@ int main(int argc, const char **argv) exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); pager_init(PERF_PAGER_ENVIRONMENT); - libperf_init(); + libperf_init(libperf_print); cmd = extract_argv0_path(argv[0]); if (!cmd) From 379dd98c3d77de920f09f1933592982d200e076a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Sep 2019 10:33:09 +0200 Subject: [PATCH 307/345] libperf: Add libperf_init() call to the tests Add libperf_init() call to the automated tests. Committer notes: Added missing stdarg.h and/or stdio.h to places using vfprintf. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-34-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/tests/test-cpumap.c | 10 ++++++++++ tools/perf/lib/tests/test-evlist.c | 10 ++++++++++ tools/perf/lib/tests/test-evsel.c | 10 ++++++++++ tools/perf/lib/tests/test-threadmap.c | 10 ++++++++++ 4 files changed, 40 insertions(+) diff --git a/tools/perf/lib/tests/test-cpumap.c b/tools/perf/lib/tests/test-cpumap.c index 76a43cfb83a1..aa34c20df07e 100644 --- a/tools/perf/lib/tests/test-cpumap.c +++ b/tools/perf/lib/tests/test-cpumap.c @@ -1,13 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + int main(int argc, char **argv) { struct perf_cpu_map *cpus; __T_START; + libperf_init(libperf_print); + cpus = perf_cpu_map__dummy_new(); if (!cpus) return -1; diff --git a/tools/perf/lib/tests/test-evlist.c b/tools/perf/lib/tests/test-evlist.c index 4e1407f20ffd..e6b2ab2e2bde 100644 --- a/tools/perf/lib/tests/test-evlist.c +++ b/tools/perf/lib/tests/test-evlist.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include @@ -6,6 +8,12 @@ #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + static int test_stat_cpu(void) { struct perf_cpu_map *cpus; @@ -177,6 +185,8 @@ int main(int argc, char **argv) { __T_START; + libperf_init(libperf_print); + test_stat_cpu(); test_stat_thread(); test_stat_thread_enable(); diff --git a/tools/perf/lib/tests/test-evsel.c b/tools/perf/lib/tests/test-evsel.c index 2c648fe5617e..1b6c4285ac2b 100644 --- a/tools/perf/lib/tests/test-evsel.c +++ b/tools/perf/lib/tests/test-evsel.c @@ -1,10 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include #include #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + static int test_stat_cpu(void) { struct perf_cpu_map *cpus; @@ -116,6 +124,8 @@ int main(int argc, char **argv) { __T_START; + libperf_init(libperf_print); + test_stat_cpu(); test_stat_thread(); test_stat_thread_enable(); diff --git a/tools/perf/lib/tests/test-threadmap.c b/tools/perf/lib/tests/test-threadmap.c index 10a4f4cbbdd5..8c5f47247d9e 100644 --- a/tools/perf/lib/tests/test-threadmap.c +++ b/tools/perf/lib/tests/test-threadmap.c @@ -1,13 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +#include +#include #include #include +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + int main(int argc, char **argv) { struct perf_thread_map *threads; __T_START; + libperf_init(libperf_print); + threads = perf_thread_map__new_dummy(); if (!threads) return -1; From 31f67fc462a9e5df3b900924c2bf649c7bc63af8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Aug 2019 13:21:53 +0200 Subject: [PATCH 308/345] libperf: Add perf_evlist__alloc_pollfd() function Move perf_evlist__alloc_pollfd() from tools/perf to libperf, it will be used in the following patches. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-37-jolsa@kernel.org [ Added api/fd/array.h include to the lib/evlist.c file ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/evlist.c | 22 ++++++++++++++++++++++ tools/perf/lib/include/internal/evlist.h | 2 ++ tools/perf/util/evlist.c | 23 +---------------------- tools/perf/util/evlist.h | 1 - 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 3a16dd0c044f..3c3b97f40be0 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -14,6 +14,7 @@ #include #include #include +#include void perf_evlist__init(struct perf_evlist *evlist) { @@ -238,3 +239,24 @@ add: perf_evlist__id_add(evlist, evsel, cpu, thread, id); return 0; } + +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) +{ + int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_threads = perf_thread_map__nr(evlist->threads); + int nfds = 0; + struct perf_evsel *evsel; + + perf_evlist__for_each_entry(evlist, evsel) { + if (evsel->system_wide) + nfds += nr_cpus; + else + nfds += nr_cpus * nr_threads; + } + + if (fdarray__available_entries(&evlist->pollfd) < nfds && + fdarray__grow(&evlist->pollfd, nfds) < 0) + return -ENOMEM; + + return 0; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 7d64185cfabd..88c0dfaf0ddc 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -24,6 +24,8 @@ struct perf_evlist { struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; }; +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); + /** * __perf_evlist__for_each_entry - iterate thru all the evsels * @list: list_head instance to iterate diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6069fb52661f..c47f23e7f3c8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -398,27 +398,6 @@ int perf_evlist__enable_event_idx(struct evlist *evlist, return perf_evlist__enable_event_thread(evlist, evsel, idx); } -int perf_evlist__alloc_pollfd(struct evlist *evlist) -{ - int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); - int nr_threads = perf_thread_map__nr(evlist->core.threads); - int nfds = 0; - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->core.system_wide) - nfds += nr_cpus; - else - nfds += nr_cpus * nr_threads; - } - - if (fdarray__available_entries(&evlist->core.pollfd) < nfds && - fdarray__grow(&evlist->core.pollfd, nfds) < 0) - return -ENOMEM; - - return 0; -} - static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, struct mmap *map, short revent) { @@ -944,7 +923,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, if (!evlist->mmap) return -ENOMEM; - if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0) return -ENOMEM; evlist->core.mmap_len = evlist__mmap_size(pages); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 80b3361613e5..bebbaa9b6325 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -142,7 +142,6 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int perf_evlist__add_pollfd(struct evlist *evlist, int fd); -int perf_evlist__alloc_pollfd(struct evlist *evlist); int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); int perf_evlist__poll(struct evlist *evlist, int timeout); From f4009e7bf7ba3375cb00e33ca901d61a5acd6c2b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Aug 2019 16:00:45 +0200 Subject: [PATCH 309/345] libperf: Add perf_evlist__add_pollfd() function Move perf_evlist__add_pollfd() from tools/perf to libperf, it will be used in the following patches. Also rename perf's perf_evlist__add_pollfd()/perf_evlist__filter_pollfd() to evlist__add_pollfd()/evlist__filter_pollfd(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-38-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 4 ++-- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/evlist.c | 16 +++++++++++++++ tools/perf/lib/include/internal/evlist.h | 2 ++ tools/perf/util/evlist.c | 25 ++++-------------------- tools/perf/util/evlist.h | 4 ++-- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 710a2898ed6c..2227e2f42c09 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -967,10 +967,10 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) goto out; } - if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0) + if (evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0) goto out; - nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin)); + nr_stdin = evlist__add_pollfd(kvm->evlist, fileno(stdin)); if (nr_stdin < 0) goto out; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 48600c90cc7e..d6daaad348b5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1624,7 +1624,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = 0; waking++; - if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) + if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) draining = true; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 97667287f573..8d9784b5d028 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3475,7 +3475,7 @@ again: int timeout = done ? 100 : -1; if (!draining && perf_evlist__poll(evlist, timeout) > 0) { - if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) + if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) draining = true; goto again; diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index 3c3b97f40be0..e4d8b3b7b8fc 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -260,3 +263,16 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) return 0; } + +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + void *ptr, short revent) +{ + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); + + if (pos >= 0) { + evlist->pollfd.priv[pos].ptr = ptr; + fcntl(fd, F_SETFL, O_NONBLOCK); + } + + return pos; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 88c0dfaf0ddc..9f440ab12b76 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -25,6 +25,8 @@ struct perf_evlist { }; int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); +int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, + void *ptr, short revent); /** * __perf_evlist__for_each_entry - iterate thru all the evsels diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c47f23e7f3c8..051be9a31db9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -398,26 +398,9 @@ int perf_evlist__enable_event_idx(struct evlist *evlist, return perf_evlist__enable_event_thread(evlist, evsel, idx); } -static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, - struct mmap *map, short revent) +int evlist__add_pollfd(struct evlist *evlist, int fd) { - int pos = fdarray__add(&evlist->core.pollfd, fd, revent | POLLERR | POLLHUP); - /* - * Save the idx so that when we filter out fds POLLHUP'ed we can - * close the associated evlist->mmap[] entry. - */ - if (pos >= 0) { - evlist->core.pollfd.priv[pos].ptr = map; - - fcntl(fd, F_SETFL, O_NONBLOCK); - } - - return pos; -} - -int perf_evlist__add_pollfd(struct evlist *evlist, int fd) -{ - return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); + return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); } static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, @@ -429,7 +412,7 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, perf_mmap__put(map); } -int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) +int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) { return fdarray__filter(&evlist->core.pollfd, revents_and_mask, perf_evlist__munmap_filtered, NULL); @@ -708,7 +691,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, * Therefore don't add it for polling. */ if (!evsel->core.system_wide && - __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { + perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { perf_mmap__put(&maps[idx]); return -1; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bebbaa9b6325..2eac8aab24a3 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -141,8 +141,8 @@ struct evsel * perf_evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); -int perf_evlist__add_pollfd(struct evlist *evlist, int fd); -int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); +int evlist__add_pollfd(struct evlist *evlist, int fd); +int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); int perf_evlist__poll(struct evlist *evlist, int timeout); From 80ab2987a016f774201d4f3509118047f9d58175 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 31 Aug 2019 22:48:33 +0200 Subject: [PATCH 310/345] libperf: Add perf_evlist__poll() function Move perf_evlist__poll() from tools/perf to libperf, it will be used in the following patches. And rename the existing perf's function to evlist__poll(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190913132355.21634-39-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-top.c | 4 ++-- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/evlist.c | 5 +++++ tools/perf/lib/include/perf/evlist.h | 1 + tools/perf/lib/libperf.map | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 6 +++--- tools/perf/util/evlist.h | 2 +- tools/perf/util/python.c | 2 +- 12 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d6daaad348b5..23332861de6e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1615,7 +1615,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (hits == rec->samples) { if (done || draining) break; - err = perf_evlist__poll(rec->evlist, -1); + err = evlist__poll(rec->evlist, -1); /* * Propagate error, only if there's any. Ignore positive * number of returned events and interrupt error. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 73bf79053ae3..30d8eb614377 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1307,7 +1307,7 @@ static int __cmd_top(struct perf_top *top) } /* Wait for a minimal set of events before starting the snapshot */ - perf_evlist__poll(top->evlist, 100); + evlist__poll(top->evlist, 100); perf_top__mmap_read(top); @@ -1317,7 +1317,7 @@ static int __cmd_top(struct perf_top *top) perf_top__mmap_read(top); if (opts->overwrite || (hits == top->samples)) - ret = perf_evlist__poll(top->evlist, 100); + ret = evlist__poll(top->evlist, 100); if (resize) { perf_top__resize(top); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8d9784b5d028..c52c3120a811 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3474,7 +3474,7 @@ again: if (trace->nr_events == before) { int timeout = done ? 100 : -1; - if (!draining && perf_evlist__poll(evlist, timeout) > 0) { + if (!draining && evlist__poll(evlist, timeout) > 0) { if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) draining = true; diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index e4d8b3b7b8fc..d1496fee810c 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -276,3 +276,8 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, return pos; } + +int perf_evlist__poll(struct perf_evlist *evlist, int timeout) +{ + return fdarray__poll(&evlist->pollfd, timeout); +} diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h index 38365f8f3fba..8a2ce0757ab2 100644 --- a/tools/perf/lib/include/perf/evlist.h +++ b/tools/perf/lib/include/perf/evlist.h @@ -31,5 +31,6 @@ LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist); LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); +LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index 5eb0150ccdc6..ab8dbde1136c 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -39,6 +39,7 @@ LIBPERF_0.0.1 { perf_evlist__remove; perf_evlist__next; perf_evlist__set_maps; + perf_evlist__poll; local: *; }; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 4629fa33c8ad..2b5c46813053 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -127,7 +127,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest } if (nr_events == before) - perf_evlist__poll(evlist, 10); + evlist__poll(evlist, 10); if (++nr_polls > 5) { pr_debug("%s: no events!\n", __func__); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 401e8d11427b..437426be29e9 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -287,7 +287,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does. */ if (total_events == before && false) - perf_evlist__poll(evlist, -1); + evlist__poll(evlist, -1); sleep(1); if (++wakeups > 5) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 24565f83e07d..bce3a4cb4c89 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -130,7 +130,7 @@ retry: out_init: if (!exited || !nr_exit) { - perf_evlist__poll(evlist, -1); + evlist__poll(evlist, -1); goto retry; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 051be9a31db9..d4c7fd125ce9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -418,9 +418,9 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) perf_evlist__munmap_filtered, NULL); } -int perf_evlist__poll(struct evlist *evlist, int timeout) +int evlist__poll(struct evlist *evlist, int timeout) { - return fdarray__poll(&evlist->core.pollfd, timeout); + return perf_evlist__poll(&evlist->core, timeout); } static void perf_evlist__set_sid_idx(struct evlist *evlist, @@ -1736,7 +1736,7 @@ static void *perf_evlist__poll_thread(void *arg) draining = true; if (!draining) - perf_evlist__poll(evlist, 1000); + evlist__poll(evlist, 1000); for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *map = &evlist->mmap[i]; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 2eac8aab24a3..130d44d691b8 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -144,7 +144,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, int evlist__add_pollfd(struct evlist *evlist, int fd); int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); -int perf_evlist__poll(struct evlist *evlist, int timeout); +int evlist__poll(struct evlist *evlist, int timeout); struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id); struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 6c46d7dbd263..53f31053a27a 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -918,7 +918,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) return NULL; - n = perf_evlist__poll(evlist, timeout); + n = evlist__poll(evlist, timeout); if (n < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; From 32ff3fec07b6d8e6c5cc2342f6cbbdcb224d484c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2019 15:14:12 -0300 Subject: [PATCH 311/345] perf copyfile: Move copyfile routines to separate files Further reducing the util.c hodgepodge files. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-0i62zh7ok25znibyebgq0qs4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/build-id.c | 3 +- tools/perf/util/copyfile.c | 144 +++++++++++++++++++++++++++++++++++ tools/perf/util/copyfile.h | 16 ++++ tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/util.c | 135 -------------------------------- tools/perf/util/util.h | 5 -- 7 files changed, 164 insertions(+), 142 deletions(-) create mode 100644 tools/perf/util/copyfile.c create mode 100644 tools/perf/util/copyfile.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index fd89d6a8cd65..4d1894e38a81 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -3,6 +3,7 @@ perf-y += block-range.o perf-y += build-id.o perf-y += cacheline.o perf-y += config.o +perf-y += copyfile.o perf-y += ctype.o perf-y += db-export.o perf-y += env.o diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7928c398a063..c076fc7fe025 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -7,12 +7,13 @@ * Copyright (C) 2009, 2010 Red Hat Inc. * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo */ -#include "util.h" // copyfile_ns(), lsdir(), mkdir_p(), rm_rf() +#include "util.h" // lsdir(), mkdir_p(), rm_rf() #include #include #include #include #include +#include "util/copyfile.h" #include "dso.h" #include "build-id.h" #include "event.h" diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c new file mode 100644 index 000000000000..3fa0db136667 --- /dev/null +++ b/tools/perf/util/copyfile.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/copyfile.h" +#include "util/namespaces.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi) +{ + int err = -1; + char *line = NULL; + size_t n; + FILE *from_fp, *to_fp; + struct nscookie nsc; + + nsinfo__mountns_enter(nsi, &nsc); + from_fp = fopen(from, "r"); + nsinfo__mountns_exit(&nsc); + if (from_fp == NULL) + goto out; + + to_fp = fopen(to, "w"); + if (to_fp == NULL) + goto out_fclose_from; + + while (getline(&line, &n, from_fp) > 0) + if (fputs(line, to_fp) == EOF) + goto out_fclose_to; + err = 0; +out_fclose_to: + fclose(to_fp); + free(line); +out_fclose_from: + fclose(from_fp); +out: + return err; +} + +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +{ + void *ptr; + loff_t pgoff; + + pgoff = off_in & ~(page_size - 1); + off_in -= pgoff; + + ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff); + if (ptr == MAP_FAILED) + return -1; + + while (size) { + ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out); + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + break; + + size -= ret; + off_in += ret; + off_out += ret; + } + munmap(ptr, off_in + size); + + return size ? -1 : 0; +} + +static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, + struct nsinfo *nsi) +{ + int fromfd, tofd; + struct stat st; + int err; + char *tmp = NULL, *ptr = NULL; + struct nscookie nsc; + + nsinfo__mountns_enter(nsi, &nsc); + err = stat(from, &st); + nsinfo__mountns_exit(&nsc); + if (err) + goto out; + err = -1; + + /* extra 'x' at the end is to reserve space for '.' */ + if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) { + tmp = NULL; + goto out; + } + ptr = strrchr(tmp, '/'); + if (!ptr) + goto out; + ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1); + *ptr = '.'; + + tofd = mkstemp(tmp); + if (tofd < 0) + goto out; + + if (fchmod(tofd, mode)) + goto out_close_to; + + if (st.st_size == 0) { /* /proc? do it slowly... */ + err = slow_copyfile(from, tmp, nsi); + goto out_close_to; + } + + nsinfo__mountns_enter(nsi, &nsc); + fromfd = open(from, O_RDONLY); + nsinfo__mountns_exit(&nsc); + if (fromfd < 0) + goto out_close_to; + + err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size); + + close(fromfd); +out_close_to: + close(tofd); + if (!err) + err = link(tmp, to); + unlink(tmp); +out: + free(tmp); + return err; +} + +int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi) +{ + return copyfile_mode_ns(from, to, 0755, nsi); +} + +int copyfile_mode(const char *from, const char *to, mode_t mode) +{ + return copyfile_mode_ns(from, to, mode, NULL); +} + +int copyfile(const char *from, const char *to) +{ + return copyfile_mode(from, to, 0755); +} diff --git a/tools/perf/util/copyfile.h b/tools/perf/util/copyfile.h new file mode 100644 index 000000000000..e85d2f22f3cc --- /dev/null +++ b/tools/perf/util/copyfile.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef PERF_COPYFILE_H_ +#define PERF_COPYFILE_H_ + +#include +#include +#include + +struct nsinfo; + +int copyfile(const char *from, const char *to); +int copyfile_mode(const char *from, const char *to, mode_t mode); +int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); + +#endif // PERF_COPYFILE_H_ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6fbfdf8bf61f..66f4be1df573 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -17,7 +17,7 @@ #include "machine.h" #include "vdso.h" #include "debug.h" -#include "util.h" +#include "util/copyfile.h" #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index cb6fa4c98470..5eda6e19c947 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -2,10 +2,7 @@ #include "util.h" #include "debug.h" #include "event.h" -#include "namespaces.h" -#include #include -#include #include #include #include @@ -233,138 +230,6 @@ out: return list; } -static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi) -{ - int err = -1; - char *line = NULL; - size_t n; - FILE *from_fp, *to_fp; - struct nscookie nsc; - - nsinfo__mountns_enter(nsi, &nsc); - from_fp = fopen(from, "r"); - nsinfo__mountns_exit(&nsc); - if (from_fp == NULL) - goto out; - - to_fp = fopen(to, "w"); - if (to_fp == NULL) - goto out_fclose_from; - - while (getline(&line, &n, from_fp) > 0) - if (fputs(line, to_fp) == EOF) - goto out_fclose_to; - err = 0; -out_fclose_to: - fclose(to_fp); - free(line); -out_fclose_from: - fclose(from_fp); -out: - return err; -} - -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) -{ - void *ptr; - loff_t pgoff; - - pgoff = off_in & ~(page_size - 1); - off_in -= pgoff; - - ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff); - if (ptr == MAP_FAILED) - return -1; - - while (size) { - ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out); - if (ret < 0 && errno == EINTR) - continue; - if (ret <= 0) - break; - - size -= ret; - off_in += ret; - off_out += ret; - } - munmap(ptr, off_in + size); - - return size ? -1 : 0; -} - -static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, - struct nsinfo *nsi) -{ - int fromfd, tofd; - struct stat st; - int err; - char *tmp = NULL, *ptr = NULL; - struct nscookie nsc; - - nsinfo__mountns_enter(nsi, &nsc); - err = stat(from, &st); - nsinfo__mountns_exit(&nsc); - if (err) - goto out; - err = -1; - - /* extra 'x' at the end is to reserve space for '.' */ - if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) { - tmp = NULL; - goto out; - } - ptr = strrchr(tmp, '/'); - if (!ptr) - goto out; - ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1); - *ptr = '.'; - - tofd = mkstemp(tmp); - if (tofd < 0) - goto out; - - if (fchmod(tofd, mode)) - goto out_close_to; - - if (st.st_size == 0) { /* /proc? do it slowly... */ - err = slow_copyfile(from, tmp, nsi); - goto out_close_to; - } - - nsinfo__mountns_enter(nsi, &nsc); - fromfd = open(from, O_RDONLY); - nsinfo__mountns_exit(&nsc); - if (fromfd < 0) - goto out_close_to; - - err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size); - - close(fromfd); -out_close_to: - close(tofd); - if (!err) - err = link(tmp, to); - unlink(tmp); -out: - free(tmp); - return err; -} - -int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi) -{ - return copyfile_mode_ns(from, to, 0755, nsi); -} - -int copyfile_mode(const char *from, const char *to, mode_t mode) -{ - return copyfile_mode_ns(from, to, mode, NULL); -} - -int copyfile(const char *from, const char *to) -{ - return copyfile_mode(from, to, 0755); -} - size_t hex_width(u64 v) { size_t n = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b78b73e5bb32..9969b8b46f7c 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -17,7 +17,6 @@ void usage(const char *err) __noreturn; void die(const char *err, ...) __noreturn __printf(1, 2); struct dirent; -struct nsinfo; struct strlist; int mkdir_p(char *path, mode_t mode); @@ -25,10 +24,6 @@ int rm_rf(const char *path); int rm_rf_perf_data(const char *path); struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); bool lsdir_no_dot_filter(const char *name, struct dirent *d); -int copyfile(const char *from, const char *to); -int copyfile_mode(const char *from, const char *to, mode_t mode); -int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); size_t hex_width(u64 v); From 90a48843a18663a25aa70c6d717d38a4443e5e29 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 Sep 2019 15:12:42 +0200 Subject: [PATCH 312/345] KVM: selftests: fix ucall on x86 After commit e8bb4755eea2("KVM: selftests: Split ucall.c into architecture specific files") selftests which use ucall on x86 started segfaulting and apparently it's gcc to blame: it "optimizes" ucall() function throwing away va_start/va_end part because it thinks the structure is not being used. Previously, it couldn't do that because the there was also MMIO version and the decision which particular implementation to use was done at runtime. With older gccs it's possible to solve the problem by adding 'volatile' to 'struct ucall' but at least with gcc-8.3 this trick doesn't work. 'memory' clobber seems to do the job. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/ucall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 4bfc9a90b1de..da4d89ad5419 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -32,7 +32,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_end(va); asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax"); + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) From 4b526de50e39b38cd828396267379183c7c21354 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:06 -0700 Subject: [PATCH 313/345] KVM: x86: Check kvm_rebooting in kvm_spurious_fault() Explicitly check kvm_rebooting in kvm_spurious_fault() prior to invoking BUG(), as opposed to assuming the caller has already done so. Letting kvm_spurious_fault() be called "directly" will allow VMX to better optimize its low level assembly flows. As a happy side effect, kvm_spurious_fault() no longer needs to be marked as a dead end since it doesn't unconditionally BUG(). Acked-by: Paolo Bonzini Cc: Josh Poimboeuf Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 3 ++- tools/objtool/check.c | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c5ed92482e2c..de0c9d8097c2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1534,7 +1534,7 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) -asmlinkage void __noreturn kvm_spurious_fault(void); +asmlinkage void kvm_spurious_fault(void); /* * Hardware virtualization extension instructions may fault if a diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9a3d8efe1c2..0ed07d8d2caa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -360,7 +360,8 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base); asmlinkage __visible void kvm_spurious_fault(void) { /* Fault while not rebooting. We want the trace. */ - BUG(); + if (!kvm_rebooting) + BUG(); } EXPORT_SYMBOL_GPL(kvm_spurious_fault); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 176f2f084060..044c9a3cb247 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,7 +138,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "do_task_dead", "__module_put_and_exit", "complete_and_exit", - "kvm_spurious_fault", "__reiserfs_panic", "lbug_with_loc", "fortify_panic", From 52a9fcbc73a30375cad8d9e7b519e1ad97b0880a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:07 -0700 Subject: [PATCH 314/345] KVM: VMX: Optimize VMX instruction error and fault handling Rework the VMX instruction helpers using asm-goto to branch directly to error/fault "handlers" in lieu of using __ex(), i.e. the generic ____kvm_handle_fault_on_reboot(). Branching directly to fault handling code during fixup avoids the extra JMP that is inserted after every VMX instruction when using the generic "fault on reboot" (see commit 3901336ed9887, "x86/kvm: Don't call kvm_spurious_fault() from .fixup"). Opportunistically clean up the helpers so that they all have consistent error handling and messages. Leave the usage of ____kvm_handle_fault_on_reboot() (via __ex()) in kvm_cpu_vmxoff() and nested_vmx_check_vmentry_hw() as is. The VMXOFF case is not a fast path, i.e. the cleanliness of __ex() is worth the JMP, and the extra JMP in nested_vmx_check_vmentry_hw() is unavoidable. Note, VMREAD cannot get the asm-goto treatment as output operands aren't compatible with GCC's asm-goto due to internal compiler restrictions. Acked-by: Paolo Bonzini Cc: Josh Poimboeuf Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/ops.h | 72 +++++++++++++++++++++++------------------- arch/x86/kvm/vmx/vmx.c | 34 ++++++++++++++++++++ 2 files changed, 74 insertions(+), 32 deletions(-) diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 2200fb698dd0..79e25d49d4d9 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -14,6 +14,12 @@ #define __ex_clear(x, reg) \ ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) +void vmwrite_error(unsigned long field, unsigned long value); +void vmclear_error(struct vmcs *vmcs, u64 phys_addr); +void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); +void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); +void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); + static __always_inline void vmcs_check16(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, @@ -103,21 +109,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) return __vmcs_readl(field); } -static noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); - dump_stack(); -} +#define vmx_asm1(insn, op1, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) + +#define vmx_asm2(insn, op1, op2, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1, op2 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { - bool error; - - asm volatile (__ex("vmwrite %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(field), "rm"(value)); - if (unlikely(error)) - vmwrite_error(field, value); + vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value); } static __always_inline void vmcs_write16(unsigned long field, u16 value) @@ -182,28 +206,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) static inline void vmcs_clear(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; - asm volatile (__ex("vmclear %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", - vmcs, phys_addr); + vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr); } static inline void vmcs_load(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; if (static_branch_unlikely(&enable_evmcs)) return evmcs_load(phys_addr); - asm volatile (__ex("vmptrld %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", - vmcs, phys_addr); + vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr); } static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) @@ -213,11 +227,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) u64 rsvd : 48; u64 gva; } operand = { vpid, 0, gva }; - bool error; - asm volatile (__ex("invvpid %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva); } static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) @@ -225,11 +236,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) struct { u64 eptp, gpa; } operand = {eptp, gpa}; - bool error; - asm volatile (__ex("invept %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a7c9922e3905..52c7614b5ecd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -343,6 +343,40 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit void vmx_vmexit(void); +#define vmx_insn_failed(fmt...) \ +do { \ + WARN_ONCE(1, fmt); \ + pr_warn_ratelimited(fmt); \ +} while (0) + +noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); +} + +noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) +{ + vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", + ext, vpid, gva); +} + +noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) +{ + vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", + ext, eptp, gpa); +} + static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* From 6e2020977e3e692a8bb31258c1f8251521f4fdb7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:08 -0700 Subject: [PATCH 315/345] KVM: VMX: Add error handling to VMREAD helper Now that VMREAD flows require a taken branch, courtesy of commit 3901336ed9887 ("x86/kvm: Don't call kvm_spurious_fault() from .fixup") bite the bullet and add full error handling to VMREAD, i.e. replace the JMP added by __ex()/____kvm_handle_fault_on_reboot() with a hinted Jcc. To minimize the code footprint, add a helper function, vmread_error(), to handle both faults and failures so that the inline flow has a single CALL. Acked-by: Paolo Bonzini Cc: Josh Poimboeuf Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/ops.h | 21 +++++++++++++++++---- arch/x86/kvm/vmx/vmx.c | 8 ++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 79e25d49d4d9..45eaedee2ac0 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -11,9 +11,8 @@ #include "vmcs.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) -#define __ex_clear(x, reg) \ - ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) +asmlinkage void vmread_error(unsigned long field, bool fault); void vmwrite_error(unsigned long field, unsigned long value); void vmclear_error(struct vmcs *vmcs, u64 phys_addr); void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); @@ -68,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (__ex_clear("vmread %1, %0", "%k0") - : "=r"(value) : "r"(field)); + asm volatile("1: vmread %2, %1\n\t" + ".byte 0x3e\n\t" /* branch taken hint */ + "ja 3f\n\t" + "mov %2, %%" _ASM_ARG1 "\n\t" + "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t" + "2: call vmread_error\n\t" + "xor %k1, %k1\n\t" + "3:\n\t" + + ".pushsection .fixup, \"ax\"\n\t" + "4: mov %2, %%" _ASM_ARG1 "\n\t" + "mov $1, %%" _ASM_ARG2 "\n\t" + "jmp 2b\n\t" + ".popsection\n\t" + _ASM_EXTABLE(1b, 4b) + : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc"); return value; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 52c7614b5ecd..1852706e6acc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -349,6 +349,14 @@ do { \ pr_warn_ratelimited(fmt); \ } while (0) +asmlinkage void vmread_error(unsigned long field, bool fault) +{ + if (fault) + kvm_spurious_fault(); + else + vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); +} + noinline void vmwrite_error(unsigned long field, unsigned long value) { vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", From 98cd382d509061a9c6ae4cac38ff0721be05adef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:09 -0700 Subject: [PATCH 316/345] KVM: x86: Drop ____kvm_handle_fault_on_reboot() Remove the variation of __kvm_handle_fault_on_reboot() that accepts a post-fault cleanup instruction now that its sole user (VMREAD) uses a different method for handling faults. Acked-by: Paolo Bonzini Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index de0c9d8097c2..202f4d4a892a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1542,7 +1542,7 @@ asmlinkage void kvm_spurious_fault(void); * Usually after catching the fault we just panic; during reboot * instead the instruction is ignored. */ -#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ +#define __kvm_handle_fault_on_reboot(insn) \ "666: \n\t" \ insn "\n\t" \ "jmp 668f \n\t" \ @@ -1551,16 +1551,12 @@ asmlinkage void kvm_spurious_fault(void); "668: \n\t" \ ".pushsection .fixup, \"ax\" \n\t" \ "700: \n\t" \ - cleanup_insn "\n\t" \ "cmpb $0, kvm_rebooting\n\t" \ "je 667b \n\t" \ "jmp 668b \n\t" \ ".popsection \n\t" \ _ASM_EXTABLE(666b, 700b) -#define __kvm_handle_fault_on_reboot(insn) \ - ____kvm_handle_fault_on_reboot(insn, "") - #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); From f209a26dd5a5038c53097b5c38e313b8cd042adb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Jul 2019 13:41:10 -0700 Subject: [PATCH 317/345] KVM: x86: Don't check kvm_rebooting in __kvm_handle_fault_on_reboot() Remove the kvm_rebooting check from VMX/SVM instruction exception fixup now that kvm_spurious_fault() conditions its BUG() on !kvm_rebooting. Because the 'cleanup_insn' functionally is also gone, deferring to kvm_spurious_fault() means __kvm_handle_fault_on_reboot() can eliminate its .fixup code entirely and have its exception table entry branch directly to the call to kvm_spurious_fault(). Cc: Paolo Bonzini Cc: Josh Poimboeuf Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 202f4d4a892a..23edf56cf577 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1549,13 +1549,7 @@ asmlinkage void kvm_spurious_fault(void); "667: \n\t" \ "call kvm_spurious_fault \n\t" \ "668: \n\t" \ - ".pushsection .fixup, \"ax\" \n\t" \ - "700: \n\t" \ - "cmpb $0, kvm_rebooting\n\t" \ - "je 667b \n\t" \ - "jmp 668b \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(666b, 700b) + _ASM_EXTABLE(666b, 667b) #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); From cab01850277a827c57270bba48be1d8fe312643d Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 25 Sep 2019 15:30:35 +0200 Subject: [PATCH 318/345] KVM: vmx: fix build warnings in hv_enable_direct_tlbflush() on i386 The following was reported on i386: arch/x86/kvm/vmx/vmx.c: In function 'hv_enable_direct_tlbflush': arch/x86/kvm/vmx/vmx.c:503:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] pr_debugs() in this function are more or less useless, let's just remove them. evmcs->hv_vm_id can use 'unsigned long' instead of 'u64'. Also, simplify the code a little bit. Reported-by: kbuild test robot Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1852706e6acc..d4575ffb3cec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -537,23 +537,19 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) * Synthetic VM-Exit is not enabled in current code and so All * evmcs in singe VM shares same assist page. */ - if (!*p_hv_pa_pg) { + if (!*p_hv_pa_pg) *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!*p_hv_pa_pg) - return -ENOMEM; - pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n", - (u64)&vcpu->kvm); - } + + if (!*p_hv_pa_pg) + return -ENOMEM; evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; evmcs->partition_assist_page = __pa(*p_hv_pa_pg); - evmcs->hv_vm_id = (u64)vcpu->kvm; + evmcs->hv_vm_id = (unsigned long)vcpu->kvm; evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; - pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n", - (u64)vcpu->kvm); return 0; } From fd3edd4a9066f28de99a16685a586d68a9f551f8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Sep 2019 18:33:53 +0200 Subject: [PATCH 319/345] KVM: nVMX: cleanup and fix host 64-bit mode checks KVM was incorrectly checking vmcs12->host_ia32_efer even if the "load IA32_EFER" exit control was reset. Also, some checks were not using the new CC macro for tracing. Cleanup everything so that the vCPU's 64-bit mode is determined directly from EFER_LMA and the VMCS checks are based on that, which matches section 26.2.4 of the SDM. Cc: Sean Christopherson Cc: Krish Sadhukhan Fixes: 5845038c111db27902bc220a4f70070fe945871c Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 50 +++++++++++++++------------------------ 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 70d59d9304f2..41abc62c9a8a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2664,8 +2664,23 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) return -EINVAL; - ia32e = (vmcs12->vm_exit_controls & - VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; +#ifdef CONFIG_X86_64 + ia32e = !!(vcpu->arch.efer & EFER_LMA); +#else + ia32e = false; +#endif + + if (ia32e) { + if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || + CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) + return -EINVAL; + } else { + if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || + CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || + CC((vmcs12->host_rip) >> 32)) + return -EINVAL; + } if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || @@ -2684,35 +2699,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu))) - return -EINVAL; - - if (!(vmcs12->host_ia32_efer & EFER_LMA) && - ((vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || - (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE))) { - return -EINVAL; - } - - if ((vmcs12->host_ia32_efer & EFER_LMA) && - !(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) { - return -EINVAL; - } - - if (!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) && - ((vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || - (vmcs12->host_cr4 & X86_CR4_PCIDE) || - (((vmcs12->host_rip) >> 32) & 0xffffffff))) { - return -EINVAL; - } - - if ((vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) && - ((!(vmcs12->host_cr4 & X86_CR4_PAE)) || - (is_noncanonical_address(vmcs12->host_rip, vcpu)))) { - return -EINVAL; - } -#else - if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE || - vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) + CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) return -EINVAL; #endif From 9620bc361ac6e292ad2d6997b2f59f41f4e17862 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 25 Sep 2019 15:06:59 -0300 Subject: [PATCH 320/345] perf evsel: Remove need for symbol_conf in evsel_fprintf.c So that we an later link it to the python binding without having to drag the symbol object files. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-8823tveyasocnuoelq4qopwf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-sched.c | 2 +- tools/perf/builtin-script.c | 6 ++++-- tools/perf/builtin-trace.c | 2 +- tools/perf/util/evsel.h | 7 ++++--- tools/perf/util/evsel_fprintf.c | 14 ++++++-------- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 079e67a36904..f9706306fea0 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2055,7 +2055,7 @@ static void timehist_print_sample(struct perf_sched *sched, EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | EVSEL__PRINT_CALLCHAIN_ARROW | EVSEL__PRINT_SKIP_IGNORED, - &callchain_cursor, stdout); + &callchain_cursor, symbol_conf.bt_stop_list, stdout); out: printf("\n"); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 22c1d114014c..0d43e2e5afff 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1325,7 +1325,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample, } else printed += fprintf(fp, "\n"); - printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp); + printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, + symbol_conf.bt_stop_list, fp); } /* print branch_to information */ @@ -1867,7 +1868,8 @@ static void process_event(struct perf_script *script, cursor = &callchain_cursor; fputc(cursor ? '\n' : ' ', fp); - sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp); + sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, + symbol_conf.bt_stop_list, fp); } if (PRINT_FIELD(IREGS)) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c52c3120a811..318225c8d7a7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2076,7 +2076,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam EVSEL__PRINT_DSO | EVSEL__PRINT_UNKNOWN_AS_ADDR; - return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); + return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output); } static const char *errno_to_name(struct evsel *evsel, int err) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a5b29ac10da0..4a4c64833893 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -428,12 +428,13 @@ int perf_evsel__fprintf(struct evsel *evsel, struct callchain_cursor; int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, - unsigned int print_opts, - struct callchain_cursor *cursor, FILE *fp); + unsigned int print_opts, struct callchain_cursor *cursor, + struct strlist *bt_stop_list, FILE *fp); int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int left_alignment, unsigned int print_opts, - struct callchain_cursor *cursor, FILE *fp); + struct callchain_cursor *cursor, + struct strlist *bt_stop_list, FILE *fp); bool perf_evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index a8cbdf4c2753..756b1e852db7 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -102,7 +102,7 @@ out: int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, unsigned int print_opts, struct callchain_cursor *cursor, - FILE *fp) + struct strlist *bt_stop_list, FILE *fp) { int printed = 0; struct callchain_cursor_node *node; @@ -175,10 +175,8 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, printed += fprintf(fp, "\n"); /* Add srccode here too? */ - if (symbol_conf.bt_stop_list && - node->sym && - strlist__has_entry(symbol_conf.bt_stop_list, - node->sym->name)) { + if (bt_stop_list && node->sym && + strlist__has_entry(bt_stop_list, node->sym->name)) { break; } @@ -193,7 +191,7 @@ next: int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int left_alignment, unsigned int print_opts, - struct callchain_cursor *cursor, FILE *fp) + struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp) { int printed = 0; int print_ip = print_opts & EVSEL__PRINT_IP; @@ -204,8 +202,8 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR; if (cursor != NULL) { - printed += sample__fprintf_callchain(sample, left_alignment, - print_opts, cursor, fp); + printed += sample__fprintf_callchain(sample, left_alignment, print_opts, + cursor, bt_stop_list, fp); } else { printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " "); From ca1252779f48ece225c6003e01c675abb91cf1b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2019 15:41:51 -0300 Subject: [PATCH 321/345] perf evsel: Introduce evsel_fprintf.h We already had evsel_fprintf.c, add its counterpart, so that we can reduce evsel.h a bit more. We needed a new perf_event_attr_fprintf.c file so as to have a separate object to link with the python binding in tools/perf/util/python-ext-sources and not drag symbol_conf, etc into the python binding. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-06bdmt1062d9unzgqmxwlv88@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-evlist.c | 1 + tools/perf/builtin-sched.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/builtin-trace.c | 2 + tools/perf/util/Build | 1 + tools/perf/util/evsel.c | 153 +--------------------- tools/perf/util/evsel.h | 51 +------- tools/perf/util/evsel_fprintf.c | 1 + tools/perf/util/evsel_fprintf.h | 50 +++++++ tools/perf/util/header.c | 1 + tools/perf/util/perf_event_attr_fprintf.c | 148 +++++++++++++++++++++ tools/perf/util/python-ext-sources | 1 + 12 files changed, 218 insertions(+), 193 deletions(-) create mode 100644 tools/perf/util/evsel_fprintf.h create mode 100644 tools/perf/util/perf_event_attr_fprintf.c diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 60509ce4dd28..440501994931 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -10,6 +10,7 @@ #include "perf.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/parse-events.h" #include #include "util/session.h" diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index f9706306fea0..5cacc4f84c8d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -6,6 +6,7 @@ #include "util/cpumap.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0d43e2e5afff..286fc70d7402 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -17,6 +17,7 @@ #include "util/trace-event.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/evswitch.h" #include "util/sort.h" #include "util/data.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 318225c8d7a7..bb5130d02155 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -28,6 +28,8 @@ #include "util/dso.h" #include "util/env.h" #include "util/event.h" +#include "util/evsel.h" +#include "util/evsel_fprintf.h" #include "util/synthetic-events.h" #include "util/evlist.h" #include "util/evswitch.h" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4d1894e38a81..8dcfca1a882f 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -11,6 +11,7 @@ perf-y += event.o perf-y += evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o +perf-y += perf_event_attr_fprintf.o perf-y += evswitch.o perf-y += find_bit.o perf-y += get_current_dir_name.o diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9c284d2adcea..6323b0c60f6c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -30,6 +30,7 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "util/evsel_fprintf.h" #include "evlist.h" #include #include "thread_map.h" @@ -1443,152 +1444,6 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) return fd; } -struct bit_names { - int bit; - const char *name; -}; - -static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) -{ - bool first_bit = true; - int i = 0; - - do { - if (value & bits[i].bit) { - buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); - first_bit = false; - } - } while (bits[++i].name != NULL); -} - -static void __p_sample_type(char *buf, size_t size, u64 value) -{ -#define bit_name(n) { PERF_SAMPLE_##n, #n } - struct bit_names bits[] = { - bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), - bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), - bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), - bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), - bit_name(WEIGHT), bit_name(PHYS_ADDR), - { .name = NULL, } - }; -#undef bit_name - __p_bits(buf, size, value, bits); -} - -static void __p_branch_sample_type(char *buf, size_t size, u64 value) -{ -#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } - struct bit_names bits[] = { - bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), - bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), - bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), - bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), - bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - { .name = NULL, } - }; -#undef bit_name - __p_bits(buf, size, value, bits); -} - -static void __p_read_format(char *buf, size_t size, u64 value) -{ -#define bit_name(n) { PERF_FORMAT_##n, #n } - struct bit_names bits[] = { - bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), - bit_name(ID), bit_name(GROUP), - { .name = NULL, } - }; -#undef bit_name - __p_bits(buf, size, value, bits); -} - -#define BUF_SIZE 1024 - -#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) -#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) -#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) -#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) -#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) -#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) - -#define PRINT_ATTRn(_n, _f, _p) \ -do { \ - if (attr->_f) { \ - _p(attr->_f); \ - ret += attr__fprintf(fp, _n, buf, priv);\ - } \ -} while (0) - -#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) - -int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, - attr__fprintf_f attr__fprintf, void *priv) -{ - char buf[BUF_SIZE]; - int ret = 0; - - PRINT_ATTRf(type, p_unsigned); - PRINT_ATTRf(size, p_unsigned); - PRINT_ATTRf(config, p_hex); - PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); - PRINT_ATTRf(sample_type, p_sample_type); - PRINT_ATTRf(read_format, p_read_format); - - PRINT_ATTRf(disabled, p_unsigned); - PRINT_ATTRf(inherit, p_unsigned); - PRINT_ATTRf(pinned, p_unsigned); - PRINT_ATTRf(exclusive, p_unsigned); - PRINT_ATTRf(exclude_user, p_unsigned); - PRINT_ATTRf(exclude_kernel, p_unsigned); - PRINT_ATTRf(exclude_hv, p_unsigned); - PRINT_ATTRf(exclude_idle, p_unsigned); - PRINT_ATTRf(mmap, p_unsigned); - PRINT_ATTRf(comm, p_unsigned); - PRINT_ATTRf(freq, p_unsigned); - PRINT_ATTRf(inherit_stat, p_unsigned); - PRINT_ATTRf(enable_on_exec, p_unsigned); - PRINT_ATTRf(task, p_unsigned); - PRINT_ATTRf(watermark, p_unsigned); - PRINT_ATTRf(precise_ip, p_unsigned); - PRINT_ATTRf(mmap_data, p_unsigned); - PRINT_ATTRf(sample_id_all, p_unsigned); - PRINT_ATTRf(exclude_host, p_unsigned); - PRINT_ATTRf(exclude_guest, p_unsigned); - PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); - PRINT_ATTRf(exclude_callchain_user, p_unsigned); - PRINT_ATTRf(mmap2, p_unsigned); - PRINT_ATTRf(comm_exec, p_unsigned); - PRINT_ATTRf(use_clockid, p_unsigned); - PRINT_ATTRf(context_switch, p_unsigned); - PRINT_ATTRf(write_backward, p_unsigned); - PRINT_ATTRf(namespaces, p_unsigned); - PRINT_ATTRf(ksymbol, p_unsigned); - PRINT_ATTRf(bpf_event, p_unsigned); - PRINT_ATTRf(aux_output, p_unsigned); - - PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); - PRINT_ATTRf(bp_type, p_unsigned); - PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); - PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); - PRINT_ATTRf(branch_sample_type, p_branch_sample_type); - PRINT_ATTRf(sample_regs_user, p_hex); - PRINT_ATTRf(sample_stack_user, p_unsigned); - PRINT_ATTRf(clockid, p_signed); - PRINT_ATTRf(sample_regs_intr, p_hex); - PRINT_ATTRf(aux_watermark, p_unsigned); - PRINT_ATTRf(sample_max_stack, p_unsigned); - - return ret; -} - -static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, - void *priv __maybe_unused) -{ - return fprintf(fp, " %-32s %s\n", name, val); -} - static void perf_evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx) @@ -1659,6 +1514,12 @@ static bool ignore_missing_thread(struct evsel *evsel, return true; } +static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __maybe_unused) +{ + return fprintf(fp, " %-32s %s\n", name, val); +} + static void display_attr(struct perf_event_attr *attr) { if (verbose >= 2) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a4c64833893..48183b5f5f83 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -13,12 +12,6 @@ #include "symbol_conf.h" #include -struct addr_location; -struct evsel; -union perf_event; - -struct cgroup; - /* * The 'struct perf_evsel_config_term' is used to pass event * specific configuration data to perf_evsel__config routine. @@ -62,7 +55,11 @@ struct perf_evsel_config_term { bool weak; }; +struct bpf_object; +struct cgroup; +struct perf_counts; struct perf_stat_evsel; +union perf_event; typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); @@ -71,9 +68,6 @@ enum perf_tool_event { PERF_TOOL_DURATION_TIME = 1, }; -struct bpf_object; -struct perf_counts; - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -404,38 +398,6 @@ static inline bool perf_evsel__is_clock(struct evsel *evsel) perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -struct perf_attr_details { - bool freq; - bool verbose; - bool event_group; - bool force; - bool trace_fields; -}; - -int perf_evsel__fprintf(struct evsel *evsel, - struct perf_attr_details *details, FILE *fp); - -#define EVSEL__PRINT_IP (1<<0) -#define EVSEL__PRINT_SYM (1<<1) -#define EVSEL__PRINT_DSO (1<<2) -#define EVSEL__PRINT_SYMOFFSET (1<<3) -#define EVSEL__PRINT_ONELINE (1<<4) -#define EVSEL__PRINT_SRCLINE (1<<5) -#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) -#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) -#define EVSEL__PRINT_SKIP_IGNORED (1<<8) - -struct callchain_cursor; - -int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, - unsigned int print_opts, struct callchain_cursor *cursor, - struct strlist *bt_stop_list, FILE *fp); - -int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, - int left_alignment, unsigned int print_opts, - struct callchain_cursor *cursor, - struct strlist *bt_stop_list, FILE *fp); - bool perf_evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, @@ -468,11 +430,6 @@ static inline bool evsel__has_callchain(const struct evsel *evsel) return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; } -typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); - -int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, - attr__fprintf_f attr__fprintf, void *priv); - struct perf_env *perf_evsel__env(struct evsel *evsel); int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 756b1e852db7..028df7afb0dc 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -4,6 +4,7 @@ #include #include #include "evsel.h" +#include "util/evsel_fprintf.h" #include "util/event.h" #include "callchain.h" #include "map.h" diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h new file mode 100644 index 000000000000..47e6c8456bb1 --- /dev/null +++ b/tools/perf/util/evsel_fprintf.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_EVSEL_FPRINTF_H +#define __PERF_EVSEL_FPRINTF_H 1 + +#include +#include + +struct evsel; + +struct perf_attr_details { + bool freq; + bool verbose; + bool event_group; + bool force; + bool trace_fields; +}; + +int perf_evsel__fprintf(struct evsel *evsel, + struct perf_attr_details *details, FILE *fp); + +#define EVSEL__PRINT_IP (1<<0) +#define EVSEL__PRINT_SYM (1<<1) +#define EVSEL__PRINT_DSO (1<<2) +#define EVSEL__PRINT_SYMOFFSET (1<<3) +#define EVSEL__PRINT_ONELINE (1<<4) +#define EVSEL__PRINT_SRCLINE (1<<5) +#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6) +#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7) +#define EVSEL__PRINT_SKIP_IGNORED (1<<8) + +struct addr_location; +struct perf_event_attr; +struct perf_sample; +struct callchain_cursor; +struct strlist; + +int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, + unsigned int print_opts, struct callchain_cursor *cursor, + struct strlist *bt_stop_list, FILE *fp); + +int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, + int left_alignment, unsigned int print_opts, + struct callchain_cursor *cursor, + struct strlist *bt_stop_list, FILE *fp); + +typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv); +#endif // __PERF_EVSEL_H diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3b24b4974c5f..86d9396cb131 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -25,6 +25,7 @@ #include "dso.h" #include "evlist.h" #include "evsel.h" +#include "util/evsel_fprintf.h" #include "header.h" #include "memswap.h" #include "trace-event.h" diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c new file mode 100644 index 000000000000..d4ad3f04923a --- /dev/null +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include "util/evsel_fprintf.h" + +struct bit_names { + int bit; + const char *name; +}; + +static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) +{ + bool first_bit = true; + int i = 0; + + do { + if (value & bits[i].bit) { + buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); + first_bit = false; + } + } while (bits[++i].name != NULL); +} + +static void __p_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_##n, #n } + struct bit_names bits[] = { + bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), + bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), + bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), + bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), + bit_name(WEIGHT), bit_name(PHYS_ADDR), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_branch_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } + struct bit_names bits[] = { + bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), + bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), + bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), + bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), + bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_read_format(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_FORMAT_##n, #n } + struct bit_names bits[] = { + bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), + bit_name(ID), bit_name(GROUP), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +#define BUF_SIZE 1024 + +#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) +#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) +#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) +#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) +#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) + +#define PRINT_ATTRn(_n, _f, _p) \ +do { \ + if (attr->_f) { \ + _p(attr->_f); \ + ret += attr__fprintf(fp, _n, buf, priv);\ + } \ +} while (0) + +#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv) +{ + char buf[BUF_SIZE]; + int ret = 0; + + PRINT_ATTRf(type, p_unsigned); + PRINT_ATTRf(size, p_unsigned); + PRINT_ATTRf(config, p_hex); + PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); + PRINT_ATTRf(sample_type, p_sample_type); + PRINT_ATTRf(read_format, p_read_format); + + PRINT_ATTRf(disabled, p_unsigned); + PRINT_ATTRf(inherit, p_unsigned); + PRINT_ATTRf(pinned, p_unsigned); + PRINT_ATTRf(exclusive, p_unsigned); + PRINT_ATTRf(exclude_user, p_unsigned); + PRINT_ATTRf(exclude_kernel, p_unsigned); + PRINT_ATTRf(exclude_hv, p_unsigned); + PRINT_ATTRf(exclude_idle, p_unsigned); + PRINT_ATTRf(mmap, p_unsigned); + PRINT_ATTRf(comm, p_unsigned); + PRINT_ATTRf(freq, p_unsigned); + PRINT_ATTRf(inherit_stat, p_unsigned); + PRINT_ATTRf(enable_on_exec, p_unsigned); + PRINT_ATTRf(task, p_unsigned); + PRINT_ATTRf(watermark, p_unsigned); + PRINT_ATTRf(precise_ip, p_unsigned); + PRINT_ATTRf(mmap_data, p_unsigned); + PRINT_ATTRf(sample_id_all, p_unsigned); + PRINT_ATTRf(exclude_host, p_unsigned); + PRINT_ATTRf(exclude_guest, p_unsigned); + PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); + PRINT_ATTRf(exclude_callchain_user, p_unsigned); + PRINT_ATTRf(mmap2, p_unsigned); + PRINT_ATTRf(comm_exec, p_unsigned); + PRINT_ATTRf(use_clockid, p_unsigned); + PRINT_ATTRf(context_switch, p_unsigned); + PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); + PRINT_ATTRf(ksymbol, p_unsigned); + PRINT_ATTRf(bpf_event, p_unsigned); + PRINT_ATTRf(aux_output, p_unsigned); + + PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); + PRINT_ATTRf(bp_type, p_unsigned); + PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); + PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRf(branch_sample_type, p_branch_sample_type); + PRINT_ATTRf(sample_regs_user, p_hex); + PRINT_ATTRf(sample_stack_user, p_unsigned); + PRINT_ATTRf(clockid, p_signed); + PRINT_ATTRf(sample_regs_intr, p_hex); + PRINT_ATTRf(aux_watermark, p_unsigned); + PRINT_ATTRf(sample_max_stack, p_unsigned); + + return ret; +} diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index c6dd478956f1..9af183860fbd 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/python.c util/cap.c util/evlist.c util/evsel.c +util/perf_event_attr_fprintf.c util/cpumap.c util/memswap.c util/mmap.c From bd70462062f36bec9d93d3900addfca4f8ec718f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2019 15:45:21 -0300 Subject: [PATCH 322/345] perf evlist: Remove unused perf_evlist__fprintf() method Ditch it, noone is using it, one more stdio.h include in a hot header. Fix the fallout in parse-events.y, where we end up using a FILE pointer, I think due to YYDEBUG being set and in some places, like Amazon Linux 1 we don't get stdio.h included by luck, like in most other places, add a explicit stdio.h include directive. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-37k5q0lhdbo2hvvfbnnzn7og@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 13 ------------- tools/perf/util/evlist.h | 3 --- tools/perf/util/parse-events.y | 1 + 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d4c7fd125ce9..84c42790dab3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1421,19 +1421,6 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); } -size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp) -{ - struct evsel *evsel; - size_t printed = 0; - - evlist__for_each_entry(evlist, evsel) { - printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", - perf_evsel__name(evsel)); - } - - return printed + fprintf(fp, "\n"); -} - int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 130d44d691b8..7cfe75522ba5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include "events_stats.h" @@ -249,8 +248,6 @@ static inline struct evsel *evlist__last(struct evlist *evlist) return container_of(evsel, struct evsel, core); } -size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp); - int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index f1c36ed1cf36..65809ad67808 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -9,6 +9,7 @@ #define YYDEBUG 1 #include +#include #include #include #include From 95be9d197da6f9006f6a70a0d141498ea2488858 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2019 15:56:14 -0300 Subject: [PATCH 323/345] perf evsel: Move config terms to a separate header Further reducing the size of util/evsel.h. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-20zr7di9eynm0272mtjfdhfc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 1 + tools/perf/builtin-top.c | 1 + tools/perf/util/evsel.c | 1 + tools/perf/util/evsel.h | 43 -------------------------- tools/perf/util/evsel_config.h | 50 +++++++++++++++++++++++++++++++ tools/perf/util/parse-events.c | 1 + 6 files changed, 54 insertions(+), 43 deletions(-) create mode 100644 tools/perf/util/evsel_config.h diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2e4de211d881..ede040cf82ad 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -23,6 +23,7 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/evsel_config.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" #include // page_size diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 30d8eb614377..1f60124eb19b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -27,6 +27,7 @@ #include "util/dso.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/evsel_config.h" #include "util/event.h" #include "util/machine.h" #include "util/map.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6323b0c60f6c..5591af81a070 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -30,6 +30,7 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "util/evsel_config.h" #include "util/evsel_fprintf.h" #include "evlist.h" #include diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 48183b5f5f83..ddc5ee6f6592 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -12,49 +12,6 @@ #include "symbol_conf.h" #include -/* - * The 'struct perf_evsel_config_term' is used to pass event - * specific configuration data to perf_evsel__config routine. - * It is allocated within event parsing and attached to - * perf_evsel::config_terms list head. -*/ -enum term_type { - PERF_EVSEL__CONFIG_TERM_PERIOD, - PERF_EVSEL__CONFIG_TERM_FREQ, - PERF_EVSEL__CONFIG_TERM_TIME, - PERF_EVSEL__CONFIG_TERM_CALLGRAPH, - PERF_EVSEL__CONFIG_TERM_STACK_USER, - PERF_EVSEL__CONFIG_TERM_INHERIT, - PERF_EVSEL__CONFIG_TERM_MAX_STACK, - PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, - PERF_EVSEL__CONFIG_TERM_OVERWRITE, - PERF_EVSEL__CONFIG_TERM_DRV_CFG, - PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_PERCORE, - PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, -}; - -struct perf_evsel_config_term { - struct list_head list; - enum term_type type; - union { - u64 period; - u64 freq; - bool time; - char *callgraph; - char *drv_cfg; - u64 stack_user; - int max_stack; - bool inherit; - bool overwrite; - char *branch; - unsigned long max_events; - bool percore; - bool aux_output; - } val; - bool weak; -}; - struct bpf_object; struct cgroup; struct perf_counts; diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h new file mode 100644 index 000000000000..8a7648037c18 --- /dev/null +++ b/tools/perf/util/evsel_config.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_EVSEL_CONFIG_H +#define __PERF_EVSEL_CONFIG_H 1 + +#include +#include + +/* + * The 'struct perf_evsel_config_term' is used to pass event + * specific configuration data to perf_evsel__config routine. + * It is allocated within event parsing and attached to + * perf_evsel::config_terms list head. +*/ +enum evsel_term_type { + PERF_EVSEL__CONFIG_TERM_PERIOD, + PERF_EVSEL__CONFIG_TERM_FREQ, + PERF_EVSEL__CONFIG_TERM_TIME, + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, + PERF_EVSEL__CONFIG_TERM_STACK_USER, + PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, + PERF_EVSEL__CONFIG_TERM_OVERWRITE, + PERF_EVSEL__CONFIG_TERM_DRV_CFG, + PERF_EVSEL__CONFIG_TERM_BRANCH, + PERF_EVSEL__CONFIG_TERM_PERCORE, + PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, +}; + +struct perf_evsel_config_term { + struct list_head list; + enum evsel_term_type type; + union { + u64 period; + u64 freq; + bool time; + char *callgraph; + char *drv_cfg; + u64 stack_user; + int max_stack; + bool inherit; + bool overwrite; + char *branch; + unsigned long max_events; + bool percore; + bool aux_output; + } val; + bool weak; +}; +#endif // __PERF_EVSEL_CONFIG_H diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d69ff746cda5..50737046fa63 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -34,6 +34,7 @@ #include "asm/bug.h" #include "util/parse-branch-options.h" #include "metricgroup.h" +#include "util/evsel_config.h" #include "util/mmap.h" #define MAX_NAME_LEN 100 From 252a2fdc742bc34b94da203282773952d3b54279 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Sep 2019 16:07:59 -0300 Subject: [PATCH 324/345] perf tools: Replace needless mmap.h with what is needed, event.h The perf_sample struct definition and the event_attr_init() are in util/event.h, but some places were getting it thru an otherwise needless util/mmap.h header, fix it by including util/event.h directly. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-p1anwyjdbbvghrkl9dlxv7h5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- tools/perf/util/parse-events.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 69d0a1991b29..e830eadfca2a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -34,7 +34,7 @@ #include "bpf-event.h" #include "block-range.h" #include "string2.h" -#include "util/mmap.h" +#include "util/event.h" #include "arch/common.h" #include #include diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 50737046fa63..b5e2adef49de 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -35,7 +35,7 @@ #include "util/parse-branch-options.h" #include "metricgroup.h" #include "util/evsel_config.h" -#include "util/mmap.h" +#include "util/event.h" #define MAX_NAME_LEN 100 From 6f6473c37d34b00676a152ec7e60770c78ed7c2f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 23 Sep 2019 16:33:39 -0700 Subject: [PATCH 325/345] perf stat: Fix free memory access / memory leaks in metrics Make sure to not free the name passed in by the caller, but free all the allocated ids when parsing expressions. The loop at the end knows that the first entry shouldn't be freed, so make sure the caller name is the first entry. Fixes % perf stat -M IpB,IpCall,IpTB,IPC,Retiring_SMT,Frontend_Bound_SMT,Kernel_Utilization,CPU_Utilization --metric-only -a -I 1000 sleep 2 valgrind: 1.009943231 ==21527== Invalid read of size 1 ==21527== at 0x483CB74: strcmp (vg_replace_strmem.c:849) ==21527== by 0x582CF8: collect_all_aliases (stat-display.c:554) ==21527== by 0x582EB3: collect_data (stat-display.c:577) ==21527== by 0x583A32: print_counter_aggr (stat-display.c:806) ==21527== by 0x584FAD: perf_evlist__print_counters (stat-display.c:1200) ==21527== by 0x45133A: print_counters (builtin-stat.c:655) ==21527== by 0x450629: process_interval (builtin-stat.c:353) ==21527== by 0x450FBD: __run_perf_stat (builtin-stat.c:564) ==21527== by 0x451285: run_perf_stat (builtin-stat.c:636) ==21527== by 0x454619: cmd_stat (builtin-stat.c:1966) ==21527== by 0x4D557D: run_builtin (perf.c:310) ==21527== by 0x4D57EA: handle_internal_command (perf.c:362) ==21527== Address 0x12826cd0 is 0 bytes inside a block of size 25 free'd ==21527== at 0x4839A0C: free (vg_replace_malloc.c:540) ==21527== by 0x627041: __zfree (zalloc.c:13) ==21527== by 0x57F66A: generic_metric (stat-shadow.c:814) ==21527== by 0x580B21: perf_stat__print_shadow_stats (stat-shadow.c:1057) ==21527== by 0x58418E: print_metric_headers (stat-display.c:943) ==21527== by 0x5844BC: print_interval (stat-display.c:1004) ==21527== by 0x584DEB: perf_evlist__print_counters (stat-display.c:1172) ==21527== by 0x45133A: print_counters (builtin-stat.c:655) ==21527== by 0x450629: process_interval (builtin-stat.c:353) ==21527== by 0x450FBD: __run_perf_stat (builtin-stat.c:564) ==21527== by 0x451285: run_perf_stat (builtin-stat.c:636) ==21527== by 0x454619: cmd_stat (builtin-stat.c:1966) ==21527== Block was alloc'd at ==21527== at 0x483880B: malloc (vg_replace_malloc.c:309) ==21527== by 0x51677DE: strdup (in /usr/lib64/libc-2.29.so) ==21527== by 0x506457: parse_events_name (parse-events.c:1754) ==21527== by 0x5550BB: parse_events_parse (parse-events.y:214) ==21527== by 0x50694D: parse_events__scanner (parse-events.c:1887) ==21527== by 0x506AEF: parse_events (parse-events.c:1927) ==21527== by 0x521D8B: metricgroup__parse_groups (metricgroup.c:527) ==21527== by 0x45156F: parse_metric_groups (builtin-stat.c:721) ==21527== by 0x6228A9: get_value (parse-options.c:243) ==21527== by 0x62363F: parse_short_opt (parse-options.c:348) ==21527== by 0x62363F: parse_options_step (parse-options.c:536) ==21527== by 0x62363F: parse_options_subcommand (parse-options.c:651) ==21527== by 0x453C1D: cmd_stat (builtin-stat.c:1718) ==21527== by 0x4D557D: run_builtin (perf.c:310) and also a leak report. Committer testing: Before: # perf stat -M IpB,IpCall,IpTB,IPC,Retiring_SMT,Frontend_Bound_SMT,Kernel_Utilization,CPU_Utilization --metric-only -a -I 1000 sleep 2 # time CPU_Utilization 1.000470810 free(): double free detected in tcache 2 Aborted (core dumped) # After: # perf stat -M IpB,IpCall,IpTB,IPC,Retiring_SMT,Frontend_Bound_SMT,Kernel_Utilization,CPU_Utilization --metric-only -a -I 1000 sleep 2 # time CPU_Utilization 1.000494752 0.1 2.001105112 0.1 # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/20190923233339.25326-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 70c87fdb2a43..2c41d47f6f83 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -738,6 +738,8 @@ static void generic_metric(struct perf_stat_config *config, char *n, *pn; expr__ctx_init(&pctx); + /* Must be first id entry */ + expr__add_id(&pctx, name, avg); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -776,8 +778,6 @@ static void generic_metric(struct perf_stat_config *config, expr__add_id(&pctx, n, avg_stats(stats)*scale); } - expr__add_id(&pctx, name, avg); - if (!metric_events[i]) { const char *p = metric_expr; From 7834fa948bebe648e8ce03393fb6b213c33f0a3a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 23 Sep 2019 16:33:37 -0700 Subject: [PATCH 326/345] perf evlist: Fix access of freed id arrays I'm not fully sure if this is the correct fix, but without this I get crashes on more complex perf stat metric usages. The problem is that part of the state gets freed when a weak group fails, but then is later still used. Just don't free the ids, we're going to reuse them anyways on the weak group retry. For example: % perf stat -M IpB,IpCall,IpTB,IPC,Retiring_SMT,Frontend_Bound_SMT,Kernel_Utilization,CPU_Utilization --metric-only -a -I 1000 sleep 2 crashes and gives in valgrind: =21527== Invalid write of size 8 ==21527== at 0x4EE582: hlist_add_head (list.h:644) ==21527== by 0x4EFD3C: perf_evlist__id_hash (evlist.c:477) ==21527== by 0x4EFD99: perf_evlist__id_add (evlist.c:483) ==21527== by 0x4EFF15: perf_evlist__id_add_fd (evlist.c:524) ==21527== by 0x4FC693: store_evsel_ids (evsel.c:2969) ==21527== by 0x4FC76C: perf_evsel__store_ids (evsel.c:2986) ==21527== by 0x450DA7: __run_perf_stat (builtin-stat.c:519) ==21527== by 0x451285: run_perf_stat (builtin-stat.c:636) ==21527== by 0x454619: cmd_stat (builtin-stat.c:1966) ==21527== by 0x4D557D: run_builtin (perf.c:310) ==21527== by 0x4D57EA: handle_internal_command (perf.c:362) ==21527== by 0x4D5931: run_argv (perf.c:406) ==21527== Address 0x12e3f008 is 104 bytes inside a block of size 2,056 free'd ==21527== at 0x4839A0C: free (vg_replace_malloc.c:540) ==21527== by 0x627139: xyarray__delete (xyarray.c:32) ==21527== by 0x4F6BE4: perf_evsel__free_id (evsel.c:1253) ==21527== by 0x4FA11F: evsel__close (evsel.c:1994) ==21527== by 0x4F30A3: perf_evlist__reset_weak_group (evlist.c:1783) ==21527== by 0x450B47: __run_perf_stat (builtin-stat.c:466) ==21527== by 0x451285: run_perf_stat (builtin-stat.c:636) ==21527== by 0x454619: cmd_stat (builtin-stat.c:1966) ==21527== by 0x4D557D: run_builtin (perf.c:310) ==21527== by 0x4D57EA: handle_internal_command (perf.c:362) ==21527== by 0x4D5931: run_argv (perf.c:406) ==21527== by 0x4D5CAE: main (perf.c:531) ==21527== Block was alloc'd at ==21527== at 0x483AB1A: calloc (vg_replace_malloc.c:762) ==21527== by 0x627024: zalloc (zalloc.c:8) ==21527== by 0x627088: xyarray__new (xyarray.c:10) ==21527== by 0x4F6B20: perf_evsel__alloc_id (evsel.c:1237) ==21527== by 0x4FC74E: perf_evsel__store_ids (evsel.c:2983) ==21527== by 0x450DA7: __run_perf_stat (builtin-stat.c:519) ==21527== by 0x451285: run_perf_stat (builtin-stat.c:636) ==21527== by 0x454619: cmd_stat (builtin-stat.c:1966) ==21527== by 0x4D557D: run_builtin (perf.c:310) ==21527== by 0x4D57EA: handle_internal_command (perf.c:362) ==21527== by 0x4D5931: run_argv (perf.c:406) ==21527== by 0x4D5CAE: main (perf.c:531) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/20190923233339.25326-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 84c42790dab3..d277a98e62df 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1659,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, is_open = false; if (c2->leader == leader) { if (is_open) - evsel__close(c2); + perf_evsel__close(&evsel->core); c2->leader = c2; c2->core.nr_members = 0; } From 28b951760cebdf1de0d32f38f325b667c2494564 Mon Sep 17 00:00:00 2001 From: Mamatha Inamdar Date: Mon, 9 Sep 2019 12:33:33 +0530 Subject: [PATCH 327/345] perf vendor events: Remove P8 HW events which are not supported This patch is to remove following hardware events from JSON file which are not supported on POWER8. pm_l3_p0_grp_pump pm_l3_p0_lco_data pm_l3_p0_lco_no_data pm_l3_p0_lco_rty Note: Unfortunately power8 event list is not publicly available. Fixes: c3b4d5c4afb0 ("perf vendor events: Remove P8 HW events which are not supported") Signed-off-by: Mamatha Inamdar Acked-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20190909065624.11956.3992.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power8/other.json | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index 9dc2f6b70354..b2a3df07fbc4 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -1775,30 +1775,6 @@ "BriefDescription": "L3 Load Prefetches", "PublicDescription": "" }, - {, - "EventCode": "0xa29084", - "EventName": "PM_L3_P0_GRP_PUMP", - "BriefDescription": "L3 pf sent with grp scope port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x528084", - "EventName": "PM_L3_P0_LCO_DATA", - "BriefDescription": "lco sent with data port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x518080", - "EventName": "PM_L3_P0_LCO_NO_DATA", - "BriefDescription": "dataless l3 lco sent port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0xa4908c", - "EventName": "PM_L3_P0_LCO_RTY", - "BriefDescription": "L3 LCO received retry port 0", - "PublicDescription": "" - }, {, "EventCode": "0x84908d", "EventName": "PM_L3_PF0_ALLOC", From 61bf4ee29d5a44399bacec9cda9ef314acdbecda Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 9 Sep 2019 13:41:15 +0200 Subject: [PATCH 328/345] perf jvmti: Include JVMTI support for s390 Enable JVMTI support for s390 perf tool chain. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20190909114116.50469-3-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 1 + tools/perf/util/genelf.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index cb198787570a..6ac8887be7c9 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 # # Syscall table generation for perf diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index b72440bf9a79..d4137559be05 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -35,6 +35,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__sparc__) #define GEN_ELF_ARCH EM_SPARC #define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__s390x__) +#define GEN_ELF_ARCH EM_S390 +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif From 815c1560bf8fd522b8d93a1d727868b910c1cc24 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 9 Sep 2019 13:41:16 +0200 Subject: [PATCH 329/345] perf build: Add detection of java-11-openjdk-devel package With Java 11 there is no seperate JRE anymore. Details: https://coderanch.com/t/701603/java/JRE-JDK Therefore the detection of the JRE needs to be adapted. This change works for s390 and x86. I have not tested other platforms. Committer testing: Continues to work with the OpenJDK 8: $ rm -f ~acme/lib64/libperf-jvmti.so $ rpm -qa | grep jdk-devel java-1.8.0-openjdk-devel-1.8.0.222.b10-0.fc30.x86_64 $ git log --oneline -1 a51937170f33 (HEAD -> perf/core) perf build: Add detection of java-11-openjdk-devel package $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; make -C tools/perf O=/tmp/build/perf install > /dev/null 2>1 $ ls -la ~acme/lib64/libperf-jvmti.so -rwxr-xr-x. 1 acme acme 230744 Sep 24 16:46 /home/acme/lib64/libperf-jvmti.so $ Suggested-by: Andreas Krebbel Signed-off-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20190909114116.50469-4-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a269d78456b6..46f7fba2306c 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -924,7 +924,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g') endif endif ifndef JDIR From d6840d87b2d148e19e244ad2b44d28ba07f437a0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 25 Sep 2019 09:27:05 -0300 Subject: [PATCH 330/345] perf parser: Remove needless include directives They go on accumulating there like the debug.h one, that was introduced here: f23610245c1a ("perf list: Add debug support for outputing alias string") But then, when that need is removed via: 2073ad3326b7 ("perf tools: Factor out PMU matching in parser") The thing stays there, so continue the house cleaning spree... list.h not needed, no macros from there are used, and 'struct list_head' is in linux/types.h, ditto for util.h, no need for that as well. Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-zkxr3mf6inun8m5mbnil4u0d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 65809ad67808..48126ae4cd13 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -11,12 +11,9 @@ #include #include #include -#include #include -#include "util.h" #include "pmu.h" #include "evsel.h" -#include "debug.h" #include "parse-events.h" #include "parse-events-bison.h" From 104c307147ad379617472dd91a5bcb368d72bd6d Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 24 Sep 2019 23:23:56 -0500 Subject: [PATCH 331/345] drm/amd/display: prevent memory leak In dcn*_create_resource_pool the allocated memory should be released if construct pool fails. Reviewed-by: Harry Wentland Signed-off-by: Navid Emamdoost Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index afc61055eca1..1787b9bf800a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -1091,6 +1091,7 @@ struct resource_pool *dce100_create_resource_pool( if (construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index c66fe170e1e8..318e9c2e2ca8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1462,6 +1462,7 @@ struct resource_pool *dce110_create_resource_pool( if (construct(num_virtual_links, dc, pool, asic_id)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 2b3a2917c168..83e1878161c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -1342,6 +1342,7 @@ struct resource_pool *dce112_create_resource_pool( if (construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 236c4c0324b1..8b85e5274bba 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -1208,6 +1208,7 @@ struct resource_pool *dce120_create_resource_pool( if (construct(num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 5a89e462e7cc..59305e411a66 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1570,6 +1570,7 @@ struct resource_pool *dcn10_create_resource_pool( if (construct(init_data->num_virtual_links, dc, pool)) return &pool->base; + kfree(pool); BREAK_TO_DEBUGGER(); return NULL; } From 284b94be1925dbe035ce5218d8b5c197321262c7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 26 Sep 2019 06:23:54 +0800 Subject: [PATCH 332/345] blk-mq: move lockdep_assert_held() into elevator_exit Commit c48dac137a62 ("block: don't hold q->sysfs_lock in elevator_init_mq") removes q->sysfs_lock from elevator_init_mq(), but forgot to deal with lockdep_assert_held() called in blk_mq_sched_free_requests() which is run in failure path of elevator_init_mq(). blk_mq_sched_free_requests() is called in the following 3 functions: elevator_init_mq() elevator_exit() blk_cleanup_queue() In blk_cleanup_queue(), blk_mq_sched_free_requests() is followed exactly by 'mutex_lock(&q->sysfs_lock)'. So moving the lockdep_assert_held() from blk_mq_sched_free_requests() into elevator_exit() for fixing the report by syzbot. Reported-by: syzbot+da3b7677bb913dc1b737@syzkaller.appspotmail.com Fixed: c48dac137a62 ("block: don't hold q->sysfs_lock in elevator_init_mq") Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 2 -- block/blk.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c9d183d6c499..ca22afd47b3d 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; - lockdep_assert_held(&q->sysfs_lock); - queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); diff --git a/block/blk.h b/block/blk.h index ed347f7a97b1..25773d668ec0 100644 --- a/block/blk.h +++ b/block/blk.h @@ -194,6 +194,8 @@ void elv_unregister_queue(struct request_queue *q); static inline void elevator_exit(struct request_queue *q, struct elevator_queue *e) { + lockdep_assert_held(&q->sysfs_lock); + blk_mq_sched_free_requests(q); __elevator_exit(q, e); } From b89f625e28d44552083f43752f62d8621ded0a04 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 23 Sep 2019 23:12:09 +0800 Subject: [PATCH 333/345] block: don't release queue's sysfs lock during switching elevator cecf5d87ff20 ("block: split .sysfs_lock into two locks") starts to release & acquire sysfs_lock before registering/un-registering elevator queue during switching elevator for avoiding potential deadlock from showing & storing 'queue/iosched' attributes and removing elevator's kobject. Turns out there isn't such deadlock because 'q->sysfs_lock' isn't required in .show & .store of queue/iosched's attributes, and just elevator's sysfs lock is acquired in elv_iosched_store() and elv_iosched_show(). So it is safe to hold queue's sysfs lock when registering/un-registering elevator queue. The biggest issue is that commit cecf5d87ff20 assumes that concurrent write on 'queue/scheduler' can't happen. However, this assumption isn't true, because kernfs_fop_write() only guarantees that concurrent write aren't called on the same open file, but the write could be from different open on the file. So we can't release & re-acquire queue's sysfs lock during switching elevator, otherwise use-after-free on elevator could be triggered. Fixes the issue by not releasing queue's sysfs lock during switching elevator. Fixes: cecf5d87ff20 ("block: split .sysfs_lock into two locks") Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Greg KH Cc: Mike Snitzer Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 13 ++++--------- block/elevator.c | 31 +------------------------------ 2 files changed, 5 insertions(+), 39 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b82736c781c5..962fc0c44381 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -989,13 +989,11 @@ int blk_register_queue(struct gendisk *disk) blk_mq_debugfs_register(q); } - /* - * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator - * switch won't happen at all. - */ + mutex_lock(&q->sysfs_lock); if (q->elevator) { ret = elv_register_queue(q, false); if (ret) { + mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_del(&q->kobj); blk_trace_remove_sysfs(dev); @@ -1005,7 +1003,6 @@ int blk_register_queue(struct gendisk *disk) has_elevator = true; } - mutex_lock(&q->sysfs_lock); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); wbt_enable_default(q); blk_throtl_register_queue(q); @@ -1062,12 +1059,10 @@ void blk_unregister_queue(struct gendisk *disk) kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); - /* - * q->kobj has been removed, so it is safe to check if elevator - * exists without holding q->sysfs_lock. - */ + mutex_lock(&q->sysfs_lock); if (q->elevator) elv_unregister_queue(q); + mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/block/elevator.c b/block/elevator.c index bba10e83478a..5437059c9261 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent) if (uevent) kobject_uevent(&e->kobj, KOBJ_ADD); - mutex_lock(&q->sysfs_lock); e->registered = 1; - mutex_unlock(&q->sysfs_lock); } return error; } @@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q) kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); - mutex_lock(&q->sysfs_lock); e->registered = 0; /* Re-enable throttling in case elevator disabled it */ wbt_enable_default(q); - mutex_unlock(&q->sysfs_lock); } } @@ -590,32 +586,11 @@ int elevator_switch_mq(struct request_queue *q, lockdep_assert_held(&q->sysfs_lock); if (q->elevator) { - if (q->elevator->registered) { - mutex_unlock(&q->sysfs_lock); - - /* - * Concurrent elevator switch can't happen becasue - * sysfs write is always exclusively on same file. - * - * Also the elevator queue won't be freed after - * sysfs_lock is released becasue kobject_del() in - * blk_unregister_queue() waits for completion of - * .store & .show on its attributes. - */ + if (q->elevator->registered) elv_unregister_queue(q); - mutex_lock(&q->sysfs_lock); - } ioc_clear_queue(q); elevator_exit(q, q->elevator); - - /* - * sysfs_lock may be dropped, so re-check if queue is - * unregistered. If yes, don't switch to new elevator - * any more - */ - if (!blk_queue_registered(q)) - return 0; } ret = blk_mq_init_sched(q, new_e); @@ -623,11 +598,7 @@ int elevator_switch_mq(struct request_queue *q, goto out; if (new_e) { - mutex_unlock(&q->sysfs_lock); - ret = elv_register_queue(q, true); - - mutex_lock(&q->sysfs_lock); if (ret) { elevator_exit(q, q->elevator); goto out; From 25d41e4aadb0788b4fae8a8fca90f437b9ebd727 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Sep 2019 16:02:07 -0700 Subject: [PATCH 334/345] iocost: better trace vrate changes vrate_adj tracepoint traces vrate changes; however, it does so only when busy_level is non-zero. busy_level turning to zero can sometimes be as interesting an event. This patch also enables vrate_adj tracepoint on other vrate related events - busy_level changes and non-zero nr_lagging. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-iocost.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 3b39deb8b9f8..32d4d6d86a99 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1343,7 +1343,7 @@ static void ioc_timer_fn(struct timer_list *timer) u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; u32 missed_ppm[2], rq_wait_pct; u64 period_vtime; - int i; + int prev_busy_level, i; /* how were the latencies during the period? */ ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); @@ -1531,6 +1531,7 @@ skip_surplus_transfers: * and experiencing shortages but not surpluses, we're too stingy * and should increase vtime rate. */ + prev_busy_level = ioc->busy_level; if (rq_wait_pct > RQ_WAIT_BUSY_PCT || missed_ppm[READ] > ppm_rthr || missed_ppm[WRITE] > ppm_wthr) { @@ -1592,6 +1593,10 @@ skip_surplus_transfers: atomic64_set(&ioc->vtime_rate, vrate); ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP( ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); + } else if (ioc->busy_level != prev_busy_level || nr_lagging) { + trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), + &missed_ppm, rq_wait_pct, nr_lagging, + nr_shortages, nr_surpluses); } ioc_refresh_params(ioc, false); From 7cd806a9a953f234b9865c30028f47fd738ce375 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Sep 2019 16:03:09 -0700 Subject: [PATCH 335/345] iocost: improve nr_lagging handling Some IOs may span multiple periods. As latencies are collected on completion, the inbetween periods won't register them and may incorrectly decide to increase vrate. nr_lagging tracks these IOs to avoid those situations. Currently, whenever there are IOs which are spanning from the previous period, busy_level is reset to 0 if negative thus suppressing vrate increase. This has the following two problems. * When latency target percentiles aren't set, vrate adjustment should only be governed by queue depth depletion; however, the current code keeps nr_lagging active which pulls in latency results and can keep down vrate unexpectedly. * When lagging condition is detected, it resets the entire negative busy_level. This turned out to be way too aggressive on some devices which sometimes experience extended latencies on a small subset of commands. In addition, a lagging IO will be accounted as latency target miss on completion anyway and resetting busy_level amplifies its impact unnecessarily. This patch fixes the above two problems by disabling nr_lagging counting when latency target percentiles aren't set and blocking vrate increases when there are lagging IOs while leaving busy_level as-is. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-iocost.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 32d4d6d86a99..10160de62e3b 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1407,7 +1407,8 @@ static void ioc_timer_fn(struct timer_list *timer) * comparing vdone against period start. If lagging behind * IOs from past periods, don't increase vrate. */ - if (!atomic_read(&iocg_to_blkg(iocg)->use_delay) && + if ((ppm_rthr != MILLION || ppm_wthr != MILLION) && + !atomic_read(&iocg_to_blkg(iocg)->use_delay) && time_after64(vtime, vdone) && time_after64(vtime, now.vnow - MAX_LAGGING_PERIODS * period_vtime) && @@ -1537,21 +1538,23 @@ skip_surplus_transfers: missed_ppm[WRITE] > ppm_wthr) { ioc->busy_level = max(ioc->busy_level, 0); ioc->busy_level++; - } else if (nr_lagging) { - ioc->busy_level = max(ioc->busy_level, 0); - } else if (nr_shortages && !nr_surpluses && - rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && + } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 && missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) { - ioc->busy_level = min(ioc->busy_level, 0); - ioc->busy_level--; + /* take action iff there is contention */ + if (nr_shortages && !nr_lagging) { + ioc->busy_level = min(ioc->busy_level, 0); + /* redistribute surpluses first */ + if (!nr_surpluses) + ioc->busy_level--; + } } else { ioc->busy_level = 0; } ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); - if (ioc->busy_level) { + if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) { u64 vrate = atomic64_read(&ioc->vtime_rate); u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; From 7afcccafa59fb63b58f863a6c5e603a34625955b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Sep 2019 16:03:35 -0700 Subject: [PATCH 336/345] iocost: bump up default latency targets for hard disks The default hard disk param sets latency targets at 50ms. As the default target percentiles are zero, these don't directly regulate vrate; however, they're still used to calculate the period length - 100ms in this case. This is excessively low. A SATA drive with QD32 saturated with random IOs can easily reach avg completion latency of several hundred msecs. A period duration which is substantially lower than avg completion latency can lead to wildly fluctuating vrate. Let's bump up the default latency targets to 250ms so that the period duration is sufficiently long. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-iocost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 10160de62e3b..2a3db80c1dce 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -529,8 +529,8 @@ struct iocg_wake_ctx { static const struct ioc_params autop[] = { [AUTOP_HDD] = { .qos = { - [QOS_RLAT] = 50000, /* 50ms */ - [QOS_WLAT] = 50000, + [QOS_RLAT] = 250000, /* 250ms */ + [QOS_WLAT] = 250000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, From bda521624e75c665c407b3d9cece6e7a28178cd8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Sep 2019 13:47:15 -0600 Subject: [PATCH 337/345] io_uring: make CQ ring wakeups be more efficient For batched IO, it's not uncommon for waiters to ask for more than 1 IO to complete before being woken up. This is a problem with wait_event() since tasks will get woken for every IO that completes, re-check condition, then go back to sleep. For batch counts on the order of what you do for high IOPS, that can result in 10s of extra wakeups for the waiting task. Add a private wake function that checks for the wake up count criteria being met before calling autoremove_wake_function(). Pavel reports that one test case he has runs 40% faster with proper batching of wakeups. Reported-by: Pavel Begunkov Tested-by: Pavel Begunkov Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 66 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9b84232e5cc4..c934f91c51e9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2768,6 +2768,38 @@ out: return submit; } +struct io_wait_queue { + struct wait_queue_entry wq; + struct io_ring_ctx *ctx; + unsigned to_wait; + unsigned nr_timeouts; +}; + +static inline bool io_should_wake(struct io_wait_queue *iowq) +{ + struct io_ring_ctx *ctx = iowq->ctx; + + /* + * Wake up if we have enough events, or if a timeout occured since we + * started waiting. For timeouts, we always want to return to userspace, + * regardless of event count. + */ + return io_cqring_events(ctx->rings) >= iowq->to_wait || + atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; +} + +static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, + wq); + + if (!io_should_wake(iowq)) + return -1; + + return autoremove_wake_function(curr, mode, wake_flags, key); +} + /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -2775,8 +2807,16 @@ out: static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { + struct io_wait_queue iowq = { + .wq = { + .private = current, + .func = io_wake_function, + .entry = LIST_HEAD_INIT(iowq.wq.entry), + }, + .ctx = ctx, + .to_wait = min_events, + }; struct io_rings *rings = ctx->rings; - unsigned nr_timeouts; int ret; if (io_cqring_events(rings) >= min_events) @@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - nr_timeouts = atomic_read(&ctx->cq_timeouts); - /* - * Return if we have enough events, or if a timeout occured since - * we started waiting. For timeouts, we always want to return to - * userspace. - */ - ret = wait_event_interruptible(ctx->wait, - io_cqring_events(rings) >= min_events || - atomic_read(&ctx->cq_timeouts) != nr_timeouts); + ret = 0; + iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); + do { + prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, + TASK_INTERRUPTIBLE); + if (io_should_wake(&iowq)) + break; + schedule(); + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } while (1); + finish_wait(&ctx->wait, &iowq.wq); + restore_saved_sigmask_unless(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR; From 715d14da670e353c8c827f97e61ba00969929e33 Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Wed, 25 Sep 2019 22:32:33 +0800 Subject: [PATCH 338/345] pwm: mediatek: Add MT7629 compatible string This adds pwm support for MT7629, and separate mt7629 compatible string from mt7622 Signed-off-by: Sam Shih Signed-off-by: Thierry Reding --- drivers/pwm/pwm-mediatek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 7782fadbc116..b94e0d09c300 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -297,6 +297,11 @@ static const struct pwm_mediatek_of_data mt7628_pwm_data = { .pwm45_fixup = true, }; +static const struct pwm_mediatek_of_data mt7629_pwm_data = { + .num_pwms = 1, + .pwm45_fixup = false, +}; + static const struct pwm_mediatek_of_data mt8516_pwm_data = { .num_pwms = 5, .pwm45_fixup = false, @@ -307,6 +312,7 @@ static const struct of_device_id pwm_mediatek_of_match[] = { { .compatible = "mediatek,mt7622-pwm", .data = &mt7622_pwm_data }, { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data }, { .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data }, + { .compatible = "mediatek,mt7629-pwm", .data = &mt7629_pwm_data }, { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data }, { }, }; From 8f960106c150a9733f5d7408b975c0a687617961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 23 Sep 2019 10:49:35 +0200 Subject: [PATCH 339/345] MAINTAINERS: Add a selection of PWM related keywords to the PWM entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is just a small subset of the relevant functions, but should at least catch all new code as every consumer has to call pwm_apply_state() (or the legacy function pwm_config()) and every PWM provider has to implement pwm_ops. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 783569e3c4b4..778b55ca4bb3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13086,6 +13086,7 @@ F: drivers/video/backlight/pwm_bl.c F: include/linux/pwm_backlight.h F: drivers/gpio/gpio-mvebu.c F: Documentation/devicetree/bindings/gpio/gpio-mvebu.txt +K: pwm_(config|apply_state|ops) PXA GPIO DRIVER M: Robert Jarzmik From 6f736909f0a4e04f9ab9744ec7b1f2f0b10c2db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 23 Sep 2019 10:49:36 +0200 Subject: [PATCH 340/345] MAINTAINERS: Add patchwork link for PWM entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This instance collects patches and Thierry updates the patches' status there, so I consider it used and suitable to document it officially. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 778b55ca4bb3..f0ab9b2f595c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13078,6 +13078,7 @@ M: Thierry Reding L: linux-pwm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git +Q: https://patchwork.ozlabs.org/project/linux-pwm/list/ F: Documentation/driver-api/pwm.rst F: Documentation/devicetree/bindings/pwm/ F: include/linux/pwm.h From da635e7abe3f4ec9a8270ca4f5ba946d1a43f678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 23 Sep 2019 10:49:37 +0200 Subject: [PATCH 341/345] MAINTAINERS: Add myself as reviewer for the PWM subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I spend some time in the nearer past reviewing PWM patches. Honor this by adding me as a reviewer. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f0ab9b2f595c..6bd99e075c40 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13075,6 +13075,7 @@ F: drivers/media/rc/pwm-ir-tx.c PWM SUBSYSTEM M: Thierry Reding +R: Uwe Kleine-König L: linux-pwm@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git From 26acf400d2dcc72c7e713e1f55db47ad92010cc2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Sep 2019 14:36:48 -0300 Subject: [PATCH 342/345] perf unwind: Fix libunwind build failure on i386 systems Naresh Kamboju reported, that on the i386 build pr_err() doesn't get defined properly due to header ordering: perf-in.o: In function `libunwind__x86_reg_id': tools/perf/util/libunwind/../../arch/x86/util/unwind-libunwind.c:109: undefined reference to `pr_err' Reported-by: Naresh Kamboju Signed-off-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 05920e3edf7a..47357973b55b 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "../../util/debug.h" #ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" #include "../../util/unwind.h" -#include "../../util/debug.h" #endif #ifdef HAVE_ARCH_X86_64_SUPPORT From 2af2783f2ea4f273598557b247e320509247df80 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 17 Sep 2019 20:04:27 +0800 Subject: [PATCH 343/345] rq-qos: get rid of redundant wbt_update_limits() We have updated limits after calling wbt_set_min_lat(). No need to update again. Reviewed-by: Bob Liu Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 962fc0c44381..46f5198be017 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -482,7 +482,6 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, blk_mq_quiesce_queue(q); wbt_set_min_lat(q, val); - wbt_update_limits(q); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); From 8d6996630c03d7ceeabe2611378fea5ca1c3f1b3 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 27 Sep 2019 16:19:55 +0800 Subject: [PATCH 344/345] block: fix null pointer dereference in blk_mq_rq_timed_out() We got a null pointer deference BUG_ON in blk_mq_rq_timed_out() as following: [ 108.825472] BUG: kernel NULL pointer dereference, address: 0000000000000040 [ 108.827059] PGD 0 P4D 0 [ 108.827313] Oops: 0000 [#1] SMP PTI [ 108.827657] CPU: 6 PID: 198 Comm: kworker/6:1H Not tainted 5.3.0-rc8+ #431 [ 108.829503] Workqueue: kblockd blk_mq_timeout_work [ 108.829913] RIP: 0010:blk_mq_check_expired+0x258/0x330 [ 108.838191] Call Trace: [ 108.838406] bt_iter+0x74/0x80 [ 108.838665] blk_mq_queue_tag_busy_iter+0x204/0x450 [ 108.839074] ? __switch_to_asm+0x34/0x70 [ 108.839405] ? blk_mq_stop_hw_queue+0x40/0x40 [ 108.839823] ? blk_mq_stop_hw_queue+0x40/0x40 [ 108.840273] ? syscall_return_via_sysret+0xf/0x7f [ 108.840732] blk_mq_timeout_work+0x74/0x200 [ 108.841151] process_one_work+0x297/0x680 [ 108.841550] worker_thread+0x29c/0x6f0 [ 108.841926] ? rescuer_thread+0x580/0x580 [ 108.842344] kthread+0x16a/0x1a0 [ 108.842666] ? kthread_flush_work+0x170/0x170 [ 108.843100] ret_from_fork+0x35/0x40 The bug is caused by the race between timeout handle and completion for flush request. When timeout handle function blk_mq_rq_timed_out() try to read 'req->q->mq_ops', the 'req' have completed and reinitiated by next flush request, which would call blk_rq_init() to clear 'req' as 0. After commit 12f5b93145 ("blk-mq: Remove generation seqeunce"), normal requests lifetime are protected by refcount. Until 'rq->ref' drop to zero, the request can really be free. Thus, these requests cannot been reused before timeout handle finish. However, flush request has defined .end_io and rq->end_io() is still called even if 'rq->ref' doesn't drop to zero. After that, the 'flush_rq' can be reused by the next flush request handle, resulting in null pointer deference BUG ON. We fix this problem by covering flush request with 'rq->ref'. If the refcount is not zero, flush_end_io() return and wait the last holder recall it. To record the request status, we add a new entry 'rq_status', which will be used in flush_end_io(). Cc: Christoph Hellwig Cc: Keith Busch Cc: Bart Van Assche Cc: stable@vger.kernel.org # v4.18+ Reviewed-by: Ming Lei Reviewed-by: Bob Liu Signed-off-by: Yufen Yu ------- v2: - move rq_status from struct request to struct blk_flush_queue v3: - remove unnecessary '{}' pair. v4: - let spinlock to protect 'fq->rq_status' v5: - move rq_status after flush_running_idx member of struct blk_flush_queue Signed-off-by: Jens Axboe --- block/blk-flush.c | 10 ++++++++++ block/blk-mq.c | 5 ++++- block/blk.h | 7 +++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index aedd9320e605..1eec9cbe5a0a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -214,6 +214,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); + + if (!refcount_dec_and_test(&flush_rq->ref)) { + fq->rq_status = error; + spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + return; + } + + if (fq->rq_status != BLK_STS_OK) + error = fq->rq_status; + hctx = flush_rq->mq_hctx; if (!q->elevator) { blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); diff --git a/block/blk-mq.c b/block/blk-mq.c index 29275f5a996f..6e3b15f70cd7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -918,7 +918,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); - if (refcount_dec_and_test(&rq->ref)) + + if (is_flush_rq(rq, hctx)) + rq->end_io(rq, 0); + else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); return true; diff --git a/block/blk.h b/block/blk.h index 25773d668ec0..47fba9362e60 100644 --- a/block/blk.h +++ b/block/blk.h @@ -19,6 +19,7 @@ struct blk_flush_queue { unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; + blk_status_t rq_status; unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; @@ -47,6 +48,12 @@ static inline void __blk_get_queue(struct request_queue *q) kobject_get(&q->kobj); } +static inline bool +is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) +{ + return hctx->fq->flush_rq == req; +} + struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); From ea1e2bbec6edb34fd13032bdfc80276526e87967 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 27 Sep 2019 17:18:05 +0100 Subject: [PATCH 345/345] keys: Add Jarkko Sakkinen as co-maintainer To address a major procedural concern on Linus's part the keyrings needs a co-maintainer. Suggested-by: David Howells Signed-off-by: Jarkko Sakkinen Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a97f1be63b9d..d9ebe514708b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9060,6 +9060,7 @@ F: include/keys/trusted.h KEYS/KEYRINGS: M: David Howells +M: Jarkko Sakkinen L: keyrings@vger.kernel.org S: Maintained F: Documentation/security/keys/core.rst